Merge pull request #10834 from samford/livecheck-replace-open-uri-with-curl
Livecheck: Replace OpenURI#open with Curl
This commit is contained in:
commit
f1f7dc151d
@ -19,7 +19,68 @@ module Homebrew
|
|||||||
# the middle of this range. Strategies with a priority of 0 (or lower)
|
# the middle of this range. Strategies with a priority of 0 (or lower)
|
||||||
# are ignored.
|
# are ignored.
|
||||||
DEFAULT_PRIORITY = 5
|
DEFAULT_PRIORITY = 5
|
||||||
private_constant :DEFAULT_PRIORITY
|
|
||||||
|
# cURL's default `--connect-timeout` value can be up to two minutes, so
|
||||||
|
# we need to use a more reasonable duration (in seconds) to avoid a
|
||||||
|
# lengthy wait when a connection can't be established.
|
||||||
|
CURL_CONNECT_TIMEOUT = 10
|
||||||
|
|
||||||
|
# cURL does not set a default `--max-time` value, so we provide a value
|
||||||
|
# to ensure cURL will time out in a reasonable amount of time.
|
||||||
|
CURL_MAX_TIME = CURL_CONNECT_TIMEOUT + 5
|
||||||
|
|
||||||
|
# The `curl` process will sometimes hang indefinitely (despite setting
|
||||||
|
# the `--max-time` argument) and it needs to be quit for livecheck to
|
||||||
|
# continue. This value is used to set the `timeout` argument on
|
||||||
|
# `Utils::Curl` method calls in `Strategy`.
|
||||||
|
CURL_PROCESS_TIMEOUT = CURL_MAX_TIME + 5
|
||||||
|
|
||||||
|
# Baseline `curl` arguments used in `Strategy` methods.
|
||||||
|
DEFAULT_CURL_ARGS = [
|
||||||
|
# Follow redirections to handle mirrors, relocations, etc.
|
||||||
|
"--location",
|
||||||
|
"--connect-timeout", CURL_CONNECT_TIMEOUT,
|
||||||
|
"--max-time", CURL_MAX_TIME
|
||||||
|
].freeze
|
||||||
|
|
||||||
|
# `curl` arguments used in `Strategy#page_headers` method.
|
||||||
|
PAGE_HEADERS_CURL_ARGS = ([
|
||||||
|
# We only need the response head (not the body)
|
||||||
|
"--head",
|
||||||
|
# Some servers may not allow a HEAD request, so we use GET
|
||||||
|
"--request", "GET",
|
||||||
|
"--silent"
|
||||||
|
] + DEFAULT_CURL_ARGS).freeze
|
||||||
|
|
||||||
|
# `curl` arguments used in `Strategy#page_content` method.
|
||||||
|
PAGE_CONTENT_CURL_ARGS = ([
|
||||||
|
"--compressed",
|
||||||
|
# Include HTTP response headers in output, so we can identify the
|
||||||
|
# final URL after any redirections
|
||||||
|
"--include",
|
||||||
|
] + DEFAULT_CURL_ARGS).freeze
|
||||||
|
|
||||||
|
# Baseline `curl` options used in `Strategy` methods.
|
||||||
|
DEFAULT_CURL_OPTIONS = {
|
||||||
|
print_stdout: false,
|
||||||
|
print_stderr: false,
|
||||||
|
debug: false,
|
||||||
|
verbose: false,
|
||||||
|
timeout: CURL_PROCESS_TIMEOUT,
|
||||||
|
retry: false,
|
||||||
|
}.freeze
|
||||||
|
|
||||||
|
# HTTP response head(s) and body are typically separated by a double
|
||||||
|
# `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
|
||||||
|
# In rare cases, this can also be a double newline (`\n\n`).
|
||||||
|
HTTP_HEAD_BODY_SEPARATOR = "\r\n\r\n"
|
||||||
|
|
||||||
|
# The `#strategies` method expects `Strategy` constants to be strategies,
|
||||||
|
# so constants we create need to be private for this to work properly.
|
||||||
|
private_constant :DEFAULT_PRIORITY, :CURL_CONNECT_TIMEOUT, :CURL_MAX_TIME,
|
||||||
|
:CURL_PROCESS_TIMEOUT, :DEFAULT_CURL_ARGS,
|
||||||
|
:PAGE_HEADERS_CURL_ARGS, :PAGE_CONTENT_CURL_ARGS,
|
||||||
|
:DEFAULT_CURL_OPTIONS, :HTTP_HEAD_BODY_SEPARATOR
|
||||||
|
|
||||||
# Creates and/or returns a `@strategies` `Hash`, which maps a snake
|
# Creates and/or returns a `@strategies` `Hash`, which maps a snake
|
||||||
# case strategy name symbol (e.g. `:page_match`) to the associated
|
# case strategy name symbol (e.g. `:page_match`) to the associated
|
||||||
@ -87,20 +148,10 @@ module Homebrew
|
|||||||
headers = []
|
headers = []
|
||||||
|
|
||||||
[:default, :browser].each do |user_agent|
|
[:default, :browser].each do |user_agent|
|
||||||
args = [
|
|
||||||
"--head", # Only work with the response headers
|
|
||||||
"--request", "GET", # Use a GET request (instead of HEAD)
|
|
||||||
"--silent", # Silent mode
|
|
||||||
"--location", # Follow redirects
|
|
||||||
"--connect-timeout", "5", # Max time allowed for connection (secs)
|
|
||||||
"--max-time", "10" # Max time allowed for transfer (secs)
|
|
||||||
]
|
|
||||||
|
|
||||||
stdout, _, status = curl_with_workarounds(
|
stdout, _, status = curl_with_workarounds(
|
||||||
*args, url,
|
*PAGE_HEADERS_CURL_ARGS, url,
|
||||||
print_stdout: false, print_stderr: false,
|
**DEFAULT_CURL_OPTIONS,
|
||||||
debug: false, verbose: false,
|
user_agent: user_agent
|
||||||
user_agent: user_agent, retry: false
|
|
||||||
)
|
)
|
||||||
|
|
||||||
while stdout.match?(/\AHTTP.*\r$/)
|
while stdout.match?(/\AHTTP.*\r$/)
|
||||||
@ -119,6 +170,8 @@ module Homebrew
|
|||||||
|
|
||||||
# Fetches the content at the URL and returns a hash containing the
|
# Fetches the content at the URL and returns a hash containing the
|
||||||
# content and, if there are any redirections, the final URL.
|
# content and, if there are any redirections, the final URL.
|
||||||
|
# If `curl` encounters an error, the hash will contain a `:messages`
|
||||||
|
# array with the error message instead.
|
||||||
#
|
#
|
||||||
# @param url [String] the URL of the content to check
|
# @param url [String] the URL of the content to check
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
@ -126,18 +179,45 @@ module Homebrew
|
|||||||
def self.page_content(url)
|
def self.page_content(url)
|
||||||
original_url = url
|
original_url = url
|
||||||
|
|
||||||
# Manually handling `URI#open` redirections allows us to detect the
|
stdout, stderr, status = curl_with_workarounds(
|
||||||
# resolved URL while also supporting HTTPS to HTTP redirections (which
|
*PAGE_CONTENT_CURL_ARGS, url,
|
||||||
# are normally forbidden by `OpenURI`).
|
**DEFAULT_CURL_OPTIONS
|
||||||
begin
|
)
|
||||||
content = URI.parse(url).open(redirect: false, &:read)
|
|
||||||
rescue OpenURI::HTTPRedirect => e
|
unless status.success?
|
||||||
url = e.uri.to_s
|
/^(?<error_msg>curl: \(\d+\) .+)/ =~ stderr
|
||||||
retry
|
return {
|
||||||
|
messages: [error_msg.presence || "cURL failed without an error"],
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
data = { content: content }
|
# stdout contains the header information followed by the page content.
|
||||||
data[:final_url] = url unless url == original_url
|
# We use #scrub here to avoid "invalid byte sequence in UTF-8" errors.
|
||||||
|
output = stdout.scrub
|
||||||
|
|
||||||
|
# Separate the head(s)/body and identify the final URL (after any
|
||||||
|
# redirections)
|
||||||
|
max_iterations = 5
|
||||||
|
iterations = 0
|
||||||
|
output = output.lstrip
|
||||||
|
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_HEAD_BODY_SEPARATOR)
|
||||||
|
iterations += 1
|
||||||
|
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
|
||||||
|
|
||||||
|
head_text, _, output = output.partition(HTTP_HEAD_BODY_SEPARATOR)
|
||||||
|
output = output.lstrip
|
||||||
|
|
||||||
|
location = head_text[/^Location:\s*(.*)$/i, 1]
|
||||||
|
next if location.blank?
|
||||||
|
|
||||||
|
location.chomp!
|
||||||
|
# Convert a relative redirect URL to an absolute URL
|
||||||
|
location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX)
|
||||||
|
final_url = location
|
||||||
|
end
|
||||||
|
|
||||||
|
data = { content: output }
|
||||||
|
data[:final_url] = final_url if final_url.present? && final_url != original_url
|
||||||
data
|
data
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@ -1,8 +1,6 @@
|
|||||||
# typed: true
|
# typed: true
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
require "open-uri"
|
|
||||||
|
|
||||||
module Homebrew
|
module Homebrew
|
||||||
module Livecheck
|
module Livecheck
|
||||||
module Strategy
|
module Strategy
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user