Strategy: Replace open-uri with curl

This commit is contained in:
Sam Ford 2020-12-14 13:03:10 -05:00
parent 654c78c2d7
commit ccfd01ba38
No known key found for this signature in database
GPG Key ID: 95209E46C7FFDEFE
2 changed files with 57 additions and 13 deletions

View File

@ -100,7 +100,8 @@ module Homebrew
*args, url, *args, url,
print_stdout: false, print_stderr: false, print_stdout: false, print_stderr: false,
debug: false, verbose: false, debug: false, verbose: false,
user_agent: user_agent, retry: false user_agent: user_agent, timeout: 20,
retry: false
) )
while stdout.match?(/\AHTTP.*\r$/) while stdout.match?(/\AHTTP.*\r$/)
@ -119,6 +120,8 @@ module Homebrew
# Fetches the content at the URL and returns a hash containing the # Fetches the content at the URL and returns a hash containing the
# content and, if there are any redirections, the final URL. # content and, if there are any redirections, the final URL.
# If `curl` encounters an error, the hash will contain a `:messages`
# array with the error message instead.
# #
# @param url [String] the URL of the content to check # @param url [String] the URL of the content to check
# @return [Hash] # @return [Hash]
@ -126,18 +129,61 @@ module Homebrew
def self.page_content(url) def self.page_content(url)
original_url = url original_url = url
# Manually handling `URI#open` redirections allows us to detect the args = curl_args(
# resolved URL while also supporting HTTPS to HTTP redirections (which "--compressed",
# are normally forbidden by `OpenURI`). # Include HTTP response headers in output, so we can identify the
begin # final URL after any redirections
content = URI.parse(url).open(redirect: false, &:read) "--include",
rescue OpenURI::HTTPRedirect => e # Follow redirections to handle mirrors, relocations, etc.
url = e.uri.to_s "--location",
retry # cURL's default timeout can be up to two minutes, so we need to
# set our own timeout settings to avoid a lengthy wait
"--connect-timeout", "10",
"--max-time", "15"
)
stdout, stderr, status = curl_with_workarounds(
*args, url,
print_stdout: false, print_stderr: false,
debug: false, verbose: false,
user_agent: :default, timeout: 20,
retry: false
)
unless status.success?
/^(?<error_msg>curl: \(\d+\) .+)/ =~ stderr
return {
messages: [error_msg.presence || "cURL failed without an error"],
}
end end
data = { content: content } # stdout contains the header information followed by the page content.
data[:final_url] = url unless url == original_url # We use #scrub here to avoid "invalid byte sequence in UTF-8" errors.
output = stdout.scrub
# Separate the head(s)/body and identify the final URL (after any
# redirections)
max_iterations = 5
iterations = 0
output = output.lstrip
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?("\r\n\r\n")
iterations += 1
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
head_text, _, output = output.partition("\r\n\r\n")
output = output.lstrip
location = head_text[/^Location:\s*(.*)$/i, 1]
next if location.blank?
location.chomp!
# Convert a relative redirect URL to an absolute URL
location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX)
final_url = location
end
data = { content: output }
data[:final_url] = final_url if final_url.present? && final_url != original_url
data data
end end
end end

View File

@ -1,8 +1,6 @@
# typed: true # typed: true
# frozen_string_literal: true # frozen_string_literal: true
require "open-uri"
module Homebrew module Homebrew
module Livecheck module Livecheck
module Strategy module Strategy