diff --git a/Library/Homebrew/cask/audit.rb b/Library/Homebrew/cask/audit.rb index 629573079a..69faa8f2b9 100644 --- a/Library/Homebrew/cask/audit.rb +++ b/Library/Homebrew/cask/audit.rb @@ -603,7 +603,8 @@ module Cask version_stanza = cask.version.to_s adjusted_version_stanza = cask.appcast.must_contain.presence || version_stanza.match(/^[[:alnum:].]+/)[0] - return if appcast_contents.include? adjusted_version_stanza + return if appcast_contents.blank? + return if appcast_contents.include?(adjusted_version_stanza) add_error <<~EOS.chomp appcast at URL '#{Formatter.url(appcast_url)}' does not contain \ diff --git a/Library/Homebrew/download_strategy.rb b/Library/Homebrew/download_strategy.rb index 5498bb662f..30bb8d9a71 100644 --- a/Library/Homebrew/download_strategy.rb +++ b/Library/Homebrew/download_strategy.rb @@ -461,27 +461,16 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy url = url.sub(%r{^(https?://#{GitHubPackages::URL_DOMAIN}/)?}o, "#{domain.chomp("/")}/") end - out, _, status= curl_output("--location", "--silent", "--head", "--request", "GET", url.to_s, timeout: timeout) + output, _, _status = curl_output( + "--location", "--silent", "--head", "--request", "GET", url.to_s, + timeout: timeout + ) + parsed_output = parse_curl_output(output) - lines = status.success? ? out.lines.map(&:chomp) : [] + lines = output.to_s.lines.map(&:chomp) - locations = lines.map { |line| line[/^Location:\s*(.*)$/i, 1] } - .compact - - redirect_url = locations.reduce(url) do |current_url, location| - if location.start_with?("//") - uri = URI(current_url) - "#{uri.scheme}:#{location}" - elsif location.start_with?("/") - uri = URI(current_url) - "#{uri.scheme}://#{uri.host}#{location}" - elsif location.start_with?("./") - uri = URI(current_url) - "#{uri.scheme}://#{uri.host}#{Pathname(uri.path).dirname/location}" - else - location - end - end + final_url = curl_response_last_location(parsed_output[:responses], absolutize: true, base_url: url) + final_url ||= url content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new @@ -515,10 +504,10 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy .map(&:to_i) .last - is_redirection = url != redirect_url - basename = filenames.last || parse_basename(redirect_url, search_query: !is_redirection) + is_redirection = url != final_url + basename = filenames.last || parse_basename(final_url, search_query: !is_redirection) - @resolved_info_cache[url] = [redirect_url, basename, time, file_size, is_redirection] + @resolved_info_cache[url] = [final_url, basename, time, file_size, is_redirection] end def _fetch(url:, resolved_url:, timeout:) diff --git a/Library/Homebrew/livecheck/strategy.rb b/Library/Homebrew/livecheck/strategy.rb index f0aacdfd93..3f98a9faf4 100644 --- a/Library/Homebrew/livecheck/strategy.rb +++ b/Library/Homebrew/livecheck/strategy.rb @@ -72,11 +72,6 @@ module Homebrew retries: 0, }.freeze - # HTTP response head(s) and body are typically separated by a double - # `CRLF` (whereas HTTP header lines are separated by a single `CRLF`). - # In rare cases, this can also be a double newline (`\n\n`). - HTTP_HEAD_BODY_SEPARATOR = "\r\n\r\n" - # A regex used to identify a tarball extension at the end of a string. TARBALL_EXTENSION_REGEX = / \.t @@ -180,22 +175,17 @@ module Homebrew headers = [] [:default, :browser].each do |user_agent| - stdout, _, status = curl_with_workarounds( + output, _, status = curl_with_workarounds( *PAGE_HEADERS_CURL_ARGS, url, **DEFAULT_CURL_OPTIONS, use_homebrew_curl: homebrew_curl, user_agent: user_agent ) + next unless status.success? - while stdout.match?(/\AHTTP.*\r$/) - h, stdout = stdout.split("\r\n\r\n", 2) - - headers << h.split("\r\n").drop(1) - .to_h { |header| header.split(/:\s*/, 2) } - .transform_keys(&:downcase) - end - - return headers if status.success? + parsed_output = parse_curl_output(output) + parsed_output[:responses].each { |response| headers << response[:headers] } + break if headers.present? end headers @@ -211,8 +201,6 @@ module Homebrew # @return [Hash] sig { params(url: String, homebrew_curl: T::Boolean).returns(T::Hash[Symbol, T.untyped]) } def self.page_content(url, homebrew_curl: false) - original_url = url - stderr = nil [:default, :browser].each do |user_agent| stdout, stderr, status = curl_with_workarounds( @@ -229,27 +217,11 @@ module Homebrew # Separate the head(s)/body and identify the final URL (after any # redirections) - max_iterations = 5 - iterations = 0 - output = output.lstrip - while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_HEAD_BODY_SEPARATOR) - iterations += 1 - raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations + parsed_output = parse_curl_output(output) + final_url = curl_response_last_location(parsed_output[:responses], absolutize: true, base_url: url) - head_text, _, output = output.partition(HTTP_HEAD_BODY_SEPARATOR) - output = output.lstrip - - location = head_text[/^Location:\s*(.*)$/i, 1] - next if location.blank? - - location.chomp! - # Convert a relative redirect URL to an absolute URL - location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX) - final_url = location - end - - data = { content: output } - data[:final_url] = final_url if final_url.present? && final_url != original_url + data = { content: parsed_output[:body] } + data[:final_url] = final_url if final_url.present? && final_url != url return data end diff --git a/Library/Homebrew/test/utils/curl_spec.rb b/Library/Homebrew/test/utils/curl_spec.rb index 454d5d1f95..d38d369eb3 100644 --- a/Library/Homebrew/test/utils/curl_spec.rb +++ b/Library/Homebrew/test/utils/curl_spec.rb @@ -4,6 +4,173 @@ require "utils/curl" describe "Utils::Curl" do + let(:location_urls) { + %w[ + https://example.com/example/ + https://example.com/example1/ + https://example.com/example2/ + ] + } + + let(:response_hash) { + response_hash = {} + + response_hash[:ok] = { + status_code: "200", + status_text: "OK", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + }, + } + + response_hash[:redirection] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[0], + }, + } + + response_hash[:redirection1] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[1], + }, + } + + response_hash[:redirection2] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[2], + }, + } + + response_hash[:redirection_no_scheme] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "//www.example.com/example/", + }, + } + + response_hash[:redirection_root_relative] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "/example/", + }, + } + + response_hash[:redirection_parent_relative] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "./example/", + }, + } + + response_hash + } + + let(:response_text) { + response_text = {} + + response_text[:ok] = <<~EOS + HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r + Cache-Control: #{response_hash[:ok][:headers]["cache-control"]}\r + Content-Type: #{response_hash[:ok][:headers]["content-type"]}\r + Date: #{response_hash[:ok][:headers]["date"]}\r + Expires: #{response_hash[:ok][:headers]["expires"]}\r + Last-Modified: #{response_hash[:ok][:headers]["last-modified"]}\r + Content-Length: #{response_hash[:ok][:headers]["content-length"]}\r + \r + EOS + + response_text[:redirection] = response_text[:ok].sub( + "HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r", + "HTTP/1.1 #{response_hash[:redirection][:status_code]} #{response_hash[:redirection][:status_text]}\r\n" \ + "Location: #{response_hash[:redirection][:headers]["location"]}\r", + ) + + response_text[:redirection_to_ok] = "#{response_text[:redirection]}#{response_text[:ok]}" + + response_text[:redirections_to_ok] = <<~EOS + #{response_text[:redirection].sub(location_urls[0], location_urls[2])} + #{response_text[:redirection].sub(location_urls[0], location_urls[1])} + #{response_text[:redirection]} + #{response_text[:ok]} + EOS + + response_text + } + + let(:body) { + body = {} + + body[:default] = <<~EOS + + + + + Example + + +

Example

+

Hello, world!

+ + + EOS + + body[:with_carriage_returns] = body[:default].sub("\n", "\r\n\r\n") + + body[:with_http_status_line] = body[:default].sub("", "HTTP/1.1 200\r\n") + + body + } + describe "curl_args" do let(:args) { ["foo"] } let(:user_agent_string) { "Lorem ipsum dolor sit amet" } @@ -101,4 +268,117 @@ describe "Utils::Curl" do expect(curl_args(*args, show_output: true).join(" ")).not_to include("--fail") end end + + describe "#parse_curl_output" do + it "returns a correct hash when curl output contains response(s) and body" do + expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:default] }) + expect(parse_curl_output("#{response_text[:ok]}#{body[:with_carriage_returns]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:with_carriage_returns] }) + expect(parse_curl_output("#{response_text[:ok]}#{body[:with_http_status_line]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:with_http_status_line] }) + expect(parse_curl_output("#{response_text[:redirection_to_ok]}#{body[:default]}")) + .to eq({ responses: [response_hash[:redirection], response_hash[:ok]], body: body[:default] }) + expect(parse_curl_output("#{response_text[:redirections_to_ok]}#{body[:default]}")) + .to eq({ + responses: [ + response_hash[:redirection2], + response_hash[:redirection1], + response_hash[:redirection], + response_hash[:ok], + ], + body: body[:default], + }) + end + + it "returns a correct hash when curl output contains HTTP response text and no body" do + expect(parse_curl_output(response_text[:ok])).to eq({ responses: [response_hash[:ok]], body: "" }) + end + + it "returns a correct hash when curl output contains body and no HTTP response text" do + expect(parse_curl_output(body[:default])).to eq({ responses: [], body: body[:default] }) + expect(parse_curl_output(body[:with_carriage_returns])) + .to eq({ responses: [], body: body[:with_carriage_returns] }) + expect(parse_curl_output(body[:with_http_status_line])) + .to eq({ responses: [], body: body[:with_http_status_line] }) + end + + it "returns correct hash when curl output is blank" do + expect(parse_curl_output("")).to eq({ responses: [], body: "" }) + end + end + + describe "#parse_curl_response" do + it "returns a correct hash when given HTTP response text" do + expect(parse_curl_response(response_text[:ok])).to eq(response_hash[:ok]) + expect(parse_curl_response(response_text[:redirection])).to eq(response_hash[:redirection]) + end + + it "returns an empty hash when given an empty string" do + expect(parse_curl_response("")).to eq({}) + end + end + + describe "#curl_response_last_location" do + it "returns the last location header when given an array of HTTP response hashes" do + expect(curl_response_last_location([ + response_hash[:redirection], + response_hash[:ok], + ])).to eq(response_hash[:redirection][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection2], + response_hash[:redirection1], + response_hash[:redirection], + response_hash[:ok], + ])).to eq(response_hash[:redirection][:headers]["location"]) + end + + it "returns the location as given, by default or when absolutize is false" do + expect(curl_response_last_location([ + response_hash[:redirection_no_scheme], + response_hash[:ok], + ])).to eq(response_hash[:redirection_no_scheme][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection_root_relative], + response_hash[:ok], + ])).to eq(response_hash[:redirection_root_relative][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection_parent_relative], + response_hash[:ok], + ])).to eq(response_hash[:redirection_parent_relative][:headers]["location"]) + end + + it "returns an absolute URL when absolutize is true and a base URL is provided" do + expect( + curl_response_last_location( + [response_hash[:redirection_no_scheme], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test", + ), + ).to eq("https:#{response_hash[:redirection_no_scheme][:headers]["location"]}") + + expect( + curl_response_last_location( + [response_hash[:redirection_root_relative], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test", + ), + ).to eq("https://brew.sh#{response_hash[:redirection_root_relative][:headers]["location"]}") + + expect( + curl_response_last_location( + [response_hash[:redirection_parent_relative], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test1/test2", + ), + ).to eq(response_hash[:redirection_parent_relative][:headers]["location"].sub(/^\./, "https://brew.sh/test1")) + end + + it "returns nil when the response hash doesn't contain a location header" do + expect(curl_response_last_location([response_hash[:ok]])).to be_nil + end + end end diff --git a/Library/Homebrew/utils/curl.rb b/Library/Homebrew/utils/curl.rb index 4b08d1fa77..20792f25a5 100644 --- a/Library/Homebrew/utils/curl.rb +++ b/Library/Homebrew/utils/curl.rb @@ -14,6 +14,22 @@ module Utils using TimeRemaining + # This regex is used to extract the part of an ETag within quotation marks, + # ignoring any leading weak validator indicator (`W/`). This simplifies + # ETag comparison in `#curl_check_http_content`. + ETAG_VALUE_REGEX = %r{^(?:[wW]/)?"((?:[^"]|\\")*)"}.freeze + + # HTTP responses and body content are typically separated by a double + # `CRLF` (whereas HTTP header lines are separated by a single `CRLF`). + # In rare cases, this can also be a double newline (`\n\n`). + HTTP_RESPONSE_BODY_SEPARATOR = "\r\n\r\n" + + # This regex is used to isolate the parts of an HTTP status line, namely + # the status code and any following descriptive text (e.g., `Not Found`). + HTTP_STATUS_LINE_REGEX = %r{^HTTP/.* (?\d+)(?: (?[^\r\n]+))?}.freeze + + private_constant :ETAG_VALUE_REGEX, :HTTP_RESPONSE_BODY_SEPARATOR, :HTTP_STATUS_LINE_REGEX + module_function def curl_executable(use_homebrew_curl: false) @@ -145,23 +161,19 @@ module Utils result end - def parse_headers(headers) - return {} if headers.blank? - - # Skip status code - headers.split("\r\n")[1..].to_h do |h| - name, content = h.split(": ") - [name.downcase, content] - end - end - def curl_download(*args, to: nil, try_partial: true, **options) destination = Pathname(to) destination.dirname.mkpath if try_partial range_stdout = curl_output("--location", "--head", *args, **options).stdout - headers = parse_headers(range_stdout.split("\r\n\r\n").first) + parsed_output = parse_curl_output(range_stdout) + + headers = if parsed_output[:responses].present? + parsed_output[:responses].last[:headers] + else + {} + end # Any value for `accept-ranges` other than none indicates that the server supports partial requests. # Its absence indicates no support. @@ -187,6 +199,8 @@ module Utils # Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io). def url_protected_by_cloudflare?(details) + return false if details[:headers].blank? + [403, 503].include?(details[:status].to_i) && details[:headers].match?(/^Set-Cookie: (__cfduid|__cf_bm)=/i) && details[:headers].match?(/^Server: cloudflare/i) @@ -194,6 +208,8 @@ module Utils # Check if a URL is protected by Incapsula (e.g. corsair.com). def url_protected_by_incapsula?(details) + return false if details[:headers].blank? + details[:status].to_i == 403 && details[:headers].match?(/^Set-Cookie: visid_incap_/i) && details[:headers].match?(/^Set-Cookie: incap_ses_/i) @@ -255,7 +271,7 @@ module Utils end if url.start_with?("https://") && Homebrew::EnvConfig.no_insecure_redirect? && - !details[:final_url].start_with?("https://") + (details[:final_url].present? && !details[:final_url].start_with?("https://")) return "The #{url_type} #{url} redirects back to HTTP" end @@ -270,9 +286,11 @@ module Utils details[:content_length] == secure_details[:content_length] file_match = details[:file_hash] == secure_details[:file_hash] - if (etag_match || content_length_match || file_match) && - secure_details[:final_url].start_with?("https://") && - url.start_with?("http://") + http_with_https_available = + url.start_with?("http://") && + (secure_details[:final_url].present? && secure_details[:final_url].start_with?("https://")) + + if (etag_match || content_length_match || file_match) && http_with_https_available return "The #{url_type} #{url} should use HTTPS rather than HTTP" end @@ -283,8 +301,7 @@ module Utils https_content = secure_details[:file]&.gsub(no_protocol_file_contents, "/") # Check for the same content after removing all protocols - if (http_content && https_content) && (http_content == https_content) && - url.start_with?("http://") && secure_details[:final_url].start_with?("https://") + if (http_content && https_content) && (http_content == https_content) && http_with_https_available return "The #{url_type} #{url} should use HTTPS rather than HTTP" end @@ -328,30 +345,33 @@ module Utils user_agent: user_agent ) - status_code = :unknown - while status_code == :unknown || status_code.to_s.start_with?("3") - headers, _, output = output.partition("\r\n\r\n") - status_code = headers[%r{HTTP/.* (\d+)}, 1] - location = headers[/^Location:\s*(.*)$/i, 1] - final_url = location.chomp if location - end - if status.success? + parsed_output = parse_curl_output(output) + responses = parsed_output[:responses] + + final_url = curl_response_last_location(responses) + headers = if responses.last.present? + status_code = responses.last[:status_code] + responses.last[:headers] + else + {} + end + etag = headers["etag"][ETAG_VALUE_REGEX, 1] if headers["etag"].present? + content_length = headers["content-length"] + file_contents = File.read(file.path) file_hash = Digest::SHA2.hexdigest(file_contents) if hash_needed end - final_url ||= url - { url: url, final_url: final_url, status: status_code, - etag: headers[%r{ETag: ([wW]/)?"(([^"]|\\")*)"}, 2], - content_length: headers[/Content-Length: (\d+)/, 1], headers: headers, - file_hash: file_hash, + etag: etag, + content_length: content_length, file: file_contents, + file_hash: file_hash, } ensure file.unlink @@ -367,6 +387,95 @@ module Utils def http_status_ok?(status) (100..299).cover?(status.to_i) end + + # Separates the output text from `curl` into an array of HTTP responses and + # the final response body (i.e. content). Response hashes contain the + # `:status_code`, `:status_text`, and `:headers`. + # @param output [String] The output text from `curl` containing HTTP + # responses, body content, or both. + # @return [Hash] A hash containing an array of response hashes and the body + # content, if found. + sig { params(output: String).returns(T::Hash[Symbol, T.untyped]) } + def parse_curl_output(output) + responses = [] + + max_iterations = 5 + iterations = 0 + output = output.lstrip + while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_RESPONSE_BODY_SEPARATOR) + iterations += 1 + raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations + + response_text, _, output = output.partition(HTTP_RESPONSE_BODY_SEPARATOR) + output = output.lstrip + next if response_text.blank? + + response_text.chomp! + response = parse_curl_response(response_text) + responses << response if response.present? + end + + { responses: responses, body: output } + end + + # Returns the URL from the last location header found in cURL responses, + # if any. + # @param responses [Array] An array of hashes containing response + # status information and headers from `#parse_curl_response`. + # @param absolutize [true, false] Whether to make the location URL absolute. + # @param base_url [String, nil] The URL to use as a base for making the + # `location` URL absolute. + # @return [String, nil] The URL from the last-occurring `location` header + # in the responses or `nil` (if no `location` headers found). + sig { + params( + responses: T::Array[T::Hash[Symbol, T.untyped]], + absolutize: T::Boolean, + base_url: T.nilable(String), + ).returns(T.nilable(String)) + } + def curl_response_last_location(responses, absolutize: false, base_url: nil) + responses.reverse_each do |response| + next if response[:headers].blank? + + location = response[:headers]["location"] + next if location.blank? + + absolute_url = URI.join(base_url, location).to_s if absolutize && base_url.present? + return absolute_url || location + end + + nil + end + + private + + # Parses HTTP response text from `curl` output into a hash containing the + # information from the status line (status code and, optionally, + # descriptive text) and headers. + # @param response_text [String] The text of a `curl` response, consisting + # of a status line followed by header lines. + # @return [Hash] A hash containing the response status information and + # headers (as a hash with header names as keys). + sig { params(response_text: String).returns(T::Hash[Symbol, T.untyped]) } + def parse_curl_response(response_text) + response = {} + return response unless response_text.match?(HTTP_STATUS_LINE_REGEX) + + # Parse the status line and remove it + match = response_text.match(HTTP_STATUS_LINE_REGEX) + response[:status_code] = match["code"] if match["code"].present? + response[:status_text] = match["text"] if match["text"].present? + response_text = response_text.sub(%r{^HTTP/.* (\d+).*$\s*}, "") + + # Create a hash from the header lines + response[:headers] = + response_text.split("\r\n") + .to_h { |header| header.split(/:\s*/, 2) } + .transform_keys(&:downcase) + + response + end end end