Merge pull request #11252 from samford/curl-add-response-parsing-methods
Curl: Add methods to parse response
This commit is contained in:
		
						commit
						92e4a5e583
					
				@ -603,7 +603,8 @@ module Cask
 | 
			
		||||
 | 
			
		||||
      version_stanza = cask.version.to_s
 | 
			
		||||
      adjusted_version_stanza = cask.appcast.must_contain.presence || version_stanza.match(/^[[:alnum:].]+/)[0]
 | 
			
		||||
      return if appcast_contents.include? adjusted_version_stanza
 | 
			
		||||
      return if appcast_contents.blank?
 | 
			
		||||
      return if appcast_contents.include?(adjusted_version_stanza)
 | 
			
		||||
 | 
			
		||||
      add_error <<~EOS.chomp
 | 
			
		||||
        appcast at URL '#{Formatter.url(appcast_url)}' does not contain \
 | 
			
		||||
 | 
			
		||||
@ -461,27 +461,16 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
 | 
			
		||||
      url = url.sub(%r{^(https?://#{GitHubPackages::URL_DOMAIN}/)?}o, "#{domain.chomp("/")}/")
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    out, _, status= curl_output("--location", "--silent", "--head", "--request", "GET", url.to_s, timeout: timeout)
 | 
			
		||||
    output, _, _status = curl_output(
 | 
			
		||||
      "--location", "--silent", "--head", "--request", "GET", url.to_s,
 | 
			
		||||
      timeout: timeout
 | 
			
		||||
    )
 | 
			
		||||
    parsed_output = parse_curl_output(output)
 | 
			
		||||
 | 
			
		||||
    lines = status.success? ? out.lines.map(&:chomp) : []
 | 
			
		||||
    lines = output.to_s.lines.map(&:chomp)
 | 
			
		||||
 | 
			
		||||
    locations = lines.map { |line| line[/^Location:\s*(.*)$/i, 1] }
 | 
			
		||||
                     .compact
 | 
			
		||||
 | 
			
		||||
    redirect_url = locations.reduce(url) do |current_url, location|
 | 
			
		||||
      if location.start_with?("//")
 | 
			
		||||
        uri = URI(current_url)
 | 
			
		||||
        "#{uri.scheme}:#{location}"
 | 
			
		||||
      elsif location.start_with?("/")
 | 
			
		||||
        uri = URI(current_url)
 | 
			
		||||
        "#{uri.scheme}://#{uri.host}#{location}"
 | 
			
		||||
      elsif location.start_with?("./")
 | 
			
		||||
        uri = URI(current_url)
 | 
			
		||||
        "#{uri.scheme}://#{uri.host}#{Pathname(uri.path).dirname/location}"
 | 
			
		||||
      else
 | 
			
		||||
        location
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
    final_url = curl_response_last_location(parsed_output[:responses], absolutize: true, base_url: url)
 | 
			
		||||
    final_url ||= url
 | 
			
		||||
 | 
			
		||||
    content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new
 | 
			
		||||
 | 
			
		||||
@ -515,10 +504,10 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
 | 
			
		||||
           .map(&:to_i)
 | 
			
		||||
           .last
 | 
			
		||||
 | 
			
		||||
    is_redirection = url != redirect_url
 | 
			
		||||
    basename = filenames.last || parse_basename(redirect_url, search_query: !is_redirection)
 | 
			
		||||
    is_redirection = url != final_url
 | 
			
		||||
    basename = filenames.last || parse_basename(final_url, search_query: !is_redirection)
 | 
			
		||||
 | 
			
		||||
    @resolved_info_cache[url] = [redirect_url, basename, time, file_size, is_redirection]
 | 
			
		||||
    @resolved_info_cache[url] = [final_url, basename, time, file_size, is_redirection]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def _fetch(url:, resolved_url:, timeout:)
 | 
			
		||||
 | 
			
		||||
@ -72,11 +72,6 @@ module Homebrew
 | 
			
		||||
        retries:         0,
 | 
			
		||||
      }.freeze
 | 
			
		||||
 | 
			
		||||
      # HTTP response head(s) and body are typically separated by a double
 | 
			
		||||
      # `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
 | 
			
		||||
      # In rare cases, this can also be a double newline (`\n\n`).
 | 
			
		||||
      HTTP_HEAD_BODY_SEPARATOR = "\r\n\r\n"
 | 
			
		||||
 | 
			
		||||
      # A regex used to identify a tarball extension at the end of a string.
 | 
			
		||||
      TARBALL_EXTENSION_REGEX = /
 | 
			
		||||
        \.t
 | 
			
		||||
@ -180,22 +175,17 @@ module Homebrew
 | 
			
		||||
        headers = []
 | 
			
		||||
 | 
			
		||||
        [:default, :browser].each do |user_agent|
 | 
			
		||||
          stdout, _, status = curl_with_workarounds(
 | 
			
		||||
          output, _, status = curl_with_workarounds(
 | 
			
		||||
            *PAGE_HEADERS_CURL_ARGS, url,
 | 
			
		||||
            **DEFAULT_CURL_OPTIONS,
 | 
			
		||||
            use_homebrew_curl: homebrew_curl,
 | 
			
		||||
            user_agent:        user_agent
 | 
			
		||||
          )
 | 
			
		||||
          next unless status.success?
 | 
			
		||||
 | 
			
		||||
          while stdout.match?(/\AHTTP.*\r$/)
 | 
			
		||||
            h, stdout = stdout.split("\r\n\r\n", 2)
 | 
			
		||||
 | 
			
		||||
            headers << h.split("\r\n").drop(1)
 | 
			
		||||
                        .to_h { |header| header.split(/:\s*/, 2) }
 | 
			
		||||
                        .transform_keys(&:downcase)
 | 
			
		||||
          end
 | 
			
		||||
 | 
			
		||||
          return headers if status.success?
 | 
			
		||||
          parsed_output = parse_curl_output(output)
 | 
			
		||||
          parsed_output[:responses].each { |response| headers << response[:headers] }
 | 
			
		||||
          break if headers.present?
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
        headers
 | 
			
		||||
@ -211,8 +201,6 @@ module Homebrew
 | 
			
		||||
      # @return [Hash]
 | 
			
		||||
      sig { params(url: String, homebrew_curl: T::Boolean).returns(T::Hash[Symbol, T.untyped]) }
 | 
			
		||||
      def self.page_content(url, homebrew_curl: false)
 | 
			
		||||
        original_url = url
 | 
			
		||||
 | 
			
		||||
        stderr = nil
 | 
			
		||||
        [:default, :browser].each do |user_agent|
 | 
			
		||||
          stdout, stderr, status = curl_with_workarounds(
 | 
			
		||||
@ -229,27 +217,11 @@ module Homebrew
 | 
			
		||||
 | 
			
		||||
          # Separate the head(s)/body and identify the final URL (after any
 | 
			
		||||
          # redirections)
 | 
			
		||||
          max_iterations = 5
 | 
			
		||||
          iterations = 0
 | 
			
		||||
          output = output.lstrip
 | 
			
		||||
          while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_HEAD_BODY_SEPARATOR)
 | 
			
		||||
            iterations += 1
 | 
			
		||||
            raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
 | 
			
		||||
          parsed_output = parse_curl_output(output)
 | 
			
		||||
          final_url = curl_response_last_location(parsed_output[:responses], absolutize: true, base_url: url)
 | 
			
		||||
 | 
			
		||||
            head_text, _, output = output.partition(HTTP_HEAD_BODY_SEPARATOR)
 | 
			
		||||
            output = output.lstrip
 | 
			
		||||
 | 
			
		||||
            location = head_text[/^Location:\s*(.*)$/i, 1]
 | 
			
		||||
            next if location.blank?
 | 
			
		||||
 | 
			
		||||
            location.chomp!
 | 
			
		||||
            # Convert a relative redirect URL to an absolute URL
 | 
			
		||||
            location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX)
 | 
			
		||||
            final_url = location
 | 
			
		||||
          end
 | 
			
		||||
 | 
			
		||||
          data = { content: output }
 | 
			
		||||
          data[:final_url] = final_url if final_url.present? && final_url != original_url
 | 
			
		||||
          data = { content: parsed_output[:body] }
 | 
			
		||||
          data[:final_url] = final_url if final_url.present? && final_url != url
 | 
			
		||||
          return data
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,173 @@
 | 
			
		||||
require "utils/curl"
 | 
			
		||||
 | 
			
		||||
describe "Utils::Curl" do
 | 
			
		||||
  let(:location_urls) {
 | 
			
		||||
    %w[
 | 
			
		||||
      https://example.com/example/
 | 
			
		||||
      https://example.com/example1/
 | 
			
		||||
      https://example.com/example2/
 | 
			
		||||
    ]
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  let(:response_hash) {
 | 
			
		||||
    response_hash = {}
 | 
			
		||||
 | 
			
		||||
    response_hash[:ok] = {
 | 
			
		||||
      status_code: "200",
 | 
			
		||||
      status_text: "OK",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => location_urls[0],
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection1] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => location_urls[1],
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection2] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => location_urls[2],
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection_no_scheme] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => "//www.example.com/example/",
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection_root_relative] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => "/example/",
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash[:redirection_parent_relative] = {
 | 
			
		||||
      status_code: "301",
 | 
			
		||||
      status_text: "Moved Permanently",
 | 
			
		||||
      headers:     {
 | 
			
		||||
        "cache-control"  => "max-age=604800",
 | 
			
		||||
        "content-type"   => "text/html; charset=UTF-8",
 | 
			
		||||
        "date"           => "Wed, 1 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "expires"        => "Wed, 31 Jan 2020 01:23:45 GMT",
 | 
			
		||||
        "last-modified"  => "Thu, 1 Jan 2019 01:23:45 GMT",
 | 
			
		||||
        "content-length" => "123",
 | 
			
		||||
        "location"       => "./example/",
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    response_hash
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  let(:response_text) {
 | 
			
		||||
    response_text = {}
 | 
			
		||||
 | 
			
		||||
    response_text[:ok] = <<~EOS
 | 
			
		||||
      HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r
 | 
			
		||||
      Cache-Control: #{response_hash[:ok][:headers]["cache-control"]}\r
 | 
			
		||||
      Content-Type: #{response_hash[:ok][:headers]["content-type"]}\r
 | 
			
		||||
      Date: #{response_hash[:ok][:headers]["date"]}\r
 | 
			
		||||
      Expires: #{response_hash[:ok][:headers]["expires"]}\r
 | 
			
		||||
      Last-Modified: #{response_hash[:ok][:headers]["last-modified"]}\r
 | 
			
		||||
      Content-Length: #{response_hash[:ok][:headers]["content-length"]}\r
 | 
			
		||||
      \r
 | 
			
		||||
    EOS
 | 
			
		||||
 | 
			
		||||
    response_text[:redirection] = response_text[:ok].sub(
 | 
			
		||||
      "HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r",
 | 
			
		||||
      "HTTP/1.1 #{response_hash[:redirection][:status_code]} #{response_hash[:redirection][:status_text]}\r\n" \
 | 
			
		||||
      "Location: #{response_hash[:redirection][:headers]["location"]}\r",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    response_text[:redirection_to_ok] = "#{response_text[:redirection]}#{response_text[:ok]}"
 | 
			
		||||
 | 
			
		||||
    response_text[:redirections_to_ok] = <<~EOS
 | 
			
		||||
      #{response_text[:redirection].sub(location_urls[0], location_urls[2])}
 | 
			
		||||
      #{response_text[:redirection].sub(location_urls[0], location_urls[1])}
 | 
			
		||||
      #{response_text[:redirection]}
 | 
			
		||||
      #{response_text[:ok]}
 | 
			
		||||
    EOS
 | 
			
		||||
 | 
			
		||||
    response_text
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  let(:body) {
 | 
			
		||||
    body = {}
 | 
			
		||||
 | 
			
		||||
    body[:default] = <<~EOS
 | 
			
		||||
      <!DOCTYPE html>
 | 
			
		||||
      <html>
 | 
			
		||||
        <head>
 | 
			
		||||
          <meta charset="utf-8">
 | 
			
		||||
          <title>Example</title>
 | 
			
		||||
        </head>
 | 
			
		||||
        <body>
 | 
			
		||||
          <h1>Example</h1>
 | 
			
		||||
          <p>Hello, world!</p>
 | 
			
		||||
        </body>
 | 
			
		||||
      </html>
 | 
			
		||||
    EOS
 | 
			
		||||
 | 
			
		||||
    body[:with_carriage_returns] = body[:default].sub("<html>\n", "<html>\r\n\r\n")
 | 
			
		||||
 | 
			
		||||
    body[:with_http_status_line] = body[:default].sub("<html>", "HTTP/1.1 200\r\n<html>")
 | 
			
		||||
 | 
			
		||||
    body
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  describe "curl_args" do
 | 
			
		||||
    let(:args) { ["foo"] }
 | 
			
		||||
    let(:user_agent_string) { "Lorem ipsum dolor sit amet" }
 | 
			
		||||
@ -101,4 +268,117 @@ describe "Utils::Curl" do
 | 
			
		||||
      expect(curl_args(*args, show_output: true).join(" ")).not_to include("--fail")
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  describe "#parse_curl_output" do
 | 
			
		||||
    it "returns a correct hash when curl output contains response(s) and body" do
 | 
			
		||||
      expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}"))
 | 
			
		||||
        .to eq({ responses: [response_hash[:ok]], body: body[:default] })
 | 
			
		||||
      expect(parse_curl_output("#{response_text[:ok]}#{body[:with_carriage_returns]}"))
 | 
			
		||||
        .to eq({ responses: [response_hash[:ok]], body: body[:with_carriage_returns] })
 | 
			
		||||
      expect(parse_curl_output("#{response_text[:ok]}#{body[:with_http_status_line]}"))
 | 
			
		||||
        .to eq({ responses: [response_hash[:ok]], body: body[:with_http_status_line] })
 | 
			
		||||
      expect(parse_curl_output("#{response_text[:redirection_to_ok]}#{body[:default]}"))
 | 
			
		||||
        .to eq({ responses: [response_hash[:redirection], response_hash[:ok]], body: body[:default] })
 | 
			
		||||
      expect(parse_curl_output("#{response_text[:redirections_to_ok]}#{body[:default]}"))
 | 
			
		||||
        .to eq({
 | 
			
		||||
          responses: [
 | 
			
		||||
            response_hash[:redirection2],
 | 
			
		||||
            response_hash[:redirection1],
 | 
			
		||||
            response_hash[:redirection],
 | 
			
		||||
            response_hash[:ok],
 | 
			
		||||
          ],
 | 
			
		||||
          body:      body[:default],
 | 
			
		||||
        })
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns a correct hash when curl output contains HTTP response text and no body" do
 | 
			
		||||
      expect(parse_curl_output(response_text[:ok])).to eq({ responses: [response_hash[:ok]], body: "" })
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns a correct hash when curl output contains body and no HTTP response text" do
 | 
			
		||||
      expect(parse_curl_output(body[:default])).to eq({ responses: [], body: body[:default] })
 | 
			
		||||
      expect(parse_curl_output(body[:with_carriage_returns]))
 | 
			
		||||
        .to eq({ responses: [], body: body[:with_carriage_returns] })
 | 
			
		||||
      expect(parse_curl_output(body[:with_http_status_line]))
 | 
			
		||||
        .to eq({ responses: [], body: body[:with_http_status_line] })
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns correct hash when curl output is blank" do
 | 
			
		||||
      expect(parse_curl_output("")).to eq({ responses: [], body: "" })
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  describe "#parse_curl_response" do
 | 
			
		||||
    it "returns a correct hash when given HTTP response text" do
 | 
			
		||||
      expect(parse_curl_response(response_text[:ok])).to eq(response_hash[:ok])
 | 
			
		||||
      expect(parse_curl_response(response_text[:redirection])).to eq(response_hash[:redirection])
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns an empty hash when given an empty string" do
 | 
			
		||||
      expect(parse_curl_response("")).to eq({})
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  describe "#curl_response_last_location" do
 | 
			
		||||
    it "returns the last location header when given an array of HTTP response hashes" do
 | 
			
		||||
      expect(curl_response_last_location([
 | 
			
		||||
        response_hash[:redirection],
 | 
			
		||||
        response_hash[:ok],
 | 
			
		||||
      ])).to eq(response_hash[:redirection][:headers]["location"])
 | 
			
		||||
 | 
			
		||||
      expect(curl_response_last_location([
 | 
			
		||||
        response_hash[:redirection2],
 | 
			
		||||
        response_hash[:redirection1],
 | 
			
		||||
        response_hash[:redirection],
 | 
			
		||||
        response_hash[:ok],
 | 
			
		||||
      ])).to eq(response_hash[:redirection][:headers]["location"])
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns the location as given, by default or when absolutize is false" do
 | 
			
		||||
      expect(curl_response_last_location([
 | 
			
		||||
        response_hash[:redirection_no_scheme],
 | 
			
		||||
        response_hash[:ok],
 | 
			
		||||
      ])).to eq(response_hash[:redirection_no_scheme][:headers]["location"])
 | 
			
		||||
 | 
			
		||||
      expect(curl_response_last_location([
 | 
			
		||||
        response_hash[:redirection_root_relative],
 | 
			
		||||
        response_hash[:ok],
 | 
			
		||||
      ])).to eq(response_hash[:redirection_root_relative][:headers]["location"])
 | 
			
		||||
 | 
			
		||||
      expect(curl_response_last_location([
 | 
			
		||||
        response_hash[:redirection_parent_relative],
 | 
			
		||||
        response_hash[:ok],
 | 
			
		||||
      ])).to eq(response_hash[:redirection_parent_relative][:headers]["location"])
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns an absolute URL when absolutize is true and a base URL is provided" do
 | 
			
		||||
      expect(
 | 
			
		||||
        curl_response_last_location(
 | 
			
		||||
          [response_hash[:redirection_no_scheme], response_hash[:ok]],
 | 
			
		||||
          absolutize: true,
 | 
			
		||||
          base_url:   "https://brew.sh/test",
 | 
			
		||||
        ),
 | 
			
		||||
      ).to eq("https:#{response_hash[:redirection_no_scheme][:headers]["location"]}")
 | 
			
		||||
 | 
			
		||||
      expect(
 | 
			
		||||
        curl_response_last_location(
 | 
			
		||||
          [response_hash[:redirection_root_relative], response_hash[:ok]],
 | 
			
		||||
          absolutize: true,
 | 
			
		||||
          base_url:   "https://brew.sh/test",
 | 
			
		||||
        ),
 | 
			
		||||
      ).to eq("https://brew.sh#{response_hash[:redirection_root_relative][:headers]["location"]}")
 | 
			
		||||
 | 
			
		||||
      expect(
 | 
			
		||||
        curl_response_last_location(
 | 
			
		||||
          [response_hash[:redirection_parent_relative], response_hash[:ok]],
 | 
			
		||||
          absolutize: true,
 | 
			
		||||
          base_url:   "https://brew.sh/test1/test2",
 | 
			
		||||
        ),
 | 
			
		||||
      ).to eq(response_hash[:redirection_parent_relative][:headers]["location"].sub(/^\./, "https://brew.sh/test1"))
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it "returns nil when the response hash doesn't contain a location header" do
 | 
			
		||||
      expect(curl_response_last_location([response_hash[:ok]])).to be_nil
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
@ -14,6 +14,22 @@ module Utils
 | 
			
		||||
 | 
			
		||||
    using TimeRemaining
 | 
			
		||||
 | 
			
		||||
    # This regex is used to extract the part of an ETag within quotation marks,
 | 
			
		||||
    # ignoring any leading weak validator indicator (`W/`). This simplifies
 | 
			
		||||
    # ETag comparison in `#curl_check_http_content`.
 | 
			
		||||
    ETAG_VALUE_REGEX = %r{^(?:[wW]/)?"((?:[^"]|\\")*)"}.freeze
 | 
			
		||||
 | 
			
		||||
    # HTTP responses and body content are typically separated by a double
 | 
			
		||||
    # `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
 | 
			
		||||
    # In rare cases, this can also be a double newline (`\n\n`).
 | 
			
		||||
    HTTP_RESPONSE_BODY_SEPARATOR = "\r\n\r\n"
 | 
			
		||||
 | 
			
		||||
    # This regex is used to isolate the parts of an HTTP status line, namely
 | 
			
		||||
    # the status code and any following descriptive text (e.g., `Not Found`).
 | 
			
		||||
    HTTP_STATUS_LINE_REGEX = %r{^HTTP/.* (?<code>\d+)(?: (?<text>[^\r\n]+))?}.freeze
 | 
			
		||||
 | 
			
		||||
    private_constant :ETAG_VALUE_REGEX, :HTTP_RESPONSE_BODY_SEPARATOR, :HTTP_STATUS_LINE_REGEX
 | 
			
		||||
 | 
			
		||||
    module_function
 | 
			
		||||
 | 
			
		||||
    def curl_executable(use_homebrew_curl: false)
 | 
			
		||||
@ -145,23 +161,19 @@ module Utils
 | 
			
		||||
      result
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def parse_headers(headers)
 | 
			
		||||
      return {} if headers.blank?
 | 
			
		||||
 | 
			
		||||
      # Skip status code
 | 
			
		||||
      headers.split("\r\n")[1..].to_h do |h|
 | 
			
		||||
        name, content = h.split(": ")
 | 
			
		||||
        [name.downcase, content]
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def curl_download(*args, to: nil, try_partial: true, **options)
 | 
			
		||||
      destination = Pathname(to)
 | 
			
		||||
      destination.dirname.mkpath
 | 
			
		||||
 | 
			
		||||
      if try_partial
 | 
			
		||||
        range_stdout = curl_output("--location", "--head", *args, **options).stdout
 | 
			
		||||
        headers = parse_headers(range_stdout.split("\r\n\r\n").first)
 | 
			
		||||
        parsed_output = parse_curl_output(range_stdout)
 | 
			
		||||
 | 
			
		||||
        headers = if parsed_output[:responses].present?
 | 
			
		||||
          parsed_output[:responses].last[:headers]
 | 
			
		||||
        else
 | 
			
		||||
          {}
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
        # Any value for `accept-ranges` other than none indicates that the server supports partial requests.
 | 
			
		||||
        # Its absence indicates no support.
 | 
			
		||||
@ -187,6 +199,8 @@ module Utils
 | 
			
		||||
 | 
			
		||||
    # Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
 | 
			
		||||
    def url_protected_by_cloudflare?(details)
 | 
			
		||||
      return false if details[:headers].blank?
 | 
			
		||||
 | 
			
		||||
      [403, 503].include?(details[:status].to_i) &&
 | 
			
		||||
        details[:headers].match?(/^Set-Cookie: (__cfduid|__cf_bm)=/i) &&
 | 
			
		||||
        details[:headers].match?(/^Server: cloudflare/i)
 | 
			
		||||
@ -194,6 +208,8 @@ module Utils
 | 
			
		||||
 | 
			
		||||
    # Check if a URL is protected by Incapsula (e.g. corsair.com).
 | 
			
		||||
    def url_protected_by_incapsula?(details)
 | 
			
		||||
      return false if details[:headers].blank?
 | 
			
		||||
 | 
			
		||||
      details[:status].to_i == 403 &&
 | 
			
		||||
        details[:headers].match?(/^Set-Cookie: visid_incap_/i) &&
 | 
			
		||||
        details[:headers].match?(/^Set-Cookie: incap_ses_/i)
 | 
			
		||||
@ -255,7 +271,7 @@ module Utils
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      if url.start_with?("https://") && Homebrew::EnvConfig.no_insecure_redirect? &&
 | 
			
		||||
         !details[:final_url].start_with?("https://")
 | 
			
		||||
         (details[:final_url].present? && !details[:final_url].start_with?("https://"))
 | 
			
		||||
        return "The #{url_type} #{url} redirects back to HTTP"
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
@ -270,9 +286,11 @@ module Utils
 | 
			
		||||
        details[:content_length] == secure_details[:content_length]
 | 
			
		||||
      file_match = details[:file_hash] == secure_details[:file_hash]
 | 
			
		||||
 | 
			
		||||
      if (etag_match || content_length_match || file_match) &&
 | 
			
		||||
         secure_details[:final_url].start_with?("https://") &&
 | 
			
		||||
         url.start_with?("http://")
 | 
			
		||||
      http_with_https_available =
 | 
			
		||||
        url.start_with?("http://") &&
 | 
			
		||||
        (secure_details[:final_url].present? && secure_details[:final_url].start_with?("https://"))
 | 
			
		||||
 | 
			
		||||
      if (etag_match || content_length_match || file_match) && http_with_https_available
 | 
			
		||||
        return "The #{url_type} #{url} should use HTTPS rather than HTTP"
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
@ -283,8 +301,7 @@ module Utils
 | 
			
		||||
      https_content = secure_details[:file]&.gsub(no_protocol_file_contents, "/")
 | 
			
		||||
 | 
			
		||||
      # Check for the same content after removing all protocols
 | 
			
		||||
      if (http_content && https_content) && (http_content == https_content) &&
 | 
			
		||||
         url.start_with?("http://") && secure_details[:final_url].start_with?("https://")
 | 
			
		||||
      if (http_content && https_content) && (http_content == https_content) && http_with_https_available
 | 
			
		||||
        return "The #{url_type} #{url} should use HTTPS rather than HTTP"
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
@ -328,30 +345,33 @@ module Utils
 | 
			
		||||
        user_agent:        user_agent
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
      status_code = :unknown
 | 
			
		||||
      while status_code == :unknown || status_code.to_s.start_with?("3")
 | 
			
		||||
        headers, _, output = output.partition("\r\n\r\n")
 | 
			
		||||
        status_code = headers[%r{HTTP/.* (\d+)}, 1]
 | 
			
		||||
        location = headers[/^Location:\s*(.*)$/i, 1]
 | 
			
		||||
        final_url = location.chomp if location
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      if status.success?
 | 
			
		||||
        parsed_output = parse_curl_output(output)
 | 
			
		||||
        responses = parsed_output[:responses]
 | 
			
		||||
 | 
			
		||||
        final_url = curl_response_last_location(responses)
 | 
			
		||||
        headers = if responses.last.present?
 | 
			
		||||
          status_code = responses.last[:status_code]
 | 
			
		||||
          responses.last[:headers]
 | 
			
		||||
        else
 | 
			
		||||
          {}
 | 
			
		||||
        end
 | 
			
		||||
        etag = headers["etag"][ETAG_VALUE_REGEX, 1] if headers["etag"].present?
 | 
			
		||||
        content_length = headers["content-length"]
 | 
			
		||||
 | 
			
		||||
        file_contents = File.read(file.path)
 | 
			
		||||
        file_hash = Digest::SHA2.hexdigest(file_contents) if hash_needed
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      final_url ||= url
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
        url:            url,
 | 
			
		||||
        final_url:      final_url,
 | 
			
		||||
        status:         status_code,
 | 
			
		||||
        etag:           headers[%r{ETag: ([wW]/)?"(([^"]|\\")*)"}, 2],
 | 
			
		||||
        content_length: headers[/Content-Length: (\d+)/, 1],
 | 
			
		||||
        headers:        headers,
 | 
			
		||||
        file_hash:      file_hash,
 | 
			
		||||
        etag:           etag,
 | 
			
		||||
        content_length: content_length,
 | 
			
		||||
        file:           file_contents,
 | 
			
		||||
        file_hash:      file_hash,
 | 
			
		||||
      }
 | 
			
		||||
    ensure
 | 
			
		||||
      file.unlink
 | 
			
		||||
@ -367,6 +387,95 @@ module Utils
 | 
			
		||||
    def http_status_ok?(status)
 | 
			
		||||
      (100..299).cover?(status.to_i)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # Separates the output text from `curl` into an array of HTTP responses and
 | 
			
		||||
    # the final response body (i.e. content). Response hashes contain the
 | 
			
		||||
    # `:status_code`, `:status_text`, and `:headers`.
 | 
			
		||||
    # @param output [String] The output text from `curl` containing HTTP
 | 
			
		||||
    #   responses, body content, or both.
 | 
			
		||||
    # @return [Hash] A hash containing an array of response hashes and the body
 | 
			
		||||
    #   content, if found.
 | 
			
		||||
    sig { params(output: String).returns(T::Hash[Symbol, T.untyped]) }
 | 
			
		||||
    def parse_curl_output(output)
 | 
			
		||||
      responses = []
 | 
			
		||||
 | 
			
		||||
      max_iterations = 5
 | 
			
		||||
      iterations = 0
 | 
			
		||||
      output = output.lstrip
 | 
			
		||||
      while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_RESPONSE_BODY_SEPARATOR)
 | 
			
		||||
        iterations += 1
 | 
			
		||||
        raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
 | 
			
		||||
 | 
			
		||||
        response_text, _, output = output.partition(HTTP_RESPONSE_BODY_SEPARATOR)
 | 
			
		||||
        output = output.lstrip
 | 
			
		||||
        next if response_text.blank?
 | 
			
		||||
 | 
			
		||||
        response_text.chomp!
 | 
			
		||||
        response = parse_curl_response(response_text)
 | 
			
		||||
        responses << response if response.present?
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      { responses: responses, body: output }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # Returns the URL from the last location header found in cURL responses,
 | 
			
		||||
    # if any.
 | 
			
		||||
    # @param responses [Array<Hash>] An array of hashes containing response
 | 
			
		||||
    #   status information and headers from `#parse_curl_response`.
 | 
			
		||||
    # @param absolutize [true, false] Whether to make the location URL absolute.
 | 
			
		||||
    # @param base_url [String, nil] The URL to use as a base for making the
 | 
			
		||||
    #   `location` URL absolute.
 | 
			
		||||
    # @return [String, nil] The URL from the last-occurring `location` header
 | 
			
		||||
    #   in the responses or `nil` (if no `location` headers found).
 | 
			
		||||
    sig {
 | 
			
		||||
      params(
 | 
			
		||||
        responses:  T::Array[T::Hash[Symbol, T.untyped]],
 | 
			
		||||
        absolutize: T::Boolean,
 | 
			
		||||
        base_url:   T.nilable(String),
 | 
			
		||||
      ).returns(T.nilable(String))
 | 
			
		||||
    }
 | 
			
		||||
    def curl_response_last_location(responses, absolutize: false, base_url: nil)
 | 
			
		||||
      responses.reverse_each do |response|
 | 
			
		||||
        next if response[:headers].blank?
 | 
			
		||||
 | 
			
		||||
        location = response[:headers]["location"]
 | 
			
		||||
        next if location.blank?
 | 
			
		||||
 | 
			
		||||
        absolute_url = URI.join(base_url, location).to_s if absolutize && base_url.present?
 | 
			
		||||
        return absolute_url || location
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      nil
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    # Parses HTTP response text from `curl` output into a hash containing the
 | 
			
		||||
    # information from the status line (status code and, optionally,
 | 
			
		||||
    # descriptive text) and headers.
 | 
			
		||||
    # @param response_text [String] The text of a `curl` response, consisting
 | 
			
		||||
    #   of a status line followed by header lines.
 | 
			
		||||
    # @return [Hash] A hash containing the response status information and
 | 
			
		||||
    #   headers (as a hash with header names as keys).
 | 
			
		||||
    sig { params(response_text: String).returns(T::Hash[Symbol, T.untyped]) }
 | 
			
		||||
    def parse_curl_response(response_text)
 | 
			
		||||
      response = {}
 | 
			
		||||
      return response unless response_text.match?(HTTP_STATUS_LINE_REGEX)
 | 
			
		||||
 | 
			
		||||
      # Parse the status line and remove it
 | 
			
		||||
      match = response_text.match(HTTP_STATUS_LINE_REGEX)
 | 
			
		||||
      response[:status_code] = match["code"] if match["code"].present?
 | 
			
		||||
      response[:status_text] = match["text"] if match["text"].present?
 | 
			
		||||
      response_text = response_text.sub(%r{^HTTP/.* (\d+).*$\s*}, "")
 | 
			
		||||
 | 
			
		||||
      # Create a hash from the header lines
 | 
			
		||||
      response[:headers] =
 | 
			
		||||
        response_text.split("\r\n")
 | 
			
		||||
                     .to_h { |header| header.split(/:\s*/, 2) }
 | 
			
		||||
                     .transform_keys(&:downcase)
 | 
			
		||||
 | 
			
		||||
      response
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user