diff --git a/Library/Homebrew/test/utils/curl_spec.rb b/Library/Homebrew/test/utils/curl_spec.rb index 454d5d1f95..d38d369eb3 100644 --- a/Library/Homebrew/test/utils/curl_spec.rb +++ b/Library/Homebrew/test/utils/curl_spec.rb @@ -4,6 +4,173 @@ require "utils/curl" describe "Utils::Curl" do + let(:location_urls) { + %w[ + https://example.com/example/ + https://example.com/example1/ + https://example.com/example2/ + ] + } + + let(:response_hash) { + response_hash = {} + + response_hash[:ok] = { + status_code: "200", + status_text: "OK", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + }, + } + + response_hash[:redirection] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[0], + }, + } + + response_hash[:redirection1] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[1], + }, + } + + response_hash[:redirection2] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => location_urls[2], + }, + } + + response_hash[:redirection_no_scheme] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "//www.example.com/example/", + }, + } + + response_hash[:redirection_root_relative] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "/example/", + }, + } + + response_hash[:redirection_parent_relative] = { + status_code: "301", + status_text: "Moved Permanently", + headers: { + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Thu, 1 Jan 2019 01:23:45 GMT", + "content-length" => "123", + "location" => "./example/", + }, + } + + response_hash + } + + let(:response_text) { + response_text = {} + + response_text[:ok] = <<~EOS + HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r + Cache-Control: #{response_hash[:ok][:headers]["cache-control"]}\r + Content-Type: #{response_hash[:ok][:headers]["content-type"]}\r + Date: #{response_hash[:ok][:headers]["date"]}\r + Expires: #{response_hash[:ok][:headers]["expires"]}\r + Last-Modified: #{response_hash[:ok][:headers]["last-modified"]}\r + Content-Length: #{response_hash[:ok][:headers]["content-length"]}\r + \r + EOS + + response_text[:redirection] = response_text[:ok].sub( + "HTTP/1.1 #{response_hash[:ok][:status_code]} #{response_hash[:ok][:status_text]}\r", + "HTTP/1.1 #{response_hash[:redirection][:status_code]} #{response_hash[:redirection][:status_text]}\r\n" \ + "Location: #{response_hash[:redirection][:headers]["location"]}\r", + ) + + response_text[:redirection_to_ok] = "#{response_text[:redirection]}#{response_text[:ok]}" + + response_text[:redirections_to_ok] = <<~EOS + #{response_text[:redirection].sub(location_urls[0], location_urls[2])} + #{response_text[:redirection].sub(location_urls[0], location_urls[1])} + #{response_text[:redirection]} + #{response_text[:ok]} + EOS + + response_text + } + + let(:body) { + body = {} + + body[:default] = <<~EOS + + +
+ +Hello, world!
+ + + EOS + + body[:with_carriage_returns] = body[:default].sub("\n", "\r\n\r\n") + + body[:with_http_status_line] = body[:default].sub("", "HTTP/1.1 200\r\n") + + body + } + describe "curl_args" do let(:args) { ["foo"] } let(:user_agent_string) { "Lorem ipsum dolor sit amet" } @@ -101,4 +268,117 @@ describe "Utils::Curl" do expect(curl_args(*args, show_output: true).join(" ")).not_to include("--fail") end end + + describe "#parse_curl_output" do + it "returns a correct hash when curl output contains response(s) and body" do + expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:default] }) + expect(parse_curl_output("#{response_text[:ok]}#{body[:with_carriage_returns]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:with_carriage_returns] }) + expect(parse_curl_output("#{response_text[:ok]}#{body[:with_http_status_line]}")) + .to eq({ responses: [response_hash[:ok]], body: body[:with_http_status_line] }) + expect(parse_curl_output("#{response_text[:redirection_to_ok]}#{body[:default]}")) + .to eq({ responses: [response_hash[:redirection], response_hash[:ok]], body: body[:default] }) + expect(parse_curl_output("#{response_text[:redirections_to_ok]}#{body[:default]}")) + .to eq({ + responses: [ + response_hash[:redirection2], + response_hash[:redirection1], + response_hash[:redirection], + response_hash[:ok], + ], + body: body[:default], + }) + end + + it "returns a correct hash when curl output contains HTTP response text and no body" do + expect(parse_curl_output(response_text[:ok])).to eq({ responses: [response_hash[:ok]], body: "" }) + end + + it "returns a correct hash when curl output contains body and no HTTP response text" do + expect(parse_curl_output(body[:default])).to eq({ responses: [], body: body[:default] }) + expect(parse_curl_output(body[:with_carriage_returns])) + .to eq({ responses: [], body: body[:with_carriage_returns] }) + expect(parse_curl_output(body[:with_http_status_line])) + .to eq({ responses: [], body: body[:with_http_status_line] }) + end + + it "returns correct hash when curl output is blank" do + expect(parse_curl_output("")).to eq({ responses: [], body: "" }) + end + end + + describe "#parse_curl_response" do + it "returns a correct hash when given HTTP response text" do + expect(parse_curl_response(response_text[:ok])).to eq(response_hash[:ok]) + expect(parse_curl_response(response_text[:redirection])).to eq(response_hash[:redirection]) + end + + it "returns an empty hash when given an empty string" do + expect(parse_curl_response("")).to eq({}) + end + end + + describe "#curl_response_last_location" do + it "returns the last location header when given an array of HTTP response hashes" do + expect(curl_response_last_location([ + response_hash[:redirection], + response_hash[:ok], + ])).to eq(response_hash[:redirection][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection2], + response_hash[:redirection1], + response_hash[:redirection], + response_hash[:ok], + ])).to eq(response_hash[:redirection][:headers]["location"]) + end + + it "returns the location as given, by default or when absolutize is false" do + expect(curl_response_last_location([ + response_hash[:redirection_no_scheme], + response_hash[:ok], + ])).to eq(response_hash[:redirection_no_scheme][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection_root_relative], + response_hash[:ok], + ])).to eq(response_hash[:redirection_root_relative][:headers]["location"]) + + expect(curl_response_last_location([ + response_hash[:redirection_parent_relative], + response_hash[:ok], + ])).to eq(response_hash[:redirection_parent_relative][:headers]["location"]) + end + + it "returns an absolute URL when absolutize is true and a base URL is provided" do + expect( + curl_response_last_location( + [response_hash[:redirection_no_scheme], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test", + ), + ).to eq("https:#{response_hash[:redirection_no_scheme][:headers]["location"]}") + + expect( + curl_response_last_location( + [response_hash[:redirection_root_relative], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test", + ), + ).to eq("https://brew.sh#{response_hash[:redirection_root_relative][:headers]["location"]}") + + expect( + curl_response_last_location( + [response_hash[:redirection_parent_relative], response_hash[:ok]], + absolutize: true, + base_url: "https://brew.sh/test1/test2", + ), + ).to eq(response_hash[:redirection_parent_relative][:headers]["location"].sub(/^\./, "https://brew.sh/test1")) + end + + it "returns nil when the response hash doesn't contain a location header" do + expect(curl_response_last_location([response_hash[:ok]])).to be_nil + end + end end diff --git a/Library/Homebrew/utils/curl.rb b/Library/Homebrew/utils/curl.rb index 4b08d1fa77..5e9583e80f 100644 --- a/Library/Homebrew/utils/curl.rb +++ b/Library/Homebrew/utils/curl.rb @@ -14,6 +14,17 @@ module Utils using TimeRemaining + # HTTP responses and body content are typically separated by a double + # `CRLF` (whereas HTTP header lines are separated by a single `CRLF`). + # In rare cases, this can also be a double newline (`\n\n`). + HTTP_RESPONSE_BODY_SEPARATOR = "\r\n\r\n" + + # This regex is used to isolate the parts of an HTTP status line, namely + # the status code and any following descriptive text (e.g., `Not Found`). + HTTP_STATUS_LINE_REGEX = %r{^HTTP/.* (?\d+)(?: (?[^\r\n]+))?}.freeze
+
+ private_constant :HTTP_RESPONSE_BODY_SEPARATOR, :HTTP_STATUS_LINE_REGEX
+
module_function
def curl_executable(use_homebrew_curl: false)
@@ -367,6 +378,95 @@ module Utils
def http_status_ok?(status)
(100..299).cover?(status.to_i)
end
+
+ # Separates the output text from `curl` into an array of HTTP responses and
+ # the final response body (i.e. content). Response hashes contain the
+ # `:status_code`, `:status_text`, and `:headers`.
+ # @param output [String] The output text from `curl` containing HTTP
+ # responses, body content, or both.
+ # @return [Hash] A hash containing an array of response hashes and the body
+ # content, if found.
+ sig { params(output: String).returns(T::Hash[Symbol, T.untyped]) }
+ def parse_curl_output(output)
+ responses = []
+
+ max_iterations = 5
+ iterations = 0
+ output = output.lstrip
+ while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_RESPONSE_BODY_SEPARATOR)
+ iterations += 1
+ raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
+
+ response_text, _, output = output.partition(HTTP_RESPONSE_BODY_SEPARATOR)
+ output = output.lstrip
+ next if response_text.blank?
+
+ response_text.chomp!
+ response = parse_curl_response(response_text)
+ responses << response if response.present?
+ end
+
+ { responses: responses, body: output }
+ end
+
+ # Returns the URL from the last location header found in cURL responses,
+ # if any.
+ # @param responses [Array] An array of hashes containing response
+ # status information and headers from `#parse_curl_response`.
+ # @param absolutize [true, false] Whether to make the location URL absolute.
+ # @param base_url [String, nil] The URL to use as a base for making the
+ # `location` URL absolute.
+ # @return [String, nil] The URL from the last-occurring `location` header
+ # in the responses or `nil` (if no `location` headers found).
+ sig {
+ params(
+ responses: T::Array[T::Hash[Symbol, T.untyped]],
+ absolutize: T::Boolean,
+ base_url: T.nilable(String),
+ ).returns(T.nilable(String))
+ }
+ def curl_response_last_location(responses, absolutize: false, base_url: nil)
+ responses.reverse_each do |response|
+ next if response[:headers].blank?
+
+ location = response[:headers]["location"]
+ next if location.blank?
+
+ absolute_url = URI.join(base_url, location).to_s if absolutize && base_url.present?
+ return absolute_url || location
+ end
+
+ nil
+ end
+
+ private
+
+ # Parses HTTP response text from `curl` output into a hash containing the
+ # information from the status line (status code and, optionally,
+ # descriptive text) and headers.
+ # @param response_text [String] The text of a `curl` response, consisting
+ # of a status line followed by header lines.
+ # @return [Hash] A hash containing the response status information and
+ # headers (as a hash with header names as keys).
+ sig { params(response_text: String).returns(T::Hash[Symbol, T.untyped]) }
+ def parse_curl_response(response_text)
+ response = {}
+ return response unless response_text.match?(HTTP_STATUS_LINE_REGEX)
+
+ # Parse the status line and remove it
+ match = response_text.match(HTTP_STATUS_LINE_REGEX)
+ response[:status_code] = match["code"] if match["code"].present?
+ response[:status_text] = match["text"] if match["text"].present?
+ response_text = response_text.sub(%r{^HTTP/.* (\d+).*$\s*}, "")
+
+ # Create a hash from the header lines
+ response[:headers] =
+ response_text.split("\r\n")
+ .to_h { |header| header.split(/:\s*/, 2) }
+ .transform_keys(&:downcase)
+
+ response
+ end
end
end