From 40b8fd3406b2c50873f021f0787f7c253acd009e Mon Sep 17 00:00:00 2001 From: Sam Ford <1584702+samford@users.noreply.github.com> Date: Mon, 2 May 2022 15:56:39 -0400 Subject: [PATCH] url_protected_by_*: Check multiple headers Before `#parse_curl_output` was introduced and related methods were updated to use it, `#url_protected_by_cloudflare?` and `#url_protected_by_incapsula?` were checking a string of all the headers from a response and using a regex to check related header values. However, when `#curl_http_content_headers_and_checksum` was updated to use `#parse_curl_output` internally, the `:headers` value became a hash generated by `#parse_curl_response`. The `#url_protected_by_*` methods were updated to work with the hash value but this wasn't able to fully replicate the previous behavior because `#parse_curl_response` was only keeping the last instance of a given header (maintaining pre-existing behavior). This is an issue for these methods because they check `Set-Cookie` headers and there can be multiple instances of this header in a response. This commit updates these methods to handle an array of strings in addition to the existing string support. This change ensures that these methods properly check all `Set-Cookie` headers, effectively reinstating the previous behavior. Past that, this updates one of the early return values in `#url_protected_by_cloudflare?` to be `false` instead of an implicit `nil`. After adding a type signature to this method, it became clear that it wasn't always returning a boolean value and this fixes it. --- Library/Homebrew/test/utils/curl_spec.rb | 149 +++++++++++++++++++++++ Library/Homebrew/utils/curl.rb | 28 ++++- 2 files changed, 173 insertions(+), 4 deletions(-) diff --git a/Library/Homebrew/test/utils/curl_spec.rb b/Library/Homebrew/test/utils/curl_spec.rb index 3f0e6cfa28..06a2d7e8bd 100644 --- a/Library/Homebrew/test/utils/curl_spec.rb +++ b/Library/Homebrew/test/utils/curl_spec.rb @@ -4,6 +4,115 @@ require "utils/curl" describe "Utils::Curl" do + let(:details) { + details = { + normal: {}, + cloudflare: {}, + incapsula: {}, + } + + details[:normal][:no_cookie] = { + url: "https://www.example.com/", + final_url: nil, + status: "403", + headers: { + "age" => "123456", + "cache-control" => "max-age=604800", + "content-type" => "text/html; charset=UTF-8", + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "etag" => "\"3147526947+ident\"", + "expires" => "Wed, 31 Jan 2020 01:23:45 GMT", + "last-modified" => "Wed, 1 Jan 2020 00:00:00 GMT", + "server" => "ECS (dcb/7EA2)", + "vary" => "Accept-Encoding", + "x-cache" => "HIT", + "content-length" => "3", + }, + etag: "3147526947+ident", + content_length: "3", + file: "...", + file_hash: nil, + } + + details[:normal][:ok] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:normal][:ok][:status] = "200" + + details[:normal][:single_cookie] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:normal][:single_cookie][:headers]["set-cookie"] = "a_cookie=for_testing" + + details[:normal][:multiple_cookies] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:normal][:multiple_cookies][:headers]["set-cookie"] = [ + "first_cookie=for_testing", + "last_cookie=also_for_testing", + ] + + details[:normal][:blank_headers] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:normal][:blank_headers][:headers] = {} + + details[:cloudflare][:single_cookie] = { + url: "https://www.example.com/", + final_url: nil, + status: "403", + headers: { + "date" => "Wed, 1 Jan 2020 01:23:45 GMT", + "content-type" => "text/plain; charset=UTF-8", + "content-length" => "16", + "x-frame-options" => "SAMEORIGIN", + "referrer-policy" => "same-origin", + "cache-control" => "private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0", + "expires" => "Thu, 01 Jan 1970 00:00:01 GMT", + "expect-ct" => "max-age=604800, report-uri=\"https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct\"", + "set-cookie" => "__cf_bm=0123456789abcdef; path=/; expires=Wed, 31-Jan-20 01:23:45 GMT;" \ + " domain=www.example.com; HttpOnly; Secure; SameSite=None", + "server" => "cloudflare", + "cf-ray" => "0123456789abcdef-IAD", + "alt-svc" => "h3=\":443\"; ma=86400, h3-29=\":443\"; ma=86400", + }, + etag: nil, + content_length: "16", + file: "error code: 1020", + file_hash: nil, + } + + details[:cloudflare][:multiple_cookies] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie])) + details[:cloudflare][:multiple_cookies][:headers]["set-cookie"] = [ + "first_cookie=for_testing", + "__cf_bm=abcdef0123456789; path=/; expires=Thu, 28-Apr-22 18:38:40 GMT; domain=www.example.com; HttpOnly;" \ + " Secure; SameSite=None", + "last_cookie=also_for_testing", + ] + + details[:cloudflare][:no_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie])) + details[:cloudflare][:no_server][:headers].delete("server") + + details[:cloudflare][:wrong_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie])) + details[:cloudflare][:wrong_server][:headers]["server"] = "nginx 1.2.3" + + # TODO: Make the Incapsula test data more realistic once we can find an + # example website to reference. + details[:incapsula][:single_cookie_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:incapsula][:single_cookie_visid_incap][:headers]["set-cookie"] = "visid_incap_something=something" + + details[:incapsula][:single_cookie_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:incapsula][:single_cookie_incap_ses][:headers]["set-cookie"] = "incap_ses_something=something" + + details[:incapsula][:multiple_cookies_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:incapsula][:multiple_cookies_visid_incap][:headers]["set-cookie"] = [ + "first_cookie=for_testing", + "visid_incap_something=something", + "last_cookie=also_for_testing", + ] + + details[:incapsula][:multiple_cookies_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie])) + details[:incapsula][:multiple_cookies_incap_ses][:headers]["set-cookie"] = [ + "first_cookie=for_testing", + "incap_ses_something=something", + "last_cookie=also_for_testing", + ] + + details + } + let(:location_urls) { %w[ https://example.com/example/ @@ -294,6 +403,46 @@ describe "Utils::Curl" do end end + describe "url_protected_by_cloudflare?" do + it "returns `true` when a URL is protected by Cloudflare" do + expect(url_protected_by_cloudflare?(details[:cloudflare][:single_cookie])).to be(true) + expect(url_protected_by_cloudflare?(details[:cloudflare][:multiple_cookies])).to be(true) + end + + it "returns `false` when a URL is not protected by Cloudflare" do + expect(url_protected_by_cloudflare?(details[:cloudflare][:no_server])).to be(false) + expect(url_protected_by_cloudflare?(details[:cloudflare][:wrong_server])).to be(false) + expect(url_protected_by_cloudflare?(details[:normal][:no_cookie])).to be(false) + expect(url_protected_by_cloudflare?(details[:normal][:ok])).to be(false) + expect(url_protected_by_cloudflare?(details[:normal][:single_cookie])).to be(false) + expect(url_protected_by_cloudflare?(details[:normal][:multiple_cookies])).to be(false) + end + + it "returns `false` when response headers are blank" do + expect(url_protected_by_cloudflare?(details[:normal][:blank_headers])).to be(false) + end + end + + describe "url_protected_by_incapsula?" do + it "returns `true` when a URL is protected by Cloudflare" do + expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_visid_incap])).to be(true) + expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_incap_ses])).to be(true) + expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_visid_incap])).to be(true) + expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_incap_ses])).to be(true) + end + + it "returns `false` when a URL is not protected by Incapsula" do + expect(url_protected_by_incapsula?(details[:normal][:no_cookie])).to be(false) + expect(url_protected_by_incapsula?(details[:normal][:ok])).to be(false) + expect(url_protected_by_incapsula?(details[:normal][:single_cookie])).to be(false) + expect(url_protected_by_incapsula?(details[:normal][:multiple_cookies])).to be(false) + end + + it "returns `false` when response headers are blank" do + expect(url_protected_by_incapsula?(details[:normal][:blank_headers])).to be(false) + end + end + describe "#parse_curl_output" do it "returns a correct hash when curl output contains response(s) and body" do expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}")) diff --git a/Library/Homebrew/utils/curl.rb b/Library/Homebrew/utils/curl.rb index 08949d6360..ded6c28720 100644 --- a/Library/Homebrew/utils/curl.rb +++ b/Library/Homebrew/utils/curl.rb @@ -198,20 +198,40 @@ module Utils end # Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io). + # @param details [Hash] Response information from + # `#curl_http_content_headers_and_checksum`. + # @return [true, false] Whether a response contains headers indicating that + # the URL is protected by Cloudflare. + sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) } def url_protected_by_cloudflare?(details) return false if details[:headers].blank? - return unless [403, 503].include?(details[:status].to_i) + return false unless [403, 503].include?(details[:status].to_i) - details[:headers].fetch("set-cookie", nil)&.match?(/^(__cfduid|__cf_bm)=/i) && - details[:headers].fetch("server", nil)&.match?(/^cloudflare/i) + set_cookie_header = Array(details[:headers]["set-cookie"]) + has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie| + cookie.match?(/^(__cfduid|__cf_bm)=/i) + end + + server_header = Array(details[:headers]["server"]) + has_cloudflare_server = server_header.compact.any? do |server| + server.match?(/^cloudflare/i) + end + + has_cloudflare_cookie_header && has_cloudflare_server end # Check if a URL is protected by Incapsula (e.g. corsair.com). + # @param details [Hash] Response information from + # `#curl_http_content_headers_and_checksum`. + # @return [true, false] Whether a response contains headers indicating that + # the URL is protected by Incapsula. + sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) } def url_protected_by_incapsula?(details) return false if details[:headers].blank? return false if details[:status].to_i != 403 - details[:headers].fetch("set-cookie", nil)&.match?(/^(visid_incap|incap_ses)_/i) + set_cookie_header = Array(details[:headers]["set-cookie"]) + set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) } end def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],