Curl: Check all responses for protected cookies

The response from a URL protected by Cloudflare may only provide a
relevant cookie on the first response but
`#curl_http_content_headers_and_checksum` only returns the headers of
the final response. In this scenario, `#curl_check_http_content` isn't
able to properly detect the protected URL and this is surfaced as an
error instead of skipping the URL.

This resolves the issue by including the array of response hashes in
the return value from `#curl_http_content_headers_and_checksum`, so
we can check all the responses in `#curl_check_http_content`.
This commit is contained in:
Sam Ford 2022-05-25 13:45:31 -04:00
parent 7b23bc64e5
commit 403a4d4a49
No known key found for this signature in database
GPG Key ID: 7AF5CBEE1DD6F76D

View File

@ -198,21 +198,20 @@ module Utils
end end
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io). # Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
# @param details [Hash] Response information from # @param response [Hash] A response hash from `#parse_curl_response`.
# `#curl_http_content_headers_and_checksum`.
# @return [true, false] Whether a response contains headers indicating that # @return [true, false] Whether a response contains headers indicating that
# the URL is protected by Cloudflare. # the URL is protected by Cloudflare.
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) } sig { params(response: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
def url_protected_by_cloudflare?(details) def url_protected_by_cloudflare?(response)
return false if details[:headers].blank? return false if response[:headers].blank?
return false unless [403, 503].include?(details[:status_code].to_i) return false unless [403, 503].include?(response[:status_code].to_i)
set_cookie_header = Array(details[:headers]["set-cookie"]) set_cookie_header = Array(response[:headers]["set-cookie"])
has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie| has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie|
cookie.match?(/^(__cfduid|__cf_bm)=/i) cookie.match?(/^(__cfduid|__cf_bm)=/i)
end end
server_header = Array(details[:headers]["server"]) server_header = Array(response[:headers]["server"])
has_cloudflare_server = server_header.compact.any? do |server| has_cloudflare_server = server_header.compact.any? do |server|
server.match?(/^cloudflare/i) server.match?(/^cloudflare/i)
end end
@ -221,16 +220,15 @@ module Utils
end end
# Check if a URL is protected by Incapsula (e.g. corsair.com). # Check if a URL is protected by Incapsula (e.g. corsair.com).
# @param details [Hash] Response information from # @param response [Hash] A response hash from `#parse_curl_response`.
# `#curl_http_content_headers_and_checksum`.
# @return [true, false] Whether a response contains headers indicating that # @return [true, false] Whether a response contains headers indicating that
# the URL is protected by Incapsula. # the URL is protected by Incapsula.
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) } sig { params(response: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
def url_protected_by_incapsula?(details) def url_protected_by_incapsula?(response)
return false if details[:headers].blank? return false if response[:headers].blank?
return false if details[:status_code].to_i != 403 return false if response[:status_code].to_i != 403
set_cookie_header = Array(details[:headers]["set-cookie"]) set_cookie_header = Array(response[:headers]["set-cookie"])
set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) } set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) }
end end
@ -284,7 +282,9 @@ module Utils
end end
unless http_status_ok?(details[:status_code]) unless http_status_ok?(details[:status_code])
return if url_protected_by_cloudflare?(details) || url_protected_by_incapsula?(details) return if details[:responses].any? do |response|
url_protected_by_cloudflare?(response) || url_protected_by_incapsula?(response)
end
return "The #{url_type} #{url} is not reachable (HTTP status code #{details[:status_code]})" return "The #{url_type} #{url} is not reachable (HTTP status code #{details[:status_code]})"
end end
@ -403,6 +403,7 @@ module Utils
content_length: content_length, content_length: content_length,
file: file_contents, file: file_contents,
file_hash: file_hash, file_hash: file_hash,
responses: responses,
} }
ensure ensure
file.unlink file.unlink