url_protected_by_*: Check multiple headers
Before `#parse_curl_output` was introduced and related methods were updated to use it, `#url_protected_by_cloudflare?` and `#url_protected_by_incapsula?` were checking a string of all the headers from a response and using a regex to check related header values. However, when `#curl_http_content_headers_and_checksum` was updated to use `#parse_curl_output` internally, the `:headers` value became a hash generated by `#parse_curl_response`. The `#url_protected_by_*` methods were updated to work with the hash value but this wasn't able to fully replicate the previous behavior because `#parse_curl_response` was only keeping the last instance of a given header (maintaining pre-existing behavior). This is an issue for these methods because they check `Set-Cookie` headers and there can be multiple instances of this header in a response. This commit updates these methods to handle an array of strings in addition to the existing string support. This change ensures that these methods properly check all `Set-Cookie` headers, effectively reinstating the previous behavior. Past that, this updates one of the early return values in `#url_protected_by_cloudflare?` to be `false` instead of an implicit `nil`. After adding a type signature to this method, it became clear that it wasn't always returning a boolean value and this fixes it.
This commit is contained in:
parent
94449d07c0
commit
40b8fd3406
@ -4,6 +4,115 @@
|
||||
require "utils/curl"
|
||||
|
||||
describe "Utils::Curl" do
|
||||
let(:details) {
|
||||
details = {
|
||||
normal: {},
|
||||
cloudflare: {},
|
||||
incapsula: {},
|
||||
}
|
||||
|
||||
details[:normal][:no_cookie] = {
|
||||
url: "https://www.example.com/",
|
||||
final_url: nil,
|
||||
status: "403",
|
||||
headers: {
|
||||
"age" => "123456",
|
||||
"cache-control" => "max-age=604800",
|
||||
"content-type" => "text/html; charset=UTF-8",
|
||||
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
|
||||
"etag" => "\"3147526947+ident\"",
|
||||
"expires" => "Wed, 31 Jan 2020 01:23:45 GMT",
|
||||
"last-modified" => "Wed, 1 Jan 2020 00:00:00 GMT",
|
||||
"server" => "ECS (dcb/7EA2)",
|
||||
"vary" => "Accept-Encoding",
|
||||
"x-cache" => "HIT",
|
||||
"content-length" => "3",
|
||||
},
|
||||
etag: "3147526947+ident",
|
||||
content_length: "3",
|
||||
file: "...",
|
||||
file_hash: nil,
|
||||
}
|
||||
|
||||
details[:normal][:ok] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:normal][:ok][:status] = "200"
|
||||
|
||||
details[:normal][:single_cookie] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:normal][:single_cookie][:headers]["set-cookie"] = "a_cookie=for_testing"
|
||||
|
||||
details[:normal][:multiple_cookies] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:normal][:multiple_cookies][:headers]["set-cookie"] = [
|
||||
"first_cookie=for_testing",
|
||||
"last_cookie=also_for_testing",
|
||||
]
|
||||
|
||||
details[:normal][:blank_headers] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:normal][:blank_headers][:headers] = {}
|
||||
|
||||
details[:cloudflare][:single_cookie] = {
|
||||
url: "https://www.example.com/",
|
||||
final_url: nil,
|
||||
status: "403",
|
||||
headers: {
|
||||
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
|
||||
"content-type" => "text/plain; charset=UTF-8",
|
||||
"content-length" => "16",
|
||||
"x-frame-options" => "SAMEORIGIN",
|
||||
"referrer-policy" => "same-origin",
|
||||
"cache-control" => "private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0",
|
||||
"expires" => "Thu, 01 Jan 1970 00:00:01 GMT",
|
||||
"expect-ct" => "max-age=604800, report-uri=\"https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct\"",
|
||||
"set-cookie" => "__cf_bm=0123456789abcdef; path=/; expires=Wed, 31-Jan-20 01:23:45 GMT;" \
|
||||
" domain=www.example.com; HttpOnly; Secure; SameSite=None",
|
||||
"server" => "cloudflare",
|
||||
"cf-ray" => "0123456789abcdef-IAD",
|
||||
"alt-svc" => "h3=\":443\"; ma=86400, h3-29=\":443\"; ma=86400",
|
||||
},
|
||||
etag: nil,
|
||||
content_length: "16",
|
||||
file: "error code: 1020",
|
||||
file_hash: nil,
|
||||
}
|
||||
|
||||
details[:cloudflare][:multiple_cookies] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||
details[:cloudflare][:multiple_cookies][:headers]["set-cookie"] = [
|
||||
"first_cookie=for_testing",
|
||||
"__cf_bm=abcdef0123456789; path=/; expires=Thu, 28-Apr-22 18:38:40 GMT; domain=www.example.com; HttpOnly;" \
|
||||
" Secure; SameSite=None",
|
||||
"last_cookie=also_for_testing",
|
||||
]
|
||||
|
||||
details[:cloudflare][:no_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||
details[:cloudflare][:no_server][:headers].delete("server")
|
||||
|
||||
details[:cloudflare][:wrong_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||
details[:cloudflare][:wrong_server][:headers]["server"] = "nginx 1.2.3"
|
||||
|
||||
# TODO: Make the Incapsula test data more realistic once we can find an
|
||||
# example website to reference.
|
||||
details[:incapsula][:single_cookie_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:incapsula][:single_cookie_visid_incap][:headers]["set-cookie"] = "visid_incap_something=something"
|
||||
|
||||
details[:incapsula][:single_cookie_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:incapsula][:single_cookie_incap_ses][:headers]["set-cookie"] = "incap_ses_something=something"
|
||||
|
||||
details[:incapsula][:multiple_cookies_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:incapsula][:multiple_cookies_visid_incap][:headers]["set-cookie"] = [
|
||||
"first_cookie=for_testing",
|
||||
"visid_incap_something=something",
|
||||
"last_cookie=also_for_testing",
|
||||
]
|
||||
|
||||
details[:incapsula][:multiple_cookies_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||
details[:incapsula][:multiple_cookies_incap_ses][:headers]["set-cookie"] = [
|
||||
"first_cookie=for_testing",
|
||||
"incap_ses_something=something",
|
||||
"last_cookie=also_for_testing",
|
||||
]
|
||||
|
||||
details
|
||||
}
|
||||
|
||||
let(:location_urls) {
|
||||
%w[
|
||||
https://example.com/example/
|
||||
@ -294,6 +403,46 @@ describe "Utils::Curl" do
|
||||
end
|
||||
end
|
||||
|
||||
describe "url_protected_by_cloudflare?" do
|
||||
it "returns `true` when a URL is protected by Cloudflare" do
|
||||
expect(url_protected_by_cloudflare?(details[:cloudflare][:single_cookie])).to be(true)
|
||||
expect(url_protected_by_cloudflare?(details[:cloudflare][:multiple_cookies])).to be(true)
|
||||
end
|
||||
|
||||
it "returns `false` when a URL is not protected by Cloudflare" do
|
||||
expect(url_protected_by_cloudflare?(details[:cloudflare][:no_server])).to be(false)
|
||||
expect(url_protected_by_cloudflare?(details[:cloudflare][:wrong_server])).to be(false)
|
||||
expect(url_protected_by_cloudflare?(details[:normal][:no_cookie])).to be(false)
|
||||
expect(url_protected_by_cloudflare?(details[:normal][:ok])).to be(false)
|
||||
expect(url_protected_by_cloudflare?(details[:normal][:single_cookie])).to be(false)
|
||||
expect(url_protected_by_cloudflare?(details[:normal][:multiple_cookies])).to be(false)
|
||||
end
|
||||
|
||||
it "returns `false` when response headers are blank" do
|
||||
expect(url_protected_by_cloudflare?(details[:normal][:blank_headers])).to be(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "url_protected_by_incapsula?" do
|
||||
it "returns `true` when a URL is protected by Cloudflare" do
|
||||
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_visid_incap])).to be(true)
|
||||
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_incap_ses])).to be(true)
|
||||
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_visid_incap])).to be(true)
|
||||
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_incap_ses])).to be(true)
|
||||
end
|
||||
|
||||
it "returns `false` when a URL is not protected by Incapsula" do
|
||||
expect(url_protected_by_incapsula?(details[:normal][:no_cookie])).to be(false)
|
||||
expect(url_protected_by_incapsula?(details[:normal][:ok])).to be(false)
|
||||
expect(url_protected_by_incapsula?(details[:normal][:single_cookie])).to be(false)
|
||||
expect(url_protected_by_incapsula?(details[:normal][:multiple_cookies])).to be(false)
|
||||
end
|
||||
|
||||
it "returns `false` when response headers are blank" do
|
||||
expect(url_protected_by_incapsula?(details[:normal][:blank_headers])).to be(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#parse_curl_output" do
|
||||
it "returns a correct hash when curl output contains response(s) and body" do
|
||||
expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}"))
|
||||
|
@ -198,20 +198,40 @@ module Utils
|
||||
end
|
||||
|
||||
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
|
||||
# @param details [Hash] Response information from
|
||||
# `#curl_http_content_headers_and_checksum`.
|
||||
# @return [true, false] Whether a response contains headers indicating that
|
||||
# the URL is protected by Cloudflare.
|
||||
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
||||
def url_protected_by_cloudflare?(details)
|
||||
return false if details[:headers].blank?
|
||||
return unless [403, 503].include?(details[:status].to_i)
|
||||
return false unless [403, 503].include?(details[:status].to_i)
|
||||
|
||||
details[:headers].fetch("set-cookie", nil)&.match?(/^(__cfduid|__cf_bm)=/i) &&
|
||||
details[:headers].fetch("server", nil)&.match?(/^cloudflare/i)
|
||||
set_cookie_header = Array(details[:headers]["set-cookie"])
|
||||
has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie|
|
||||
cookie.match?(/^(__cfduid|__cf_bm)=/i)
|
||||
end
|
||||
|
||||
server_header = Array(details[:headers]["server"])
|
||||
has_cloudflare_server = server_header.compact.any? do |server|
|
||||
server.match?(/^cloudflare/i)
|
||||
end
|
||||
|
||||
has_cloudflare_cookie_header && has_cloudflare_server
|
||||
end
|
||||
|
||||
# Check if a URL is protected by Incapsula (e.g. corsair.com).
|
||||
# @param details [Hash] Response information from
|
||||
# `#curl_http_content_headers_and_checksum`.
|
||||
# @return [true, false] Whether a response contains headers indicating that
|
||||
# the URL is protected by Incapsula.
|
||||
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
||||
def url_protected_by_incapsula?(details)
|
||||
return false if details[:headers].blank?
|
||||
return false if details[:status].to_i != 403
|
||||
|
||||
details[:headers].fetch("set-cookie", nil)&.match?(/^(visid_incap|incap_ses)_/i)
|
||||
set_cookie_header = Array(details[:headers]["set-cookie"])
|
||||
set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) }
|
||||
end
|
||||
|
||||
def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],
|
||||
|
Loading…
x
Reference in New Issue
Block a user