url_protected_by_*: Check multiple headers
Before `#parse_curl_output` was introduced and related methods were updated to use it, `#url_protected_by_cloudflare?` and `#url_protected_by_incapsula?` were checking a string of all the headers from a response and using a regex to check related header values. However, when `#curl_http_content_headers_and_checksum` was updated to use `#parse_curl_output` internally, the `:headers` value became a hash generated by `#parse_curl_response`. The `#url_protected_by_*` methods were updated to work with the hash value but this wasn't able to fully replicate the previous behavior because `#parse_curl_response` was only keeping the last instance of a given header (maintaining pre-existing behavior). This is an issue for these methods because they check `Set-Cookie` headers and there can be multiple instances of this header in a response. This commit updates these methods to handle an array of strings in addition to the existing string support. This change ensures that these methods properly check all `Set-Cookie` headers, effectively reinstating the previous behavior. Past that, this updates one of the early return values in `#url_protected_by_cloudflare?` to be `false` instead of an implicit `nil`. After adding a type signature to this method, it became clear that it wasn't always returning a boolean value and this fixes it.
This commit is contained in:
parent
94449d07c0
commit
40b8fd3406
@ -4,6 +4,115 @@
|
|||||||
require "utils/curl"
|
require "utils/curl"
|
||||||
|
|
||||||
describe "Utils::Curl" do
|
describe "Utils::Curl" do
|
||||||
|
let(:details) {
|
||||||
|
details = {
|
||||||
|
normal: {},
|
||||||
|
cloudflare: {},
|
||||||
|
incapsula: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
details[:normal][:no_cookie] = {
|
||||||
|
url: "https://www.example.com/",
|
||||||
|
final_url: nil,
|
||||||
|
status: "403",
|
||||||
|
headers: {
|
||||||
|
"age" => "123456",
|
||||||
|
"cache-control" => "max-age=604800",
|
||||||
|
"content-type" => "text/html; charset=UTF-8",
|
||||||
|
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
|
||||||
|
"etag" => "\"3147526947+ident\"",
|
||||||
|
"expires" => "Wed, 31 Jan 2020 01:23:45 GMT",
|
||||||
|
"last-modified" => "Wed, 1 Jan 2020 00:00:00 GMT",
|
||||||
|
"server" => "ECS (dcb/7EA2)",
|
||||||
|
"vary" => "Accept-Encoding",
|
||||||
|
"x-cache" => "HIT",
|
||||||
|
"content-length" => "3",
|
||||||
|
},
|
||||||
|
etag: "3147526947+ident",
|
||||||
|
content_length: "3",
|
||||||
|
file: "...",
|
||||||
|
file_hash: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
details[:normal][:ok] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:normal][:ok][:status] = "200"
|
||||||
|
|
||||||
|
details[:normal][:single_cookie] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:normal][:single_cookie][:headers]["set-cookie"] = "a_cookie=for_testing"
|
||||||
|
|
||||||
|
details[:normal][:multiple_cookies] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:normal][:multiple_cookies][:headers]["set-cookie"] = [
|
||||||
|
"first_cookie=for_testing",
|
||||||
|
"last_cookie=also_for_testing",
|
||||||
|
]
|
||||||
|
|
||||||
|
details[:normal][:blank_headers] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:normal][:blank_headers][:headers] = {}
|
||||||
|
|
||||||
|
details[:cloudflare][:single_cookie] = {
|
||||||
|
url: "https://www.example.com/",
|
||||||
|
final_url: nil,
|
||||||
|
status: "403",
|
||||||
|
headers: {
|
||||||
|
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
|
||||||
|
"content-type" => "text/plain; charset=UTF-8",
|
||||||
|
"content-length" => "16",
|
||||||
|
"x-frame-options" => "SAMEORIGIN",
|
||||||
|
"referrer-policy" => "same-origin",
|
||||||
|
"cache-control" => "private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0",
|
||||||
|
"expires" => "Thu, 01 Jan 1970 00:00:01 GMT",
|
||||||
|
"expect-ct" => "max-age=604800, report-uri=\"https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct\"",
|
||||||
|
"set-cookie" => "__cf_bm=0123456789abcdef; path=/; expires=Wed, 31-Jan-20 01:23:45 GMT;" \
|
||||||
|
" domain=www.example.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"server" => "cloudflare",
|
||||||
|
"cf-ray" => "0123456789abcdef-IAD",
|
||||||
|
"alt-svc" => "h3=\":443\"; ma=86400, h3-29=\":443\"; ma=86400",
|
||||||
|
},
|
||||||
|
etag: nil,
|
||||||
|
content_length: "16",
|
||||||
|
file: "error code: 1020",
|
||||||
|
file_hash: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
details[:cloudflare][:multiple_cookies] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||||
|
details[:cloudflare][:multiple_cookies][:headers]["set-cookie"] = [
|
||||||
|
"first_cookie=for_testing",
|
||||||
|
"__cf_bm=abcdef0123456789; path=/; expires=Thu, 28-Apr-22 18:38:40 GMT; domain=www.example.com; HttpOnly;" \
|
||||||
|
" Secure; SameSite=None",
|
||||||
|
"last_cookie=also_for_testing",
|
||||||
|
]
|
||||||
|
|
||||||
|
details[:cloudflare][:no_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||||
|
details[:cloudflare][:no_server][:headers].delete("server")
|
||||||
|
|
||||||
|
details[:cloudflare][:wrong_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
|
||||||
|
details[:cloudflare][:wrong_server][:headers]["server"] = "nginx 1.2.3"
|
||||||
|
|
||||||
|
# TODO: Make the Incapsula test data more realistic once we can find an
|
||||||
|
# example website to reference.
|
||||||
|
details[:incapsula][:single_cookie_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:incapsula][:single_cookie_visid_incap][:headers]["set-cookie"] = "visid_incap_something=something"
|
||||||
|
|
||||||
|
details[:incapsula][:single_cookie_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:incapsula][:single_cookie_incap_ses][:headers]["set-cookie"] = "incap_ses_something=something"
|
||||||
|
|
||||||
|
details[:incapsula][:multiple_cookies_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:incapsula][:multiple_cookies_visid_incap][:headers]["set-cookie"] = [
|
||||||
|
"first_cookie=for_testing",
|
||||||
|
"visid_incap_something=something",
|
||||||
|
"last_cookie=also_for_testing",
|
||||||
|
]
|
||||||
|
|
||||||
|
details[:incapsula][:multiple_cookies_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
|
||||||
|
details[:incapsula][:multiple_cookies_incap_ses][:headers]["set-cookie"] = [
|
||||||
|
"first_cookie=for_testing",
|
||||||
|
"incap_ses_something=something",
|
||||||
|
"last_cookie=also_for_testing",
|
||||||
|
]
|
||||||
|
|
||||||
|
details
|
||||||
|
}
|
||||||
|
|
||||||
let(:location_urls) {
|
let(:location_urls) {
|
||||||
%w[
|
%w[
|
||||||
https://example.com/example/
|
https://example.com/example/
|
||||||
@ -294,6 +403,46 @@ describe "Utils::Curl" do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "url_protected_by_cloudflare?" do
|
||||||
|
it "returns `true` when a URL is protected by Cloudflare" do
|
||||||
|
expect(url_protected_by_cloudflare?(details[:cloudflare][:single_cookie])).to be(true)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:cloudflare][:multiple_cookies])).to be(true)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns `false` when a URL is not protected by Cloudflare" do
|
||||||
|
expect(url_protected_by_cloudflare?(details[:cloudflare][:no_server])).to be(false)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:cloudflare][:wrong_server])).to be(false)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:normal][:no_cookie])).to be(false)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:normal][:ok])).to be(false)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:normal][:single_cookie])).to be(false)
|
||||||
|
expect(url_protected_by_cloudflare?(details[:normal][:multiple_cookies])).to be(false)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns `false` when response headers are blank" do
|
||||||
|
expect(url_protected_by_cloudflare?(details[:normal][:blank_headers])).to be(false)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "url_protected_by_incapsula?" do
|
||||||
|
it "returns `true` when a URL is protected by Cloudflare" do
|
||||||
|
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_visid_incap])).to be(true)
|
||||||
|
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_incap_ses])).to be(true)
|
||||||
|
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_visid_incap])).to be(true)
|
||||||
|
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_incap_ses])).to be(true)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns `false` when a URL is not protected by Incapsula" do
|
||||||
|
expect(url_protected_by_incapsula?(details[:normal][:no_cookie])).to be(false)
|
||||||
|
expect(url_protected_by_incapsula?(details[:normal][:ok])).to be(false)
|
||||||
|
expect(url_protected_by_incapsula?(details[:normal][:single_cookie])).to be(false)
|
||||||
|
expect(url_protected_by_incapsula?(details[:normal][:multiple_cookies])).to be(false)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns `false` when response headers are blank" do
|
||||||
|
expect(url_protected_by_incapsula?(details[:normal][:blank_headers])).to be(false)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "#parse_curl_output" do
|
describe "#parse_curl_output" do
|
||||||
it "returns a correct hash when curl output contains response(s) and body" do
|
it "returns a correct hash when curl output contains response(s) and body" do
|
||||||
expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}"))
|
expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}"))
|
||||||
|
@ -198,20 +198,40 @@ module Utils
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
|
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
|
||||||
|
# @param details [Hash] Response information from
|
||||||
|
# `#curl_http_content_headers_and_checksum`.
|
||||||
|
# @return [true, false] Whether a response contains headers indicating that
|
||||||
|
# the URL is protected by Cloudflare.
|
||||||
|
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
||||||
def url_protected_by_cloudflare?(details)
|
def url_protected_by_cloudflare?(details)
|
||||||
return false if details[:headers].blank?
|
return false if details[:headers].blank?
|
||||||
return unless [403, 503].include?(details[:status].to_i)
|
return false unless [403, 503].include?(details[:status].to_i)
|
||||||
|
|
||||||
details[:headers].fetch("set-cookie", nil)&.match?(/^(__cfduid|__cf_bm)=/i) &&
|
set_cookie_header = Array(details[:headers]["set-cookie"])
|
||||||
details[:headers].fetch("server", nil)&.match?(/^cloudflare/i)
|
has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie|
|
||||||
|
cookie.match?(/^(__cfduid|__cf_bm)=/i)
|
||||||
|
end
|
||||||
|
|
||||||
|
server_header = Array(details[:headers]["server"])
|
||||||
|
has_cloudflare_server = server_header.compact.any? do |server|
|
||||||
|
server.match?(/^cloudflare/i)
|
||||||
|
end
|
||||||
|
|
||||||
|
has_cloudflare_cookie_header && has_cloudflare_server
|
||||||
end
|
end
|
||||||
|
|
||||||
# Check if a URL is protected by Incapsula (e.g. corsair.com).
|
# Check if a URL is protected by Incapsula (e.g. corsair.com).
|
||||||
|
# @param details [Hash] Response information from
|
||||||
|
# `#curl_http_content_headers_and_checksum`.
|
||||||
|
# @return [true, false] Whether a response contains headers indicating that
|
||||||
|
# the URL is protected by Incapsula.
|
||||||
|
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
||||||
def url_protected_by_incapsula?(details)
|
def url_protected_by_incapsula?(details)
|
||||||
return false if details[:headers].blank?
|
return false if details[:headers].blank?
|
||||||
return false if details[:status].to_i != 403
|
return false if details[:status].to_i != 403
|
||||||
|
|
||||||
details[:headers].fetch("set-cookie", nil)&.match?(/^(visid_incap|incap_ses)_/i)
|
set_cookie_header = Array(details[:headers]["set-cookie"])
|
||||||
|
set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) }
|
||||||
end
|
end
|
||||||
|
|
||||||
def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],
|
def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user