audit: further refactor http content checks.

Check homepages and don’t check mirrors unless `—strict`.
This commit is contained in:
Mike McQuaid 2017-02-23 10:15:06 +00:00
parent 1a436b4d24
commit 9fa014710d

View File

@ -174,30 +174,62 @@ class FormulaAuditor
@specs = %w[stable devel head].map { |s| formula.send(s) }.compact @specs = %w[stable devel head].map { |s| formula.send(s) }.compact
end end
def url_status_code(url, range: false) def self.check_http_content(url, user_agents: [:default])
# The system Curl is too old and unreliable with HTTPS homepages on details = nil
# Yosemite and below. user_agent = nil
return "200" unless DevelopmentTools.curl_handles_most_https_homepages? user_agents.each do |ua|
details = http_content_headers_and_checksum(url, user_agent: ua)
extra_args = [ user_agent = ua
"--connect-timeout", "15", break if details[:status].to_s.start_with?("2")
"--output", "/dev/null",
"--write-out", "%{http_code}"
]
extra_args << "--range" << "0-0" if range
extra_args << url
status_code = nil
[:browser, :default].each do |user_agent|
args = curl_args(
extra_args: extra_args,
show_output: true,
user_agent: user_agent,
)
status_code = Open3.popen3(*args) { |_, stdout, _, _| stdout.read }
break if status_code.start_with? "2"
end end
status_code
return "The URL #{url} is not reachable" unless details[:status]
unless details[:status].start_with? "2"
return "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
end
return unless url.start_with? "http:"
secure_url = url.sub "http", "https"
secure_details =
http_content_headers_and_checksum(secure_url, user_agent: user_agent)
if !details[:status].to_s.start_with?("2") ||
!secure_details[:status].to_s.start_with?("2")
return
end
etag_match = details[:etag] &&
details[:etag] == secure_details[:etag]
content_length_match =
details[:content_length] &&
details[:content_length] == secure_details[:content_length]
file_match = details[:file_hash] == secure_details[:file_hash]
return if !etag_match && !content_length_match && !file_match
"The URL #{url} could use HTTPS rather than HTTP"
end
def self.http_content_headers_and_checksum(url, user_agent: :default)
args = curl_args(
extra_args: ["--connect-timeout", "15", "--include", url],
show_output: true,
user_agent: user_agent,
)
output = Open3.popen3(*args) { |_, stdout, _, _| stdout.read }
status_code = :unknown
while status_code == :unknown || status_code.to_s.start_with?("3")
headers, _, output = output.partition("\r\n\r\n")
status_code = headers[%r{HTTP\/.* (\d+)}, 1]
end
{
status: status_code,
etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2],
content_length: headers[/Content-Length: (\d+)/, 1],
file_hash: Digest::SHA256.digest(output),
}
end end
def audit_style def audit_style
@ -619,9 +651,13 @@ class FormulaAuditor
return unless @online return unless @online
status_code = url_status_code(homepage) # The system Curl is too old and unreliable with HTTPS homepages on
return if status_code.start_with? "2" # Yosemite and below.
problem "The homepage #{homepage} is not reachable (HTTP status code #{status_code})" return unless DevelopmentTools.curl_handles_most_https_homepages?
if http_content_problem = FormulaAuditor.check_http_content(homepage,
user_agents: [:browser, :default])
problem http_content_problem
end
end end
def audit_bottle_spec def audit_bottle_spec
@ -671,11 +707,11 @@ class FormulaAuditor
%w[Stable Devel HEAD].each do |name| %w[Stable Devel HEAD].each do |name|
next unless spec = formula.send(name.downcase) next unless spec = formula.send(name.downcase)
ra = ResourceAuditor.new(spec, online: @online).audit ra = ResourceAuditor.new(spec, online: @online, strict: @strict).audit
problems.concat ra.problems.map { |problem| "#{name}: #{problem}" } problems.concat ra.problems.map { |problem| "#{name}: #{problem}" }
spec.resources.each_value do |resource| spec.resources.each_value do |resource|
ra = ResourceAuditor.new(resource, online: @online).audit ra = ResourceAuditor.new(resource, online: @online, strict: @strict).audit
problems.concat ra.problems.map { |problem| problems.concat ra.problems.map { |problem|
"#{name} resource #{resource.name.inspect}: #{problem}" "#{name} resource #{resource.name.inspect}: #{problem}"
} }
@ -1231,6 +1267,7 @@ class ResourceAuditor
@using = resource.using @using = resource.using
@specs = resource.specs @specs = resource.specs
@online = options[:online] @online = options[:online]
@strict = options[:strict]
@problems = [] @problems = []
end end
@ -1492,7 +1529,10 @@ class ResourceAuditor
urls.each do |url| urls.each do |url|
strategy = DownloadStrategyDetector.detect(url) strategy = DownloadStrategyDetector.detect(url)
if strategy <= CurlDownloadStrategy && !url.start_with?("file") if strategy <= CurlDownloadStrategy && !url.start_with?("file")
check_http_content url next if !@strict && mirrors.include?(url)
if http_content_problem = FormulaAuditor.check_http_content(url)
problem http_content_problem
end
elsif strategy <= GitDownloadStrategy elsif strategy <= GitDownloadStrategy
unless Utils.git_remote_exists url unless Utils.git_remote_exists url
problem "The URL #{url} is not a valid git URL" problem "The URL #{url} is not a valid git URL"
@ -1505,53 +1545,7 @@ class ResourceAuditor
end end
end end
def check_http_content(url)
details = get_content_details(url)
if details[:status].nil?
problem "The URL #{url} is not reachable"
elsif !details[:status].start_with? "2"
problem "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
end
return unless url.start_with? "http:"
secure_url = url.sub "http", "https"
secure_details = get_content_details(secure_url)
if !details[:status].to_s.start_with?("2") ||
!secure_details[:status].to_s.start_with?("2")
return
end
etag_match = details[:etag] &&
details[:etag] == secure_details[:etag]
content_length_match =
details[:content_length] &&
details[:content_length] == secure_details[:content_length]
file_match = details[:file_hash] == secure_details[:file_hash]
return if !etag_match && !content_length_match && !file_match
problem "The URL #{url} could use HTTPS rather than HTTP"
end
def problem(text) def problem(text)
@problems << text @problems << text
end end
def get_content_details(url)
out = {}
output, = curl_output "--connect-timeout", "15", "--include", url
status_code = :unknown
while status_code == :unknown || status_code.to_s.start_with?("3")
headers, _, output = output.partition("\r\n\r\n")
status_code = headers[%r{HTTP\/.* (\d+)}, 1]
end
out[:status] = status_code
out[:etag] = headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2]
out[:content_length] = headers[/Content-Length: (\d+)/, 1]
out[:file_hash] = Digest::SHA256.digest output
out
end
end end