Merge pull request #3318 from Git-Jiro/improve_audit

Add audit check for URL schema
This commit is contained in:
Markus Reiter 2017-12-05 21:34:50 +01:00 committed by GitHub
commit 497348a8b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 111 additions and 97 deletions

View File

@ -2,6 +2,7 @@ require "hbc/checkable"
require "hbc/download"
require "digest"
require "utils/git"
require "utils/curl"
module Hbc
class Audit
@ -30,6 +31,7 @@ module Hbc
check_url
check_generic_artifacts
check_token_conflicts
check_https_availability
check_download
check_single_pre_postflight
check_single_uninstall_zap
@ -275,6 +277,17 @@ module Hbc
"#{core_tap.default_remote}/blob/master/Formula/#{cask.token}.rb"
end
def check_https_availability
check_url_for_https_availability(cask.url, user_agents: [cask.url.user_agent]) unless cask.url.to_s.empty?
check_url_for_https_availability(cask.appcast) unless cask.appcast.to_s.empty?
check_url_for_https_availability(cask.homepage) unless cask.homepage.to_s.empty?
end
def check_url_for_https_availability(url_to_check, user_agents: [:default])
problem = curl_check_http_content(url_to_check.to_s, user_agents: user_agents)
add_error problem unless problem.nil?
end
def check_download
return unless download && cask.url
odebug "Auditing download"

View File

@ -40,6 +40,7 @@
require "formula"
require "formula_versions"
require "utils"
require "utils/curl"
require "extend/ENV"
require "formula_cellar_checks"
require "official_taps"
@ -202,98 +203,6 @@ class FormulaAuditor
@specs = %w[stable devel head].map { |s| formula.send(s) }.compact
end
def self.check_http_content(url, user_agents: [:default], check_content: false, strict: false, require_http: false)
return unless url.start_with? "http"
details = nil
user_agent = nil
hash_needed = url.start_with?("http:") && !require_http
user_agents.each do |ua|
details = http_content_headers_and_checksum(url, hash_needed: hash_needed, user_agent: ua)
user_agent = ua
break if details[:status].to_s.start_with?("2")
end
unless details[:status]
# Hack around https://github.com/Homebrew/brew/issues/3199
return if MacOS.version == :el_capitan
return "The URL #{url} is not reachable"
end
unless details[:status].start_with? "2"
return "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
end
return unless hash_needed
secure_url = url.sub "http", "https"
secure_details =
http_content_headers_and_checksum(secure_url, hash_needed: true, user_agent: user_agent)
if !details[:status].to_s.start_with?("2") ||
!secure_details[:status].to_s.start_with?("2")
return
end
etag_match = details[:etag] &&
details[:etag] == secure_details[:etag]
content_length_match =
details[:content_length] &&
details[:content_length] == secure_details[:content_length]
file_match = details[:file_hash] == secure_details[:file_hash]
if etag_match || content_length_match || file_match
return "The URL #{url} should use HTTPS rather than HTTP"
end
return unless check_content
no_protocol_file_contents = %r{https?:\\?/\\?/}
details[:file] = details[:file].gsub(no_protocol_file_contents, "/")
secure_details[:file] = secure_details[:file].gsub(no_protocol_file_contents, "/")
# Check for the same content after removing all protocols
if details[:file] == secure_details[:file]
return "The URL #{url} should use HTTPS rather than HTTP"
end
return unless strict
# Same size, different content after normalization
# (typical causes: Generated ID, Timestamp, Unix time)
if details[:file].length == secure_details[:file].length
return "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
lenratio = (100 * secure_details[:file].length / details[:file].length).to_i
return unless (90..110).cover?(lenratio)
"The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
def self.http_content_headers_and_checksum(url, hash_needed: false, user_agent: :default)
max_time = hash_needed ? "600" : "25"
output, = curl_output(
"--connect-timeout", "15", "--include", "--max-time", max_time, "--location", url,
user_agent: user_agent
)
status_code = :unknown
while status_code == :unknown || status_code.to_s.start_with?("3")
headers, _, output = output.partition("\r\n\r\n")
status_code = headers[%r{HTTP\/.* (\d+)}, 1]
end
output_hash = Digest::SHA256.digest(output) if hash_needed
{
status: status_code,
etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2],
content_length: headers[/Content-Length: (\d+)/, 1],
file_hash: output_hash,
file: output,
}
end
def audit_style
return unless @style_offenses
display_cop_names = ARGV.include?("--display-cop-names")
@ -558,10 +467,10 @@ class FormulaAuditor
return unless @online
return unless DevelopmentTools.curl_handles_most_https_certificates?
if http_content_problem = FormulaAuditor.check_http_content(homepage,
user_agents: [:browser, :default],
check_content: true,
strict: @strict)
if http_content_problem = curl_check_http_content(homepage,
user_agents: [:browser, :default],
check_content: true,
strict: @strict)
problem http_content_problem
end
end
@ -1037,7 +946,7 @@ class ResourceAuditor
# A `brew mirror`'ed URL is usually not yet reachable at the time of
# pull request.
next if url =~ %r{^https://dl.bintray.com/homebrew/mirror/}
if http_content_problem = FormulaAuditor.check_http_content(url, require_http: curl_openssl_or_deps)
if http_content_problem = curl_check_http_content(url, require_http: curl_openssl_or_deps)
problem http_content_problem
end
elsif strategy <= GitDownloadStrategy

View File

@ -59,3 +59,95 @@ end
def curl_output(*args, **options)
Open3.capture3(*curl_args(*args, show_output: true, **options))
end
def curl_check_http_content(url, user_agents: [:default], check_content: false, strict: false, require_http: false)
return unless url.start_with? "http"
details = nil
user_agent = nil
hash_needed = url.start_with?("http:") && !require_http
user_agents.each do |ua|
details = curl_http_content_headers_and_checksum(url, hash_needed: hash_needed, user_agent: ua)
user_agent = ua
break if details[:status].to_s.start_with?("2")
end
unless details[:status]
# Hack around https://github.com/Homebrew/brew/issues/3199
return if MacOS.version == :el_capitan
return "The URL #{url} is not reachable"
end
unless details[:status].start_with? "2"
return "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
end
return unless hash_needed
secure_url = url.sub "http", "https"
secure_details =
curl_http_content_headers_and_checksum(secure_url, hash_needed: true, user_agent: user_agent)
if !details[:status].to_s.start_with?("2") ||
!secure_details[:status].to_s.start_with?("2")
return
end
etag_match = details[:etag] &&
details[:etag] == secure_details[:etag]
content_length_match =
details[:content_length] &&
details[:content_length] == secure_details[:content_length]
file_match = details[:file_hash] == secure_details[:file_hash]
if etag_match || content_length_match || file_match
return "The URL #{url} should use HTTPS rather than HTTP"
end
return unless check_content
no_protocol_file_contents = %r{https?:\\?/\\?/}
details[:file] = details[:file].gsub(no_protocol_file_contents, "/")
secure_details[:file] = secure_details[:file].gsub(no_protocol_file_contents, "/")
# Check for the same content after removing all protocols
if details[:file] == secure_details[:file]
return "The URL #{url} should use HTTPS rather than HTTP"
end
return unless strict
# Same size, different content after normalization
# (typical causes: Generated ID, Timestamp, Unix time)
if details[:file].length == secure_details[:file].length
return "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
lenratio = (100 * secure_details[:file].length / details[:file].length).to_i
return unless (90..110).cover?(lenratio)
"The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
def curl_http_content_headers_and_checksum(url, hash_needed: false, user_agent: :default)
max_time = hash_needed ? "600" : "25"
output, = curl_output(
"--connect-timeout", "15", "--include", "--max-time", max_time, "--location", url,
user_agent: user_agent
)
status_code = :unknown
while status_code == :unknown || status_code.to_s.start_with?("3")
headers, _, output = output.partition("\r\n\r\n")
status_code = headers[%r{HTTP\/.* (\d+)}, 1]
end
output_hash = Digest::SHA256.digest(output) if hash_needed
{
status: status_code,
etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2],
content_length: headers[/Content-Length: (\d+)/, 1],
file_hash: output_hash,
file: output,
}
end