314 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			314 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| require "rubocops/extend/formula"
 | |
| 
 | |
| module RuboCop
 | |
|   module Cop
 | |
|     module FormulaAudit
 | |
|       # This cop audits URLs and mirrors in Formulae.
 | |
|       class Urls < FormulaCop
 | |
|         # These are parts of URLs that look like binaries but actually aren't.
 | |
|         NOT_A_BINARY_URL_PREFIX_ALLOWLIST = %w[
 | |
|           https://downloads.sourceforge.net/project/astyle/astyle/
 | |
|           https://downloads.sourceforge.net/project/bittwist/
 | |
|           https://downloads.sourceforge.net/project/launch4j/
 | |
|           https://github.com/ChrisJohnsen/tmux-MacOSX-pasteboard/archive/
 | |
|           https://github.com/obihann/archey-osx
 | |
|           https://github.com/sindresorhus/macos-wallpaper/archive/
 | |
|           https://raw.githubusercontent.com/liyanage/macosx-shell-scripts/
 | |
|           https://osxbook.com/book/bonus/chapter8/core/download/gcore
 | |
|           https://naif.jpl.nasa.gov/pub/naif/toolkit/C/MacIntel_OSX_AppleC_64bit/packages/
 | |
|           https://artifacts.videolan.org/x264/release-macos/
 | |
|         ].freeze
 | |
| 
 | |
|         # These are formulae that, sadly, require an upstream binary to bootstrap.
 | |
|         BINARY_BOOTSTRAP_FORMULA_URLS_ALLOWLIST = %w[
 | |
|           clozure-cl
 | |
|           crystal
 | |
|           fpc
 | |
|           ghc
 | |
|           ghc@8.6
 | |
|           ghc@8.8
 | |
|           go
 | |
|           go@1.9
 | |
|           go@1.10
 | |
|           go@1.11
 | |
|           go@1.12
 | |
|           go@1.13
 | |
|           haskell-stack
 | |
|           ldc
 | |
|           mlton
 | |
|           openjdk
 | |
|           openjdk@11
 | |
|           pypy
 | |
|           sbcl
 | |
|           rust
 | |
|         ].freeze
 | |
| 
 | |
|         def audit_formula(_node, _class_node, _parent_class_node, body_node)
 | |
|           urls = find_every_func_call_by_name(body_node, :url)
 | |
|           mirrors = find_every_func_call_by_name(body_node, :mirror)
 | |
| 
 | |
|           # Identify livecheck urls, to skip some checks for them
 | |
|           livecheck_url = if (livecheck = find_every_func_call_by_name(body_node, :livecheck).first) &&
 | |
|                              (livecheck_url = find_every_func_call_by_name(livecheck.parent, :url).first)
 | |
|             string_content(parameters(livecheck_url).first)
 | |
|           end
 | |
| 
 | |
|           # GNU urls; doesn't apply to mirrors
 | |
|           gnu_pattern = %r{^(?:https?|ftp)://ftpmirror.gnu.org/(.*)}
 | |
|           audit_urls(urls, gnu_pattern) do |match, url|
 | |
|             problem "Please use \"https://ftp.gnu.org/gnu/#{match[1]}\" instead of #{url}."
 | |
|           end
 | |
| 
 | |
|           # Fossies upstream requests they aren't used as primary URLs
 | |
|           # https://github.com/Homebrew/homebrew-core/issues/14486#issuecomment-307753234
 | |
|           fossies_pattern = %r{^https?://fossies\.org/}
 | |
|           audit_urls(urls, fossies_pattern) do
 | |
|             problem "Please don't use fossies.org in the url (using as a mirror is fine)"
 | |
|           end
 | |
| 
 | |
|           apache_pattern = %r{^https?://(?:[^/]*\.)?apache\.org/(?:dyn/closer\.cgi\?path=/?|dist/)(.*)}i
 | |
|           audit_urls(urls, apache_pattern) do |match, url|
 | |
|             next if url == livecheck_url
 | |
| 
 | |
|             problem "#{url} should be `https://www.apache.org/dyn/closer.lua?path=#{match[1]}`"
 | |
|           end
 | |
| 
 | |
|           version_control_pattern = %r{^(cvs|bzr|hg|fossil)://}
 | |
|           audit_urls(urls, version_control_pattern) do |match, _|
 | |
|             problem "Use of the #{match[1]}:// scheme is deprecated, pass `:using => :#{match[1]}` instead"
 | |
|           end
 | |
| 
 | |
|           svn_pattern = %r{^svn\+http://}
 | |
|           audit_urls(urls, svn_pattern) do |_, _|
 | |
|             problem "Use of the svn+http:// scheme is deprecated, pass `:using => :svn` instead"
 | |
|           end
 | |
| 
 | |
|           audit_urls(mirrors, /.*/) do |_, mirror|
 | |
|             urls.each do |url|
 | |
|               url_string = string_content(parameters(url).first)
 | |
|               next unless url_string.eql?(mirror)
 | |
| 
 | |
|               problem "URL should not be duplicated as a mirror: #{url_string}"
 | |
|             end
 | |
|           end
 | |
| 
 | |
|           urls += mirrors
 | |
| 
 | |
|           # Check a variety of SSL/TLS URLs that don't consistently auto-redirect
 | |
|           # or are overly common errors that need to be reduced & fixed over time.
 | |
|           http_to_https_patterns = Regexp.union([%r{^http://ftp\.gnu\.org/},
 | |
|                                                  %r{^http://ftpmirror\.gnu\.org/},
 | |
|                                                  %r{^http://download\.savannah\.gnu\.org/},
 | |
|                                                  %r{^http://download-mirror\.savannah\.gnu\.org/},
 | |
|                                                  %r{^http://(?:[^/]*\.)?apache\.org/},
 | |
|                                                  %r{^http://code\.google\.com/},
 | |
|                                                  %r{^http://fossies\.org/},
 | |
|                                                  %r{^http://mirrors\.kernel\.org/},
 | |
|                                                  %r{^http://mirrors\.ocf\.berkeley\.edu/},
 | |
|                                                  %r{^http://(?:[^/]*\.)?bintray\.com/},
 | |
|                                                  %r{^http://tools\.ietf\.org/},
 | |
|                                                  %r{^http://launchpad\.net/},
 | |
|                                                  %r{^http://github\.com/},
 | |
|                                                  %r{^http://bitbucket\.org/},
 | |
|                                                  %r{^http://anonscm\.debian\.org/},
 | |
|                                                  %r{^http://cpan\.metacpan\.org/},
 | |
|                                                  %r{^http://hackage\.haskell\.org/},
 | |
|                                                  %r{^http://(?:[^/]*\.)?archive\.org},
 | |
|                                                  %r{^http://(?:[^/]*\.)?freedesktop\.org},
 | |
|                                                  %r{^http://(?:[^/]*\.)?mirrorservice\.org/}])
 | |
|           audit_urls(urls, http_to_https_patterns) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           apache_mirror_pattern = %r{^https?://(?:[^/]*\.)?apache\.org/dyn/closer\.(?:cgi|lua)\?path=/?(.*)}i
 | |
|           audit_urls(mirrors, apache_mirror_pattern) do |match, mirror|
 | |
|             problem "Please use `https://archive.apache.org/dist/#{match[1]}` as a mirror instead of #{mirror}."
 | |
|           end
 | |
| 
 | |
|           cpan_pattern = %r{^http://search\.mcpan\.org/CPAN/(.*)}i
 | |
|           audit_urls(urls, cpan_pattern) do |match, url|
 | |
|             problem "#{url} should be `https://cpan.metacpan.org/#{match[1]}`"
 | |
|           end
 | |
| 
 | |
|           gnome_pattern = %r{^(http|ftp)://ftp\.gnome\.org/pub/gnome/(.*)}i
 | |
|           audit_urls(urls, gnome_pattern) do |match, url|
 | |
|             problem "#{url} should be `https://download.gnome.org/#{match[2]}`"
 | |
|           end
 | |
| 
 | |
|           debian_pattern = %r{^git://anonscm\.debian\.org/users/(.*)}i
 | |
|           audit_urls(urls, debian_pattern) do |match, url|
 | |
|             problem "#{url} should be `https://anonscm.debian.org/git/users/#{match[1]}`"
 | |
|           end
 | |
| 
 | |
|           # Prefer HTTP/S when possible over FTP protocol due to possible firewalls.
 | |
|           mirror_service_pattern = %r{^ftp://ftp\.mirrorservice\.org}
 | |
|           audit_urls(urls, mirror_service_pattern) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           cpan_ftp_pattern = %r{^ftp://ftp\.cpan\.org/pub/CPAN(.*)}i
 | |
|           audit_urls(urls, cpan_ftp_pattern) do |match_obj, url|
 | |
|             problem "#{url} should be `http://search.cpan.org/CPAN#{match_obj[1]}`"
 | |
|           end
 | |
| 
 | |
|           # SourceForge url patterns
 | |
|           sourceforge_patterns = %r{^https?://.*\b(sourceforge|sf)\.(com|net)}
 | |
|           audit_urls(urls, sourceforge_patterns) do |_, url|
 | |
|             # Skip if the URL looks like a SVN repo
 | |
|             next if url.include? "/svnroot/"
 | |
|             next if url.include? "svn.sourceforge"
 | |
|             next if url.include? "/p/"
 | |
| 
 | |
|             if url =~ /(\?|&)use_mirror=/
 | |
|               problem "Don't use #{Regexp.last_match(1)}use_mirror in SourceForge urls (url is #{url})."
 | |
|             end
 | |
| 
 | |
|             problem "Don't use /download in SourceForge urls (url is #{url})." if url.end_with?("/download")
 | |
| 
 | |
|             if url.match?(%r{^https?://sourceforge\.}) && url != livecheck_url
 | |
|               problem "Use https://downloads.sourceforge.net to get geolocation (url is #{url})."
 | |
|             end
 | |
| 
 | |
|             if url.match?(%r{^https?://prdownloads\.})
 | |
|               problem <<~EOS.chomp
 | |
|                 Don't use prdownloads in SourceForge urls (url is #{url}).
 | |
|                         See: http://librelist.com/browser/homebrew/2011/1/12/prdownloads-is-bad/
 | |
|               EOS
 | |
|             end
 | |
| 
 | |
|             if url.match?(%r{^http://\w+\.dl\.})
 | |
|               problem "Don't use specific dl mirrors in SourceForge urls (url is #{url})."
 | |
|             end
 | |
| 
 | |
|             problem "Please use https:// for #{url}" if url.start_with? "http://downloads"
 | |
|           end
 | |
| 
 | |
|           # Debian has an abundance of secure mirrors. Let's not pluck the insecure
 | |
|           # one out of the grab bag.
 | |
|           unsecure_deb_pattern = %r{^http://http\.debian\.net/debian/(.*)}i
 | |
|           audit_urls(urls, unsecure_deb_pattern) do |match, _|
 | |
|             problem <<~EOS
 | |
|               Please use a secure mirror for Debian URLs.
 | |
|               We recommend:
 | |
|                 https://deb.debian.org/debian/#{match[1]}
 | |
|             EOS
 | |
|           end
 | |
| 
 | |
|           # Check to use canonical urls for Debian packages
 | |
|           noncanon_deb_pattern =
 | |
|             Regexp.union([%r{^https://mirrors\.kernel\.org/debian/},
 | |
|                           %r{^https://mirrors\.ocf\.berkeley\.edu/debian/},
 | |
|                           %r{^https://(?:[^/]*\.)?mirrorservice\.org/sites/ftp\.debian\.org/debian/}])
 | |
|           audit_urls(urls, noncanon_deb_pattern) do |_, url|
 | |
|             problem "Please use https://deb.debian.org/debian/ for #{url}"
 | |
|           end
 | |
| 
 | |
|           # Check for new-url Google Code download urls, https:// is preferred
 | |
|           google_code_pattern = Regexp.union([%r{^http://.*\.googlecode\.com/files.*},
 | |
|                                               %r{^http://code\.google\.com/}])
 | |
|           audit_urls(urls, google_code_pattern) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           # Check for git:// GitHub repo urls, https:// is preferred.
 | |
|           git_gh_pattern = %r{^git://[^/]*github\.com/}
 | |
|           audit_urls(urls, git_gh_pattern) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           # Check for git:// Gitorious repo urls, https:// is preferred.
 | |
|           git_gitorious_pattern = %r{^git://[^/]*gitorious\.org/}
 | |
|           audit_urls(urls, git_gitorious_pattern) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           # Check for http:// GitHub repo urls, https:// is preferred.
 | |
|           gh_pattern = %r{^http://github\.com/.*\.git$}
 | |
|           audit_urls(urls, gh_pattern) do |_, url|
 | |
|             problem "Please use https:// for #{url}"
 | |
|           end
 | |
| 
 | |
|           # Check for master branch GitHub archives.
 | |
|           tarball_gh_pattern = %r{^https://github\.com/.*archive/master\.(tar\.gz|zip)$}
 | |
|           audit_urls(urls, tarball_gh_pattern) do
 | |
|             problem "Use versioned rather than branch tarballs for stable checksums."
 | |
|           end
 | |
| 
 | |
|           # Use new-style archive downloads
 | |
|           archive_gh_pattern = %r{https://.*github.*/(?:tar|zip)ball/}
 | |
|           audit_urls(urls, archive_gh_pattern) do |_, url|
 | |
|             next if url.end_with?(".git")
 | |
| 
 | |
|             problem "Use /archive/ URLs for GitHub tarballs (url is #{url})."
 | |
|           end
 | |
| 
 | |
|           # Don't use GitHub .zip files
 | |
|           zip_gh_pattern = %r{https://.*github.*/(archive|releases)/.*\.zip$}
 | |
|           audit_urls(urls, zip_gh_pattern) do |_, url|
 | |
|             next if url.match?(%r{releases/download})
 | |
| 
 | |
|             problem "Use GitHub tarballs rather than zipballs (url is #{url})."
 | |
|           end
 | |
| 
 | |
|           # Don't use GitHub codeload URLs
 | |
|           codeload_gh_pattern = %r{https?://codeload\.github\.com/(.+)/(.+)/(?:tar\.gz|zip)/(.+)}
 | |
|           audit_urls(urls, codeload_gh_pattern) do |match, url|
 | |
|             problem <<~EOS
 | |
|               Use GitHub archive URLs:
 | |
|                 https://github.com/#{match[1]}/#{match[2]}/archive/#{match[3]}.tar.gz
 | |
|               Rather than codeload:
 | |
|                 #{url}
 | |
|             EOS
 | |
|           end
 | |
| 
 | |
|           # Check for Maven Central urls, prefer HTTPS redirector over specific host
 | |
|           maven_pattern = %r{https?://(?:central|repo\d+)\.maven\.org/maven2/(.+)$}
 | |
|           audit_urls(urls, maven_pattern) do |match, url|
 | |
|             problem "#{url} should be `https://search.maven.org/remotecontent?filepath=#{match[1]}`"
 | |
|           end
 | |
| 
 | |
|           return if formula_tap != "homebrew-core"
 | |
| 
 | |
|           # Check for binary URLs
 | |
|           audit_urls(urls, /(darwin|macos|osx)/i) do |match, url|
 | |
|             next if @formula_name.include?(match.to_s.downcase)
 | |
|             next if url.match?(/.(patch|diff)(\?full_index=1)?$/)
 | |
|             next if NOT_A_BINARY_URL_PREFIX_ALLOWLIST.any? { |prefix| url.start_with?(prefix) }
 | |
|             next if BINARY_BOOTSTRAP_FORMULA_URLS_ALLOWLIST.include?(@formula_name)
 | |
| 
 | |
|             problem "#{url} looks like a binary package, not a source archive; " \
 | |
|                     "homebrew/core is source-only."
 | |
|           end
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       class PyPiUrls < FormulaCop
 | |
|         def audit_formula(_node, _class_node, _parent_class_node, body_node)
 | |
|           urls = find_every_func_call_by_name(body_node, :url)
 | |
|           mirrors = find_every_func_call_by_name(body_node, :mirror)
 | |
|           urls += mirrors
 | |
| 
 | |
|           # Check pypi urls
 | |
|           @pypi_pattern = %r{^https?://pypi.python.org/(.*)}
 | |
|           audit_urls(urls, @pypi_pattern) do |match, url|
 | |
|             problem "#{url} should be `https://files.pythonhosted.org/#{match[1]}`"
 | |
|           end
 | |
|         end
 | |
| 
 | |
|         def autocorrect(node)
 | |
|           lambda do |corrector|
 | |
|             url_string_node = parameters(node).first
 | |
|             url = string_content(url_string_node)
 | |
|             match = regex_match_group(url_string_node, @pypi_pattern)
 | |
|             correction = node.source.sub(url, "https://files.pythonhosted.org/#{match[1]}")
 | |
|             corrector.insert_before(node.source_range, correction)
 | |
|             corrector.remove(node.source_range)
 | |
|           end
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 | 
