From d5b3ae095c3e172559494e2a841507c6277c1803 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Tue, 20 May 2025 11:06:20 -0400 Subject: [PATCH] Prohibit non-ASCII characters in URLs, nudge toward punycode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inspired by curl's blog post, [Detecting malicious Unicode][1], this likely captures most if not all cases and nudges the user toward supplying IDNs with punycode. A possible improvement would be telling the user exactly what punycode domain to use instead, but that may require another library as I can't quickly find something built into the Ruby stdlib that handles punycode encoding. [1]: https://daniel.haxx.se/blog/2025/05/16/detecting-malicious-unicode/ Co-authored-by: Štefan Baebler <319826+stefanb@users.noreply.github.com> --- Library/Homebrew/rubocops/shared/url_helper.rb | 6 ++++++ Library/Homebrew/test/rubocops/urls_spec.rb | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/Library/Homebrew/rubocops/shared/url_helper.rb b/Library/Homebrew/rubocops/shared/url_helper.rb index 1d5e6a32ff..0a49816f13 100644 --- a/Library/Homebrew/rubocops/shared/url_helper.rb +++ b/Library/Homebrew/rubocops/shared/url_helper.rb @@ -35,6 +35,12 @@ module RuboCop def audit_url(type, urls, mirrors, livecheck_url: false) @type = type + # URLs must be ASCII; IDNs must be punycode + ascii_pattern = /[^\p{ASCII}]+/ + audit_urls(urls, ascii_pattern) do |_, url| + problem "Please use the ASCII (Punycode encoded host, URL-encoded path and query) version of #{url}." + end + # GNU URLs; doesn't apply to mirrors gnu_pattern = %r{^(?:https?|ftp)://ftpmirror\.gnu\.org/(.*)} audit_urls(urls, gnu_pattern) do |match, url| diff --git a/Library/Homebrew/test/rubocops/urls_spec.rb b/Library/Homebrew/test/rubocops/urls_spec.rb index 80bf49199a..fab6d84ada 100644 --- a/Library/Homebrew/test/rubocops/urls_spec.rb +++ b/Library/Homebrew/test/rubocops/urls_spec.rb @@ -177,6 +177,14 @@ RSpec.describe RuboCop::Cop::FormulaAudit::Urls do "url" => "svn+http://brew.sh/foo/bar", "msg" => "Use of the svn+http:// scheme is deprecated, pass `:using => :svn` instead", "col" => 2, + }, { + "url" => "https://🫠.sh/foo/bar", + "msg" => "Please use the ASCII (Punycode encoded host, URL-encoded path and query) version of https://🫠.sh/foo/bar.", + "col" => 2, + }, { + "url" => "https://ßrew.sh/foo/bar", + "msg" => "Please use the ASCII (Punycode encoded host, URL-encoded path and query) version of https://ßrew.sh/foo/bar.", + "col" => 2, }] end