From cd313adbe4fcccae7f7102e23d134fced26a2fbd Mon Sep 17 00:00:00 2001 From: Mike McQuaid Date: Fri, 16 Jun 2023 15:39:49 +0100 Subject: [PATCH] cmd/info: add (undocumented) --github-packages-downloads option. This screen scrapes GitHub Packages download counts from HTML for a core formula. It's useful when figuring out the difference between our analytics numbers (which people can opt-out of) and our bottle downloads (which people cannot due to the way GitHub Packages works). --- Library/Homebrew/cmd/info.rb | 3 ++ Library/Homebrew/utils/analytics.rb | 51 +++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Library/Homebrew/cmd/info.rb b/Library/Homebrew/cmd/info.rb index 266cf96b82..1091a73e93 100644 --- a/Library/Homebrew/cmd/info.rb +++ b/Library/Homebrew/cmd/info.rb @@ -42,6 +42,9 @@ module Homebrew "The value for must be `install`, `install-on-request` or `build-error`; " \ "`cask-install` or `os-version` may be specified if is not. " \ "The default is `install`." + switch "--github-packages-downloads", + description: "Scrape GitHub Packages download counts from HTML for a core formula.", + hidden: true switch "--github", description: "Open the GitHub source page for and in a browser. " \ "To view the history locally: `brew log -p` or " diff --git a/Library/Homebrew/utils/analytics.rb b/Library/Homebrew/utils/analytics.rb index 04ea281aa6..f61f72a8f3 100644 --- a/Library/Homebrew/utils/analytics.rb +++ b/Library/Homebrew/utils/analytics.rb @@ -182,7 +182,7 @@ options: options) table_output(category, days, results, os_version: os_version, cask_install: cask_install) end - def get_analytics(json, args:) + def output_analytics(json, args:) full_analytics = args.analytics? || verbose? ohai "Analytics" @@ -207,13 +207,58 @@ options: options) end end + # This method is undocumented because it is not intended for general use. + # It relies on screen scraping some GitHub HTML that's not available as an API. + # This seems very likely to break in the future. + # That said, it's the only way to get the data we want right now. + def output_github_packages_downloads(formula, args:) + return unless args.github_packages_downloads? + return unless formula.core_formula? + + escaped_formula_name = GitHubPackages.image_formula_name(formula.name) + formula_url_suffix = "container/core%2F#{escaped_formula_name}/" + formula_url = "https://github.com/Homebrew/homebrew-core/pkgs/#{formula_url_suffix}" + output = Utils::Curl.curl_output("--fail", formula_url) + return unless output.success? + + formula_version_urls = output.stdout + .scan(%r{/orgs/Homebrew/packages/#{formula_url_suffix}\d+\?tag=[^"]+}) + .map do |url| + url.sub("/orgs/Homebrew/packages/", "/Homebrew/homebrew-core/pkgs/") + end + return if formula_version_urls.empty? + + thirty_day_download_count = 0 + formula_version_urls.each do |formula_version_url_suffix| + formula_version_url = "https://github.com#{formula_version_url_suffix}" + output = Utils::Curl.curl_output("--fail", formula_version_url) + next unless output.success? + + last_thirty_days_match = output.stdout.match( + %r{Last 30 days\s*([\d.M,]+)}m, + ) + next if last_thirty_days_match.blank? + + last_thirty_days_downloads = last_thirty_days_match.captures.first.tr(",", "") + thirty_day_download_count += if (millions_match = last_thirty_days_downloads.match(/(\d+\.\d+)M/).presence) + millions_match.captures.first.to_i * 1_000_000 + else + last_thirty_days_downloads.to_i + end + end + + ohai "GitHub Packages Downloads" + puts "#{number_readable(thirty_day_download_count)} (30 days)" + end + def formula_output(formula, args:) return if Homebrew::EnvConfig.no_analytics? || Homebrew::EnvConfig.no_github_api? json = Homebrew::API::Formula.fetch formula.name return if json.blank? || json["analytics"].blank? - get_analytics(json, args: args) + output_analytics(json, args: args) + output_github_packages_downloads(formula, args: args) rescue ArgumentError # Ignore failed API requests nil @@ -225,7 +270,7 @@ options: options) json = Homebrew::API::Cask.fetch cask.token return if json.blank? || json["analytics"].blank? - get_analytics(json, args: args) + output_analytics(json, args: args) rescue ArgumentError # Ignore failed API requests nil