brew/Library/Homebrew/dev-cmd/formula-analytics.rb
Mike McQuaid 435158884d
Move formula analytics pretty name logic to MacOSVersion.
This makes sense to centralize these so when we support new macOS
versions we can just add them to the
`VERSIONS_TO_ANALYTICS_PRETTY_NAMES` hash.
2025-08-18 16:57:20 +01:00

390 lines
16 KiB
Ruby
Executable File

# typed: strict
# frozen_string_literal: true
require "abstract_command"
require "fileutils"
module Homebrew
module DevCmd
class FormulaAnalytics < AbstractCommand
cmd_args do
usage_banner <<~EOS
`formula-analytics`
Query Homebrew's analytics.
EOS
flag "--days-ago=",
description: "Query from the specified days ago until the present. The default is 30 days."
switch "--install",
description: "Output the number of specifically requested installations or installation as " \
"dependencies of formulae. This is the default."
switch "--install-on-request",
description: "Output the number of specifically requested installations of formulae."
switch "--cask-install",
description: "Output the number of installations of casks."
switch "--build-error",
description: "Output the number of build errors for formulae."
switch "--os-version",
description: "Output the number of events by OS name and version."
switch "--homebrew-devcmdrun-developer",
description: "Output the number of devcmdrun/HOMEBREW_DEVELOPER events."
switch "--homebrew-os-arch-ci",
description: "Output the number of OS/Architecture/CI events."
switch "--homebrew-prefixes",
description: "Output Homebrew prefixes."
switch "--homebrew-versions",
description: "Output Homebrew versions."
switch "--brew-command-run",
description: "Output `brew` commands run."
switch "--brew-command-run-options",
description: "Output `brew` commands run with options."
switch "--brew-test-bot-test",
description: "Output `brew test-bot` steps run."
switch "--json",
description: "Output JSON. This is required: plain text support has been removed."
switch "--all-core-formulae-json",
description: "Output a different JSON format containing the JSON data for all " \
"Homebrew/homebrew-core formulae."
switch "--setup",
description: "Install the necessary gems, require them and exit without running a query."
conflicts "--install", "--cask-install", "--install-on-request", "--build-error", "--os-version",
"--homebrew-devcmdrun-developer", "--homebrew-os-arch-ci", "--homebrew-prefixes",
"--homebrew-versions", "--brew-command-run", "--brew-command-run-options", "--brew-test-bot-test"
conflicts "--json", "--all-core-formulae-json", "--setup"
named_args :none
end
FIRST_INFLUXDB_ANALYTICS_DATE = T.let(Date.new(2023, 03, 27).freeze, Date)
sig { override.void }
def run
Homebrew.install_bundler_gems!(groups: ["formula_analytics"])
setup_python
influx_analytics(args)
end
sig { void }
def setup_python
formula_analytics_root = HOMEBREW_LIBRARY/"Homebrew/formula-analytics"
vendor_python = Pathname.new("~/.brew-formula-analytics/vendor/python").expand_path
python_version = (formula_analytics_root/".python-version").read.chomp
which_python = which("python#{python_version}", ORIGINAL_PATHS)
odie <<~EOS if which_python.nil?
Python #{python_version} is required. Try:
brew install python@#{python_version}
EOS
venv_root = vendor_python/python_version
vendor_python.children.reject { |path| path == venv_root }.each(&:rmtree) if vendor_python.exist?
venv_python = venv_root/"bin/python"
repo_requirements = HOMEBREW_LIBRARY/"Homebrew/formula-analytics/requirements.txt"
venv_requirements = venv_root/"requirements.txt"
if !venv_requirements.exist? || !FileUtils.identical?(repo_requirements, venv_requirements)
safe_system which_python, "-I", "-m", "venv", "--clear", venv_root, out: :err
safe_system venv_python, "-m", "pip", "install",
"--disable-pip-version-check",
"--require-hashes",
"--requirement", repo_requirements,
out: :err
FileUtils.cp repo_requirements, venv_requirements
end
ENV["PATH"] = "#{venv_root}/bin:#{ENV.fetch("PATH")}"
ENV["__PYVENV_LAUNCHER__"] = venv_python.to_s # support macOS framework Pythons
require "pycall"
PyCall.init(venv_python)
require formula_analytics_root/"pycall-setup"
end
sig { params(args: Homebrew::DevCmd::FormulaAnalytics::Args).void }
def influx_analytics(args)
require "utils/analytics"
require "json"
return if args.setup?
odie "`$HOMEBREW_NO_ANALYTICS` is set!" if ENV["HOMEBREW_NO_ANALYTICS"]
token = ENV.fetch("HOMEBREW_INFLUXDB_TOKEN", nil)
odie "No InfluxDB credentials found in `$HOMEBREW_INFLUXDB_TOKEN`!" unless token
client = InfluxDBClient3.new(
token:,
host: URI.parse(Utils::Analytics::INFLUX_HOST).host,
org: Utils::Analytics::INFLUX_ORG,
database: Utils::Analytics::INFLUX_BUCKET,
)
max_days_ago = (Date.today - FIRST_INFLUXDB_ANALYTICS_DATE).to_s.to_i
days_ago = (args.days_ago || 30).to_i
if days_ago > max_days_ago
opoo "Analytics started #{FIRST_INFLUXDB_ANALYTICS_DATE}. `--days-ago` set to maximum value."
days_ago = max_days_ago
end
if days_ago > 365
opoo "Analytics are only retained for 1 year, setting `--days-ago=365`."
days_ago = 365
end
all_core_formulae_json = args.all_core_formulae_json?
categories = []
categories << :build_error if args.build_error?
categories << :cask_install if args.cask_install?
categories << :formula_install if args.install?
categories << :formula_install_on_request if args.install_on_request?
categories << :homebrew_devcmdrun_developer if args.homebrew_devcmdrun_developer?
categories << :homebrew_os_arch_ci if args.homebrew_os_arch_ci?
categories << :homebrew_prefixes if args.homebrew_prefixes?
categories << :homebrew_versions if args.homebrew_versions?
categories << :os_versions if args.os_version?
categories << :command_run if args.brew_command_run?
categories << :command_run_options if args.brew_command_run_options?
categories << :test_bot_test if args.brew_test_bot_test?
category_matching_buckets = [:build_error, :cask_install, :command_run, :test_bot_test]
categories.each do |category|
additional_where = all_core_formulae_json ? " AND tap_name ~ '^homebrew/(core|cask)$'" : ""
bucket = if category_matching_buckets.include?(category)
category
elsif category == :command_run_options
:command_run
else
:formula_install
end
case category
when :homebrew_devcmdrun_developer
dimension_key = "devcmdrun_developer"
groups = [:devcmdrun, :developer]
when :homebrew_os_arch_ci
dimension_key = "os_arch_ci"
groups = [:os, :arch, :ci]
when :homebrew_prefixes
dimension_key = "prefix"
groups = [:prefix, :os, :arch]
when :homebrew_versions
dimension_key = "version"
groups = [:version]
when :os_versions
dimension_key = :os_version
groups = [:os_name_and_version]
when :command_run
dimension_key = "command_run"
groups = [:command]
when :command_run_options
dimension_key = "command_run_options"
groups = [:command, :options, :devcmdrun, :developer]
additional_where += " AND ci = 'false'"
when :test_bot_test
dimension_key = "test_bot_test"
groups = [:command, :passed, :arch, :os]
when :cask_install
dimension_key = :cask
groups = [:package, :tap_name]
else
dimension_key = :formula
additional_where += " AND on_request = 'true'" if category == :formula_install_on_request
groups = [:package, :tap_name, :options]
end
sql_groups = groups.map { |e| "\"#{e}\"" }.join(",")
query = <<~EOS
SELECT #{sql_groups}, COUNT(*) AS "count" FROM "#{bucket}" WHERE time >= now() - INTERVAL '#{days_ago} day'#{additional_where} GROUP BY #{sql_groups}
EOS
batches = begin
client.query(query:, language: "sql").to_batches
rescue PyCall::PyError => e
if e.message.include?("message: unauthenticated")
odie "Could not authenticate with InfluxDB! Please check your `$HOMEBREW_INFLUXDB_TOKEN`!"
end
raise
end
json = T.let({
category:,
total_items: 0,
start_date: Date.today - days_ago.to_i,
end_date: Date.today,
total_count: 0,
items: [],
}, T::Hash[Symbol, T.untyped])
batches.each do |batch|
batch.to_pylist.each do |record|
dimension = case category
when :homebrew_devcmdrun_developer
"devcmdrun=#{record["devcmdrun"]} HOMEBREW_DEVELOPER=#{record["developer"]}"
when :homebrew_os_arch_ci
if record["ci"] == "true"
"#{record["os"]} #{record["arch"]} (CI)"
else
"#{record["os"]} #{record["arch"]}"
end
when :homebrew_prefixes
if record["prefix"] == "custom-prefix"
"#{record["prefix"]} (#{record["os"]} #{record["arch"]})"
else
record["prefix"].to_s
end
when :os_versions
format_os_version_dimension(record["os_name_and_version"])
when :command_run_options
options = record["options"].split
# Cleanup bad data before TODO
# Can delete this code after 18th July 2025.
options.reject! { |option| option.match?(/^--with(out)?-/) }
next if options.any? { |option| option.match?(/^TMPDIR=/) }
"#{record["command"]} #{options.sort.join(" ")}"
when :test_bot_test
command_and_package, options = record["command"].split.partition { |arg| !arg.start_with?("-") }
# Cleanup bad data before https://github.com/Homebrew/homebrew-test-bot/pull/1043
# Can delete this code after 27th April 2025.
next if %w[audit install linkage style test].exclude?(command_and_package.first)
next if command_and_package.last.include?("/")
next if options.include?("--tap=")
next if options.include?("--only-dependencies")
next if options.include?("--cached")
command_and_options = (command_and_package + options.sort).join(" ")
passed = (record["passed"] == "true") ? "PASSED" : "FAILED"
"#{command_and_options} (#{record["os"]} #{record["arch"]}) (#{passed})"
else
record[groups.first.to_s]
end
next if dimension.blank?
if (tap_name = record["tap_name"].presence) &&
((tap_name != "homebrew/cask" && dimension_key == :cask) ||
(tap_name != "homebrew/core" && dimension_key == :formula))
dimension = "#{tap_name}/#{dimension}"
end
if (all_core_formulae_json || category == :build_error) &&
(options = record["options"].presence)
# homebrew/core formulae don't have non-HEAD options but they ended up in our analytics anyway.
if all_core_formulae_json
options = options.split.include?("--HEAD") ? "--HEAD" : ""
end
dimension = "#{dimension} #{options}"
end
dimension = dimension.strip
next if dimension.match?(/[<>]/)
count = record["count"]
json[:total_items] += 1
json[:total_count] += count
json[:items] << {
number: nil,
dimension_key => dimension,
count:,
}
end
end
odie "No data returned" if json[:total_count].zero?
# Combine identical values
deduped_items = {}
json[:items].each do |item|
key = item[dimension_key]
if deduped_items.key?(key)
deduped_items[key][:count] += item[:count]
else
deduped_items[key] = item
end
end
json[:items] = deduped_items.values
if all_core_formulae_json
core_formula_items = {}
json[:items].each do |item|
item.delete(:number)
formula_name, = item[dimension_key].split.first
next if formula_name.include?("/")
core_formula_items[formula_name] ||= []
core_formula_items[formula_name] << item
end
json.delete(:items)
core_formula_items.each_value do |items|
items.sort_by! { |item| -item[:count] }
items.each do |item|
item[:count] = format_count(item[:count])
end
end
json[:formulae] = core_formula_items.sort_by { |name, _| name }.to_h
else
json[:items].sort_by! do |item|
-item[:count]
end
json[:items].each_with_index do |item, index|
item[:number] = index + 1
percent = (item[:count].to_f / json[:total_count]) * 100
item[:percent] = format_percent(percent)
item[:count] = format_count(item[:count])
end
end
puts JSON.pretty_generate json
end
end
sig { params(count: Integer).returns(String) }
def format_count(count)
count.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
end
sig { params(percent: Float).returns(String) }
def format_percent(percent)
format("%<percent>.2f", percent:).gsub(/\.00$/, "")
end
sig { params(dimension: T.nilable(String)).returns(T.nilable(String)) }
def format_os_version_dimension(dimension)
return if dimension.blank?
require "macos_version"
dimension = dimension.gsub(/^Intel ?/, "")
.gsub(/^macOS ?/, "")
.gsub(/ \(.+\)$/, "")
case dimension
when (macos_pretty_name = ::MacOSVersion.analytics_pretty_name(dimension))
macos_pretty_name
when /Ubuntu(-Server)? (14|16|18|20|22)\.04/ then "Ubuntu #{Regexp.last_match(2)}.04 LTS"
when /Ubuntu(-Server)? (\d+\.\d+).\d ?(LTS)?/
"Ubuntu #{Regexp.last_match(2)} #{Regexp.last_match(3)}".strip
when %r{Debian GNU/Linux (\d+)\.\d+} then "Debian #{Regexp.last_match(1)} #{Regexp.last_match(2)}"
when /CentOS (\w+) (\d+)/ then "CentOS #{Regexp.last_match(1)} #{Regexp.last_match(2)}"
when /Fedora Linux (\d+)[.\d]*/ then "Fedora Linux #{Regexp.last_match(1)}"
when /KDE neon .*?([\d.]+)/ then "KDE neon #{Regexp.last_match(1)}"
when /Amazon Linux (\d+)\.[.\d]*/ then "Amazon Linux #{Regexp.last_match(1)}"
else dimension
end
end
end
end
end