Homebrew (opt-in) Analytics tweaks. (#57)

- add `HOMEBREW_PRODUCT` global variable
- only differentiate between `/usr/local` and `non-/usr/local` Homebrew
  prefixes to avoid sharing sensitive user information
- note if e.g. build errors are occurring under CI
- Add `HOMEBREW_NO_ANALYTICS` variable (this will be how people opt-out
  when this is enabled for everyone)
- Add `HOMEBREW_ANALYTICS_DEBUG` variable to output all the analytics
  that are sent
- Move Bash analytics code to `Library/Homebrew/utils/analytics.sh`
- Add documentation for our analytics and why/what/when/how and opt-out
- Only official Homebrew commands are reported
- Ruby analytics are now reported in a forked, background process
This commit is contained in:
Mike McQuaid 2016-04-12 11:02:22 +01:00
parent 279df8ec81
commit 0c85113053
8 changed files with 164 additions and 62 deletions

View File

@ -3,6 +3,7 @@ require "fileutils"
module Homebrew
def tests
(HOMEBREW_LIBRARY/"Homebrew/test").cd do
ENV["HOMEBREW_NO_ANALYTICS"] = "1"
ENV["TESTOPTS"] = "-v" if ARGV.verbose?
ENV["HOMEBREW_NO_COMPAT"] = "1" if ARGV.include? "--no-compat"
if ARGV.include? "--coverage"

View File

@ -202,7 +202,10 @@ class FormulaInstaller
oh1 "Installing #{Tty.green}#{formula.full_name}#{Tty.reset}" if show_header?
report_analytics_event("install", formula.full_name)
if formula.tap && !formula.tap.private?
options = effective_build_options_for(formula).used_options.to_a.join(" ")
report_analytics_event("install", "#{formula.full_name} #{options}".strip)
end
@@attempted << formula

View File

@ -12,6 +12,7 @@ require "rbconfig"
ARGV.extend(HomebrewArgvExtension)
HOMEBREW_PRODUCT = ENV["HOMEBREW_PRODUCT"]
HOMEBREW_VERSION = ENV["HOMEBREW_VERSION"]
HOMEBREW_WWW = "http://brew.sh"

View File

@ -1,52 +1,66 @@
def analytics_anonymous_prefix_and_os
def analytics_label
@analytics_anonymous_prefix_and_os ||= begin
"#{OS_VERSION}, #{HOMEBREW_PREFIX.to_s.gsub(ENV["HOME"], "~")}"
os = OS_VERSION
prefix = ", non-/usr/local" if HOMEBREW_PREFIX.to_s != "/usr/local"
ci = ", CI" if ENV["CI"]
"#{os}#{prefix}#{ci}"
end
end
def report_analytics(type, metadata={})
def report_analytics(type, metadata = {})
return unless ENV["HOMEBREW_ANALYTICS"]
return if ENV["HOMEBREW_NO_ANALYTICS"]
metadata_args = metadata.map do |key, value|
["-d", "#{key}=#{value}"] if key && value
end.compact.flatten
args = %W[
--max-time 3
--user-agent #{HOMEBREW_USER_AGENT_CURL}
-d v=1
-d tid=#{ENV["HOMEBREW_ANALYTICS_ID"]}
-d cid=#{ENV["HOMEBREW_ANALYTICS_USER_UUID"]}
-d aip=1
-d an=#{HOMEBREW_PRODUCT}
-d av=#{HOMEBREW_VERSION}
-d t=#{type}
]
metadata.each { |k, v| args << "-d" << "#{k}=#{v}" if k && v }
# Send analytics. Anonymise the IP address (aip=1) and don't send or store
# any personally identifiable information.
# Send analytics. Don't send or store any personally identifiable information.
# https://github.com/Homebrew/brew/blob/master/share/doc/homebrew/Analytics.md
# https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide
# https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
system ENV["HOMEBREW_CURL"], "https://www.google-analytics.com/collect",
"-d", "v=1", "--silent", "--max-time", "3", "--output", "/dev/null",
"--user-agent", HOMEBREW_USER_AGENT_CURL,
"-d", "tid=#{ENV["HOMEBREW_ANALYTICS_ID"]}",
"-d", "cid=#{ENV["HOMEBREW_ANALYTICS_USER_UUID"]}",
"-d", "aip=1",
"-d", "an=Homebrew",
"-d", "av=#{HOMEBREW_VERSION}",
"-d", "t=#{type}",
*metadata_args
if ENV["HOMEBREW_ANALYTICS_DEBUG"]
puts Utils.popen_read ENV["HOMEBREW_CURL"],
"https://www.google-analytics.com/debug/collect",
*args
else
pid = fork do
exec ENV["HOMEBREW_CURL"],
"https://www.google-analytics.com/collect",
"--silent", "--output", "/dev/null",
*args
end
Process.detach pid
end
end
def report_analytics_event(category, action, label=analytics_anonymous_prefix_and_os, value=nil)
report_analytics(:event, {
def report_analytics_event(category, action, label = analytics_label, value = nil)
report_analytics(:event,
:ec => category,
:ea => action,
:el => label,
:ev => value,
})
:ev => value)
end
def report_analytics_exception(exception, options={})
if exception.is_a? BuildError
def report_analytics_exception(exception, options = {})
if exception.is_a?(BuildError) &&
exception.formula.tap && !exception.formula.tap.private?
report_analytics_event("BuildError", exception.formula.full_name)
end
fatal = options.fetch(:fatal, true) ? "1" : "0"
report_analytics(:exception, {
report_analytics(:exception,
:exd => exception.class.name,
:exf => fatal,
})
:exf => fatal)
end
def report_analytics_screenview(screen_name)

View File

@ -0,0 +1,71 @@
setup-analytics() {
[[ -z "$HOMEBREW_ANALYTICS" ]] && return
[[ -n "$HOMEBREW_NO_ANALYTICS" ]] && return
# User UUID file. Used for Homebrew user counting. Can be deleted and
# recreated with no adverse effect (beyond our user counts being inflated).
HOMEBREW_ANALYTICS_USER_UUID_FILE="$HOME/.homebrew_analytics_user_uuid"
if [[ -r "$HOMEBREW_ANALYTICS_USER_UUID_FILE" ]]
then
HOMEBREW_ANALYTICS_USER_UUID="$(<"$HOMEBREW_ANALYTICS_USER_UUID_FILE")"
else
HOMEBREW_ANALYTICS_USER_UUID="$(uuidgen)"
echo "$HOMEBREW_ANALYTICS_USER_UUID" > "$HOMEBREW_ANALYTICS_USER_UUID_FILE"
fi
export HOMEBREW_ANALYTICS_ID="UA-75654628-1"
export HOMEBREW_ANALYTICS_USER_UUID
}
report-analytics-screenview-command() {
if [[ -z "$HOMEBREW_ANALYTICS" || -n "$HOMEBREW_NO_ANALYTICS" ]]
then
return
fi
# Don't report non-official commands.
if ! [[ "$HOMEBREW_COMMAND" = "bundle" ||
"$HOMEBREW_COMMAND" = "cask" ||
"$HOMEBREW_COMMAND" = "services" ||
-f "$HOMEBREW_LIBRARY/Homebrew/cmd/$HOMEBREW_COMMAND.rb" ||
-f "$HOMEBREW_LIBRARY/Homebrew/cmd/$HOMEBREW_COMMAND.sh" ||
-f "$HOMEBREW_LIBRARY/Homebrew/dev-cmd/$HOMEBREW_COMMAND.rb" ||
-f "$HOMEBREW_LIBRARY/Homebrew/dev-cmd/$HOMEBREW_COMMAND.sh" ]]
then
return
fi
# Don't report commands used mostly by our scripts and not users.
# TODO: list more e.g. shell completion things here perhaps using a single
# script as a shell-completion entry point.
if [[ "$HOMEBREW_COMMAND" = "commands" ]]
then
return
fi
local args=(
--max-time 3 \
--user-agent "$HOMEBREW_USER_AGENT_CURL" \
-d v=1 \
-d tid="$HOMEBREW_ANALYTICS_ID" \
-d cid="$HOMEBREW_ANALYTICS_USER_UUID" \
-d aip=1 \
-d an="$HOMEBREW_PRODUCT" \
-d av="$HOMEBREW_VERSION" \
-d t=screenview \
-d cd="$HOMEBREW_COMMAND" \
)
# Send analytics. Don't send or store any personally identifiable information.
# https://github.com/Homebrew/brew/blob/master/share/doc/homebrew/Analytics.md
# https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#screenView
# https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
if [[ -z "$HOMEBREW_ANALYTICS_DEBUG" ]]
then
"$HOMEBREW_CURL" https://www.google-analytics.com/collect \
"${args[@]}" \
--silent --output /dev/null &>/dev/null & disown
else
"$HOMEBREW_CURL" https://www.google-analytics.com/debug/collect \
"${args[@]}"
fi
}

View File

@ -111,6 +111,7 @@ export HOMEBREW_VERSION
export HOMEBREW_CELLAR
export HOMEBREW_RUBY_PATH
export HOMEBREW_CURL
export HOMEBREW_PRODUCT
export HOMEBREW_OS_VERSION
export HOMEBREW_OSX_VERSION
export HOMEBREW_USER_AGENT
@ -196,39 +197,9 @@ EOS
esac
fi
if [[ -n "$HOMEBREW_ANALYTICS" ]]
then
# User UUID file. Used for Homebrew user counting. Can be deleted and
# recreated with no adverse effect (beyond our user counts being inflated).
HOMEBREW_ANALYTICS_USER_UUID_FILE="$HOME/.homebrew_analytics_user_uuid"
if [[ -r "$HOMEBREW_ANALYTICS_USER_UUID_FILE" ]]
then
HOMEBREW_ANALYTICS_USER_UUID="$(<"$HOMEBREW_ANALYTICS_USER_UUID_FILE")"
else
HOMEBREW_ANALYTICS_USER_UUID="$(uuidgen)"
echo "$HOMEBREW_ANALYTICS_USER_UUID" > "$HOMEBREW_ANALYTICS_USER_UUID_FILE"
fi
export HOMEBREW_ANALYTICS_ID="UA-75654628-1"
export HOMEBREW_ANALYTICS_USER_UUID
# Send the to-be-executed command as an "App Screen View". Anonymise the IP
# address (aip=1) and don't send or store any personally identifiable
# information.
# https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#screenView
# https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
"$HOMEBREW_CURL" https://www.google-analytics.com/collect -d v=1 \
--silent --max-time 3 --output /dev/null \
--user-agent "$HOMEBREW_USER_AGENT_CURL" \
-d tid="$HOMEBREW_ANALYTICS_ID" \
-d cid="$HOMEBREW_ANALYTICS_USER_UUID" \
-d aip=1 \
-d an=Homebrew \
-d av="$HOMEBREW_VERSION" \
-d t=screenview \
-d cd="$HOMEBREW_COMMAND" \
&> /dev/null \
& disown
fi
source "$HOMEBREW_LIBRARY/Homebrew/utils/analytics.sh"
setup-analytics
report-analytics-screenview-command
update-preinstall() {
[[ -n "$HOMEBREW_AUTO_UPDATE" ]] || return

View File

@ -0,0 +1,41 @@
# Analytics
Homebrew will shortly begin gathering anonymous aggregate user behaviour analytics and reporting these to Google Analytics.
## Why?
Homebrew is provided free of charge and run entirely by volunteers in their spare time. As a result, we do not have the resources to do detailed user studies of Homebrew users to decide on how best to design future features and prioritise current work. Anonymous aggregate user analytics allow us to prioritise fixes and features based on how, where and when people use Homebrew. For example, if a formulae is widely used and is failing often it will enable us to prioritise fixing that formula over others.
## What?
Homebrew's analytics record some shared information for every event:
- The Homebrew user agent e.g. `Homebrew/0.9.9 (Macintosh; Intel Mac OS X 10.11.4) curl/7.43.0`
- The Google Analytics version i.e. `1` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#v)
- The Homebrew analytics tracking ID e.g. `UA-75654628-1` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#tid)
- A Homebrew analytics user ID e.g. `1BAB65CC-FE7F-4D8C-AB45-B7DB5A6BA9CB`. This is generated by `uuidgen` and stored in `~/.homebrew_analytics_user_uuid`. This does not allow us to track individual users but does enable us to accurately measure user counts vs. event counts (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#cid)
- The Google Analytics anonymous IP setting is enabled i.e. `1` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#aip)
- The Homebrew application name e.g. `Homebrew` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#an)
- The Homebrew application version e.g. `0.9.9` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#av)
- The Homebrew analytics hit type e.g. `screenview` (https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#t)
Homebrew's analytics records the following different events:
- a `screenview` hit type with the official Homebrew command you have run (with arguments stripped) e.g. `brew list` (not `brew list foo` or any external commands except `bundle`, `cask` and `services`)
- an `event` hit type with the `install` event category, the Homebrew formula from a non-private GitHub tap you have requested to install and any used options e.g. `wget --with-pcre` as the action and an event label e.g. `Mac OS X 10.11, non-/usr/local, CI` to indicate the OS version, non-standard installation location and invocation as part of CI
- an `event` hit type with the `BuildError` event category, the Homebrew formula that failed to install e.g. `wget` as the action and an event label e.g. `Mac OS X 10.11`
- an `exception` hit type with the `exception` event category, exception description of the exception name e.g. `FormulaUnavailableError` and whether the exception was fatal e.g. `1`
You can also view all the information that is sent by Homebrew's analytics by setting `HOMEBREW_ANALYTICS_DEBUG=1` in your environment. Please note this will also stop any analytics being sent.
It is impossible for the Homebrew developers to match any particular event to any particular user, even if we had access to the Homebrew analytics user ID (which we do not). An example of the most user-specific information we can see from Google Analytics:
![Aggregate user analytics](images/analytics.png)
It is impossible for Google to match a Homebrew analytics user ID to any other Google Analytics user ID. If Google turned evil the only thing they could do would be to lie about anonymising IP addresses and attempt to match users based on IP addresses.
## When?
Homebrew's analytics are sent throughout Homebrew's execution to Google Analytics.
## How?
The code is viewable in https://github.com/Homebrew/brew/blob/master/Library/Homebrew/utils/analytics.rb and https://github.com/Homebrew/brew/blob/master/Library/Homebrew/utils/analytics.sh. They are done in a separate background process and fail fast to avoid delaying any execution. They will fail immediately and silently if you have no network connection.
## Opting-out
If after everything you've read you still wish to opt-out of Homebrew's analytics you may set `HOMEBREW_NO_ANALYTICS=1` in your environment which will prevent analytics from ever being sent when it is set.

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB