diff --git a/Library/Homebrew/livecheck/strategy.rb b/Library/Homebrew/livecheck/strategy.rb new file mode 100644 index 0000000000..aa6d196ffe --- /dev/null +++ b/Library/Homebrew/livecheck/strategy.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + # The `Livecheck::Strategy` module contains the various strategies as well + # as some general-purpose methods for working with them. Within the context + # of the `brew livecheck` command, strategies are established procedures + # for finding new software versions at a given source. + # + # @api private + module Strategy + module_function + + # Strategy priorities informally range from 1 to 10, where 10 is the + # highest priority. 5 is the default priority because it's roughly in + # the middle of this range. Strategies with a priority of 0 (or lower) + # are ignored. + DEFAULT_PRIORITY = 5 + private_constant :DEFAULT_PRIORITY + + # Creates and/or returns a `@strategies` `Hash` ,which maps a snake + # case strategy name symbol (e.g., `:page_match`) to the associated + # `Strategy`. + # + # At present, this should only be called after tap strategies have been + # loaded, otherwise livecheck won't be able to use them. + # @return [Hash] + def strategies + return @strategies if defined? @strategies + + @strategies = {} + constants.sort.each do |strategy_symbol| + key = strategy_symbol.to_s.underscore.to_sym + strategy = const_get(strategy_symbol) + @strategies[key] = strategy + end + @strategies + end + private_class_method :strategies + + # Returns the `Strategy` that corresponds to the provided `Symbol` (or + # `nil` if there is no matching `Strategy`). + # @param symbol [Symbol] the strategy name in snake case as a `Symbol` + # (e.g., `:page_match`) + # @return [Strategy, nil] + def from_symbol(symbol) + strategies[symbol] + end + + # Returns an array of strategies that apply to the provided URL. + # @param url [String] the URL to check for matching strategies + # @param regex_provided [Boolean] whether a regex is provided in a + # `livecheck` block + # @return [Array] + def from_url(url, regex_provided = nil) + usable_strategies = strategies.values.select do |strategy| + # Ignore strategies with a priority of 0 or lower + next if strategy.const_defined?(:PRIORITY) && !strategy::PRIORITY.positive? + + strategy.respond_to?(:match?) && strategy.match?(url) + end + + usable_strategies << strategies[:page_match] if strategies.key?(:page_match) && regex_provided + + # Sort usable strategies in descending order by priority, using the + # DEFAULT_PRIORITY when a strategy doesn't contain a PRIORITY constant + usable_strategies.sort_by do |strategy| + (strategy.const_defined?(:PRIORITY) ? -strategy::PRIORITY : -DEFAULT_PRIORITY) + end + end + end + end +end + +require_relative "strategy/apache" +require_relative "strategy/bitbucket" +require_relative "strategy/git" +require_relative "strategy/gnome" +require_relative "strategy/gnu" +require_relative "strategy/hackage" +require_relative "strategy/launchpad" +require_relative "strategy/npm" +require_relative "strategy/page_match" +require_relative "strategy/pypi" +require_relative "strategy/sourceforge" +require_relative "strategy/xorg" diff --git a/Library/Homebrew/livecheck/strategy/apache.rb b/Library/Homebrew/livecheck/strategy/apache.rb new file mode 100644 index 0000000000..5e8cb9e122 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/apache.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Apache` strategy identifies versions of software at apache.org + # by checking directory listing pages. + # + # Apache URLs start with `https://www.apache.org/dyn/closer.lua?path=`. + # + # The `path` parameter takes one of the following formats: + # * `example/1.2.3/example-1.2.3.tar.gz` + # * `example/example-1.2.3/example-1.2.3.tar.gz` + # * `example/example-1.2.3-bin.tar.gz` + # + # When the `path` contains a version directory (e.g., `/1.2.3/`, + # `/example-1.2.3/`, etc.), the default regex matches numeric versions + # in directory names. Otherwise, the default regex matches numeric + # versions in filenames. + # + # @api public + class Apache + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = %r{www\.apache\.org/dyn/.+path=.+}i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + %r{ + path= + (?.+?)/ # Path to directory of files or version directories + (?[^/]*?) # Any text in filename or directory before version + v?\d+(?:\.\d+)+ # The numeric version + (?/|[^/]*) # Any text in filename or directory after version + }ix =~ url + + # Use `\.t` instead of specific tarball extensions (e.g., .tar.gz) + suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t") + + # Example URL: `https://archive.apache.org/dist/example/` + page_url = "https://archive.apache.org/dist/#{path}/" + + # Example directory regex: `%r{href=["']?v?(\d+(?:\.\d+)+)/}i` + # Example file regexes: + # * `/href=["']?example-v?(\d+(?:\.\d+)+)\.t/i` + # * `/href=["']?example-v?(\d+(?:\.\d+)+)-bin\.zip/i` + regex ||= /href=["']?#{Regexp.escape(prefix)}v?(\d+(?:\.\d+)+)#{Regexp.escape(suffix)}/i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/bitbucket.rb b/Library/Homebrew/livecheck/strategy/bitbucket.rb new file mode 100644 index 0000000000..a696d21252 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/bitbucket.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Bitbucket` strategy identifies versions of software at + # bitbucket.org by checking a repository's available downloads. + # + # Bitbucket URLs generally take one of the following formats: + # * `https://bitbucket.org/example/example/get/1.2.3.tar.gz` + # * `https://bitbucket.org/example/example/downloads/example-1.2.3.tar.gz` + # + # The `/get/` archive files are simply automated snapshots of the files + # for a given tag. The `/downloads/` archive files are files that have + # been uploaded instead. + # + # It's also possible for an archive to come from a repository's wiki, + # like: + # `https://bitbucket.org/example/example/wiki/downloads/example-1.2.3.zip`. + # This scenario is handled by this strategy as well and the `path` in + # this example would be `example/example/wiki` (instead of + # `example/example` with the previous URLs). + # + # The default regex identifies versions in archive files found in `href` + # attributes. + # + # @api public + class Bitbucket + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = %r{bitbucket\.org(/[^/]+){4}\.\w+}i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + %r{ + bitbucket\.org/ + (?.+?)/ # The path leading up to the get or downloads part + (?get|downloads)/ # An indicator of the file download type + (?(?:[^/]+?[_-])?) # Filename text before the version + v?\d+(?:\.\d+)+ # The numeric version + (?[^/]+) # Filename text after the version + }ix =~ url + + # Use `\.t` instead of specific tarball extensions (e.g., .tar.gz) + suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t") + + # `/get/` archives are Git tag snapshots, so we need to check that tab + # instead of the main `/downloads/` page + page_url = if dl_type == "get" + "https://bitbucket.org/#{path}/downloads/?tab=tags" + else + "https://bitbucket.org/#{path}/downloads/" + end + + # Example regexes: + # * `/href=.*?v?(\d+(?:\.\d+)+)\.t/i` + # * `/href=.*?example-v?(\d+(?:\.\d+)+)\.t/i` + regex ||= /href=.*?#{Regexp.escape(prefix)}v?(\d+(?:\.\d+)+)#{Regexp.escape(suffix)}/i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/git.rb b/Library/Homebrew/livecheck/strategy/git.rb new file mode 100644 index 0000000000..f93127af9f --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/git.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +require "open3" + +module Homebrew + module Livecheck + module Strategy + # The `Git` strategy identifies versions of software in a Git repository + # by checking the tags using `git ls-remote --tags`. + # + # Livecheck has historically prioritized the `Git` strategy over others + # and this behavior was continued when the priority setup was created. + # This is partly related to livecheck checking formula URLs in order of + # `head`, `stable`, and then `homepage`. The higher priority here may + # be removed (or altered) in the future if we reevaluate this particular + # behavior. + # + # This strategy does not have a default regex. Instead, it simply removes + # any non-digit text from the start of tags and parses the rest as a + # `Version`. This works for some simple situations but even one unusual + # tag can cause a bad result. It's better to provide a regex in a + # `livecheck` block, so `livecheck` only matches what we really want. + # + # @api public + class Git + # The priority of the strategy on an informal scale of 1 to 10 (from + # lowest to highest). + PRIORITY = 8 + + # Fetches a remote Git repository's tags using `git ls-remote --tags` + # and parses the command's output. If a regex is provided, it will be + # used to filter out any tags that don't match it. + # @param url [String] the URL of the Git repository to check + # @param regex [Regexp] the regex to use for filtering tags + # @return [Hash] + def self.tag_info(url, regex = nil) + # Open3#capture3 is used here because we need to capture stderr + # output and handle it in an appropriate manner. Alternatives like + # SystemCommand always print errors (as well as debug output) and + # don't meet the same goals. + stdout_str, stderr_str, _status = Open3.capture3( + { "GIT_TERMINAL_PROMPT" => "0" }, "git", "ls-remote", "--tags", url + ) + + tags_data = { tags: [] } + tags_data[:messages] = stderr_str.split("\n") if stderr_str.present? + return tags_data if stdout_str.blank? + + # Isolate tag strings by removing leading/trailing text + stdout_str.gsub!(%r{^.*\trefs/tags/}, "") + stdout_str.gsub!("^{}", "") + + tags = stdout_str.split("\n").uniq.sort + tags.select! { |t| t =~ regex } if regex + tags_data[:tags] = tags + + tags_data + end + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + (DownloadStrategyDetector.detect(url) <= GitDownloadStrategy) == true + end + + # Checks the Git tags for new versions. When a regex isn't provided, + # the `Git` strategy simply removes non-digits from the start of tag + # strings and parses the remaining text as a `Version`. + # @param url [String] the URL of the Git repository to check + # @param regex [Regexp] the regex to use for matching versions + # @return [Hash] + def self.find_versions(url, regex = nil) + match_data = { matches: {}, regex: regex, url: url } + + tags_data = tag_info(url, regex) + + if tags_data.key?(:messages) + match_data[:messages] = tags_data[:messages] + return match_data if tags_data[:tags].blank? + end + + tags_only_debian = tags_data[:tags].all? { |tag| tag.start_with?("debian/") } + + tags_data[:tags].each do |tag| + # Skip tag if it has a 'debian/' prefix and upstream does not do + # only 'debian/' prefixed tags + next if tag =~ %r{^debian/} && !tags_only_debian + + captures = regex.is_a?(Regexp) ? tag.scan(regex) : [] + tag_cleaned = if captures[0].is_a?(Array) + captures[0][0] # Use the first capture group (the version) + else + tag[/\D*(.*)/, 1] # Remove non-digits from the start of the tag + end + + match_data[:matches][tag] = Version.new(tag_cleaned) + rescue TypeError + nil + end + + match_data + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/gnome.rb b/Library/Homebrew/livecheck/strategy/gnome.rb new file mode 100644 index 0000000000..72a6c68ff2 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/gnome.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Gnome` strategy identifies versions of software at gnome.org by + # checking the available downloads found in a project's `cache.json` + # file. + # + # GNOME URLs generally follow a format like: + # `https://download.gnome.org/sources/example/1.2/example-1.2.3.tar.xz`. + # + # The default regex restricts matching to filenames containing a version + # with an even-numbered minor below 90, as these are stable releases. + # + # @api public + class Gnome + NICE_NAME = "GNOME" + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = /download\.gnome\.org/i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + %r{/sources/(?.*?)/}i =~ url + + page_url = "https://download.gnome.org/sources/#{package_name}/cache.json" + + # GNOME archive files seem to use a standard filename format, so we + # count on the delimiter between the package name and numeric version + # being a hyphen and the file being a tarball. + # + # The `([0-8]\d*?)?[02468]` part of the regex is intended to restrict + # matching to versions with an even-numbered minor, as these are + # stable releases. This also excludes x.90+ versions, which are + # development versions. See: https://www.gnome.org/gnome-3/source/ + # + # Example regex: `/example-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i` + regex ||= /#{Regexp.escape(package_name)}-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/gnu.rb b/Library/Homebrew/livecheck/strategy/gnu.rb new file mode 100644 index 0000000000..396c7e25a6 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/gnu.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Gnu` strategy identifies versions of software at gnu.org by + # checking directory listing pages. + # + # GNU URLs use a variety of formats: + # + # Archive file URLs: + # * `https://ftp.gnu.org/gnu/example/example-1.2.3.tar.gz` + # * `https://ftp.gnu.org/gnu/example/1.2.3/example-1.2.3.tar.gz` + # + # Homepage URLs: + # * `https://www.gnu.org/software/example/` + # * `https://example.gnu.org` + # + # There are other URL formats that this strategy currently doesn't + # support: + # * `https://ftp.gnu.org/non-gnu/example/source/feature/1.2.3/example-1.2.3.tar.gz` + # * `https://savannah.nongnu.org/download/example/example-1.2.3.tar.gz` + # * `https://download.savannah.gnu.org/releases/example/example-1.2.3.tar.gz` + # * `https://download.savannah.nongnu.org/releases/example/example-1.2.3.tar.gz` + # + # The default regex identifies versions in archive files found in `href` + # attributes. + # + # @api public + class Gnu + NICE_NAME = "GNU" + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = %r{ + //.+?\.gnu\.org$| + gnu\.org/(?:gnu|software)/ + }ix.freeze + + # The `Regexp` used to parse the project name from the provided URL. + # The strategy uses this information to create the URL to check and + # the default regex. + PROJECT_NAME_REGEXES = [ + %r{/(?:gnu|software)/(?.+?)/}i, + %r{//(?.+?)\.gnu\.org(?:/)?$}i, + ].freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) && !url.include?("savannah.") + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + project_names = PROJECT_NAME_REGEXES.map do |project_name_regex| + m = url.match(project_name_regex) + m["project_name"] if m + end.compact + return { matches: {}, regex: regex, url: url } if project_names.blank? + + if project_names.length > 1 + odebug <<~EOS + + Multiple project names found: #{match_list} + + EOS + end + + project_name = project_names.first + + # The directory listing page for the project's files + page_url = "http://ftp.gnu.org/gnu/#{project_name}/?C=M&O=D" + + # The default regex consists of the following parts: + # * `href=.*?`: restricts matching to URLs in `href` attributes + # * The project name + # * `[._-]`: the generic delimiter between project name and version + # * `v?(\d+(?:\.\d+)*)`: the numeric version + # * `(?:\.[a-z]+|/)`: the file extension (a trailing delimiter) + # + # Example regex: `%r{href=.*?example[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i` + regex ||= %r{href=.*?#{project_name}[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/hackage.rb b/Library/Homebrew/livecheck/strategy/hackage.rb new file mode 100644 index 0000000000..4f0303fe68 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/hackage.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Hackage` strategy identifies versions of software at + # hackage.haskell.org by checking directory listing pages. + # + # Hackage URLs take one of the following formats: + # * `https://hackage.haskell.org/package/example-1.2.3/example-1.2.3.tar.gz` + # * `https://downloads.haskell.org/~ghc/8.10.1/ghc-8.10.1-src.tar.xz` + # + # The default regex checks for the latest version an `h3` heading element + # with a format like `

example-1.2.3/

`. + # + # @api public + class Hackage + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = /(?:downloads|hackage)\.haskell\.org/i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + /^(?.+?)-\d+/i =~ File.basename(url) + + # A page containing a directory listing of the latest source tarball + page_url = "https://hackage.haskell.org/package/#{package_name}/src" + + # Example regex: `%r{

example-(.*?)/?

}i` + regex ||= %r{

#{Regexp.escape(package_name)}-(.*?)/?

}i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/launchpad.rb b/Library/Homebrew/livecheck/strategy/launchpad.rb new file mode 100644 index 0000000000..b9253521b5 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/launchpad.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Launchpad` strategy identifies versions of software at + # launchpad.net by checking the main page for a project. + # + # Launchpad URLs take a variety of formats but all the current formats + # contain the project name as the first part of the URL path: + # * `https://launchpad.net/example/1.2/1.2.3/+download/example-1.2.3.tar.gz` + # * `https://launchpad.net/example/trunk/1.2.3/+download/example-1.2.3.tar.gz` + # * `https://code.launchpad.net/example/1.2/1.2.3/+download/example-1.2.3.tar.gz` + # + # The default regex identifies the latest version within an HTML element + # found on the main page for a project: + # ```html + #
+ # Latest version is 1.2.3 + #
+ # ``` + # + # @api public + class Launchpad + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = /launchpad\.net/i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + %r{launchpad\.net/(?[^/]+)}i =~ url + + # The main page for the project on Launchpad + page_url = "https://launchpad.net/#{project_name}" + + # The default regex is the same for all URLs using this strategy + regex ||= %r{class="[^"]*version[^"]*"[^>]*>\s*Latest version is (.+)\s*.+)/-/}i =~ url + + page_url = "https://www.npmjs.com/package/#{package_name}?activeTab=versions" + + # Example regexes: + # * `%r{href=.*?/package/example/v/(\d+(?:\.\d+)+)"}i` + # * `%r{href=.*?/package/@example/example/v/(\d+(?:\.\d+)+)"}i` + regex ||= %r{href=.*?/package/#{Regexp.escape(package_name)}/v/(\d+(?:\.\d+)+)"}i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/page_match.rb b/Library/Homebrew/livecheck/strategy/page_match.rb new file mode 100644 index 0000000000..3e9ddc85f2 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/page_match.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +require "open-uri" + +module Homebrew + module Livecheck + module Strategy + # The `PageMatch` strategy fetches content at a URL and scans it for + # matching text using the provided regex. + # + # This strategy can be used in a `livecheck` block when no specific + # strategies apply to a given URL. Though `PageMatch` will technically + # match any HTTP URL, the strategy also requires a regex to function. + # + # The `PageMatch#find_versions` method is also used within other + # strategies, to handle the process of identifying version text in + # content. + # + # @api public + class PageMatch + NICE_NAME = "Page match" + + # A priority of zero causes livecheck to skip the strategy. We do this + # for `PageMatch` so we can selectively apply the strategy only when a + # regex is provided in a `livecheck` block. + PRIORITY = 0 + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = %r{^https?://}i.freeze + + # Whether the strategy can be applied to the provided URL. + # + # PageMatch will technically match any HTTP URL but it's only usable + # when the formula has a `livecheck` block containing a regex. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Fetches the content at the URL, uses the regex to match text, and + # returns an array of unique matches. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in the + # content + # @return [Array] + def self.page_matches(url, regex) + page = URI.open(url).read + matches = page.scan(regex) + matches.map(&:first).uniq + end + + # Checks the content at the URL for new versions, using the provided + # regex for matching. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex) + match_data = { matches: {}, regex: regex, url: url } + + page_matches(url, regex).each do |match| + match_data[:matches][match] = Version.new(match) + end + + match_data + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/pypi.rb b/Library/Homebrew/livecheck/strategy/pypi.rb new file mode 100644 index 0000000000..fda0c9f4e1 --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/pypi.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Pypi` strategy identifies versions of software at pypi.org by + # checking project pages for archive files. + # + # PyPI URLs have a standard format but the hexadecimal text between + # `/packages/` and the filename varies: + # * `https://files.pythonhosted.org/packages////example-1.2.3.tar.gz` + # + # As such, the default regex only targets the filename at the end of the + # URL. + # + # @api public + class Pypi + NICE_NAME = "PyPI" + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = /files\.pythonhosted\.org/i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + / + (?.+)- # The package name followed by a hyphen + .*? # The version string + (?\.tar\.[a-z0-9]+|\.[a-z0-9]+)$ # Filename extension + /ix =~ File.basename(url) + + # Use `\.t` instead of specific tarball extensions (e.g., .tar.gz) + suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t") + + # It's not technically necessary to have the `#files` fragment at the + # end of the URL but it makes the debug output a bit more useful. + page_url = "https://pypi.org/project/#{package_name.gsub(/%20|_/, "-")}#files" + + # Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*).t}i`. + regex ||= + %r{href=.*?/packages.*?/#{Regexp.escape(package_name)}[._-]v?(\d+(?:\.\d+)*)#{Regexp.escape(suffix)}}i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/sourceforge.rb b/Library/Homebrew/livecheck/strategy/sourceforge.rb new file mode 100644 index 0000000000..336d32ebfd --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/sourceforge.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module Homebrew + module Livecheck + module Strategy + # The `Sourceforge` strategy identifies versions of software at + # sourceforge.net by checking a project's RSS feed. + # + # SourceForge URLs take a few different formats: + # * https://downloads.sourceforge.net/project/example/example-1.2.3.tar.gz + # * https://svn.code.sf.net/p/example/code/trunk + # * :pserver:anonymous:@example.cvs.sourceforge.net:/cvsroot/example + # + # The RSS feed for a project contains the most recent release archives + # and this is fine for most projects but this approach has some + # shortcomings. Some project releases involve so many files that the one + # we're interested in isn't present in the feed content. Some projects + # contain additional software and the archive we're interested in is + # pushed out of the feed (especially if it hasn't been updated recently). + # + # Usually we address this situation by adding a `livecheck` block to + # the formula that checks the page for the relevant directory in the + # project instead. In this situation, it's necessary to use + # `strategy :page_match` to prevent the `Sourceforge` stratgy from + # being used. + # + # The default regex matches within `url` attributes in the RSS feed + # and identifies versions within directory names or filenames. + # + # @api public + class Sourceforge + NICE_NAME = "SourceForge" + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = /(?:sourceforge|sf)\.net/i.freeze + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and passes them + # to the `PageMatch#find_versions` method to identify versions in the + # content. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex = nil) + if url.include?("/project") + %r{/projects?/(?[^/]+)/}i =~ url + elsif url.include?(".net/p/") + %r{\.net/p/(?[^/]+)/}i =~ url + else + %r{\.net(?::/cvsroot)?/(?[^/]+)}i =~ url + end + + page_url = "https://sourceforge.net/projects/#{project_name}/rss" + + # It may be possible to improve the default regex but there's quite a + # bit of variation between projects and it can be challenging to + # create something that works for most URLs. + regex ||= %r{url=.*?/#{Regexp.escape(project_name)}/files/.*?[-_/](\d+(?:[-.]\d+)+)[-_/%.]}i + + Homebrew::Livecheck::Strategy::PageMatch.find_versions(page_url, regex) + end + end + end + end +end diff --git a/Library/Homebrew/livecheck/strategy/xorg.rb b/Library/Homebrew/livecheck/strategy/xorg.rb new file mode 100644 index 0000000000..f9de6660df --- /dev/null +++ b/Library/Homebrew/livecheck/strategy/xorg.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require "open-uri" + +module Homebrew + module Livecheck + module Strategy + # The `Xorg` strategy identifies versions of software at x.org by + # checking directory listing pages. + # + # X.Org URLs take one of the following formats, among several others: + # * `https://www.x.org/archive/individual/app/example-1.2.3.tar.bz2` + # * `https://www.x.org/archive/individual/font/example-1.2.3.tar.bz2` + # * `https://www.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` + # * `https://ftp.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` + # * `https://www.x.org/pub/individual/doc/example-1.2.3.tar.gz` + # + # The notable differences between URLs are as follows: + # * `www.x.org` and `ftp.x.org` seem to be interchangeable (we prefer + # `www.x.org`). + # * `/archive/` is the current top-level directory and `/pub/` will + # redirect to the same URL using `/archive/` instead. [The strategy + # handles this replacement to avoid the redirection.] + # * The `/individual/` directory contains a number of directories (e.g., + # app, data, doc, driver, font, lib, etc.) which contain a number of + # different archive files. + # + # Since this strategy ends up checking the same directory listing pages + # for multiple formulae, we've included a simple method of page caching. + # This prevents livecheck from fetching the same page more than once and + # also dramatically speeds up these checks. Eventually we hope to + # implement a more sophisticated page cache that all strategies using + # `PageMatch` can use (and we can simplify this strategy accordingly). + # + # The default regex identifies versions in archive files found in `href` + # attributes. + # + # @api public + class Xorg + NICE_NAME = "X.Org" + + # The `Regexp` used to determine if the strategy applies to the URL. + URL_MATCH_REGEX = %r{ + [/.]x\.org.*?/individual/| + freedesktop\.org/(?:archive|dist|software)/ + }ix.freeze + + # Used to cache page content, so we don't fetch the same pages + # repeatedly. + @page_data = {} + + # Whether the strategy can be applied to the provided URL. + # @param url [String] the URL to match against + # @return [Boolean] + def self.match?(url) + URL_MATCH_REGEX.match?(url) + end + + # Generates a URL and regex (if one isn't provided) and checks the + # content at the URL for new versions (using the regex for matching). + # + # The behavior in this method for matching text in the content using a + # regex is copied and modified from the `PageMatch` strategy, so that + # we can add some simple page caching. If this behavior is expanded to + # apply to all strategies that use `PageMatch` to identify versions, + # then the `Xorg` strategy can be brought in line with the others. + # @param url [String] the URL of the content to check + # @param regex [Regexp] a regex used for matching versions in content + # @return [Hash] + def self.find_versions(url, regex) + file_name = File.basename(url) + + /^(?.+)-\d+/i =~ file_name + + # /pub/ URLs redirect to the same URL with /archive/, so we replace + # it to avoid the redirection. Removing the filename from the end of + # the URL gives us the relevant directory listing page. + page_url = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name) + + regex ||= /href=.*?#{Regexp.escape(module_name)}[._-]v?(\d+(?:\.\d+)+)\.t/i + + match_data = { matches: {}, regex: regex, url: page_url } + + # Cache responses to avoid unnecessary duplicate fetches + @page_data[page_url] = URI.open(page_url).read unless @page_data.key?(page_url) + + matches = @page_data[page_url].scan(regex) + matches.map(&:first).uniq.each do |match| + match_data[:matches][match] = Version.new(match) + end + + match_data + end + end + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/apache_spec.rb b/Library/Homebrew/test/livecheck/strategy/apache_spec.rb new file mode 100644 index 0000000000..b44ca71ea2 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/apache_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/apache" + +describe Homebrew::Livecheck::Strategy::Apache do + subject(:apache) { described_class } + + let(:apache_url) { "https://www.apache.org/dyn/closer.lua?path=abc/1.2.3/def-1.2.3.tar.gz" } + let(:non_apache_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is an Apache URL" do + expect(apache.match?(apache_url)).to be true + end + + it "returns false if the argument provided is not an Apache URL" do + expect(apache.match?(non_apache_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/bitbucket_spec.rb b/Library/Homebrew/test/livecheck/strategy/bitbucket_spec.rb new file mode 100644 index 0000000000..2026d157a6 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/bitbucket_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/bitbucket" + +describe Homebrew::Livecheck::Strategy::Bitbucket do + subject(:bitbucket) { described_class } + + let(:bitbucket_url) { "https://bitbucket.org/abc/def/get/1.2.3.tar.gz" } + let(:non_bitbucket_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a Bitbucket URL" do + expect(bitbucket.match?(bitbucket_url)).to be true + end + + it "returns false if the argument provided is not a Bitbucket URL" do + expect(bitbucket.match?(non_bitbucket_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/git_spec.rb b/Library/Homebrew/test/livecheck/strategy/git_spec.rb new file mode 100644 index 0000000000..366ed35586 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/git_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "livecheck/strategy/git" + +describe Homebrew::Livecheck::Strategy::Git do + subject(:git) { described_class } + + let(:git_url) { "https://github.com/Homebrew/brew.git" } + let(:non_git_url) { "https://brew.sh/test" } + + describe "::tag_info", :needs_network do + it "returns the Git tags for the provided remote URL that match the regex provided" do + expect(git.tag_info(git_url, /^v?(\d+(?:\.\d+))$/)) + .not_to be_empty + end + end + + describe "::match?" do + it "returns true if the argument provided is a Git repository" do + expect(git.match?(git_url)).to be true + end + + it "returns false if the argument provided is not a Git repository" do + expect(git.match?(non_git_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/gnome_spec.rb b/Library/Homebrew/test/livecheck/strategy/gnome_spec.rb new file mode 100644 index 0000000000..79145e02c1 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/gnome_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/gnome" + +describe Homebrew::Livecheck::Strategy::Gnome do + subject(:gnome) { described_class } + + let(:gnome_url) { "https://download.gnome.org/sources/abc/1.2/def-1.2.3.tar.xz" } + let(:non_gnome_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a GNOME URL" do + expect(gnome.match?(gnome_url)).to be true + end + + it "returns false if the argument provided is not a GNOME URL" do + expect(gnome.match?(non_gnome_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/gnu_spec.rb b/Library/Homebrew/test/livecheck/strategy/gnu_spec.rb new file mode 100644 index 0000000000..82cfc98c73 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/gnu_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "livecheck/strategy/gnu" + +describe Homebrew::Livecheck::Strategy::Gnu do + subject(:gnu) { described_class } + + let(:gnu_url) { "https://ftp.gnu.org/gnu/abc/def-1.2.3.tar.gz" } + let(:savannah_gnu_url) { "https://download.savannah.gnu.org/releases/abc/def-1.2.3.tar.gz" } + let(:non_gnu_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a non-Savannah GNU URL" do + expect(gnu.match?(gnu_url)).to be true + end + + it "returns false if the argument provided is a Savannah GNU URL" do + expect(gnu.match?(savannah_gnu_url)).to be false + end + + it "returns false if the argument provided is not a GNU URL" do + expect(gnu.match?(non_gnu_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/hackage_spec.rb b/Library/Homebrew/test/livecheck/strategy/hackage_spec.rb new file mode 100644 index 0000000000..63e3346bdb --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/hackage_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/hackage" + +describe Homebrew::Livecheck::Strategy::Hackage do + subject(:hackage) { described_class } + + let(:hackage_url) { "https://hackage.haskell.org/package/abc-1.2.3/def-1.2.3.tar.gz" } + let(:non_hackage_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a Hackage URL" do + expect(hackage.match?(hackage_url)).to be true + end + + it "returns false if the argument provided is not a Hackage URL" do + expect(hackage.match?(non_hackage_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/launchpad_spec.rb b/Library/Homebrew/test/livecheck/strategy/launchpad_spec.rb new file mode 100644 index 0000000000..54630bb5d6 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/launchpad_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/launchpad" + +describe Homebrew::Livecheck::Strategy::Launchpad do + subject(:launchpad) { described_class } + + let(:launchpad_url) { "https://launchpad.net/abc/1.2/1.2.3/+download/def-1.2.3.tar.gz" } + let(:non_launchpad_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a Launchpad URL" do + expect(launchpad.match?(launchpad_url)).to be true + end + + it "returns false if the argument provided is not a Launchpad URL" do + expect(launchpad.match?(non_launchpad_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/npm_spec.rb b/Library/Homebrew/test/livecheck/strategy/npm_spec.rb new file mode 100644 index 0000000000..3bee724891 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/npm_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/npm" + +describe Homebrew::Livecheck::Strategy::Npm do + subject(:npm) { described_class } + + let(:npm_url) { "https://registry.npmjs.org/abc/-/def-1.2.3.tgz" } + let(:non_npm_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is an npm URL" do + expect(npm.match?(npm_url)).to be true + end + + it "returns false if the argument provided is not an npm URL" do + expect(npm.match?(non_npm_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb b/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb new file mode 100644 index 0000000000..1b615b2467 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require "livecheck/strategy/page_match" + +describe Homebrew::Livecheck::Strategy::PageMatch do + subject(:page_match) { described_class } + + let(:url) { "http://api.github.com/Homebrew/brew/releases/latest" } + + describe "::match?" do + it "returns true for any URL" do + expect(page_match.match?(url)).to be true + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb new file mode 100644 index 0000000000..b8c49817ec --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/pypi" + +describe Homebrew::Livecheck::Strategy::Pypi do + subject(:pypi) { described_class } + + let(:pypi_url) { "https://files.pythonhosted.org/packages/ab/cd/efg/hij-1.2.3.tar.gz" } + let(:non_pypi_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a PyPI URL" do + expect(pypi.match?(pypi_url)).to be true + end + + it "returns false if the argument provided is not a PyPI URL" do + expect(pypi.match?(non_pypi_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/sourceforge_spec.rb b/Library/Homebrew/test/livecheck/strategy/sourceforge_spec.rb new file mode 100644 index 0000000000..8ca98945a3 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/sourceforge_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/sourceforge" + +describe Homebrew::Livecheck::Strategy::Sourceforge do + subject(:sourceforge) { described_class } + + let(:sourceforge_url) { "https://downloads.sourceforge.net/project/abc/def-1.2.3.tar.gz" } + let(:non_sourceforge_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is a SourceForge URL" do + expect(sourceforge.match?(sourceforge_url)).to be true + end + + it "returns false if the argument provided is not a SourceForge URL" do + expect(sourceforge.match?(non_sourceforge_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy/xorg_spec.rb b/Library/Homebrew/test/livecheck/strategy/xorg_spec.rb new file mode 100644 index 0000000000..7db151cc78 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy/xorg_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "livecheck/strategy/xorg" + +describe Homebrew::Livecheck::Strategy::Xorg do + subject(:xorg) { described_class } + + let(:xorg_url) { "https://www.x.org/archive/individual/app/abc-1.2.3.tar.bz2" } + let(:non_xorg_url) { "https://brew.sh/test" } + + describe "::match?" do + it "returns true if the argument provided is an X.Org URL" do + expect(xorg.match?(xorg_url)).to be true + end + + it "returns false if the argument provided is not an X.Org URL" do + expect(xorg.match?(non_xorg_url)).to be false + end + end +end diff --git a/Library/Homebrew/test/livecheck/strategy_spec.rb b/Library/Homebrew/test/livecheck/strategy_spec.rb new file mode 100644 index 0000000000..de0869a700 --- /dev/null +++ b/Library/Homebrew/test/livecheck/strategy_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require "livecheck/strategy" + +describe Homebrew::Livecheck::Strategy do + subject(:strategy) { described_class } + + describe "::from_symbol" do + it "returns the Strategy module represented by the Symbol argument" do + expect(strategy.from_symbol(:page_match)).to eq(Homebrew::Livecheck::Strategy::PageMatch) + end + end + + describe "::from_url" do + let(:url) { "https://sourceforge.net/projects/test" } + + context "when no regex is provided" do + it "returns an array of usable strategies which doesn't include PageMatch" do + expect(strategy.from_url(url)).to eq([Homebrew::Livecheck::Strategy::Sourceforge]) + end + end + + context "when a regex is provided" do + it "returns an array of usable strategies including PageMatch, sorted in descending order by priority" do + expect(strategy.from_url(url, regex_provided: true)) + .to eq( + [Homebrew::Livecheck::Strategy::Sourceforge, Homebrew::Livecheck::Strategy::PageMatch], + ) + end + end + end +end