# typed: strict # frozen_string_literal: true require "livecheck/strategic" module Homebrew module Livecheck module Strategy # The {Xorg} strategy identifies versions of software at x.org by # checking directory listing pages. # # X.Org URLs take one of the following formats, among several others: # # * `https://www.x.org/archive/individual/app/example-1.2.3.tar.bz2` # * `https://www.x.org/archive/individual/font/example-1.2.3.tar.bz2` # * `https://www.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` # * `https://ftp.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` # * `https://www.x.org/pub/individual/doc/example-1.2.3.tar.gz` # * `https://xorg.freedesktop.org/archive/individual/util/example-1.2.3.tar.xz` # # The notable differences between URLs are as follows: # # * `www.x.org` and `ftp.x.org` seem to be interchangeable (we prefer # `www.x.org`). # * `/archive/` is the current top-level directory and `/pub/` will # redirect to the same URL using `/archive/` instead. (The strategy # handles this replacement to avoid the redirection.) # * The `/individual/` directory contains a number of directories (e.g. # app, data, doc, driver, font, lib, etc.) which contain a number of # different archive files. # # Since this strategy ends up checking the same directory listing pages # for multiple formulae, we've included a simple method of page caching. # This prevents livecheck from fetching the same page more than once and # also dramatically speeds up these checks. Eventually we hope to # implement a more sophisticated page cache that all strategies using # {PageMatch} can use (allowing us to simplify this strategy accordingly). # # The default regex identifies versions in archive files found in `href` # attributes. # # @api public class Xorg extend Strategic NICE_NAME = "X.Org" # A `Regexp` used in determining if the strategy applies to the URL and # also as part of extracting the module name from the URL basename. MODULE_REGEX = /(?.+)-\d+/i # A `Regexp` used to extract the module name from the URL basename. FILENAME_REGEX = /^#{MODULE_REGEX.source.strip}/i # The `Regexp` used to determine if the strategy applies to the URL. URL_MATCH_REGEX = %r{ ^https?://(?:[^/]+?\.)* # Scheme and any leading subdomains (?:x\.org/(?:[^/]+/)*individual |freedesktop\.org/(?:archive|dist|software) |archive\.mesa3d\.org) /(?:[^/]+/)*#{MODULE_REGEX.source.strip} }ix # Used to cache page content, so we don't fetch the same pages # repeatedly. @page_data = T.let({}, T::Hash[String, String]) # Whether the strategy can be applied to the provided URL. # # @param url [String] the URL to match against # @return [Boolean] sig { override.params(url: String).returns(T::Boolean) } def self.match?(url) URL_MATCH_REGEX.match?(url) end # Extracts information from a provided URL and uses it to generate # various input values used by the strategy to check for new versions. # Some of these values act as defaults and can be overridden in a # `livecheck` block. # # @param url [String] the URL used to generate values # @return [Hash] sig { params(url: String).returns(T::Hash[Symbol, T.untyped]) } def self.generate_input_values(url) values = {} file_name = File.basename(url) match = file_name.match(FILENAME_REGEX) return values if match.blank? # /pub/ URLs redirect to the same URL with /archive/, so we replace # it to avoid the redirection. Removing the filename from the end of # the URL gives us the relevant directory listing page. values[:url] = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name) regex_name = Regexp.escape(T.must(match[:module_name])).gsub("\\-", "-") # Example regex: `/href=.*?example[._-]v?(\d+(?:\.\d+)+)\.t/i` values[:regex] = /href=.*?#{regex_name}[._-]v?(\d+(?:\.\d+)+)\.t/i values end # Generates a URL and regex (if one isn't provided) and checks the # content at the URL for new versions (using the regex for matching). # # The behavior in this method for matching text in the content using a # regex is copied and modified from the {PageMatch} strategy, so that # we can add some simple page caching. If this behavior is expanded to # apply to all strategies that use {PageMatch} to identify versions, # then this strategy can be brought in line with the others. # # @param url [String] the URL of the content to check # @param regex [Regexp] a regex used for matching versions in content # @param options [Options] options to modify behavior # @return [Hash] sig { override(allow_incompatible: true).params( url: String, regex: T.nilable(Regexp), options: Options, block: T.nilable(Proc), ).returns(T::Hash[Symbol, T.anything]) } def self.find_versions(url:, regex: nil, options: Options.new, &block) generated = generate_input_values(url) generated_url = generated[:url] # Use the cached page content to avoid duplicate fetches cached_content = @page_data[generated_url] match_data = PageMatch.find_versions( url: generated_url, regex: regex || generated[:regex], provided_content: cached_content, options:, &block ) content = match_data[:content] return match_data if content.blank? # Cache any new page content @page_data[generated_url] = content unless cached_content match_data end end end end end