Refactor livecheck strategies around match regex
This commit is contained in:
parent
86fee106a3
commit
2060f13de1
@ -22,7 +22,14 @@ module Homebrew
|
|||||||
# @api public
|
# @api public
|
||||||
class Apache
|
class Apache
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{www\.apache\.org/dyn/.+path=.+}i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://www\.apache\.org
|
||||||
|
/dyn/.+path=
|
||||||
|
(?<path>.+?)/ # Path to directory of files or version directories
|
||||||
|
(?<prefix>[^/]*?) # Any text in filename or directory before version
|
||||||
|
v?\d+(?:\.\d+)+ # The numeric version
|
||||||
|
(?<suffix>/|[^/]*) # Any text in filename or directory after version
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -39,25 +46,19 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{
|
match = url.match(URL_MATCH_REGEX)
|
||||||
path=
|
|
||||||
(?<path>.+?)/ # Path to directory of files or version directories
|
|
||||||
(?<prefix>[^/]*?) # Any text in filename or directory before version
|
|
||||||
v?\d+(?:\.\d+)+ # The numeric version
|
|
||||||
(?<suffix>/|[^/]*) # Any text in filename or directory after version
|
|
||||||
}ix =~ url
|
|
||||||
|
|
||||||
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
||||||
suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
suffix = match[:suffix].sub(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
||||||
|
|
||||||
# Example URL: `https://archive.apache.org/dist/example/`
|
# Example URL: `https://archive.apache.org/dist/example/`
|
||||||
page_url = "https://archive.apache.org/dist/#{path}/"
|
page_url = "https://archive.apache.org/dist/#{match[:path]}/"
|
||||||
|
|
||||||
# Example directory regex: `%r{href=["']?v?(\d+(?:\.\d+)+)/}i`
|
# Example directory regex: `%r{href=["']?v?(\d+(?:\.\d+)+)/}i`
|
||||||
# Example file regexes:
|
# Example file regexes:
|
||||||
# * `/href=["']?example-v?(\d+(?:\.\d+)+)\.t/i`
|
# * `/href=["']?example-v?(\d+(?:\.\d+)+)\.t/i`
|
||||||
# * `/href=["']?example-v?(\d+(?:\.\d+)+)-bin\.zip/i`
|
# * `/href=["']?example-v?(\d+(?:\.\d+)+)-bin\.zip/i`
|
||||||
regex ||= /href=["']?#{Regexp.escape(prefix)}v?(\d+(?:\.\d+)+)#{Regexp.escape(suffix)}/i
|
regex ||= /href=["']?#{Regexp.escape(match[:prefix])}v?(\d+(?:\.\d+)+)#{Regexp.escape(suffix)}/i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -21,7 +21,13 @@ module Homebrew
|
|||||||
NICE_NAME = "CPAN"
|
NICE_NAME = "CPAN"
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{^https?://cpan\.metacpan\.org/authors/id(?:/[^/]+){3,}/[^/]+}i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://cpan\.metacpan\.org
|
||||||
|
(?<path>/authors/id(?:/[^/]+){3,}/) # Path before the filename
|
||||||
|
(?<prefix>[^/]+) # Filename text before the version
|
||||||
|
-v?\d+(?:\.\d+)* # The numeric version
|
||||||
|
(?<suffix>[^/]+) # Filename text after the version
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -38,21 +44,16 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{
|
match = url.match(URL_MATCH_REGEX)
|
||||||
(?<path>/authors/id(?:/[^/]+){3,}/) # Path before the filename
|
|
||||||
(?<prefix>[^/]+) # Filename text before the version
|
|
||||||
-v?\d+(?:\.\d+)* # The numeric version
|
|
||||||
(?<suffix>[^/]+) # Filename text after the version
|
|
||||||
}ix =~ url
|
|
||||||
|
|
||||||
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
||||||
suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
suffix = match[:suffix].sub(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
||||||
|
|
||||||
# The directory listing page where the archive files are found
|
# The directory listing page where the archive files are found
|
||||||
page_url = "https://cpan.metacpan.org#{path}"
|
page_url = "https://cpan.metacpan.org#{match[:path]}"
|
||||||
|
|
||||||
# Example regex: `/href=.*?Brew[._-]v?(\d+(?:\.\d+)*)\.t/i`
|
# Example regex: `/href=.*?Brew[._-]v?(\d+(?:\.\d+)*)\.t/i`
|
||||||
regex ||= /href=.*?#{prefix}[._-]v?(\d+(?:\.\d+)*)#{Regexp.escape(suffix)}/i
|
regex ||= /href=.*?#{match[:prefix]}[._-]v?(\d+(?:\.\d+)*)#{Regexp.escape(suffix)}/i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -40,7 +40,11 @@ module Homebrew
|
|||||||
PRIORITY = 0
|
PRIORITY = 0
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{//github\.com(?:/downloads)?(?:/[^/]+){2}}i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://github\.com
|
||||||
|
/(?:downloads/)?(?<username>[^/]+) # The GitHub username
|
||||||
|
/(?<repository>[^/]+) # The GitHub repository name
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -57,10 +61,10 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{github\.com/(?:downloads/)?(?<username>[^/]+)/(?<repository>[^/]+)}i =~ url.sub(/\.git$/i, "")
|
match = url.sub(/\.git$/i, "").match(URL_MATCH_REGEX)
|
||||||
|
|
||||||
# Example URL: `https://github.com/example/example/releases/latest`
|
# Example URL: `https://github.com/example/example/releases/latest`
|
||||||
page_url = "https://github.com/#{username}/#{repository}/releases/latest"
|
page_url = "https://github.com/#{match[:username]}/#{match[:repository]}/releases/latest"
|
||||||
|
|
||||||
# The default regex is the same for all URLs using this strategy
|
# The default regex is the same for all URLs using this strategy
|
||||||
regex ||= %r{href=.*?/tag/v?(\d+(?:\.\d+)+)["' >]}i
|
regex ||= %r{href=.*?/tag/v?(\d+(?:\.\d+)+)["' >]}i
|
||||||
|
|||||||
@ -20,7 +20,11 @@ module Homebrew
|
|||||||
NICE_NAME = "GNOME"
|
NICE_NAME = "GNOME"
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{^https?://download\.gnome\.org/sources/[^/]+/}i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://download\.gnome\.org
|
||||||
|
/sources
|
||||||
|
/(?<package_name>[^/]+)/ # The GNOME package name
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -37,9 +41,9 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{/sources/(?<package_name>[^/]+)/}i =~ url
|
match = url.match(URL_MATCH_REGEX)
|
||||||
|
|
||||||
page_url = "https://download.gnome.org/sources/#{package_name}/cache.json"
|
page_url = "https://download.gnome.org/sources/#{match[:package_name]}/cache.json"
|
||||||
|
|
||||||
# GNOME archive files seem to use a standard filename format, so we
|
# GNOME archive files seem to use a standard filename format, so we
|
||||||
# count on the delimiter between the package name and numeric version
|
# count on the delimiter between the package name and numeric version
|
||||||
@ -51,7 +55,7 @@ module Homebrew
|
|||||||
# development versions. See: https://www.gnome.org/gnome-3/source/
|
# development versions. See: https://www.gnome.org/gnome-3/source/
|
||||||
#
|
#
|
||||||
# Example regex: `/example-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i`
|
# Example regex: `/example-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i`
|
||||||
regex ||= /#{Regexp.escape(package_name)}-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i
|
regex ||= /#{Regexp.escape(match[:package_name])}-(\d+\.([0-8]\d*?)?[02468](?:\.\d+)*?)\.t/i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -33,18 +33,11 @@ module Homebrew
|
|||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{
|
URL_MATCH_REGEX = %r{
|
||||||
//.+?\.gnu\.org$|
|
^https?://
|
||||||
gnu\.org/(?:gnu|software)/
|
(?:(?:[^/]+?\.)*gnu\.org/(?:gnu|software)/(?<project_name>[^/]+)/
|
||||||
|
|(?<project_name>[^/]+)\.gnu\.org/?$)
|
||||||
}ix.freeze
|
}ix.freeze
|
||||||
|
|
||||||
# The `Regexp` used to parse the project name from the provided URL.
|
|
||||||
# The strategy uses this information to create the URL to check and
|
|
||||||
# the default regex.
|
|
||||||
PROJECT_NAME_REGEXES = [
|
|
||||||
%r{/(?:gnu|software)/(?<project_name>.+?)/}i,
|
|
||||||
%r{//(?<project_name>.+?)\.gnu\.org(?:/)?$}i,
|
|
||||||
].freeze
|
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
# @param url [String] the URL to match against
|
# @param url [String] the URL to match against
|
||||||
@ -60,24 +53,10 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
project_names = PROJECT_NAME_REGEXES.map do |project_name_regex|
|
match = url.match(URL_MATCH_REGEX)
|
||||||
m = url.match(project_name_regex)
|
|
||||||
m["project_name"] if m
|
|
||||||
end.compact
|
|
||||||
return { matches: {}, regex: regex, url: url } if project_names.blank?
|
|
||||||
|
|
||||||
if project_names.length > 1
|
|
||||||
odebug <<~EOS
|
|
||||||
|
|
||||||
Multiple project names found: #{match_list}
|
|
||||||
|
|
||||||
EOS
|
|
||||||
end
|
|
||||||
|
|
||||||
project_name = project_names.first
|
|
||||||
|
|
||||||
# The directory listing page for the project's files
|
# The directory listing page for the project's files
|
||||||
page_url = "http://ftp.gnu.org/gnu/#{project_name}/?C=M&O=D"
|
page_url = "http://ftp.gnu.org/gnu/#{match[:project_name]}/?C=M&O=D"
|
||||||
|
|
||||||
# The default regex consists of the following parts:
|
# The default regex consists of the following parts:
|
||||||
# * `href=.*?`: restricts matching to URLs in `href` attributes
|
# * `href=.*?`: restricts matching to URLs in `href` attributes
|
||||||
@ -87,7 +66,7 @@ module Homebrew
|
|||||||
# * `(?:\.[a-z]+|/)`: the file extension (a trailing delimiter)
|
# * `(?:\.[a-z]+|/)`: the file extension (a trailing delimiter)
|
||||||
#
|
#
|
||||||
# Example regex: `%r{href=.*?example[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i`
|
# Example regex: `%r{href=.*?example[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i`
|
||||||
regex ||= %r{href=.*?#{project_name}[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i
|
regex ||= %r{href=.*?#{match[:project_name]}[._-]v?(\d+(?:\.\d+)*)(?:\.[a-z]+|/)}i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -17,8 +17,19 @@ module Homebrew
|
|||||||
#
|
#
|
||||||
# @api public
|
# @api public
|
||||||
class Hackage
|
class Hackage
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# A `Regexp` used in determining if the strategy applies to the URL and
|
||||||
URL_MATCH_REGEX = %r{^https?://(?:downloads|hackage)\.haskell\.org(?:/[^/]+){3}}i.freeze
|
# also as part of extracting the package name from the URL basename.
|
||||||
|
PACKAGE_NAME_REGEX = /(?<package_name>.+?)-\d+/i.freeze
|
||||||
|
|
||||||
|
# A `Regexp` used to extract the package name from the URL basename.
|
||||||
|
FILENAME_REGEX = /^#{PACKAGE_NAME_REGEX.source.strip}/i.freeze
|
||||||
|
|
||||||
|
# A `Regexp` used in determining if the strategy applies to the URL.
|
||||||
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://(?:downloads|hackage)\.haskell\.org
|
||||||
|
(?:/[^/]+)+ # Path before the filename
|
||||||
|
#{PACKAGE_NAME_REGEX.source.strip}
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -35,13 +46,13 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
/^(?<package_name>.+?)-\d+/i =~ File.basename(url)
|
match = File.basename(url).match(FILENAME_REGEX)
|
||||||
|
|
||||||
# A page containing a directory listing of the latest source tarball
|
# A page containing a directory listing of the latest source tarball
|
||||||
page_url = "https://hackage.haskell.org/package/#{package_name}/src/"
|
page_url = "https://hackage.haskell.org/package/#{match[:package_name]}/src/"
|
||||||
|
|
||||||
# Example regex: `%r{<h3>example-(.*?)/?</h3>}i`
|
# Example regex: `%r{<h3>example-(.*?)/?</h3>}i`
|
||||||
regex ||= %r{<h3>#{Regexp.escape(package_name)}-(.*?)/?</h3>}i
|
regex ||= %r{<h3>#{Regexp.escape(match[:package_name])}-(.*?)/?</h3>}i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -24,7 +24,10 @@ module Homebrew
|
|||||||
# @api public
|
# @api public
|
||||||
class Launchpad
|
class Launchpad
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = /launchpad\.net/i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://(?:[^/]+?\.)*launchpad\.net
|
||||||
|
/(?<project_name>[^/]+) # The Launchpad project name
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -41,10 +44,10 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{launchpad\.net/(?<project_name>[^/]+)}i =~ url
|
match = url.match(URL_MATCH_REGEX)
|
||||||
|
|
||||||
# The main page for the project on Launchpad
|
# The main page for the project on Launchpad
|
||||||
page_url = "https://launchpad.net/#{project_name}"
|
page_url = "https://launchpad.net/#{match[:project_name]}"
|
||||||
|
|
||||||
# The default regex is the same for all URLs using this strategy
|
# The default regex is the same for all URLs using this strategy
|
||||||
regex ||= %r{class="[^"]*version[^"]*"[^>]*>\s*Latest version is (.+)\s*</}
|
regex ||= %r{class="[^"]*version[^"]*"[^>]*>\s*Latest version is (.+)\s*</}
|
||||||
|
|||||||
@ -20,7 +20,10 @@ module Homebrew
|
|||||||
NICE_NAME = "npm"
|
NICE_NAME = "npm"
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{^https?://registry\.npmjs\.org(?:/[^/]+)?/[^/]+/-/}i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://registry\.npmjs\.org
|
||||||
|
/(?<package_name>.+?)/-/ # The npm package name
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -37,14 +40,14 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
%r{registry\.npmjs\.org/(?<package_name>(?:[^/]+/)?[^/]+)/-/}i =~ url
|
match = url.match(URL_MATCH_REGEX)
|
||||||
|
|
||||||
page_url = "https://www.npmjs.com/package/#{package_name}?activeTab=versions"
|
page_url = "https://www.npmjs.com/package/#{match[:package_name]}?activeTab=versions"
|
||||||
|
|
||||||
# Example regexes:
|
# Example regexes:
|
||||||
# * `%r{href=.*?/package/example/v/(\d+(?:\.\d+)+)"}i`
|
# * `%r{href=.*?/package/example/v/(\d+(?:\.\d+)+)"}i`
|
||||||
# * `%r{href=.*?/package/@example/example/v/(\d+(?:\.\d+)+)"}i`
|
# * `%r{href=.*?/package/@example/example/v/(\d+(?:\.\d+)+)"}i`
|
||||||
regex ||= %r{href=.*?/package/#{Regexp.escape(package_name)}/v/(\d+(?:\.\d+)+)"}i
|
regex ||= %r{href=.*?/package/#{Regexp.escape(match[:package_name])}/v/(\d+(?:\.\d+)+)"}i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -19,8 +19,21 @@ module Homebrew
|
|||||||
class Pypi
|
class Pypi
|
||||||
NICE_NAME = "PyPI"
|
NICE_NAME = "PyPI"
|
||||||
|
|
||||||
|
# The `Regexp` used to extract the package name and suffix (e.g., file
|
||||||
|
# extension) from the URL basename.
|
||||||
|
FILENAME_REGEX = /
|
||||||
|
(?<package_name>.+)- # The package name followed by a hyphen
|
||||||
|
.*? # The version string
|
||||||
|
(?<suffix>\.tar\.[a-z0-9]+|\.[a-z0-9]+)$ # Filename extension
|
||||||
|
/ix.freeze
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{^https?://files\.pythonhosted\.org/packages(?:/[^/]+){4}i}.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://files\.pythonhosted\.org
|
||||||
|
/packages
|
||||||
|
(?:/[^/]+)+ # The hexadecimal paths before the filename
|
||||||
|
/#{FILENAME_REGEX.source.strip} # The filename
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -37,23 +50,19 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
/
|
match = File.basename(url).match(FILENAME_REGEX)
|
||||||
(?<package_name>.+)- # The package name followed by a hyphen
|
|
||||||
.*? # The version string
|
|
||||||
(?<suffix>\.tar\.[a-z0-9]+|\.[a-z0-9]+)$ # Filename extension
|
|
||||||
/ix =~ File.basename(url)
|
|
||||||
|
|
||||||
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
|
||||||
suffix.sub!(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
suffix = match[:suffix].sub(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t")
|
||||||
|
|
||||||
# It's not technically necessary to have the `#files` fragment at the
|
# It's not technically necessary to have the `#files` fragment at the
|
||||||
# end of the URL but it makes the debug output a bit more useful.
|
# end of the URL but it makes the debug output a bit more useful.
|
||||||
page_url = "https://pypi.org/project/#{package_name.gsub(/%20|_/, "-")}/#files"
|
page_url = "https://pypi.org/project/#{match[:package_name].gsub(/%20|_/, "-")}/#files"
|
||||||
|
|
||||||
# Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*).t}i`.
|
# Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i`
|
||||||
regex ||=
|
re_package_name = Regexp.escape(match[:package_name])
|
||||||
%r{href=.*?/packages.*?/#{Regexp.escape(package_name)}[._-]
|
re_suffix = Regexp.escape(suffix)
|
||||||
v?(\d+(?:\.\d+)*(.post\d+)?)#{Regexp.escape(suffix)}}ix
|
regex ||= %r{href=.*?/packages.*?/#{re_package_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)#{re_suffix}}i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -34,7 +34,12 @@ module Homebrew
|
|||||||
NICE_NAME = "SourceForge"
|
NICE_NAME = "SourceForge"
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = /(?:sourceforge|sf)\.net/i.freeze
|
URL_MATCH_REGEX = %r{
|
||||||
|
^https?://(?:[^/]+?\.)*(?:sourceforge|sf)\.net
|
||||||
|
(?:/projects?/(?<project_name>[^/]+)/
|
||||||
|
|/p/(?<project_name>[^/]+)/
|
||||||
|
|(?::/cvsroot)?/(?<project_name>[^/]+))
|
||||||
|
}ix.freeze
|
||||||
|
|
||||||
# Whether the strategy can be applied to the provided URL.
|
# Whether the strategy can be applied to the provided URL.
|
||||||
#
|
#
|
||||||
@ -51,20 +56,14 @@ module Homebrew
|
|||||||
# @param regex [Regexp] a regex used for matching versions in content
|
# @param regex [Regexp] a regex used for matching versions in content
|
||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
if url.include?("/project")
|
match = url.match(URL_MATCH_REGEX)
|
||||||
%r{/projects?/(?<project_name>[^/]+)/}i =~ url
|
|
||||||
elsif url.include?(".net/p/")
|
|
||||||
%r{\.net/p/(?<project_name>[^/]+)/}i =~ url
|
|
||||||
else
|
|
||||||
%r{\.net(?::/cvsroot)?/(?<project_name>[^/]+)}i =~ url
|
|
||||||
end
|
|
||||||
|
|
||||||
page_url = "https://sourceforge.net/projects/#{project_name}/rss"
|
page_url = "https://sourceforge.net/projects/#{match[:project_name]}/rss"
|
||||||
|
|
||||||
# It may be possible to improve the default regex but there's quite a
|
# It may be possible to improve the default regex but there's quite a
|
||||||
# bit of variation between projects and it can be challenging to
|
# bit of variation between projects and it can be challenging to
|
||||||
# create something that works for most URLs.
|
# create something that works for most URLs.
|
||||||
regex ||= %r{url=.*?/#{Regexp.escape(project_name)}/files/.*?[-_/](\d+(?:[-.]\d+)+)[-_/%.]}i
|
regex ||= %r{url=.*?/#{Regexp.escape(match[:project_name])}/files/.*?[-_/](\d+(?:[-.]\d+)+)[-_/%.]}i
|
||||||
|
|
||||||
PageMatch.find_versions(page_url, regex, &block)
|
PageMatch.find_versions(page_url, regex, &block)
|
||||||
end
|
end
|
||||||
|
|||||||
@ -40,10 +40,18 @@ module Homebrew
|
|||||||
class Xorg
|
class Xorg
|
||||||
NICE_NAME = "X.Org"
|
NICE_NAME = "X.Org"
|
||||||
|
|
||||||
|
# A `Regexp` used in determining if the strategy applies to the URL and
|
||||||
|
# also as part of extracting the module name from the URL basename.
|
||||||
|
MODULE_REGEX = /(?<module_name>.+)-\d+/i.freeze
|
||||||
|
|
||||||
|
# A `Regexp` used to extract the module name from the URL basename.
|
||||||
|
FILENAME_REGEX = /^#{MODULE_REGEX.source.strip}/i.freeze
|
||||||
|
|
||||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
URL_MATCH_REGEX = %r{
|
URL_MATCH_REGEX = %r{
|
||||||
[/.]x\.org.*?/individual/|
|
^https?://(?:[^/]+?\.)* # Scheme and any leading subdomains
|
||||||
freedesktop\.org/(?:archive|dist|software)/
|
(?:x\.org/(?:[^/]+/)*individual/(?:[^/]+/)*#{MODULE_REGEX.source.strip}
|
||||||
|
|freedesktop\.org/(?:archive|dist|software)/(?:[^/]+/)*#{MODULE_REGEX.source.strip})
|
||||||
}ix.freeze
|
}ix.freeze
|
||||||
|
|
||||||
# Used to cache page content, so we don't fetch the same pages
|
# Used to cache page content, so we don't fetch the same pages
|
||||||
@ -72,15 +80,15 @@ module Homebrew
|
|||||||
# @return [Hash]
|
# @return [Hash]
|
||||||
def self.find_versions(url, regex = nil, &block)
|
def self.find_versions(url, regex = nil, &block)
|
||||||
file_name = File.basename(url)
|
file_name = File.basename(url)
|
||||||
/^(?<module_name>.+)-\d+/i =~ file_name
|
match = file_name.match(FILENAME_REGEX)
|
||||||
|
|
||||||
# /pub/ URLs redirect to the same URL with /archive/, so we replace
|
# /pub/ URLs redirect to the same URL with /archive/, so we replace
|
||||||
# it to avoid the redirection. Removing the filename from the end of
|
# it to avoid the redirection. Removing the filename from the end of
|
||||||
# the URL gives us the relevant directory listing page.
|
# the URL gives us the relevant directory listing page.
|
||||||
page_url = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name)
|
page_url = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name)
|
||||||
|
|
||||||
# Example regex: /href=.*?example[._-]v?(\d+(?:\.\d+)+)\.t/i
|
# Example regex: `/href=.*?example[._-]v?(\d+(?:\.\d+)+)\.t/i`
|
||||||
regex ||= /href=.*?#{Regexp.escape(module_name)}[._-]v?(\d+(?:\.\d+)+)\.t/i
|
regex ||= /href=.*?#{Regexp.escape(match[:module_name])}[._-]v?(\d+(?:\.\d+)+)\.t/i
|
||||||
|
|
||||||
# Use the cached page content to avoid duplicate fetches
|
# Use the cached page content to avoid duplicate fetches
|
||||||
cached_content = @page_data[page_url]
|
cached_content = @page_data[page_url]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user