Merge pull request #18895 from chenrui333/update-to-use-json-endpoint

fix(livecheck/pypi): update to use json endpoint to query version
This commit is contained in:
Sam Ford 2024-12-07 23:23:01 -05:00 committed by GitHub
commit b13a4c5dbb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 125 additions and 35 deletions

View File

@ -4,21 +4,28 @@
module Homebrew
module Livecheck
module Strategy
# The {Pypi} strategy identifies versions of software at pypi.org by
# checking project pages for archive files.
# The {Pypi} strategy identifies the newest version of a PyPI package by
# checking the JSON API endpoint for the project and using the
# `info.version` field from the response.
#
# PyPI URLs have a standard format but the hexadecimal text between
# `/packages/` and the filename varies:
# PyPI URLs have a standard format:
# `https://files.pythonhosted.org/packages/<hex>/<hex>/<long_hex>/example-1.2.3.tar.gz`
#
# * `https://files.pythonhosted.org/packages/<hex>/<hex>/<long_hex>/example-1.2.3.tar.gz`
#
# As such, the default regex only targets the filename at the end of the
# URL.
# Upstream documentation for the PyPI JSON API can be found at:
# https://docs.pypi.org/api/json/#get-a-project
#
# @api public
class Pypi
NICE_NAME = "PyPI"
# The default `strategy` block used to extract version information when
# a `strategy` block isn't provided.
DEFAULT_BLOCK = T.let(proc do |json|
json.dig("info", "version").presence
end.freeze, T.proc.params(
arg0: T::Hash[String, T.untyped],
).returns(T.nilable(String)))
# The `Regexp` used to extract the package name and suffix (e.g. file
# extension) from the URL basename.
FILENAME_REGEX = /
@ -44,10 +51,8 @@ module Homebrew
URL_MATCH_REGEX.match?(url)
end
# Extracts information from a provided URL and uses it to generate
# various input values used by the strategy to check for new versions.
# Some of these values act as defaults and can be overridden in a
# `livecheck` block.
# Extracts the package name from the provided URL and uses it to
# generate the PyPI JSON API URL for the project.
#
# @param url [String] the URL used to generate values
# @return [Hash]
@ -58,40 +63,41 @@ module Homebrew
match = File.basename(url).match(FILENAME_REGEX)
return values if match.blank?
# It's not technically necessary to have the `#files` fragment at the
# end of the URL but it makes the debug output a bit more useful.
values[:url] = "https://pypi.org/project/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/#files"
# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
suffix = T.must(match[:suffix]).sub(Strategy::TARBALL_EXTENSION_REGEX, ".t")
regex_suffix = Regexp.escape(suffix).gsub("\\-", "-")
# Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i`
regex_name = Regexp.escape(T.must(match[:package_name])).gsub(/\\[_-]/, "[_-]")
values[:regex] =
%r{href=.*?/packages.*?/#{regex_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)#{regex_suffix}}i
values[:url] = "https://pypi.org/pypi/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/json"
values
end
# Generates a URL and regex (if one isn't provided) and passes them
# to {PageMatch.find_versions} to identify versions in the content.
# Generates a PyPI JSON API URL for the project and identifies new
# versions using {Json#find_versions} with a block.
#
# @param url [String] the URL of the content to check
# @param regex [Regexp] a regex used for matching versions in content
# @param provided_content [String, nil] content to check instead of
# fetching
# @return [Hash]
sig {
params(
url: String,
regex: T.nilable(Regexp),
unused: T.untyped,
block: T.nilable(Proc),
url: String,
regex: T.nilable(Regexp),
provided_content: T.nilable(String),
unused: T.untyped,
block: T.nilable(Proc),
).returns(T::Hash[Symbol, T.untyped])
}
def self.find_versions(url:, regex: nil, **unused, &block)
generated = generate_input_values(url)
def self.find_versions(url:, regex: nil, provided_content: nil, **unused, &block)
match_data = { matches: {}, regex:, url: }
PageMatch.find_versions(url: generated[:url], regex: regex || generated[:regex], **unused, &block)
generated = generate_input_values(url)
return match_data if generated.blank?
Json.find_versions(
url: generated[:url],
regex:,
provided_content:,
**unused,
&block || DEFAULT_BLOCK
)
end
end
end

View File

@ -8,13 +8,42 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do
let(:pypi_url) { "https://files.pythonhosted.org/packages/ab/cd/efg/example-package-1.2.3.tar.gz" }
let(:non_pypi_url) { "https://brew.sh/test" }
let(:regex) { /^v?(\d+(?:\.\d+)+)$/i }
let(:generated) do
{
url: "https://pypi.org/project/example-package/#files",
regex: %r{href=.*?/packages.*?/example[_-]package[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i,
url: "https://pypi.org/pypi/example-package/json",
}
end
# This is a limited subset of a PyPI JSON API response object, for the sake
# of testing.
let(:content) do
<<~JSON
{
"info": {
"version": "1.2.3"
}
}
JSON
end
let(:matches) { ["1.2.3"] }
let(:find_versions_return_hash) do
{
matches: {
"1.2.3" => Version.new("1.2.3"),
},
regex: nil,
url: generated[:url],
}
end
let(:find_versions_cached_return_hash) do
find_versions_return_hash.merge({ cached: true })
end
describe "::match?" do
it "returns true for a PyPI URL" do
expect(pypi.match?(pypi_url)).to be true
@ -34,4 +63,59 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do
expect(pypi.generate_input_values(non_pypi_url)).to eq({})
end
end
describe "::find_versions" do
let(:match_data) do
cached = {
matches: matches.to_h { |v| [v, Version.new(v)] },
regex: nil,
url: generated[:url],
cached: true,
}
{
cached:,
cached_default: cached.merge({ matches: {} }),
}
end
it "finds versions in provided content" do
expect(pypi.find_versions(url: pypi_url, provided_content: content))
.to eq(match_data[:cached])
end
it "finds versions in provided content using a block" do
# NOTE: We only use a regex here to make sure it can be passed into the
# block, if necessary.
expect(pypi.find_versions(url: pypi_url, regex:, provided_content: content) do |json, regex|
match = json.dig("info", "version")&.match(regex)
next if match.blank?
match[1]
end).to eq(match_data[:cached].merge({ regex: }))
expect(pypi.find_versions(url: pypi_url, provided_content: content) do |json|
json.dig("info", "version").presence
end).to eq(match_data[:cached])
end
it "returns default match_data when block doesn't return version information" do
expect(pypi.find_versions(url: pypi_url, provided_content: '{"info":{"version":""}}'))
.to eq(match_data[:cached_default])
expect(pypi.find_versions(url: pypi_url, provided_content: '{"other":true}'))
.to eq(match_data[:cached_default])
end
it "returns default match_data when url is blank" do
expect(pypi.find_versions(url: "") { "1.2.3" })
.to eq({ matches: {}, regex: nil, url: "" })
end
it "returns default match_data when content is blank" do
expect(pypi.find_versions(url: pypi_url, provided_content: "{}") { "1.2.3" })
.to eq(match_data[:cached_default])
expect(pypi.find_versions(url: pypi_url, provided_content: "") { "1.2.3" })
.to eq(match_data[:cached_default])
end
end
end