From d49e01b82bd6e8a5d96853963467d68f20f835ba Mon Sep 17 00:00:00 2001 From: Rui Chen Date: Sat, 7 Dec 2024 01:15:40 -0500 Subject: [PATCH 1/3] fix(livecheck/pypi): update to use json endpoint to query version Signed-off-by: Rui Chen --- Library/Homebrew/livecheck/strategy/pypi.rb | 70 +++++++++++-------- .../test/livecheck/strategy/pypi_spec.rb | 2 +- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/Library/Homebrew/livecheck/strategy/pypi.rb b/Library/Homebrew/livecheck/strategy/pypi.rb index 86f27df8a8..c598b33168 100644 --- a/Library/Homebrew/livecheck/strategy/pypi.rb +++ b/Library/Homebrew/livecheck/strategy/pypi.rb @@ -1,19 +1,21 @@ # typed: strict # frozen_string_literal: true +require "json" +require "utils/curl" + module Homebrew module Livecheck module Strategy # The {Pypi} strategy identifies versions of software at pypi.org by - # checking project pages for archive files. + # using the JSON API endpoint. # - # PyPI URLs have a standard format but the hexadecimal text between - # `/packages/` and the filename varies: + # PyPI URLs have a standard format: # # * `https://files.pythonhosted.org/packages////example-1.2.3.tar.gz` # - # As such, the default regex only targets the filename at the end of the - # URL. + # This method uses the `info.version` field in the JSON response to + # determine the latest stable version. # # @api public class Pypi @@ -44,10 +46,8 @@ module Homebrew URL_MATCH_REGEX.match?(url) end - # Extracts information from a provided URL and uses it to generate - # various input values used by the strategy to check for new versions. - # Some of these values act as defaults and can be overridden in a - # `livecheck` block. + # Extracts the package name from the provided URL and generates the + # PyPI JSON API endpoint. # # @param url [String] the URL used to generate values # @return [Hash] @@ -58,40 +58,48 @@ module Homebrew match = File.basename(url).match(FILENAME_REGEX) return values if match.blank? - # It's not technically necessary to have the `#files` fragment at the - # end of the URL but it makes the debug output a bit more useful. - values[:url] = "https://pypi.org/project/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/#files" - - # Use `\.t` instead of specific tarball extensions (e.g. .tar.gz) - suffix = T.must(match[:suffix]).sub(Strategy::TARBALL_EXTENSION_REGEX, ".t") - regex_suffix = Regexp.escape(suffix).gsub("\\-", "-") - - # Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i` - regex_name = Regexp.escape(T.must(match[:package_name])).gsub(/\\[_-]/, "[_-]") - values[:regex] = - %r{href=.*?/packages.*?/#{regex_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)#{regex_suffix}}i + package_name = T.must(match[:package_name]).gsub(/[_-]/, "-") + values[:url] = "https://pypi.org/project/#{package_name}/#files" + values[:regex] = %r{href=.*?/packages.*?/#{package_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i values end - # Generates a URL and regex (if one isn't provided) and passes them - # to {PageMatch.find_versions} to identify versions in the content. + # Fetches the latest version of the package from the PyPI JSON API. # # @param url [String] the URL of the content to check - # @param regex [Regexp] a regex used for matching versions in content + # @param regex [Regexp] a regex used for matching versions in content (optional) # @return [Hash] sig { params( - url: String, - regex: T.nilable(Regexp), - unused: T.untyped, - block: T.nilable(Proc), + url: String, + regex: T.nilable(Regexp), + _unused: T.untyped, + _block: T.nilable(Proc), ).returns(T::Hash[Symbol, T.untyped]) } - def self.find_versions(url:, regex: nil, **unused, &block) - generated = generate_input_values(url) + def self.find_versions(url:, regex: nil, **_unused, &_block) + match_data = { matches: {}, regex:, url: } - PageMatch.find_versions(url: generated[:url], regex: regex || generated[:regex], **unused, &block) + generated = generate_input_values(url) + return match_data if generated.blank? + + match_data[:url] = generated[:url] + + # Parse JSON and get the latest version + begin + response = Utils::Curl.curl_output(generated[:url]) + data = JSON.parse(response.stdout, symbolize_names: true) + latest_version = data.dig(:info, :version) + rescue => e + puts "Error fetching version from PyPI: #{e.message}" + return {} + end + + # Return the version if found + return {} if latest_version.blank? + + { matches: { latest_version => Version.new(latest_version) } } end end end diff --git a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb index 2cdd945c11..f42a3d174c 100644 --- a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb +++ b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb @@ -11,7 +11,7 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do let(:generated) do { url: "https://pypi.org/project/example-package/#files", - regex: %r{href=.*?/packages.*?/example[_-]package[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i, + regex: %r{href=.*?/packages.*?/example-package[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i, } end From 935eb89ecaf27435def207fd64d51090c3bae75c Mon Sep 17 00:00:00 2001 From: Sam Ford <1584702+samford@users.noreply.github.com> Date: Sat, 7 Dec 2024 10:56:46 -0500 Subject: [PATCH 2/3] Pypi: Rework to use Json::find_versions This reworks the new `Pypi` JSON API implementation to use `Json::find_versions` in `Pypi::find_versions`, borrowing some of the approach from the `Crate` strategy. Besides that, this pares down the fields in the `::generate_input_values` return hash to only `:url`, as we're not using a generated regex to match version information in this setup. This adds a `provided_content` parameter to `::find_versions` as part of this process and I will expand the `Pypi` tests to increase coverage (like the `Crates` tests) in a later PR. 75% of `Pypi` checks are failing at the moment (with some returning inaccurate version information), so the current priority is getting this fix merged in the short-term. --- Library/Homebrew/livecheck/strategy/pypi.rb | 72 +++++++++---------- .../test/livecheck/strategy/pypi_spec.rb | 3 +- 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/Library/Homebrew/livecheck/strategy/pypi.rb b/Library/Homebrew/livecheck/strategy/pypi.rb index c598b33168..66351f884c 100644 --- a/Library/Homebrew/livecheck/strategy/pypi.rb +++ b/Library/Homebrew/livecheck/strategy/pypi.rb @@ -1,26 +1,31 @@ # typed: strict # frozen_string_literal: true -require "json" -require "utils/curl" - module Homebrew module Livecheck module Strategy - # The {Pypi} strategy identifies versions of software at pypi.org by - # using the JSON API endpoint. + # The {Pypi} strategy identifies the newest version of a PyPI package by + # checking the JSON API endpoint for the project and using the + # `info.version` field from the response. # # PyPI URLs have a standard format: + # `https://files.pythonhosted.org/packages////example-1.2.3.tar.gz` # - # * `https://files.pythonhosted.org/packages////example-1.2.3.tar.gz` - # - # This method uses the `info.version` field in the JSON response to - # determine the latest stable version. + # Upstream documentation for the PyPI JSON API can be found at: + # https://docs.pypi.org/api/json/#get-a-project # # @api public class Pypi NICE_NAME = "PyPI" + # The default `strategy` block used to extract version information when + # a `strategy` block isn't provided. + DEFAULT_BLOCK = T.let(proc do |json| + json.dig("info", "version").presence + end.freeze, T.proc.params( + arg0: T::Hash[String, T.untyped], + ).returns(T.nilable(String))) + # The `Regexp` used to extract the package name and suffix (e.g. file # extension) from the URL basename. FILENAME_REGEX = / @@ -46,8 +51,8 @@ module Homebrew URL_MATCH_REGEX.match?(url) end - # Extracts the package name from the provided URL and generates the - # PyPI JSON API endpoint. + # Extracts the package name from the provided URL and uses it to + # generate the PyPI JSON API URL for the project. # # @param url [String] the URL used to generate values # @return [Hash] @@ -58,48 +63,41 @@ module Homebrew match = File.basename(url).match(FILENAME_REGEX) return values if match.blank? - package_name = T.must(match[:package_name]).gsub(/[_-]/, "-") - values[:url] = "https://pypi.org/project/#{package_name}/#files" - values[:regex] = %r{href=.*?/packages.*?/#{package_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i + values[:url] = "https://pypi.org/pypi/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/json" values end - # Fetches the latest version of the package from the PyPI JSON API. + # Generates a PyPI JSON API URL for the project and identifies new + # versions using {Json#find_versions} with a block. # # @param url [String] the URL of the content to check - # @param regex [Regexp] a regex used for matching versions in content (optional) + # @param regex [Regexp] a regex used for matching versions in content + # @param provided_content [String, nil] content to check instead of + # fetching # @return [Hash] sig { params( - url: String, - regex: T.nilable(Regexp), - _unused: T.untyped, - _block: T.nilable(Proc), + url: String, + regex: T.nilable(Regexp), + provided_content: T.nilable(String), + unused: T.untyped, + block: T.nilable(Proc), ).returns(T::Hash[Symbol, T.untyped]) } - def self.find_versions(url:, regex: nil, **_unused, &_block) + def self.find_versions(url:, regex: nil, provided_content: nil, **unused, &block) match_data = { matches: {}, regex:, url: } generated = generate_input_values(url) return match_data if generated.blank? - match_data[:url] = generated[:url] - - # Parse JSON and get the latest version - begin - response = Utils::Curl.curl_output(generated[:url]) - data = JSON.parse(response.stdout, symbolize_names: true) - latest_version = data.dig(:info, :version) - rescue => e - puts "Error fetching version from PyPI: #{e.message}" - return {} - end - - # Return the version if found - return {} if latest_version.blank? - - { matches: { latest_version => Version.new(latest_version) } } + Json.find_versions( + url: generated[:url], + regex:, + provided_content:, + **unused, + &block || DEFAULT_BLOCK + ) end end end diff --git a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb index f42a3d174c..2ee5aa35e8 100644 --- a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb +++ b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb @@ -10,8 +10,7 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do let(:generated) do { - url: "https://pypi.org/project/example-package/#files", - regex: %r{href=.*?/packages.*?/example-package[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i, + url: "https://pypi.org/pypi/example-package/json", } end From ac4854ef9e525d3b5b080e302d10f1b71b7b54f6 Mon Sep 17 00:00:00 2001 From: Sam Ford <1584702+samford@users.noreply.github.com> Date: Sat, 7 Dec 2024 20:42:46 -0500 Subject: [PATCH 3/3] Pypi: Expand test coverage Among other things, the previous commit added a `provided_content` paramter to `Pypi::find_versions`, so this takes advantage of that to expand `Pypi` test coverage to 100%. --- .../test/livecheck/strategy/pypi_spec.rb | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb index 2ee5aa35e8..be93644477 100644 --- a/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb +++ b/Library/Homebrew/test/livecheck/strategy/pypi_spec.rb @@ -8,12 +8,42 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do let(:pypi_url) { "https://files.pythonhosted.org/packages/ab/cd/efg/example-package-1.2.3.tar.gz" } let(:non_pypi_url) { "https://brew.sh/test" } + let(:regex) { /^v?(\d+(?:\.\d+)+)$/i } + let(:generated) do { url: "https://pypi.org/pypi/example-package/json", } end + # This is a limited subset of a PyPI JSON API response object, for the sake + # of testing. + let(:content) do + <<~JSON + { + "info": { + "version": "1.2.3" + } + } + JSON + end + + let(:matches) { ["1.2.3"] } + + let(:find_versions_return_hash) do + { + matches: { + "1.2.3" => Version.new("1.2.3"), + }, + regex: nil, + url: generated[:url], + } + end + + let(:find_versions_cached_return_hash) do + find_versions_return_hash.merge({ cached: true }) + end + describe "::match?" do it "returns true for a PyPI URL" do expect(pypi.match?(pypi_url)).to be true @@ -33,4 +63,59 @@ RSpec.describe Homebrew::Livecheck::Strategy::Pypi do expect(pypi.generate_input_values(non_pypi_url)).to eq({}) end end + + describe "::find_versions" do + let(:match_data) do + cached = { + matches: matches.to_h { |v| [v, Version.new(v)] }, + regex: nil, + url: generated[:url], + cached: true, + } + + { + cached:, + cached_default: cached.merge({ matches: {} }), + } + end + + it "finds versions in provided content" do + expect(pypi.find_versions(url: pypi_url, provided_content: content)) + .to eq(match_data[:cached]) + end + + it "finds versions in provided content using a block" do + # NOTE: We only use a regex here to make sure it can be passed into the + # block, if necessary. + expect(pypi.find_versions(url: pypi_url, regex:, provided_content: content) do |json, regex| + match = json.dig("info", "version")&.match(regex) + next if match.blank? + + match[1] + end).to eq(match_data[:cached].merge({ regex: })) + + expect(pypi.find_versions(url: pypi_url, provided_content: content) do |json| + json.dig("info", "version").presence + end).to eq(match_data[:cached]) + end + + it "returns default match_data when block doesn't return version information" do + expect(pypi.find_versions(url: pypi_url, provided_content: '{"info":{"version":""}}')) + .to eq(match_data[:cached_default]) + expect(pypi.find_versions(url: pypi_url, provided_content: '{"other":true}')) + .to eq(match_data[:cached_default]) + end + + it "returns default match_data when url is blank" do + expect(pypi.find_versions(url: "") { "1.2.3" }) + .to eq({ matches: {}, regex: nil, url: "" }) + end + + it "returns default match_data when content is blank" do + expect(pypi.find_versions(url: pypi_url, provided_content: "{}") { "1.2.3" }) + .to eq(match_data[:cached_default]) + expect(pypi.find_versions(url: pypi_url, provided_content: "") { "1.2.3" }) + .to eq(match_data[:cached_default]) + end + end end