diff --git a/Library/Homebrew/.rubocop.yml b/Library/Homebrew/.rubocop.yml
index 3015754abc..74c41a77b6 100644
--- a/Library/Homebrew/.rubocop.yml
+++ b/Library/Homebrew/.rubocop.yml
@@ -52,6 +52,7 @@ Style/Documentation:
- livecheck/strategy/pypi.rb
- livecheck/strategy/sourceforge.rb
- livecheck/strategy/sparkle.rb
+ - livecheck/strategy/xml.rb
- livecheck/strategy/xorg.rb
- os.rb
- resource.rb
diff --git a/Library/Homebrew/livecheck/strategy.rb b/Library/Homebrew/livecheck/strategy.rb
index 0d4c6c022c..0f3457e83d 100644
--- a/Library/Homebrew/livecheck/strategy.rb
+++ b/Library/Homebrew/livecheck/strategy.rb
@@ -156,7 +156,7 @@ module Homebrew
# Only treat the strategy as usable if the `livecheck` block
# contains a regex and/or `strategy` block
next if !regex_provided && !block_provided
- elsif strategy == Json
+ elsif [Json, Xml].include?(strategy)
# Only treat the strategy as usable if the `livecheck` block
# specifies the strategy and contains a `strategy` block
next if (livecheck_strategy != strategy_symbol) || !block_provided
@@ -284,4 +284,5 @@ require_relative "strategy/page_match"
require_relative "strategy/pypi"
require_relative "strategy/sourceforge"
require_relative "strategy/sparkle"
+require_relative "strategy/xml"
require_relative "strategy/xorg"
diff --git a/Library/Homebrew/livecheck/strategy/sparkle.rb b/Library/Homebrew/livecheck/strategy/sparkle.rb
index 5fbb74842b..87c6aefedd 100644
--- a/Library/Homebrew/livecheck/strategy/sparkle.rb
+++ b/Library/Homebrew/livecheck/strategy/sparkle.rb
@@ -66,25 +66,8 @@ module Homebrew
# @return [Item, nil]
sig { params(content: String).returns(T::Array[Item]) }
def self.items_from_content(content)
- require "rexml/document"
-
- parsing_tries = 0
- xml = begin
- REXML::Document.new(content)
- rescue REXML::UndefinedNamespaceException => e
- undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
- raise if undefined_prefix.blank?
-
- # Only retry parsing once after removing prefix from content
- parsing_tries += 1
- raise if parsing_tries > 1
-
- # When an XML document contains a prefix without a corresponding
- # namespace, it's necessary to remove the prefix from the content
- # to be able to successfully parse it using REXML
- content = content.gsub(%r{(?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
- retry
- end
+ xml = Xml.parse_xml(content)
+ return [] if xml.blank?
# Remove prefixes, so we can reliably identify elements and attributes
xml.root&.each_recursive do |node|
diff --git a/Library/Homebrew/livecheck/strategy/xml.rb b/Library/Homebrew/livecheck/strategy/xml.rb
new file mode 100644
index 0000000000..5fee37529b
--- /dev/null
+++ b/Library/Homebrew/livecheck/strategy/xml.rb
@@ -0,0 +1,153 @@
+# typed: true
+# frozen_string_literal: true
+
+module Homebrew
+ module Livecheck
+ module Strategy
+ # The {Xml} strategy fetches content at a URL, parses it as XML using
+ # `REXML`, and provides the `REXML::Document` to a `strategy` block.
+ # If a regex is present in the `livecheck` block, it should be passed
+ # as the second argument to the `strategy` block.
+ #
+ # This is a generic strategy that doesn't contain any logic for finding
+ # versions, as the structure of XML data varies. Instead, a `strategy`
+ # block must be used to extract version information from the XML data.
+ # For more information on how to work with an `REXML::Document` object,
+ # please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html)
+ # and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html)
+ # documentation.
+ #
+ # This strategy is not applied automatically and it is necessary to use
+ # `strategy :xml` in a `livecheck` block (in conjunction with a
+ # `strategy` block) to use it.
+ #
+ # This strategy's {find_versions} method can be used in other strategies
+ # that work with XML content, so it should only be necessary to write
+ # the version-finding logic that works with the parsed XML data.
+ #
+ # @api public
+ class Xml
+ extend T::Sig
+
+ NICE_NAME = "XML"
+
+ # A priority of zero causes livecheck to skip the strategy. We do this
+ # for {Xml} so we can selectively apply it only when a strategy block
+ # is provided in a `livecheck` block.
+ PRIORITY = 0
+
+ # The `Regexp` used to determine if the strategy applies to the URL.
+ URL_MATCH_REGEX = %r{^https?://}i.freeze
+
+ # Whether the strategy can be applied to the provided URL.
+ # {Xml} will technically match any HTTP URL but is only usable with
+ # a `livecheck` block containing a `strategy` block.
+ #
+ # @param url [String] the URL to match against
+ # @return [Boolean]
+ sig { params(url: String).returns(T::Boolean) }
+ def self.match?(url)
+ URL_MATCH_REGEX.match?(url)
+ end
+
+ # Parses XML text and returns an `REXML::Document` object.
+ # @param content [String] the XML text to parse
+ # @return [REXML::Document, nil]
+ sig { params(content: String).returns(T.nilable(REXML::Document)) }
+ def self.parse_xml(content)
+ require "rexml/document"
+
+ parsing_tries = 0
+ begin
+ REXML::Document.new(content)
+ rescue REXML::UndefinedNamespaceException => e
+ undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
+ raise "Could not identify undefined prefix." if undefined_prefix.blank?
+
+ # Only retry parsing once after removing prefix from content
+ parsing_tries += 1
+ raise "Could not parse XML after removing undefined prefix." if parsing_tries > 1
+
+ # When an XML document contains a prefix without a corresponding
+ # namespace, it's necessary to remove the prefix from the content
+ # to be able to successfully parse it using REXML
+ content = content.gsub(%r{(?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
+ retry
+ end
+ end
+
+ # Parses XML text and identifies versions using a `strategy` block.
+ # If a regex is provided, it will be passed as the second argument to
+ # the `strategy` block (after the parsed XML data).
+ # @param content [String] the XML text to parse and check
+ # @param regex [Regexp, nil] a regex used for matching versions in the
+ # content
+ # @return [Array]
+ sig {
+ params(
+ content: String,
+ regex: T.nilable(Regexp),
+ block: T.untyped,
+ ).returns(T::Array[String])
+ }
+ def self.versions_from_content(content, regex = nil, &block)
+ return [] if content.blank? || block.blank?
+
+ require "rexml"
+ xml = parse_xml(content)
+ return [] if xml.blank?
+
+ block_return_value = if regex.present?
+ yield(xml, regex)
+ elsif block.arity == 2
+ raise "Two arguments found in `strategy` block but no regex provided."
+ else
+ yield(xml)
+ end
+ Strategy.handle_block_return(block_return_value)
+ end
+
+ # Checks the XML content at the URL for versions, using the provided
+ # `strategy` block to extract version information.
+ #
+ # @param url [String] the URL of the content to check
+ # @param regex [Regexp, nil] a regex used for matching versions
+ # @param provided_content [String, nil] page content to use in place of
+ # fetching via `Strategy#page_content`
+ # @param homebrew_curl [Boolean] whether to use brewed curl with the URL
+ # @return [Hash]
+ sig {
+ params(
+ url: String,
+ regex: T.nilable(Regexp),
+ provided_content: T.nilable(String),
+ homebrew_curl: T::Boolean,
+ _unused: T.nilable(T::Hash[Symbol, T.untyped]),
+ block: T.untyped,
+ ).returns(T::Hash[Symbol, T.untyped])
+ }
+ def self.find_versions(url:, regex: nil, provided_content: nil, homebrew_curl: false, **_unused, &block)
+ raise ArgumentError, "#{Utils.demodulize(T.must(name))} requires a `strategy` block" if block.blank?
+
+ match_data = { matches: {}, regex: regex, url: url }
+ return match_data if url.blank? || block.blank?
+
+ content = if provided_content.is_a?(String)
+ match_data[:cached] = true
+ provided_content
+ else
+ match_data.merge!(Strategy.page_content(url, homebrew_curl: homebrew_curl))
+ match_data[:content]
+ end
+ return match_data if content.blank?
+
+ versions_from_content(content, regex, &block).each do |match_text|
+ match_data[:matches][match_text] = Version.new(match_text)
+ end
+
+ match_data
+ end
+ end
+ end
+ end
+end
diff --git a/Library/Homebrew/test/livecheck/strategy/xml_spec.rb b/Library/Homebrew/test/livecheck/strategy/xml_spec.rb
new file mode 100644
index 0000000000..41a2e6688d
--- /dev/null
+++ b/Library/Homebrew/test/livecheck/strategy/xml_spec.rb
@@ -0,0 +1,205 @@
+# typed: false
+# frozen_string_literal: true
+
+require "livecheck/strategy"
+require "rexml/document"
+
+describe Homebrew::Livecheck::Strategy::Xml do
+ subject(:xml) { described_class }
+
+ let(:http_url) { "https://brew.sh/blog/" }
+ let(:non_http_url) { "ftp://brew.sh/" }
+
+ let(:regex) { /^v?(\d+(?:\.\d+)+)$/i }
+
+ let(:content_version_text) {
+ <<~EOS
+
+
+ 1.1.2
+ 1.1.2b
+ 1.1.2a
+ 1.1.1
+ 1.1.0
+ 1.1.0-rc3
+ 1.1.0-rc2
+ 1.1.0-rc1
+ 1.0.x-last
+ 1.0.3
+ 1.0.3-rc3
+ 1.0.3-rc2
+ 1.0.3-rc1
+ 1.0.2
+ 1.0.2-rc1
+ 1.0.1
+ 1.0.1-rc1
+ 1.0.0
+ 1.0.0-rc1
+
+ EOS
+ }
+
+ let(:content_version_attr) {
+ <<~EOS
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ EOS
+ }
+
+ let(:content_simple) {
+ <<~EOS
+
+ 1.2.3
+ EOS
+ }
+
+ let(:content_undefined_namespace) {
+ <<~EOS
+
+ 1.2.3
+ EOS
+ }
+
+ let(:content_matches) { ["1.1.2", "1.1.1", "1.1.0", "1.0.3", "1.0.2", "1.0.1", "1.0.0"] }
+ let(:content_simple_matches) { ["1.2.3"] }
+
+ let(:find_versions_return_hash) {
+ {
+ matches: {
+ "1.1.2" => Version.new("1.1.2"),
+ "1.1.1" => Version.new("1.1.1"),
+ "1.1.0" => Version.new("1.1.0"),
+ "1.0.3" => Version.new("1.0.3"),
+ "1.0.2" => Version.new("1.0.2"),
+ "1.0.1" => Version.new("1.0.1"),
+ "1.0.0" => Version.new("1.0.0"),
+ },
+ regex: regex,
+ url: http_url,
+ }
+ }
+
+ let(:find_versions_cached_return_hash) {
+ find_versions_return_hash.merge({ cached: true })
+ }
+
+ describe "::match?" do
+ it "returns true for an HTTP URL" do
+ expect(xml.match?(http_url)).to be true
+ end
+
+ it "returns false for a non-HTTP URL" do
+ expect(xml.match?(non_http_url)).to be false
+ end
+ end
+
+ describe "::parse_xml" do
+ # TODO: Should we be comparing against an actual REXML::Document object?
+ it "returns an REXML::Document when given XML content" do
+ expect(xml.parse_xml(content_version_text)).to be_an_instance_of(REXML::Document)
+ end
+
+ it "returns an REXML::Document when given XML content with an undefined namespace" do
+ expect(xml.parse_xml(content_undefined_namespace)).to be_an_instance_of(REXML::Document)
+ end
+ end
+
+ describe "::versions_from_content" do
+ it "returns an empty array when given a block but content is blank" do
+ expect(xml.versions_from_content("", regex) { "1.2.3" }).to eq([])
+ end
+
+ it "returns an array of version strings when given content and a block" do
+ # Returning a string from block
+ expect(xml.versions_from_content(content_simple) do |xml|
+ xml.elements["version"]&.text
+ end).to eq(content_simple_matches)
+ expect(xml.versions_from_content(content_simple, regex) do |xml|
+ version = xml.elements["version"]&.text
+ next if version.blank?
+
+ version[regex, 1]
+ end).to eq(content_simple_matches)
+
+ # Returning an array of strings from block
+ expect(xml.versions_from_content(content_version_text, regex) do |xml, regex|
+ xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
+ end).to eq(content_matches)
+
+ expect(xml.versions_from_content(content_version_attr, regex) do |xml, regex|
+ xml.get_elements("items//item").map do |item|
+ version = item["version"]
+ next if version.blank?
+
+ version[regex, 1]
+ end
+ end).to eq(content_matches)
+ end
+
+ it "allows a nil return from a block" do
+ expect(xml.versions_from_content(content_simple, regex) { next }).to eq([])
+ end
+
+ it "errors if a block uses two arguments but a regex is not given" do
+ expect { xml.versions_from_content(content_simple) { |xml, regex| xml["version"][regex, 1] } }
+ .to raise_error("Two arguments found in `strategy` block but no regex provided.")
+ end
+
+ it "errors on an invalid return type from a block" do
+ expect { xml.versions_from_content(content_simple, regex) { 123 } }
+ .to raise_error(TypeError, Homebrew::Livecheck::Strategy::INVALID_BLOCK_RETURN_VALUE_MSG)
+ end
+ end
+
+ describe "::find_versions?" do
+ it "finds versions in provided_content using a block" do
+ expect(xml.find_versions(url: http_url, regex: regex, provided_content: content_version_text) do |xml, regex|
+ xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
+ end).to eq(find_versions_cached_return_hash)
+
+ # NOTE: A regex should be provided using the `#regex` method in a
+ # `livecheck` block but we're using a regex literal in the `strategy`
+ # block here simply to ensure this method works as expected when a
+ # regex isn't provided.
+ expect(xml.find_versions(url: http_url, provided_content: content_version_text) do |xml|
+ regex = /^v?(\d+(?:\.\d+)+)$/i.freeze
+ xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
+ end).to eq(find_versions_cached_return_hash.merge({ regex: nil }))
+ end
+
+ it "errors if a block is not provided" do
+ expect { xml.find_versions(url: http_url, provided_content: content_simple) }
+ .to raise_error(ArgumentError, "Xml requires a `strategy` block")
+ end
+
+ it "returns default match_data when url is blank" do
+ expect(xml.find_versions(url: "") { "1.2.3" })
+ .to eq({ matches: {}, regex: nil, url: "" })
+ end
+
+ it "returns default match_data when content is blank" do
+ expect(xml.find_versions(url: http_url, provided_content: "") { "1.2.3" })
+ .to eq({ matches: {}, regex: nil, url: http_url, cached: true })
+ end
+ end
+end
diff --git a/docs/Brew-Livecheck.md b/docs/Brew-Livecheck.md
index 28919e37db..f32dd17a67 100644
--- a/docs/Brew-Livecheck.md
+++ b/docs/Brew-Livecheck.md
@@ -171,6 +171,22 @@ livecheck do
end
```
+#### `Xml` `strategy` block
+
+A `strategy` block for `Xml` receives an `REXML::Document` object and, if provided, a regex. For example, if the XML contains a `versions` element with nested `version` elements and their inner text contains the version string, we could extract it using a regex as follows:
+
+```ruby
+livecheck do
+ url "https://www.example.com/example.xml"
+ regex(/v?(\d+(?:\.\d+)+)/i)
+ strategy :xml do |xml, regex|
+ xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
+ end
+end
+```
+
+For more information on how to work with an `REXML::Document` object, please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html) and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html) documentation.
+
### `skip`
Livecheck automatically skips some formulae/casks for a number of reasons (deprecated, disabled, discontinued, etc.). However, on rare occasions we need to use a `livecheck` block to do a manual skip. The `skip` method takes a string containing a very brief reason for skipping.