Merge pull request #14845 from samford/livecheck/add-xml-strategy
livecheck: Add Xml strategy
This commit is contained in:
commit
97fbd89a57
@ -52,6 +52,7 @@ Style/Documentation:
|
||||
- livecheck/strategy/pypi.rb
|
||||
- livecheck/strategy/sourceforge.rb
|
||||
- livecheck/strategy/sparkle.rb
|
||||
- livecheck/strategy/xml.rb
|
||||
- livecheck/strategy/xorg.rb
|
||||
- os.rb
|
||||
- resource.rb
|
||||
|
@ -156,7 +156,7 @@ module Homebrew
|
||||
# Only treat the strategy as usable if the `livecheck` block
|
||||
# contains a regex and/or `strategy` block
|
||||
next if !regex_provided && !block_provided
|
||||
elsif strategy == Json
|
||||
elsif [Json, Xml].include?(strategy)
|
||||
# Only treat the strategy as usable if the `livecheck` block
|
||||
# specifies the strategy and contains a `strategy` block
|
||||
next if (livecheck_strategy != strategy_symbol) || !block_provided
|
||||
@ -284,4 +284,5 @@ require_relative "strategy/page_match"
|
||||
require_relative "strategy/pypi"
|
||||
require_relative "strategy/sourceforge"
|
||||
require_relative "strategy/sparkle"
|
||||
require_relative "strategy/xml"
|
||||
require_relative "strategy/xorg"
|
||||
|
@ -66,25 +66,8 @@ module Homebrew
|
||||
# @return [Item, nil]
|
||||
sig { params(content: String).returns(T::Array[Item]) }
|
||||
def self.items_from_content(content)
|
||||
require "rexml/document"
|
||||
|
||||
parsing_tries = 0
|
||||
xml = begin
|
||||
REXML::Document.new(content)
|
||||
rescue REXML::UndefinedNamespaceException => e
|
||||
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
|
||||
raise if undefined_prefix.blank?
|
||||
|
||||
# Only retry parsing once after removing prefix from content
|
||||
parsing_tries += 1
|
||||
raise if parsing_tries > 1
|
||||
|
||||
# When an XML document contains a prefix without a corresponding
|
||||
# namespace, it's necessary to remove the prefix from the content
|
||||
# to be able to successfully parse it using REXML
|
||||
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
|
||||
retry
|
||||
end
|
||||
xml = Xml.parse_xml(content)
|
||||
return [] if xml.blank?
|
||||
|
||||
# Remove prefixes, so we can reliably identify elements and attributes
|
||||
xml.root&.each_recursive do |node|
|
||||
|
153
Library/Homebrew/livecheck/strategy/xml.rb
Normal file
153
Library/Homebrew/livecheck/strategy/xml.rb
Normal file
@ -0,0 +1,153 @@
|
||||
# typed: true
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Homebrew
|
||||
module Livecheck
|
||||
module Strategy
|
||||
# The {Xml} strategy fetches content at a URL, parses it as XML using
|
||||
# `REXML`, and provides the `REXML::Document` to a `strategy` block.
|
||||
# If a regex is present in the `livecheck` block, it should be passed
|
||||
# as the second argument to the `strategy` block.
|
||||
#
|
||||
# This is a generic strategy that doesn't contain any logic for finding
|
||||
# versions, as the structure of XML data varies. Instead, a `strategy`
|
||||
# block must be used to extract version information from the XML data.
|
||||
# For more information on how to work with an `REXML::Document` object,
|
||||
# please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html)
|
||||
# and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html)
|
||||
# documentation.
|
||||
#
|
||||
# This strategy is not applied automatically and it is necessary to use
|
||||
# `strategy :xml` in a `livecheck` block (in conjunction with a
|
||||
# `strategy` block) to use it.
|
||||
#
|
||||
# This strategy's {find_versions} method can be used in other strategies
|
||||
# that work with XML content, so it should only be necessary to write
|
||||
# the version-finding logic that works with the parsed XML data.
|
||||
#
|
||||
# @api public
|
||||
class Xml
|
||||
extend T::Sig
|
||||
|
||||
NICE_NAME = "XML"
|
||||
|
||||
# A priority of zero causes livecheck to skip the strategy. We do this
|
||||
# for {Xml} so we can selectively apply it only when a strategy block
|
||||
# is provided in a `livecheck` block.
|
||||
PRIORITY = 0
|
||||
|
||||
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||
URL_MATCH_REGEX = %r{^https?://}i.freeze
|
||||
|
||||
# Whether the strategy can be applied to the provided URL.
|
||||
# {Xml} will technically match any HTTP URL but is only usable with
|
||||
# a `livecheck` block containing a `strategy` block.
|
||||
#
|
||||
# @param url [String] the URL to match against
|
||||
# @return [Boolean]
|
||||
sig { params(url: String).returns(T::Boolean) }
|
||||
def self.match?(url)
|
||||
URL_MATCH_REGEX.match?(url)
|
||||
end
|
||||
|
||||
# Parses XML text and returns an `REXML::Document` object.
|
||||
# @param content [String] the XML text to parse
|
||||
# @return [REXML::Document, nil]
|
||||
sig { params(content: String).returns(T.nilable(REXML::Document)) }
|
||||
def self.parse_xml(content)
|
||||
require "rexml/document"
|
||||
|
||||
parsing_tries = 0
|
||||
begin
|
||||
REXML::Document.new(content)
|
||||
rescue REXML::UndefinedNamespaceException => e
|
||||
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
|
||||
raise "Could not identify undefined prefix." if undefined_prefix.blank?
|
||||
|
||||
# Only retry parsing once after removing prefix from content
|
||||
parsing_tries += 1
|
||||
raise "Could not parse XML after removing undefined prefix." if parsing_tries > 1
|
||||
|
||||
# When an XML document contains a prefix without a corresponding
|
||||
# namespace, it's necessary to remove the prefix from the content
|
||||
# to be able to successfully parse it using REXML
|
||||
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
|
||||
retry
|
||||
end
|
||||
end
|
||||
|
||||
# Parses XML text and identifies versions using a `strategy` block.
|
||||
# If a regex is provided, it will be passed as the second argument to
|
||||
# the `strategy` block (after the parsed XML data).
|
||||
# @param content [String] the XML text to parse and check
|
||||
# @param regex [Regexp, nil] a regex used for matching versions in the
|
||||
# content
|
||||
# @return [Array]
|
||||
sig {
|
||||
params(
|
||||
content: String,
|
||||
regex: T.nilable(Regexp),
|
||||
block: T.untyped,
|
||||
).returns(T::Array[String])
|
||||
}
|
||||
def self.versions_from_content(content, regex = nil, &block)
|
||||
return [] if content.blank? || block.blank?
|
||||
|
||||
require "rexml"
|
||||
xml = parse_xml(content)
|
||||
return [] if xml.blank?
|
||||
|
||||
block_return_value = if regex.present?
|
||||
yield(xml, regex)
|
||||
elsif block.arity == 2
|
||||
raise "Two arguments found in `strategy` block but no regex provided."
|
||||
else
|
||||
yield(xml)
|
||||
end
|
||||
Strategy.handle_block_return(block_return_value)
|
||||
end
|
||||
|
||||
# Checks the XML content at the URL for versions, using the provided
|
||||
# `strategy` block to extract version information.
|
||||
#
|
||||
# @param url [String] the URL of the content to check
|
||||
# @param regex [Regexp, nil] a regex used for matching versions
|
||||
# @param provided_content [String, nil] page content to use in place of
|
||||
# fetching via `Strategy#page_content`
|
||||
# @param homebrew_curl [Boolean] whether to use brewed curl with the URL
|
||||
# @return [Hash]
|
||||
sig {
|
||||
params(
|
||||
url: String,
|
||||
regex: T.nilable(Regexp),
|
||||
provided_content: T.nilable(String),
|
||||
homebrew_curl: T::Boolean,
|
||||
_unused: T.nilable(T::Hash[Symbol, T.untyped]),
|
||||
block: T.untyped,
|
||||
).returns(T::Hash[Symbol, T.untyped])
|
||||
}
|
||||
def self.find_versions(url:, regex: nil, provided_content: nil, homebrew_curl: false, **_unused, &block)
|
||||
raise ArgumentError, "#{Utils.demodulize(T.must(name))} requires a `strategy` block" if block.blank?
|
||||
|
||||
match_data = { matches: {}, regex: regex, url: url }
|
||||
return match_data if url.blank? || block.blank?
|
||||
|
||||
content = if provided_content.is_a?(String)
|
||||
match_data[:cached] = true
|
||||
provided_content
|
||||
else
|
||||
match_data.merge!(Strategy.page_content(url, homebrew_curl: homebrew_curl))
|
||||
match_data[:content]
|
||||
end
|
||||
return match_data if content.blank?
|
||||
|
||||
versions_from_content(content, regex, &block).each do |match_text|
|
||||
match_data[:matches][match_text] = Version.new(match_text)
|
||||
end
|
||||
|
||||
match_data
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
205
Library/Homebrew/test/livecheck/strategy/xml_spec.rb
Normal file
205
Library/Homebrew/test/livecheck/strategy/xml_spec.rb
Normal file
@ -0,0 +1,205 @@
|
||||
# typed: false
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "livecheck/strategy"
|
||||
require "rexml/document"
|
||||
|
||||
describe Homebrew::Livecheck::Strategy::Xml do
|
||||
subject(:xml) { described_class }
|
||||
|
||||
let(:http_url) { "https://brew.sh/blog/" }
|
||||
let(:non_http_url) { "ftp://brew.sh/" }
|
||||
|
||||
let(:regex) { /^v?(\d+(?:\.\d+)+)$/i }
|
||||
|
||||
let(:content_version_text) {
|
||||
<<~EOS
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<versions>
|
||||
<version>1.1.2</version>
|
||||
<version>1.1.2b</version>
|
||||
<version>1.1.2a</version>
|
||||
<version>1.1.1</version>
|
||||
<version>1.1.0</version>
|
||||
<version>1.1.0-rc3</version>
|
||||
<version>1.1.0-rc2</version>
|
||||
<version>1.1.0-rc1</version>
|
||||
<version>1.0.x-last</version>
|
||||
<version>1.0.3</version>
|
||||
<version>1.0.3-rc3</version>
|
||||
<version>1.0.3-rc2</version>
|
||||
<version>1.0.3-rc1</version>
|
||||
<version>1.0.2</version>
|
||||
<version>1.0.2-rc1</version>
|
||||
<version>1.0.1</version>
|
||||
<version>1.0.1-rc1</version>
|
||||
<version>1.0.0</version>
|
||||
<version>1.0.0-rc1</version>
|
||||
</versions>
|
||||
EOS
|
||||
}
|
||||
|
||||
let(:content_version_attr) {
|
||||
<<~EOS
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<items>
|
||||
<item version="1.1.2" />
|
||||
<item version="1.1.2b" />
|
||||
<item version="1.1.2a" />
|
||||
<item version="1.1.1" />
|
||||
<item version="1.1.0" />
|
||||
<item version="1.1.0-rc3" />
|
||||
<item version="1.1.0-rc2" />
|
||||
<item version="1.1.0-rc1" />
|
||||
<item version="1.0.x-last" />
|
||||
<item version="1.0.3" />
|
||||
<item version="1.0.3-rc3" />
|
||||
<item version="1.0.3-rc2" />
|
||||
<item version="1.0.3-rc1" />
|
||||
<item version="1.0.2" />
|
||||
<item version="1.0.2-rc1" />
|
||||
<item version="1.0.1" />
|
||||
<item version="1.0.1-rc1" />
|
||||
<item version="1.0.0" />
|
||||
<item version="1.0.0-rc1" />
|
||||
</items>
|
||||
EOS
|
||||
}
|
||||
|
||||
let(:content_simple) {
|
||||
<<~EOS
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<version>1.2.3</version>
|
||||
EOS
|
||||
}
|
||||
|
||||
let(:content_undefined_namespace) {
|
||||
<<~EOS
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<something:version>1.2.3</something:version>
|
||||
EOS
|
||||
}
|
||||
|
||||
let(:content_matches) { ["1.1.2", "1.1.1", "1.1.0", "1.0.3", "1.0.2", "1.0.1", "1.0.0"] }
|
||||
let(:content_simple_matches) { ["1.2.3"] }
|
||||
|
||||
let(:find_versions_return_hash) {
|
||||
{
|
||||
matches: {
|
||||
"1.1.2" => Version.new("1.1.2"),
|
||||
"1.1.1" => Version.new("1.1.1"),
|
||||
"1.1.0" => Version.new("1.1.0"),
|
||||
"1.0.3" => Version.new("1.0.3"),
|
||||
"1.0.2" => Version.new("1.0.2"),
|
||||
"1.0.1" => Version.new("1.0.1"),
|
||||
"1.0.0" => Version.new("1.0.0"),
|
||||
},
|
||||
regex: regex,
|
||||
url: http_url,
|
||||
}
|
||||
}
|
||||
|
||||
let(:find_versions_cached_return_hash) {
|
||||
find_versions_return_hash.merge({ cached: true })
|
||||
}
|
||||
|
||||
describe "::match?" do
|
||||
it "returns true for an HTTP URL" do
|
||||
expect(xml.match?(http_url)).to be true
|
||||
end
|
||||
|
||||
it "returns false for a non-HTTP URL" do
|
||||
expect(xml.match?(non_http_url)).to be false
|
||||
end
|
||||
end
|
||||
|
||||
describe "::parse_xml" do
|
||||
# TODO: Should we be comparing against an actual REXML::Document object?
|
||||
it "returns an REXML::Document when given XML content" do
|
||||
expect(xml.parse_xml(content_version_text)).to be_an_instance_of(REXML::Document)
|
||||
end
|
||||
|
||||
it "returns an REXML::Document when given XML content with an undefined namespace" do
|
||||
expect(xml.parse_xml(content_undefined_namespace)).to be_an_instance_of(REXML::Document)
|
||||
end
|
||||
end
|
||||
|
||||
describe "::versions_from_content" do
|
||||
it "returns an empty array when given a block but content is blank" do
|
||||
expect(xml.versions_from_content("", regex) { "1.2.3" }).to eq([])
|
||||
end
|
||||
|
||||
it "returns an array of version strings when given content and a block" do
|
||||
# Returning a string from block
|
||||
expect(xml.versions_from_content(content_simple) do |xml|
|
||||
xml.elements["version"]&.text
|
||||
end).to eq(content_simple_matches)
|
||||
expect(xml.versions_from_content(content_simple, regex) do |xml|
|
||||
version = xml.elements["version"]&.text
|
||||
next if version.blank?
|
||||
|
||||
version[regex, 1]
|
||||
end).to eq(content_simple_matches)
|
||||
|
||||
# Returning an array of strings from block
|
||||
expect(xml.versions_from_content(content_version_text, regex) do |xml, regex|
|
||||
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||
end).to eq(content_matches)
|
||||
|
||||
expect(xml.versions_from_content(content_version_attr, regex) do |xml, regex|
|
||||
xml.get_elements("items//item").map do |item|
|
||||
version = item["version"]
|
||||
next if version.blank?
|
||||
|
||||
version[regex, 1]
|
||||
end
|
||||
end).to eq(content_matches)
|
||||
end
|
||||
|
||||
it "allows a nil return from a block" do
|
||||
expect(xml.versions_from_content(content_simple, regex) { next }).to eq([])
|
||||
end
|
||||
|
||||
it "errors if a block uses two arguments but a regex is not given" do
|
||||
expect { xml.versions_from_content(content_simple) { |xml, regex| xml["version"][regex, 1] } }
|
||||
.to raise_error("Two arguments found in `strategy` block but no regex provided.")
|
||||
end
|
||||
|
||||
it "errors on an invalid return type from a block" do
|
||||
expect { xml.versions_from_content(content_simple, regex) { 123 } }
|
||||
.to raise_error(TypeError, Homebrew::Livecheck::Strategy::INVALID_BLOCK_RETURN_VALUE_MSG)
|
||||
end
|
||||
end
|
||||
|
||||
describe "::find_versions?" do
|
||||
it "finds versions in provided_content using a block" do
|
||||
expect(xml.find_versions(url: http_url, regex: regex, provided_content: content_version_text) do |xml, regex|
|
||||
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||
end).to eq(find_versions_cached_return_hash)
|
||||
|
||||
# NOTE: A regex should be provided using the `#regex` method in a
|
||||
# `livecheck` block but we're using a regex literal in the `strategy`
|
||||
# block here simply to ensure this method works as expected when a
|
||||
# regex isn't provided.
|
||||
expect(xml.find_versions(url: http_url, provided_content: content_version_text) do |xml|
|
||||
regex = /^v?(\d+(?:\.\d+)+)$/i.freeze
|
||||
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||
end).to eq(find_versions_cached_return_hash.merge({ regex: nil }))
|
||||
end
|
||||
|
||||
it "errors if a block is not provided" do
|
||||
expect { xml.find_versions(url: http_url, provided_content: content_simple) }
|
||||
.to raise_error(ArgumentError, "Xml requires a `strategy` block")
|
||||
end
|
||||
|
||||
it "returns default match_data when url is blank" do
|
||||
expect(xml.find_versions(url: "") { "1.2.3" })
|
||||
.to eq({ matches: {}, regex: nil, url: "" })
|
||||
end
|
||||
|
||||
it "returns default match_data when content is blank" do
|
||||
expect(xml.find_versions(url: http_url, provided_content: "") { "1.2.3" })
|
||||
.to eq({ matches: {}, regex: nil, url: http_url, cached: true })
|
||||
end
|
||||
end
|
||||
end
|
@ -171,6 +171,22 @@ livecheck do
|
||||
end
|
||||
```
|
||||
|
||||
#### `Xml` `strategy` block
|
||||
|
||||
A `strategy` block for `Xml` receives an `REXML::Document` object and, if provided, a regex. For example, if the XML contains a `versions` element with nested `version` elements and their inner text contains the version string, we could extract it using a regex as follows:
|
||||
|
||||
```ruby
|
||||
livecheck do
|
||||
url "https://www.example.com/example.xml"
|
||||
regex(/v?(\d+(?:\.\d+)+)/i)
|
||||
strategy :xml do |xml, regex|
|
||||
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
For more information on how to work with an `REXML::Document` object, please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html) and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html) documentation.
|
||||
|
||||
### `skip`
|
||||
|
||||
Livecheck automatically skips some formulae/casks for a number of reasons (deprecated, disabled, discontinued, etc.). However, on rare occasions we need to use a `livecheck` block to do a manual skip. The `skip` method takes a string containing a very brief reason for skipping.
|
||||
|
Loading…
x
Reference in New Issue
Block a user