Merge pull request #14845 from samford/livecheck/add-xml-strategy
livecheck: Add Xml strategy
This commit is contained in:
commit
97fbd89a57
@ -52,6 +52,7 @@ Style/Documentation:
|
|||||||
- livecheck/strategy/pypi.rb
|
- livecheck/strategy/pypi.rb
|
||||||
- livecheck/strategy/sourceforge.rb
|
- livecheck/strategy/sourceforge.rb
|
||||||
- livecheck/strategy/sparkle.rb
|
- livecheck/strategy/sparkle.rb
|
||||||
|
- livecheck/strategy/xml.rb
|
||||||
- livecheck/strategy/xorg.rb
|
- livecheck/strategy/xorg.rb
|
||||||
- os.rb
|
- os.rb
|
||||||
- resource.rb
|
- resource.rb
|
||||||
|
@ -156,7 +156,7 @@ module Homebrew
|
|||||||
# Only treat the strategy as usable if the `livecheck` block
|
# Only treat the strategy as usable if the `livecheck` block
|
||||||
# contains a regex and/or `strategy` block
|
# contains a regex and/or `strategy` block
|
||||||
next if !regex_provided && !block_provided
|
next if !regex_provided && !block_provided
|
||||||
elsif strategy == Json
|
elsif [Json, Xml].include?(strategy)
|
||||||
# Only treat the strategy as usable if the `livecheck` block
|
# Only treat the strategy as usable if the `livecheck` block
|
||||||
# specifies the strategy and contains a `strategy` block
|
# specifies the strategy and contains a `strategy` block
|
||||||
next if (livecheck_strategy != strategy_symbol) || !block_provided
|
next if (livecheck_strategy != strategy_symbol) || !block_provided
|
||||||
@ -284,4 +284,5 @@ require_relative "strategy/page_match"
|
|||||||
require_relative "strategy/pypi"
|
require_relative "strategy/pypi"
|
||||||
require_relative "strategy/sourceforge"
|
require_relative "strategy/sourceforge"
|
||||||
require_relative "strategy/sparkle"
|
require_relative "strategy/sparkle"
|
||||||
|
require_relative "strategy/xml"
|
||||||
require_relative "strategy/xorg"
|
require_relative "strategy/xorg"
|
||||||
|
@ -66,25 +66,8 @@ module Homebrew
|
|||||||
# @return [Item, nil]
|
# @return [Item, nil]
|
||||||
sig { params(content: String).returns(T::Array[Item]) }
|
sig { params(content: String).returns(T::Array[Item]) }
|
||||||
def self.items_from_content(content)
|
def self.items_from_content(content)
|
||||||
require "rexml/document"
|
xml = Xml.parse_xml(content)
|
||||||
|
return [] if xml.blank?
|
||||||
parsing_tries = 0
|
|
||||||
xml = begin
|
|
||||||
REXML::Document.new(content)
|
|
||||||
rescue REXML::UndefinedNamespaceException => e
|
|
||||||
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
|
|
||||||
raise if undefined_prefix.blank?
|
|
||||||
|
|
||||||
# Only retry parsing once after removing prefix from content
|
|
||||||
parsing_tries += 1
|
|
||||||
raise if parsing_tries > 1
|
|
||||||
|
|
||||||
# When an XML document contains a prefix without a corresponding
|
|
||||||
# namespace, it's necessary to remove the prefix from the content
|
|
||||||
# to be able to successfully parse it using REXML
|
|
||||||
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
|
|
||||||
retry
|
|
||||||
end
|
|
||||||
|
|
||||||
# Remove prefixes, so we can reliably identify elements and attributes
|
# Remove prefixes, so we can reliably identify elements and attributes
|
||||||
xml.root&.each_recursive do |node|
|
xml.root&.each_recursive do |node|
|
||||||
|
153
Library/Homebrew/livecheck/strategy/xml.rb
Normal file
153
Library/Homebrew/livecheck/strategy/xml.rb
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# typed: true
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Homebrew
|
||||||
|
module Livecheck
|
||||||
|
module Strategy
|
||||||
|
# The {Xml} strategy fetches content at a URL, parses it as XML using
|
||||||
|
# `REXML`, and provides the `REXML::Document` to a `strategy` block.
|
||||||
|
# If a regex is present in the `livecheck` block, it should be passed
|
||||||
|
# as the second argument to the `strategy` block.
|
||||||
|
#
|
||||||
|
# This is a generic strategy that doesn't contain any logic for finding
|
||||||
|
# versions, as the structure of XML data varies. Instead, a `strategy`
|
||||||
|
# block must be used to extract version information from the XML data.
|
||||||
|
# For more information on how to work with an `REXML::Document` object,
|
||||||
|
# please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html)
|
||||||
|
# and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html)
|
||||||
|
# documentation.
|
||||||
|
#
|
||||||
|
# This strategy is not applied automatically and it is necessary to use
|
||||||
|
# `strategy :xml` in a `livecheck` block (in conjunction with a
|
||||||
|
# `strategy` block) to use it.
|
||||||
|
#
|
||||||
|
# This strategy's {find_versions} method can be used in other strategies
|
||||||
|
# that work with XML content, so it should only be necessary to write
|
||||||
|
# the version-finding logic that works with the parsed XML data.
|
||||||
|
#
|
||||||
|
# @api public
|
||||||
|
class Xml
|
||||||
|
extend T::Sig
|
||||||
|
|
||||||
|
NICE_NAME = "XML"
|
||||||
|
|
||||||
|
# A priority of zero causes livecheck to skip the strategy. We do this
|
||||||
|
# for {Xml} so we can selectively apply it only when a strategy block
|
||||||
|
# is provided in a `livecheck` block.
|
||||||
|
PRIORITY = 0
|
||||||
|
|
||||||
|
# The `Regexp` used to determine if the strategy applies to the URL.
|
||||||
|
URL_MATCH_REGEX = %r{^https?://}i.freeze
|
||||||
|
|
||||||
|
# Whether the strategy can be applied to the provided URL.
|
||||||
|
# {Xml} will technically match any HTTP URL but is only usable with
|
||||||
|
# a `livecheck` block containing a `strategy` block.
|
||||||
|
#
|
||||||
|
# @param url [String] the URL to match against
|
||||||
|
# @return [Boolean]
|
||||||
|
sig { params(url: String).returns(T::Boolean) }
|
||||||
|
def self.match?(url)
|
||||||
|
URL_MATCH_REGEX.match?(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses XML text and returns an `REXML::Document` object.
|
||||||
|
# @param content [String] the XML text to parse
|
||||||
|
# @return [REXML::Document, nil]
|
||||||
|
sig { params(content: String).returns(T.nilable(REXML::Document)) }
|
||||||
|
def self.parse_xml(content)
|
||||||
|
require "rexml/document"
|
||||||
|
|
||||||
|
parsing_tries = 0
|
||||||
|
begin
|
||||||
|
REXML::Document.new(content)
|
||||||
|
rescue REXML::UndefinedNamespaceException => e
|
||||||
|
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
|
||||||
|
raise "Could not identify undefined prefix." if undefined_prefix.blank?
|
||||||
|
|
||||||
|
# Only retry parsing once after removing prefix from content
|
||||||
|
parsing_tries += 1
|
||||||
|
raise "Could not parse XML after removing undefined prefix." if parsing_tries > 1
|
||||||
|
|
||||||
|
# When an XML document contains a prefix without a corresponding
|
||||||
|
# namespace, it's necessary to remove the prefix from the content
|
||||||
|
# to be able to successfully parse it using REXML
|
||||||
|
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
|
||||||
|
retry
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses XML text and identifies versions using a `strategy` block.
|
||||||
|
# If a regex is provided, it will be passed as the second argument to
|
||||||
|
# the `strategy` block (after the parsed XML data).
|
||||||
|
# @param content [String] the XML text to parse and check
|
||||||
|
# @param regex [Regexp, nil] a regex used for matching versions in the
|
||||||
|
# content
|
||||||
|
# @return [Array]
|
||||||
|
sig {
|
||||||
|
params(
|
||||||
|
content: String,
|
||||||
|
regex: T.nilable(Regexp),
|
||||||
|
block: T.untyped,
|
||||||
|
).returns(T::Array[String])
|
||||||
|
}
|
||||||
|
def self.versions_from_content(content, regex = nil, &block)
|
||||||
|
return [] if content.blank? || block.blank?
|
||||||
|
|
||||||
|
require "rexml"
|
||||||
|
xml = parse_xml(content)
|
||||||
|
return [] if xml.blank?
|
||||||
|
|
||||||
|
block_return_value = if regex.present?
|
||||||
|
yield(xml, regex)
|
||||||
|
elsif block.arity == 2
|
||||||
|
raise "Two arguments found in `strategy` block but no regex provided."
|
||||||
|
else
|
||||||
|
yield(xml)
|
||||||
|
end
|
||||||
|
Strategy.handle_block_return(block_return_value)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks the XML content at the URL for versions, using the provided
|
||||||
|
# `strategy` block to extract version information.
|
||||||
|
#
|
||||||
|
# @param url [String] the URL of the content to check
|
||||||
|
# @param regex [Regexp, nil] a regex used for matching versions
|
||||||
|
# @param provided_content [String, nil] page content to use in place of
|
||||||
|
# fetching via `Strategy#page_content`
|
||||||
|
# @param homebrew_curl [Boolean] whether to use brewed curl with the URL
|
||||||
|
# @return [Hash]
|
||||||
|
sig {
|
||||||
|
params(
|
||||||
|
url: String,
|
||||||
|
regex: T.nilable(Regexp),
|
||||||
|
provided_content: T.nilable(String),
|
||||||
|
homebrew_curl: T::Boolean,
|
||||||
|
_unused: T.nilable(T::Hash[Symbol, T.untyped]),
|
||||||
|
block: T.untyped,
|
||||||
|
).returns(T::Hash[Symbol, T.untyped])
|
||||||
|
}
|
||||||
|
def self.find_versions(url:, regex: nil, provided_content: nil, homebrew_curl: false, **_unused, &block)
|
||||||
|
raise ArgumentError, "#{Utils.demodulize(T.must(name))} requires a `strategy` block" if block.blank?
|
||||||
|
|
||||||
|
match_data = { matches: {}, regex: regex, url: url }
|
||||||
|
return match_data if url.blank? || block.blank?
|
||||||
|
|
||||||
|
content = if provided_content.is_a?(String)
|
||||||
|
match_data[:cached] = true
|
||||||
|
provided_content
|
||||||
|
else
|
||||||
|
match_data.merge!(Strategy.page_content(url, homebrew_curl: homebrew_curl))
|
||||||
|
match_data[:content]
|
||||||
|
end
|
||||||
|
return match_data if content.blank?
|
||||||
|
|
||||||
|
versions_from_content(content, regex, &block).each do |match_text|
|
||||||
|
match_data[:matches][match_text] = Version.new(match_text)
|
||||||
|
end
|
||||||
|
|
||||||
|
match_data
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
205
Library/Homebrew/test/livecheck/strategy/xml_spec.rb
Normal file
205
Library/Homebrew/test/livecheck/strategy/xml_spec.rb
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
# typed: false
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "livecheck/strategy"
|
||||||
|
require "rexml/document"
|
||||||
|
|
||||||
|
describe Homebrew::Livecheck::Strategy::Xml do
|
||||||
|
subject(:xml) { described_class }
|
||||||
|
|
||||||
|
let(:http_url) { "https://brew.sh/blog/" }
|
||||||
|
let(:non_http_url) { "ftp://brew.sh/" }
|
||||||
|
|
||||||
|
let(:regex) { /^v?(\d+(?:\.\d+)+)$/i }
|
||||||
|
|
||||||
|
let(:content_version_text) {
|
||||||
|
<<~EOS
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<versions>
|
||||||
|
<version>1.1.2</version>
|
||||||
|
<version>1.1.2b</version>
|
||||||
|
<version>1.1.2a</version>
|
||||||
|
<version>1.1.1</version>
|
||||||
|
<version>1.1.0</version>
|
||||||
|
<version>1.1.0-rc3</version>
|
||||||
|
<version>1.1.0-rc2</version>
|
||||||
|
<version>1.1.0-rc1</version>
|
||||||
|
<version>1.0.x-last</version>
|
||||||
|
<version>1.0.3</version>
|
||||||
|
<version>1.0.3-rc3</version>
|
||||||
|
<version>1.0.3-rc2</version>
|
||||||
|
<version>1.0.3-rc1</version>
|
||||||
|
<version>1.0.2</version>
|
||||||
|
<version>1.0.2-rc1</version>
|
||||||
|
<version>1.0.1</version>
|
||||||
|
<version>1.0.1-rc1</version>
|
||||||
|
<version>1.0.0</version>
|
||||||
|
<version>1.0.0-rc1</version>
|
||||||
|
</versions>
|
||||||
|
EOS
|
||||||
|
}
|
||||||
|
|
||||||
|
let(:content_version_attr) {
|
||||||
|
<<~EOS
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<items>
|
||||||
|
<item version="1.1.2" />
|
||||||
|
<item version="1.1.2b" />
|
||||||
|
<item version="1.1.2a" />
|
||||||
|
<item version="1.1.1" />
|
||||||
|
<item version="1.1.0" />
|
||||||
|
<item version="1.1.0-rc3" />
|
||||||
|
<item version="1.1.0-rc2" />
|
||||||
|
<item version="1.1.0-rc1" />
|
||||||
|
<item version="1.0.x-last" />
|
||||||
|
<item version="1.0.3" />
|
||||||
|
<item version="1.0.3-rc3" />
|
||||||
|
<item version="1.0.3-rc2" />
|
||||||
|
<item version="1.0.3-rc1" />
|
||||||
|
<item version="1.0.2" />
|
||||||
|
<item version="1.0.2-rc1" />
|
||||||
|
<item version="1.0.1" />
|
||||||
|
<item version="1.0.1-rc1" />
|
||||||
|
<item version="1.0.0" />
|
||||||
|
<item version="1.0.0-rc1" />
|
||||||
|
</items>
|
||||||
|
EOS
|
||||||
|
}
|
||||||
|
|
||||||
|
let(:content_simple) {
|
||||||
|
<<~EOS
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<version>1.2.3</version>
|
||||||
|
EOS
|
||||||
|
}
|
||||||
|
|
||||||
|
let(:content_undefined_namespace) {
|
||||||
|
<<~EOS
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<something:version>1.2.3</something:version>
|
||||||
|
EOS
|
||||||
|
}
|
||||||
|
|
||||||
|
let(:content_matches) { ["1.1.2", "1.1.1", "1.1.0", "1.0.3", "1.0.2", "1.0.1", "1.0.0"] }
|
||||||
|
let(:content_simple_matches) { ["1.2.3"] }
|
||||||
|
|
||||||
|
let(:find_versions_return_hash) {
|
||||||
|
{
|
||||||
|
matches: {
|
||||||
|
"1.1.2" => Version.new("1.1.2"),
|
||||||
|
"1.1.1" => Version.new("1.1.1"),
|
||||||
|
"1.1.0" => Version.new("1.1.0"),
|
||||||
|
"1.0.3" => Version.new("1.0.3"),
|
||||||
|
"1.0.2" => Version.new("1.0.2"),
|
||||||
|
"1.0.1" => Version.new("1.0.1"),
|
||||||
|
"1.0.0" => Version.new("1.0.0"),
|
||||||
|
},
|
||||||
|
regex: regex,
|
||||||
|
url: http_url,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let(:find_versions_cached_return_hash) {
|
||||||
|
find_versions_return_hash.merge({ cached: true })
|
||||||
|
}
|
||||||
|
|
||||||
|
describe "::match?" do
|
||||||
|
it "returns true for an HTTP URL" do
|
||||||
|
expect(xml.match?(http_url)).to be true
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns false for a non-HTTP URL" do
|
||||||
|
expect(xml.match?(non_http_url)).to be false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "::parse_xml" do
|
||||||
|
# TODO: Should we be comparing against an actual REXML::Document object?
|
||||||
|
it "returns an REXML::Document when given XML content" do
|
||||||
|
expect(xml.parse_xml(content_version_text)).to be_an_instance_of(REXML::Document)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns an REXML::Document when given XML content with an undefined namespace" do
|
||||||
|
expect(xml.parse_xml(content_undefined_namespace)).to be_an_instance_of(REXML::Document)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "::versions_from_content" do
|
||||||
|
it "returns an empty array when given a block but content is blank" do
|
||||||
|
expect(xml.versions_from_content("", regex) { "1.2.3" }).to eq([])
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns an array of version strings when given content and a block" do
|
||||||
|
# Returning a string from block
|
||||||
|
expect(xml.versions_from_content(content_simple) do |xml|
|
||||||
|
xml.elements["version"]&.text
|
||||||
|
end).to eq(content_simple_matches)
|
||||||
|
expect(xml.versions_from_content(content_simple, regex) do |xml|
|
||||||
|
version = xml.elements["version"]&.text
|
||||||
|
next if version.blank?
|
||||||
|
|
||||||
|
version[regex, 1]
|
||||||
|
end).to eq(content_simple_matches)
|
||||||
|
|
||||||
|
# Returning an array of strings from block
|
||||||
|
expect(xml.versions_from_content(content_version_text, regex) do |xml, regex|
|
||||||
|
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||||
|
end).to eq(content_matches)
|
||||||
|
|
||||||
|
expect(xml.versions_from_content(content_version_attr, regex) do |xml, regex|
|
||||||
|
xml.get_elements("items//item").map do |item|
|
||||||
|
version = item["version"]
|
||||||
|
next if version.blank?
|
||||||
|
|
||||||
|
version[regex, 1]
|
||||||
|
end
|
||||||
|
end).to eq(content_matches)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "allows a nil return from a block" do
|
||||||
|
expect(xml.versions_from_content(content_simple, regex) { next }).to eq([])
|
||||||
|
end
|
||||||
|
|
||||||
|
it "errors if a block uses two arguments but a regex is not given" do
|
||||||
|
expect { xml.versions_from_content(content_simple) { |xml, regex| xml["version"][regex, 1] } }
|
||||||
|
.to raise_error("Two arguments found in `strategy` block but no regex provided.")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "errors on an invalid return type from a block" do
|
||||||
|
expect { xml.versions_from_content(content_simple, regex) { 123 } }
|
||||||
|
.to raise_error(TypeError, Homebrew::Livecheck::Strategy::INVALID_BLOCK_RETURN_VALUE_MSG)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "::find_versions?" do
|
||||||
|
it "finds versions in provided_content using a block" do
|
||||||
|
expect(xml.find_versions(url: http_url, regex: regex, provided_content: content_version_text) do |xml, regex|
|
||||||
|
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||||
|
end).to eq(find_versions_cached_return_hash)
|
||||||
|
|
||||||
|
# NOTE: A regex should be provided using the `#regex` method in a
|
||||||
|
# `livecheck` block but we're using a regex literal in the `strategy`
|
||||||
|
# block here simply to ensure this method works as expected when a
|
||||||
|
# regex isn't provided.
|
||||||
|
expect(xml.find_versions(url: http_url, provided_content: content_version_text) do |xml|
|
||||||
|
regex = /^v?(\d+(?:\.\d+)+)$/i.freeze
|
||||||
|
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||||
|
end).to eq(find_versions_cached_return_hash.merge({ regex: nil }))
|
||||||
|
end
|
||||||
|
|
||||||
|
it "errors if a block is not provided" do
|
||||||
|
expect { xml.find_versions(url: http_url, provided_content: content_simple) }
|
||||||
|
.to raise_error(ArgumentError, "Xml requires a `strategy` block")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns default match_data when url is blank" do
|
||||||
|
expect(xml.find_versions(url: "") { "1.2.3" })
|
||||||
|
.to eq({ matches: {}, regex: nil, url: "" })
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns default match_data when content is blank" do
|
||||||
|
expect(xml.find_versions(url: http_url, provided_content: "") { "1.2.3" })
|
||||||
|
.to eq({ matches: {}, regex: nil, url: http_url, cached: true })
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -171,6 +171,22 @@ livecheck do
|
|||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### `Xml` `strategy` block
|
||||||
|
|
||||||
|
A `strategy` block for `Xml` receives an `REXML::Document` object and, if provided, a regex. For example, if the XML contains a `versions` element with nested `version` elements and their inner text contains the version string, we could extract it using a regex as follows:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
livecheck do
|
||||||
|
url "https://www.example.com/example.xml"
|
||||||
|
regex(/v?(\d+(?:\.\d+)+)/i)
|
||||||
|
strategy :xml do |xml, regex|
|
||||||
|
xml.get_elements("versions//version").map { |item| item.text[regex, 1] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on how to work with an `REXML::Document` object, please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html) and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html) documentation.
|
||||||
|
|
||||||
### `skip`
|
### `skip`
|
||||||
|
|
||||||
Livecheck automatically skips some formulae/casks for a number of reasons (deprecated, disabled, discontinued, etc.). However, on rare occasions we need to use a `livecheck` block to do a manual skip. The `skip` method takes a string containing a very brief reason for skipping.
|
Livecheck automatically skips some formulae/casks for a number of reasons (deprecated, disabled, discontinued, etc.). However, on rare occasions we need to use a `livecheck` block to do a manual skip. The `skip` method takes a string containing a very brief reason for skipping.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user