188 lines
6.2 KiB
Ruby
Raw Normal View History

2020-12-18 17:58:21 +01:00
# typed: true
2020-12-12 21:59:04 +01:00
# frozen_string_literal: true
require "bundle_version"
2020-12-12 21:59:04 +01:00
module Homebrew
module Livecheck
module Strategy
# The {Sparkle} strategy fetches content at a URL and parses
2020-12-20 00:23:22 -05:00
# it as a Sparkle appcast in XML format.
2020-12-12 21:59:04 +01:00
#
# @api private
class Sparkle
2020-12-12 21:59:04 +01:00
extend T::Sig
# A priority of zero causes livecheck to skip the strategy. We only
# apply {Sparkle} using `strategy :sparkle` in a `livecheck` block,
# as we can't automatically determine when this can be successfully
# applied to a URL without fetching the content.
PRIORITY = 0
# The `Regexp` used to determine if the strategy applies to the URL.
URL_MATCH_REGEX = %r{^https?://}i.freeze
# Whether the strategy can be applied to the provided URL.
# The strategy will technically match any HTTP URL but is
# only usable with a `livecheck` block containing a regex
# or block.
sig { params(url: String).returns(T::Boolean) }
def self.match?(url)
URL_MATCH_REGEX.match?(url)
end
2021-04-04 03:00:34 +02:00
# @api private
Item = Struct.new(
# @api public
:title,
# @api private
:pub_date,
2021-04-04 03:00:34 +02:00
# @api public
:url,
# @api private
:bundle_version,
keyword_init: true,
) do
2020-12-19 01:07:56 -05:00
extend T::Sig
2020-12-15 20:26:19 +01:00
2020-12-19 01:07:56 -05:00
extend Forwardable
2020-12-15 20:26:19 +01:00
2021-04-04 03:00:34 +02:00
# @api public
2020-12-19 01:07:56 -05:00
delegate version: :bundle_version
2021-04-04 03:00:34 +02:00
# @api public
2020-12-19 01:07:56 -05:00
delegate short_version: :bundle_version
2020-12-15 20:26:19 +01:00
end
2020-12-17 15:40:23 +01:00
sig { params(content: String).returns(T.nilable(Item)) }
2020-12-15 20:26:19 +01:00
def self.item_from_content(content)
2021-06-23 09:27:14 -04:00
require "rexml/document"
parsing_tries = 0
xml = begin
REXML::Document.new(content)
rescue REXML::UndefinedNamespaceException => e
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
raise if undefined_prefix.blank?
# Only retry parsing once after removing prefix from content
parsing_tries += 1
raise if parsing_tries > 1
# When an XML document contains a prefix without a corresponding
# namespace, it's necessary to remove the the prefix from the
# content to be able to successfully parse it using REXML
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
retry
end
# Remove prefixes, so we can reliably identify elements and attributes
xml.root&.each_recursive do |node|
node.prefix = ""
node.attributes.each_attribute do |attribute|
attribute.prefix = ""
end
end
2020-12-12 21:59:04 +01:00
2021-06-23 09:27:14 -04:00
items = xml.get_elements("//rss//channel//item").map do |item|
enclosure = item.elements["enclosure"]
2020-12-12 21:59:04 +01:00
2021-06-23 09:27:14 -04:00
if enclosure
url = enclosure["url"]
short_version = enclosure["shortVersionString"]
version = enclosure["version"]
os = enclosure["os"]
end
2021-06-23 09:27:14 -04:00
url ||= item.elements["link"]&.text
short_version ||= item.elements["shortVersionString"]&.text&.strip
version ||= item.elements["version"]&.text&.strip
2021-06-23 09:27:14 -04:00
title = item.elements["title"]&.text&.strip
pub_date = item.elements["pubDate"]&.text&.strip&.presence&.yield_self do |date_string|
Time.parse(date_string)
rescue ArgumentError
# Omit unparseable strings (e.g. non-English dates)
nil
end
if (match = title&.match(/(\d+(?:\.\d+)*)\s*(\([^)]+\))?\Z/))
short_version ||= match[1]
version ||= match[2]
end
2020-12-18 21:17:55 +01:00
bundle_version = BundleVersion.new(short_version, version) if short_version || version
2021-06-23 09:27:14 -04:00
next if os && os != "osx"
2021-01-09 04:46:22 +01:00
if (minimum_system_version = item.elements["minimumSystemVersion"]&.text&.gsub(/\A\D+|\D+\z/, ""))
macos_minimum_system_version = begin
OS::Mac::Version.new(minimum_system_version).strip_patch
rescue MacOSVersionError
nil
end
next if macos_minimum_system_version&.prerelease?
end
data = {
title: title,
2021-06-22 00:14:07 -04:00
pub_date: pub_date || Time.new(0),
url: url,
2020-12-18 21:17:55 +01:00
bundle_version: bundle_version,
}.compact
2020-12-15 20:26:19 +01:00
Item.new(**data) unless data.empty?
end.compact
items.max_by { |item| [item.pub_date, item.bundle_version] }
2020-12-15 20:26:19 +01:00
end
Standardize valid strategy block return types Valid `strategy` block return types currently vary between strategies. Some only accept a string whereas others accept a string or array of strings. [`strategy` blocks also accept a `nil` return (to simplify early returns) but this was already standardized across strategies.] While some strategies only identify one version by default (where a string is an appropriate return type), it could be that a strategy block identifies more than one version. In this situation, the strategy would need to be modified to accept (and work with) an array from a `strategy` block. Rather than waiting for this to become a problem, this modifies all strategies to standardize on allowing `strategy` blocks to return a string or array of strings (even if only one of these is currently used in practice). Standardizing valid return types helps to further simplify the mental model for `strategy` blocks and reduce cognitive load. This commit extracts related logic from `#find_versions` into methods like `#versions_from_content`, which is conceptually similar to `PageMatch#page_matches` (renamed to `#versions_from_content` for consistency). This allows us to write tests for the related code without having to make network requests (or stub them) at this point. In general, this also helps to better align the structure of strategies and how the various `#find_versions` methods work with versions. There's still more planned work to be done here but this is a step in the right direction.
2021-08-10 11:09:55 -04:00
# Identify versions from content
#
# @param content [String] the content to pull version information from
# @return [Array]
sig {
params(
content: String,
block: T.nilable(T.proc.params(arg0: Item).returns(T.any(String, T::Array[String], NilClass))),
).returns(T::Array[String])
}
def self.versions_from_content(content, &block)
item = item_from_content(content)
return [] if item.blank?
return Strategy.handle_block_return(block.call(item)) if block
version = item.bundle_version&.nice_version
version.present? ? [version] : []
end
2020-12-19 01:07:56 -05:00
# Checks the content at the URL for new versions.
2021-04-04 03:00:34 +02:00
sig {
params(
url: String,
regex: T.nilable(Regexp),
cask: T.nilable(Cask::Cask),
block: T.nilable(T.proc.params(arg0: Item).returns(T.nilable(String))),
2021-04-04 03:00:34 +02:00
).returns(T::Hash[Symbol, T.untyped])
}
def self.find_versions(url, regex, cask: nil, &block)
2020-12-19 02:22:03 -05:00
raise ArgumentError, "The #{T.must(name).demodulize} strategy does not support a regex." if regex
2020-12-19 01:07:56 -05:00
match_data = { matches: {}, regex: regex, url: url }
2020-12-12 21:59:04 +01:00
2020-12-24 03:33:14 +01:00
match_data.merge!(Strategy.page_content(url))
content = match_data.delete(:content)
2020-12-19 01:07:56 -05:00
Standardize valid strategy block return types Valid `strategy` block return types currently vary between strategies. Some only accept a string whereas others accept a string or array of strings. [`strategy` blocks also accept a `nil` return (to simplify early returns) but this was already standardized across strategies.] While some strategies only identify one version by default (where a string is an appropriate return type), it could be that a strategy block identifies more than one version. In this situation, the strategy would need to be modified to accept (and work with) an array from a `strategy` block. Rather than waiting for this to become a problem, this modifies all strategies to standardize on allowing `strategy` blocks to return a string or array of strings (even if only one of these is currently used in practice). Standardizing valid return types helps to further simplify the mental model for `strategy` blocks and reduce cognitive load. This commit extracts related logic from `#find_versions` into methods like `#versions_from_content`, which is conceptually similar to `PageMatch#page_matches` (renamed to `#versions_from_content` for consistency). This allows us to write tests for the related code without having to make network requests (or stub them) at this point. In general, this also helps to better align the structure of strategies and how the various `#find_versions` methods work with versions. There's still more planned work to be done here but this is a step in the right direction.
2021-08-10 11:09:55 -04:00
versions_from_content(content, &block).each do |version_text|
match_data[:matches][version_text] = Version.new(version_text)
2020-12-19 01:07:56 -05:00
end
match_data
2020-12-12 21:59:04 +01:00
end
end
end
end
end