Strategy: Manually handle redirections

This commit is contained in:
Sam Ford 2020-12-22 22:46:52 -05:00
parent 53ef74f674
commit 12afb272bb
No known key found for this signature in database
GPG Key ID: 95209E46C7FFDEFE
4 changed files with 75 additions and 12 deletions

View File

@ -524,6 +524,7 @@ module Homebrew
if debug
puts "URL (strategy): #{strategy_data[:url]}" if strategy_data[:url] != url
puts "URL (final): #{strategy_data[:final_url]}" if strategy_data[:final_url]
puts "Regex (strategy): #{strategy_data[:regex].inspect}" if strategy_data[:regex] != livecheck_regex
end
@ -564,6 +565,7 @@ module Homebrew
}
version_info[:meta][:url][:processed] = url if url != original_url
version_info[:meta][:url][:strategy] = strategy_data[:url] if strategy_data[:url] != url
version_info[:meta][:url][:final] = strategy_data[:final_url] if strategy_data[:final_url]
version_info[:meta][:strategies] = strategies.map { |s| livecheck_strategy_names[s] } if strategies.present?
version_info[:meta][:regex] = regex.inspect if regex.present?
end

View File

@ -115,8 +115,27 @@ module Homebrew
headers
end
# Fetches the content at the URL and returns a hash containing the
# content and, if there are any redirections, the final URL.
#
# @param url [String] the URL of the content to check
# @return [Hash]
def self.page_content(url)
URI.parse(url).open.read
original_url = url
# Manually handling `URI#open` redirections allows us to detect the
# resolved URL while also supporting HTTPS to HTTP redirections (which
# are normally forbidden by `OpenURI`).
begin
content = URI.parse(url).open(redirect: false, &:read)
rescue OpenURI::HTTPRedirect => e
url = e.uri.to_s
retry
end
data = { content: content }
data[:final_url] = url unless url == original_url
data
end
end
end

View File

@ -39,18 +39,17 @@ module Homebrew
URL_MATCH_REGEX.match?(url)
end
# Fetches the content at the URL, uses the regex to match text, and
# returns an array of unique matches.
# Uses the regex to match text in the content or, if a block is
# provided, passes the page content to the block to handle matching.
# With either approach, an array of unique matches is returned.
#
# @param url [String] the URL of the content to check
# @param content [String] the page content to check
# @param regex [Regexp] a regex used for matching versions in the
# content
# @return [Array]
def self.page_matches(url, regex, &block)
page = Strategy.page_content(url)
def self.page_matches(content, regex, &block)
if block
case (value = block.call(page))
case (value = block.call(content))
when String
return [value]
when Array
@ -60,7 +59,7 @@ module Homebrew
end
end
page.scan(regex).map do |match|
content.scan(regex).map do |match|
case match
when String
match
@ -79,8 +78,12 @@ module Homebrew
def self.find_versions(url, regex, &block)
match_data = { matches: {}, regex: regex, url: url }
page_matches(url, regex, &block).each do |match|
match_data[:matches][match] = Version.new(match)
match_data.merge!(Strategy.page_content(url))
content = match_data.delete(:content)
return match_data if content.blank?
page_matches(content, regex, &block).each do |match_text|
match_data[:matches][match_text] = Version.new(match_text)
end
match_data

View File

@ -6,11 +6,50 @@ require "livecheck/strategy/page_match"
describe Homebrew::Livecheck::Strategy::PageMatch do
subject(:page_match) { described_class }
let(:url) { "http://api.github.com/Homebrew/brew/releases/latest" }
let(:url) { "https://brew.sh/blog/" }
let(:regex) { %r{href=.*?/homebrew[._-]v?(\d+(?:\.\d+)+)/?["' >]}i }
let(:page_content) {
<<~EOS
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Homebrew Homebrew</title>
</head>
<body>
<ul class="posts">
<li><a href="/2020/12/01/homebrew-2.6.0/" title="2.6.0"><h2>2.6.0</h2><h3>01 Dec 2020</h3></a></li>
<li><a href="/2020/11/18/homebrew-tap-with-bottles-uploaded-to-github-releases/" title="Homebrew tap with bottles uploaded to GitHub Releases"><h2>Homebrew tap with bottles uploaded to GitHub Releases</h2><h3>18 Nov 2020</h3></a></li>
<li><a href="/2020/09/08/homebrew-2.5.0/" title="2.5.0"><h2>2.5.0</h2><h3>08 Sep 2020</h3></a></li>
<li><a href="/2020/06/11/homebrew-2.4.0/" title="2.4.0"><h2>2.4.0</h2><h3>11 Jun 2020</h3></a></li>
<li><a href="/2020/05/29/homebrew-2.3.0/" title="2.3.0"><h2>2.3.0</h2><h3>29 May 2020</h3></a></li>
<li><a href="/2019/11/27/homebrew-2.2.0/" title="2.2.0"><h2>2.2.0</h2><h3>27 Nov 2019</h3></a></li>
<li><a href="/2019/06/14/homebrew-maintainer-meeting/" title="Homebrew Maintainer Meeting"><h2>Homebrew Maintainer Meeting</h2><h3>14 Jun 2019</h3></a></li>
<li><a href="/2019/04/04/homebrew-2.1.0/" title="2.1.0"><h2>2.1.0</h2><h3>04 Apr 2019</h3></a></li>
<li><a href="/2019/02/02/homebrew-2.0.0/" title="2.0.0"><h2>2.0.0</h2><h3>02 Feb 2019</h3></a></li>
<li><a href="/2019/01/09/homebrew-1.9.0/" title="1.9.0"><h2>1.9.0</h2><h3>09 Jan 2019</h3></a></li>
</ul>
</body>
</html>
EOS
}
let(:page_content_matches) { ["2.6.0", "2.5.0", "2.4.0", "2.3.0", "2.2.0", "2.1.0", "2.0.0", "1.9.0"] }
describe "::match?" do
it "returns true for any URL" do
expect(page_match.match?(url)).to be true
end
end
describe "::page_matches" do
it "finds matching text in page content using a regex" do
expect(page_match.page_matches(page_content, regex)).to eq(page_content_matches)
end
it "finds matching text in page content using a strategy block" do
expect(page_match.page_matches(page_content, regex) { |content| content.scan(regex).map(&:first).uniq })
.to eq(page_content_matches)
end
end
end