Strategy: Manually handle redirections
This commit is contained in:
parent
53ef74f674
commit
12afb272bb
@ -524,6 +524,7 @@ module Homebrew
|
|||||||
|
|
||||||
if debug
|
if debug
|
||||||
puts "URL (strategy): #{strategy_data[:url]}" if strategy_data[:url] != url
|
puts "URL (strategy): #{strategy_data[:url]}" if strategy_data[:url] != url
|
||||||
|
puts "URL (final): #{strategy_data[:final_url]}" if strategy_data[:final_url]
|
||||||
puts "Regex (strategy): #{strategy_data[:regex].inspect}" if strategy_data[:regex] != livecheck_regex
|
puts "Regex (strategy): #{strategy_data[:regex].inspect}" if strategy_data[:regex] != livecheck_regex
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -564,6 +565,7 @@ module Homebrew
|
|||||||
}
|
}
|
||||||
version_info[:meta][:url][:processed] = url if url != original_url
|
version_info[:meta][:url][:processed] = url if url != original_url
|
||||||
version_info[:meta][:url][:strategy] = strategy_data[:url] if strategy_data[:url] != url
|
version_info[:meta][:url][:strategy] = strategy_data[:url] if strategy_data[:url] != url
|
||||||
|
version_info[:meta][:url][:final] = strategy_data[:final_url] if strategy_data[:final_url]
|
||||||
version_info[:meta][:strategies] = strategies.map { |s| livecheck_strategy_names[s] } if strategies.present?
|
version_info[:meta][:strategies] = strategies.map { |s| livecheck_strategy_names[s] } if strategies.present?
|
||||||
version_info[:meta][:regex] = regex.inspect if regex.present?
|
version_info[:meta][:regex] = regex.inspect if regex.present?
|
||||||
end
|
end
|
||||||
|
|||||||
@ -115,8 +115,27 @@ module Homebrew
|
|||||||
headers
|
headers
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Fetches the content at the URL and returns a hash containing the
|
||||||
|
# content and, if there are any redirections, the final URL.
|
||||||
|
#
|
||||||
|
# @param url [String] the URL of the content to check
|
||||||
|
# @return [Hash]
|
||||||
def self.page_content(url)
|
def self.page_content(url)
|
||||||
URI.parse(url).open.read
|
original_url = url
|
||||||
|
|
||||||
|
# Manually handling `URI#open` redirections allows us to detect the
|
||||||
|
# resolved URL while also supporting HTTPS to HTTP redirections (which
|
||||||
|
# are normally forbidden by `OpenURI`).
|
||||||
|
begin
|
||||||
|
content = URI.parse(url).open(redirect: false, &:read)
|
||||||
|
rescue OpenURI::HTTPRedirect => e
|
||||||
|
url = e.uri.to_s
|
||||||
|
retry
|
||||||
|
end
|
||||||
|
|
||||||
|
data = { content: content }
|
||||||
|
data[:final_url] = url unless url == original_url
|
||||||
|
data
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@ -39,18 +39,17 @@ module Homebrew
|
|||||||
URL_MATCH_REGEX.match?(url)
|
URL_MATCH_REGEX.match?(url)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Fetches the content at the URL, uses the regex to match text, and
|
# Uses the regex to match text in the content or, if a block is
|
||||||
# returns an array of unique matches.
|
# provided, passes the page content to the block to handle matching.
|
||||||
|
# With either approach, an array of unique matches is returned.
|
||||||
#
|
#
|
||||||
# @param url [String] the URL of the content to check
|
# @param content [String] the page content to check
|
||||||
# @param regex [Regexp] a regex used for matching versions in the
|
# @param regex [Regexp] a regex used for matching versions in the
|
||||||
# content
|
# content
|
||||||
# @return [Array]
|
# @return [Array]
|
||||||
def self.page_matches(url, regex, &block)
|
def self.page_matches(content, regex, &block)
|
||||||
page = Strategy.page_content(url)
|
|
||||||
|
|
||||||
if block
|
if block
|
||||||
case (value = block.call(page))
|
case (value = block.call(content))
|
||||||
when String
|
when String
|
||||||
return [value]
|
return [value]
|
||||||
when Array
|
when Array
|
||||||
@ -60,7 +59,7 @@ module Homebrew
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
page.scan(regex).map do |match|
|
content.scan(regex).map do |match|
|
||||||
case match
|
case match
|
||||||
when String
|
when String
|
||||||
match
|
match
|
||||||
@ -79,8 +78,12 @@ module Homebrew
|
|||||||
def self.find_versions(url, regex, &block)
|
def self.find_versions(url, regex, &block)
|
||||||
match_data = { matches: {}, regex: regex, url: url }
|
match_data = { matches: {}, regex: regex, url: url }
|
||||||
|
|
||||||
page_matches(url, regex, &block).each do |match|
|
match_data.merge!(Strategy.page_content(url))
|
||||||
match_data[:matches][match] = Version.new(match)
|
content = match_data.delete(:content)
|
||||||
|
return match_data if content.blank?
|
||||||
|
|
||||||
|
page_matches(content, regex, &block).each do |match_text|
|
||||||
|
match_data[:matches][match_text] = Version.new(match_text)
|
||||||
end
|
end
|
||||||
|
|
||||||
match_data
|
match_data
|
||||||
|
|||||||
@ -6,11 +6,50 @@ require "livecheck/strategy/page_match"
|
|||||||
describe Homebrew::Livecheck::Strategy::PageMatch do
|
describe Homebrew::Livecheck::Strategy::PageMatch do
|
||||||
subject(:page_match) { described_class }
|
subject(:page_match) { described_class }
|
||||||
|
|
||||||
let(:url) { "http://api.github.com/Homebrew/brew/releases/latest" }
|
let(:url) { "https://brew.sh/blog/" }
|
||||||
|
let(:regex) { %r{href=.*?/homebrew[._-]v?(\d+(?:\.\d+)+)/?["' >]}i }
|
||||||
|
|
||||||
|
let(:page_content) {
|
||||||
|
<<~EOS
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Homebrew — Homebrew</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<ul class="posts">
|
||||||
|
<li><a href="/2020/12/01/homebrew-2.6.0/" title="2.6.0"><h2>2.6.0</h2><h3>01 Dec 2020</h3></a></li>
|
||||||
|
<li><a href="/2020/11/18/homebrew-tap-with-bottles-uploaded-to-github-releases/" title="Homebrew tap with bottles uploaded to GitHub Releases"><h2>Homebrew tap with bottles uploaded to GitHub Releases</h2><h3>18 Nov 2020</h3></a></li>
|
||||||
|
<li><a href="/2020/09/08/homebrew-2.5.0/" title="2.5.0"><h2>2.5.0</h2><h3>08 Sep 2020</h3></a></li>
|
||||||
|
<li><a href="/2020/06/11/homebrew-2.4.0/" title="2.4.0"><h2>2.4.0</h2><h3>11 Jun 2020</h3></a></li>
|
||||||
|
<li><a href="/2020/05/29/homebrew-2.3.0/" title="2.3.0"><h2>2.3.0</h2><h3>29 May 2020</h3></a></li>
|
||||||
|
<li><a href="/2019/11/27/homebrew-2.2.0/" title="2.2.0"><h2>2.2.0</h2><h3>27 Nov 2019</h3></a></li>
|
||||||
|
<li><a href="/2019/06/14/homebrew-maintainer-meeting/" title="Homebrew Maintainer Meeting"><h2>Homebrew Maintainer Meeting</h2><h3>14 Jun 2019</h3></a></li>
|
||||||
|
<li><a href="/2019/04/04/homebrew-2.1.0/" title="2.1.0"><h2>2.1.0</h2><h3>04 Apr 2019</h3></a></li>
|
||||||
|
<li><a href="/2019/02/02/homebrew-2.0.0/" title="2.0.0"><h2>2.0.0</h2><h3>02 Feb 2019</h3></a></li>
|
||||||
|
<li><a href="/2019/01/09/homebrew-1.9.0/" title="1.9.0"><h2>1.9.0</h2><h3>09 Jan 2019</h3></a></li>
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
EOS
|
||||||
|
}
|
||||||
|
let(:page_content_matches) { ["2.6.0", "2.5.0", "2.4.0", "2.3.0", "2.2.0", "2.1.0", "2.0.0", "1.9.0"] }
|
||||||
|
|
||||||
describe "::match?" do
|
describe "::match?" do
|
||||||
it "returns true for any URL" do
|
it "returns true for any URL" do
|
||||||
expect(page_match.match?(url)).to be true
|
expect(page_match.match?(url)).to be true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "::page_matches" do
|
||||||
|
it "finds matching text in page content using a regex" do
|
||||||
|
expect(page_match.page_matches(page_content, regex)).to eq(page_content_matches)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds matching text in page content using a strategy block" do
|
||||||
|
expect(page_match.page_matches(page_content, regex) { |content| content.scan(regex).map(&:first).uniq })
|
||||||
|
.to eq(page_content_matches)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user