From 12afb272bbf0f1e0a219dfb738caa06e9cdce2c7 Mon Sep 17 00:00:00 2001 From: Sam Ford <1584702+samford@users.noreply.github.com> Date: Tue, 22 Dec 2020 22:46:52 -0500 Subject: [PATCH] Strategy: Manually handle redirections --- Library/Homebrew/livecheck/livecheck.rb | 2 + Library/Homebrew/livecheck/strategy.rb | 21 +++++++++- .../Homebrew/livecheck/strategy/page_match.rb | 23 ++++++----- .../livecheck/strategy/page_match_spec.rb | 41 ++++++++++++++++++- 4 files changed, 75 insertions(+), 12 deletions(-) diff --git a/Library/Homebrew/livecheck/livecheck.rb b/Library/Homebrew/livecheck/livecheck.rb index ece9c04656..7c832bf4e5 100644 --- a/Library/Homebrew/livecheck/livecheck.rb +++ b/Library/Homebrew/livecheck/livecheck.rb @@ -524,6 +524,7 @@ module Homebrew if debug puts "URL (strategy): #{strategy_data[:url]}" if strategy_data[:url] != url + puts "URL (final): #{strategy_data[:final_url]}" if strategy_data[:final_url] puts "Regex (strategy): #{strategy_data[:regex].inspect}" if strategy_data[:regex] != livecheck_regex end @@ -564,6 +565,7 @@ module Homebrew } version_info[:meta][:url][:processed] = url if url != original_url version_info[:meta][:url][:strategy] = strategy_data[:url] if strategy_data[:url] != url + version_info[:meta][:url][:final] = strategy_data[:final_url] if strategy_data[:final_url] version_info[:meta][:strategies] = strategies.map { |s| livecheck_strategy_names[s] } if strategies.present? version_info[:meta][:regex] = regex.inspect if regex.present? end diff --git a/Library/Homebrew/livecheck/strategy.rb b/Library/Homebrew/livecheck/strategy.rb index 76b4351b20..c08dcdd9c0 100644 --- a/Library/Homebrew/livecheck/strategy.rb +++ b/Library/Homebrew/livecheck/strategy.rb @@ -115,8 +115,27 @@ module Homebrew headers end + # Fetches the content at the URL and returns a hash containing the + # content and, if there are any redirections, the final URL. + # + # @param url [String] the URL of the content to check + # @return [Hash] def self.page_content(url) - URI.parse(url).open.read + original_url = url + + # Manually handling `URI#open` redirections allows us to detect the + # resolved URL while also supporting HTTPS to HTTP redirections (which + # are normally forbidden by `OpenURI`). + begin + content = URI.parse(url).open(redirect: false, &:read) + rescue OpenURI::HTTPRedirect => e + url = e.uri.to_s + retry + end + + data = { content: content } + data[:final_url] = url unless url == original_url + data end end end diff --git a/Library/Homebrew/livecheck/strategy/page_match.rb b/Library/Homebrew/livecheck/strategy/page_match.rb index 361cf04b97..2b24882299 100644 --- a/Library/Homebrew/livecheck/strategy/page_match.rb +++ b/Library/Homebrew/livecheck/strategy/page_match.rb @@ -39,18 +39,17 @@ module Homebrew URL_MATCH_REGEX.match?(url) end - # Fetches the content at the URL, uses the regex to match text, and - # returns an array of unique matches. + # Uses the regex to match text in the content or, if a block is + # provided, passes the page content to the block to handle matching. + # With either approach, an array of unique matches is returned. # - # @param url [String] the URL of the content to check + # @param content [String] the page content to check # @param regex [Regexp] a regex used for matching versions in the # content # @return [Array] - def self.page_matches(url, regex, &block) - page = Strategy.page_content(url) - + def self.page_matches(content, regex, &block) if block - case (value = block.call(page)) + case (value = block.call(content)) when String return [value] when Array @@ -60,7 +59,7 @@ module Homebrew end end - page.scan(regex).map do |match| + content.scan(regex).map do |match| case match when String match @@ -79,8 +78,12 @@ module Homebrew def self.find_versions(url, regex, &block) match_data = { matches: {}, regex: regex, url: url } - page_matches(url, regex, &block).each do |match| - match_data[:matches][match] = Version.new(match) + match_data.merge!(Strategy.page_content(url)) + content = match_data.delete(:content) + return match_data if content.blank? + + page_matches(content, regex, &block).each do |match_text| + match_data[:matches][match_text] = Version.new(match_text) end match_data diff --git a/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb b/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb index 96bf43ad53..4f853fee17 100644 --- a/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb +++ b/Library/Homebrew/test/livecheck/strategy/page_match_spec.rb @@ -6,11 +6,50 @@ require "livecheck/strategy/page_match" describe Homebrew::Livecheck::Strategy::PageMatch do subject(:page_match) { described_class } - let(:url) { "http://api.github.com/Homebrew/brew/releases/latest" } + let(:url) { "https://brew.sh/blog/" } + let(:regex) { %r{href=.*?/homebrew[._-]v?(\d+(?:\.\d+)+)/?["' >]}i } + + let(:page_content) { + <<~EOS + + + + + Homebrew — Homebrew + + + + + + EOS + } + let(:page_content_matches) { ["2.6.0", "2.5.0", "2.4.0", "2.3.0", "2.2.0", "2.1.0", "2.0.0", "1.9.0"] } describe "::match?" do it "returns true for any URL" do expect(page_match.match?(url)).to be true end end + + describe "::page_matches" do + it "finds matching text in page content using a regex" do + expect(page_match.page_matches(page_content, regex)).to eq(page_content_matches) + end + + it "finds matching text in page content using a strategy block" do + expect(page_match.page_matches(page_content, regex) { |content| content.scan(regex).map(&:first).uniq }) + .to eq(page_content_matches) + end + end end