2020-10-10 14:16:11 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# typed: false
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# frozen_string_literal: true
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								module Homebrew
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  module Livecheck
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    module Strategy
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      # The {Xorg} strategy identifies versions of software at x.org by
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # checking directory listing pages.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # X.Org URLs take one of the following formats, among several others:
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `https://www.x.org/archive/individual/app/example-1.2.3.tar.bz2`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `https://www.x.org/archive/individual/font/example-1.2.3.tar.bz2`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `https://www.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `https://ftp.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `https://www.x.org/pub/individual/doc/example-1.2.3.tar.gz`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # The notable differences between URLs are as follows:
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `www.x.org` and `ftp.x.org` seem to be interchangeable (we prefer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #   `www.x.org`).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * `/archive/` is the current top-level directory and `/pub/` will
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      #   redirect to the same URL using `/archive/` instead. (The strategy
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #   handles this replacement to avoid the redirection.)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # * The `/individual/` directory contains a number of directories (e.g.
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #   app, data, doc, driver, font, lib, etc.) which contain a number of
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #   different archive files.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # Since this strategy ends up checking the same directory listing pages
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # for multiple formulae, we've included a simple method of page caching.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # This prevents livecheck from fetching the same page more than once and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # also dramatically speeds up these checks. Eventually we hope to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # implement a more sophisticated page cache that all strategies using
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      # {PageMatch} can use (allowing us to simplify this strategy accordingly).
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # The default regex identifies versions in archive files found in `href`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # attributes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      # @api public
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      class Xorg
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        NICE_NAME = "X.Org"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # The `Regexp` used to determine if the strategy applies to the URL.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        URL_MATCH_REGEX = %r{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          [/.]x\.org.*?/individual/|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          freedesktop\.org/(?:archive|dist|software)/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }ix.freeze
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Used to cache page content, so we don't fetch the same pages
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # repeatedly.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        @page_data = {}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Whether the strategy can be applied to the provided URL.
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        #
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # @param url [String] the URL to match against
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # @return [Boolean]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        def self.match?(url)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          URL_MATCH_REGEX.match?(url)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Generates a URL and regex (if one isn't provided) and checks the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # content at the URL for new versions (using the regex for matching).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # The behavior in this method for matching text in the content using a
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # regex is copied and modified from the {PageMatch} strategy, so that
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # we can add some simple page caching. If this behavior is expanded to
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # apply to all strategies that use {PageMatch} to identify versions,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # then this strategy can be brought in line with the others.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        #
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # @param url [String] the URL of the content to check
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # @param regex [Regexp] a regex used for matching versions in content
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # @return [Hash]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        def self.find_versions(url, regex)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          file_name = File.basename(url)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          /^(?<module_name>.+)-\d+/i =~ file_name
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          # /pub/ URLs redirect to the same URL with /archive/, so we replace
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          # it to avoid the redirection. Removing the filename from the end of
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          # the URL gives us the relevant directory listing page.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          page_url = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-23 09:12:53 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          # Example regex: /href=.*?example[._-]v?(\d+(?:\.\d+)+)\.t/i
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          regex ||= /href=.*?#{Regexp.escape(module_name)}[._-]v?(\d+(?:\.\d+)+)\.t/i
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-23 09:12:53 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          # Use the cached page content to avoid duplicate fetches
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          cached_content = @page_data[page_url]
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-23 14:35:12 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          match_data = PageMatch.find_versions(page_url, regex, cached_content)
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-23 09:12:53 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          # Cache any new page content
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          @page_data[page_url] = match_data[:content] if match_data[:content].present?
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          match_data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								end
							 |