| 
									
										
										
										
											2024-07-04 20:09:36 -04:00
										 |  |  | # typed: strict | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | # frozen_string_literal: true | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | module Homebrew | 
					
						
							|  |  |  |   module Livecheck | 
					
						
							|  |  |  |     module Strategy | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # The {Xorg} strategy identifies versions of software at x.org by | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # checking directory listing pages. | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # X.Org URLs take one of the following formats, among several others: | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # * `https://www.x.org/archive/individual/app/example-1.2.3.tar.bz2` | 
					
						
							|  |  |  |       # * `https://www.x.org/archive/individual/font/example-1.2.3.tar.bz2` | 
					
						
							|  |  |  |       # * `https://www.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` | 
					
						
							|  |  |  |       # * `https://ftp.x.org/archive/individual/lib/libexample-1.2.3.tar.bz2` | 
					
						
							|  |  |  |       # * `https://www.x.org/pub/individual/doc/example-1.2.3.tar.gz` | 
					
						
							| 
									
										
										
										
											2024-12-27 11:27:15 -05:00
										 |  |  |       # * `https://xorg.freedesktop.org/archive/individual/util/example-1.2.3.tar.xz` | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # | 
					
						
							|  |  |  |       # The notable differences between URLs are as follows: | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # * `www.x.org` and `ftp.x.org` seem to be interchangeable (we prefer | 
					
						
							|  |  |  |       #   `www.x.org`). | 
					
						
							|  |  |  |       # * `/archive/` is the current top-level directory and `/pub/` will | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       #   redirect to the same URL using `/archive/` instead. (The strategy | 
					
						
							|  |  |  |       #   handles this replacement to avoid the redirection.) | 
					
						
							|  |  |  |       # * The `/individual/` directory contains a number of directories (e.g. | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       #   app, data, doc, driver, font, lib, etc.) which contain a number of | 
					
						
							|  |  |  |       #   different archive files. | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # Since this strategy ends up checking the same directory listing pages | 
					
						
							|  |  |  |       # for multiple formulae, we've included a simple method of page caching. | 
					
						
							|  |  |  |       # This prevents livecheck from fetching the same page more than once and | 
					
						
							|  |  |  |       # also dramatically speeds up these checks. Eventually we hope to | 
					
						
							|  |  |  |       # implement a more sophisticated page cache that all strategies using | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # {PageMatch} can use (allowing us to simplify this strategy accordingly). | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # | 
					
						
							|  |  |  |       # The default regex identifies versions in archive files found in `href` | 
					
						
							|  |  |  |       # attributes. | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # @api public | 
					
						
							|  |  |  |       class Xorg | 
					
						
							|  |  |  |         NICE_NAME = "X.Org" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |         # A `Regexp` used in determining if the strategy applies to the URL and | 
					
						
							|  |  |  |         # also as part of extracting the module name from the URL basename. | 
					
						
							| 
									
										
										
										
											2024-01-18 22:18:42 +00:00
										 |  |  |         MODULE_REGEX = /(?<module_name>.+)-\d+/i | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # A `Regexp` used to extract the module name from the URL basename. | 
					
						
							| 
									
										
										
										
											2024-01-18 22:18:42 +00:00
										 |  |  |         FILENAME_REGEX = /^#{MODULE_REGEX.source.strip}/i | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # The `Regexp` used to determine if the strategy applies to the URL. | 
					
						
							|  |  |  |         URL_MATCH_REGEX = %r{
 | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |           ^https?://(?:[^/]+?\.)* # Scheme and any leading subdomains | 
					
						
							| 
									
										
										
										
											2024-12-27 11:27:15 -05:00
										 |  |  |           (?:x\.org/(?:[^/]+/)*individual | 
					
						
							|  |  |  |             |freedesktop\.org/(?:archive|dist|software) | 
					
						
							|  |  |  |             |archive\.mesa3d\.org) | 
					
						
							|  |  |  |           /(?:[^/]+/)*#{MODULE_REGEX.source.strip} | 
					
						
							| 
									
										
										
										
											2024-01-18 22:18:42 +00:00
										 |  |  |         }ix | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |         # Used to cache page content, so we don't fetch the same pages | 
					
						
							|  |  |  |         # repeatedly. | 
					
						
							| 
									
										
										
										
											2024-07-04 20:09:36 -04:00
										 |  |  |         @page_data = T.let({}, T::Hash[String, String]) | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |         # Whether the strategy can be applied to the provided URL. | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |         # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # @param url [String] the URL to match against | 
					
						
							|  |  |  |         # @return [Boolean] | 
					
						
							| 
									
										
										
										
											2021-08-10 18:24:51 -04:00
										 |  |  |         sig { params(url: String).returns(T::Boolean) } | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         def self.match?(url) | 
					
						
							|  |  |  |           URL_MATCH_REGEX.match?(url) | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-28 13:20:12 -04:00
										 |  |  |         # Extracts information from a provided URL and uses it to generate | 
					
						
							|  |  |  |         # various input values used by the strategy to check for new versions. | 
					
						
							|  |  |  |         # Some of these values act as defaults and can be overridden in a | 
					
						
							|  |  |  |         # `livecheck` block. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # @param url [String] the URL used to generate values | 
					
						
							|  |  |  |         # @return [Hash] | 
					
						
							|  |  |  |         sig { params(url: String).returns(T::Hash[Symbol, T.untyped]) } | 
					
						
							|  |  |  |         def self.generate_input_values(url) | 
					
						
							|  |  |  |           values = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           file_name = File.basename(url) | 
					
						
							|  |  |  |           match = file_name.match(FILENAME_REGEX) | 
					
						
							|  |  |  |           return values if match.blank? | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           # /pub/ URLs redirect to the same URL with /archive/, so we replace | 
					
						
							|  |  |  |           # it to avoid the redirection. Removing the filename from the end of | 
					
						
							|  |  |  |           # the URL gives us the relevant directory listing page. | 
					
						
							|  |  |  |           values[:url] = url.sub("x.org/pub/", "x.org/archive/").delete_suffix(file_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           regex_name = Regexp.escape(T.must(match[:module_name])).gsub("\\-", "-") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           # Example regex: `/href=.*?example[._-]v?(\d+(?:\.\d+)+)\.t/i` | 
					
						
							|  |  |  |           values[:regex] = /href=.*?#{regex_name}[._-]v?(\d+(?:\.\d+)+)\.t/i | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           values | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # Generates a URL and regex (if one isn't provided) and checks the | 
					
						
							|  |  |  |         # content at the URL for new versions (using the regex for matching). | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # The behavior in this method for matching text in the content using a | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |         # regex is copied and modified from the {PageMatch} strategy, so that | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # we can add some simple page caching. If this behavior is expanded to | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |         # apply to all strategies that use {PageMatch} to identify versions, | 
					
						
							|  |  |  |         # then this strategy can be brought in line with the others. | 
					
						
							|  |  |  |         # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # @param url [String] the URL of the content to check | 
					
						
							|  |  |  |         # @param regex [Regexp] a regex used for matching versions in content | 
					
						
							|  |  |  |         # @return [Hash] | 
					
						
							| 
									
										
										
										
											2021-04-04 03:00:34 +02:00
										 |  |  |         sig { | 
					
						
							|  |  |  |           params( | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |             url:    String, | 
					
						
							|  |  |  |             regex:  T.nilable(Regexp), | 
					
						
							| 
									
										
										
										
											2024-02-28 12:32:21 -05:00
										 |  |  |             unused: T.untyped, | 
					
						
							| 
									
										
										
										
											2023-04-04 22:40:31 -07:00
										 |  |  |             block:  T.nilable(Proc), | 
					
						
							| 
									
										
										
										
											2021-04-04 03:00:34 +02:00
										 |  |  |           ).returns(T::Hash[Symbol, T.untyped]) | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |         def self.find_versions(url:, regex: nil, **unused, &block) | 
					
						
							| 
									
										
										
										
											2021-07-28 13:20:12 -04:00
										 |  |  |           generated = generate_input_values(url) | 
					
						
							|  |  |  |           generated_url = generated[:url] | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-23 09:12:53 -05:00
										 |  |  |           # Use the cached page content to avoid duplicate fetches | 
					
						
							| 
									
										
										
										
											2021-07-28 13:20:12 -04:00
										 |  |  |           cached_content = @page_data[generated_url] | 
					
						
							| 
									
										
										
										
											2023-04-03 17:34:39 -07:00
										 |  |  |           match_data = PageMatch.find_versions( | 
					
						
							| 
									
										
										
										
											2021-07-28 13:20:12 -04:00
										 |  |  |             url:              generated_url, | 
					
						
							|  |  |  |             regex:            regex || generated[:regex], | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |             provided_content: cached_content, | 
					
						
							|  |  |  |             **unused, | 
					
						
							|  |  |  |             &block | 
					
						
							|  |  |  |           ) | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-23 09:12:53 -05:00
										 |  |  |           # Cache any new page content | 
					
						
							| 
									
										
										
										
											2021-07-28 13:20:12 -04:00
										 |  |  |           @page_data[generated_url] = match_data[:content] if match_data[:content].present? | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |           match_data | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  |       end | 
					
						
							|  |  |  |     end | 
					
						
							|  |  |  |   end | 
					
						
							|  |  |  | end |