| 
									
										
										
										
											2020-10-10 14:16:11 +02:00
										 |  |  | # typed: false | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | # frozen_string_literal: true | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | module Homebrew | 
					
						
							|  |  |  |   module Livecheck | 
					
						
							|  |  |  |     module Strategy | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # The {Pypi} strategy identifies versions of software at pypi.org by | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # checking project pages for archive files. | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # PyPI URLs have a standard format but the hexadecimal text between | 
					
						
							|  |  |  |       # `/packages/` and the filename varies: | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |       # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |       # * `https://files.pythonhosted.org/packages/<hex>/<hex>/<long_hex>/example-1.2.3.tar.gz` | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # As such, the default regex only targets the filename at the end of the | 
					
						
							|  |  |  |       # URL. | 
					
						
							|  |  |  |       # | 
					
						
							|  |  |  |       # @api public | 
					
						
							|  |  |  |       class Pypi | 
					
						
							| 
									
										
										
										
											2021-04-04 03:00:34 +02:00
										 |  |  |         extend T::Sig | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         NICE_NAME = "PyPI" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |         # The `Regexp` used to extract the package name and suffix (e.g., file | 
					
						
							|  |  |  |         # extension) from the URL basename. | 
					
						
							|  |  |  |         FILENAME_REGEX = /
 | 
					
						
							|  |  |  |           (?<package_name>.+)- # The package name followed by a hyphen | 
					
						
							|  |  |  |           .*? # The version string | 
					
						
							|  |  |  |           (?<suffix>\.tar\.[a-z0-9]+|\.[a-z0-9]+)$ # Filename extension | 
					
						
							|  |  |  |         /ix.freeze
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # The `Regexp` used to determine if the strategy applies to the URL. | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |         URL_MATCH_REGEX = %r{
 | 
					
						
							|  |  |  |           ^https?://files\.pythonhosted\.org | 
					
						
							|  |  |  |           /packages
 | 
					
						
							|  |  |  |           (?:/[^/]+)+ # The hexadecimal paths before the filename | 
					
						
							|  |  |  |           /#{FILENAME_REGEX.source.strip} # The filename
 | 
					
						
							|  |  |  |         }ix.freeze | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |         # Whether the strategy can be applied to the provided URL. | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |         # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # @param url [String] the URL to match against | 
					
						
							|  |  |  |         # @return [Boolean] | 
					
						
							| 
									
										
										
										
											2021-08-10 18:24:51 -04:00
										 |  |  |         sig { params(url: String).returns(T::Boolean) } | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         def self.match?(url) | 
					
						
							|  |  |  |           URL_MATCH_REGEX.match?(url) | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Generates a URL and regex (if one isn't provided) and passes them | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |         # to {PageMatch.find_versions} to identify versions in the content. | 
					
						
							|  |  |  |         # | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         # @param url [String] the URL of the content to check | 
					
						
							|  |  |  |         # @param regex [Regexp] a regex used for matching versions in content | 
					
						
							|  |  |  |         # @return [Hash] | 
					
						
							| 
									
										
										
										
											2021-04-04 03:00:34 +02:00
										 |  |  |         sig { | 
					
						
							|  |  |  |           params( | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |             url:    String, | 
					
						
							|  |  |  |             regex:  T.nilable(Regexp), | 
					
						
							|  |  |  |             unused: T.nilable(T::Hash[Symbol, T.untyped]), | 
					
						
							|  |  |  |             block:  T.nilable( | 
					
						
							| 
									
										
										
										
											2021-07-26 20:32:10 -04:00
										 |  |  |               T.proc.params(arg0: String, arg1: Regexp).returns(T.any(String, T::Array[String], NilClass)), | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2021-04-04 03:00:34 +02:00
										 |  |  |           ).returns(T::Hash[Symbol, T.untyped]) | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |         def self.find_versions(url:, regex: nil, **unused, &block) | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |           match = File.basename(url).match(FILENAME_REGEX) | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 17:17:03 -05:00
										 |  |  |           # Use `\.t` instead of specific tarball extensions (e.g. .tar.gz) | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |           suffix = match[:suffix].sub(/\.t(?:ar\..+|[a-z0-9]+)$/i, "\.t") | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |           # It's not technically necessary to have the `#files` fragment at the | 
					
						
							|  |  |  |           # end of the URL but it makes the debug output a bit more useful. | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |           page_url = "https://pypi.org/project/#{match[:package_name].gsub(/%20|_/, "-")}/#files" | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-21 00:48:31 -05:00
										 |  |  |           # Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i` | 
					
						
							|  |  |  |           re_package_name = Regexp.escape(match[:package_name]) | 
					
						
							|  |  |  |           re_suffix = Regexp.escape(suffix) | 
					
						
							|  |  |  |           regex ||= %r{href=.*?/packages.*?/#{re_package_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)#{re_suffix}}i | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-12 11:54:29 -04:00
										 |  |  |           PageMatch.find_versions(url: page_url, regex: regex, **unused, &block) | 
					
						
							| 
									
										
										
										
											2020-08-08 07:16:06 +05:30
										 |  |  |         end | 
					
						
							|  |  |  |       end | 
					
						
							|  |  |  |     end | 
					
						
							|  |  |  |   end | 
					
						
							|  |  |  | end |