download_strategy: fix bad unescaping in basename parsing

This commit is contained in:
Bo Anderson 2023-04-18 01:40:06 +01:00
parent cf25397186
commit 6cf18e3810
No known key found for this signature in database
GPG Key ID: 3DB94E204E137D65

View File

@ -328,8 +328,11 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
@resolved_url_and_basename = [url, parse_basename(url)] @resolved_url_and_basename = [url, parse_basename(url)]
end end
sig { params(url: String, search_query: T::Boolean).returns(String) }
def parse_basename(url, search_query: true) def parse_basename(url, search_query: true)
uri_path = if url.match?(URI::DEFAULT_PARSER.make_regexp) components = { path: T.let([], T::Array[String]), query: T.let([], T::Array[String]) }
if url.match?(URI::DEFAULT_PARSER.make_regexp)
uri = URI(url) uri = URI(url)
if uri.query if uri.query
@ -340,29 +343,32 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
end end
end end
if uri.query && search_query if (uri_path = uri.path.presence)
"#{uri.path}?#{uri.query}" components[:path] = uri_path.split("/").map do |part|
else URI::DEFAULT_PARSER.unescape(part).presence
uri.path end.compact
end
else
url
end end
uri_path = URI.decode_www_form_component(uri_path) if search_query && (uri_query = uri.query.presence)
query_regex = /[^?&]+/ components[:query] = URI.decode_www_form(uri_query).map(&:second)
end
else
components[:path] = [url]
end
# We need a Pathname because we've monkeypatched extname to support double # We need a Pathname because we've monkeypatched extname to support double
# extensions (e.g. tar.gz). # extensions (e.g. tar.gz).
# Given a URL like https://example.com/download.php?file=foo-1.0.tar.gz # Given a URL like https://example.com/download.php?file=foo-1.0.tar.gz
# the basename we want is "foo-1.0.tar.gz", not "download.php". # the basename we want is "foo-1.0.tar.gz", not "download.php".
Pathname.new(uri_path).ascend do |path| [*components[:path], *components[:query]].reverse_each do |path|
ext = path.extname[query_regex] path = Pathname(path)
return path.basename.to_s[/#{query_regex.source}#{Regexp.escape(ext)}/] if ext return path.basename.to_s if path.extname.present?
end end
# Strip query string filename = components[:path].last
File.basename(uri_path)[query_regex] return "" if filename.blank?
File.basename(filename)
end end
end end