download_strategy: fix bad unescaping in basename parsing

This commit is contained in:
Bo Anderson 2023-04-18 01:40:06 +01:00
parent cf25397186
commit 6cf18e3810
No known key found for this signature in database
GPG Key ID: 3DB94E204E137D65

View File

@ -328,8 +328,11 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
@resolved_url_and_basename = [url, parse_basename(url)]
end
sig { params(url: String, search_query: T::Boolean).returns(String) }
def parse_basename(url, search_query: true)
uri_path = if url.match?(URI::DEFAULT_PARSER.make_regexp)
components = { path: T.let([], T::Array[String]), query: T.let([], T::Array[String]) }
if url.match?(URI::DEFAULT_PARSER.make_regexp)
uri = URI(url)
if uri.query
@ -340,29 +343,32 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
end
end
if uri.query && search_query
"#{uri.path}?#{uri.query}"
else
uri.path
if (uri_path = uri.path.presence)
components[:path] = uri_path.split("/").map do |part|
URI::DEFAULT_PARSER.unescape(part).presence
end.compact
end
if search_query && (uri_query = uri.query.presence)
components[:query] = URI.decode_www_form(uri_query).map(&:second)
end
else
url
components[:path] = [url]
end
uri_path = URI.decode_www_form_component(uri_path)
query_regex = /[^?&]+/
# We need a Pathname because we've monkeypatched extname to support double
# extensions (e.g. tar.gz).
# Given a URL like https://example.com/download.php?file=foo-1.0.tar.gz
# the basename we want is "foo-1.0.tar.gz", not "download.php".
Pathname.new(uri_path).ascend do |path|
ext = path.extname[query_regex]
return path.basename.to_s[/#{query_regex.source}#{Regexp.escape(ext)}/] if ext
[*components[:path], *components[:query]].reverse_each do |path|
path = Pathname(path)
return path.basename.to_s if path.extname.present?
end
# Strip query string
File.basename(uri_path)[query_regex]
filename = components[:path].last
return "" if filename.blank?
File.basename(filename)
end
end