download_strategy: ignore query strings when parsing resolved URLs

This commit is contained in:
Bo Anderson 2022-04-08 19:22:40 +01:00
parent 3acdab2d44
commit a50db1378e
No known key found for this signature in database
GPG Key ID: 3DB94E204E137D65

View File

@ -327,7 +327,7 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
@resolved_url_and_basename = [url, parse_basename(url)] @resolved_url_and_basename = [url, parse_basename(url)]
end end
def parse_basename(url) def parse_basename(url, search_query: true)
uri_path = if url.match?(URI::DEFAULT_PARSER.make_regexp) uri_path = if url.match?(URI::DEFAULT_PARSER.make_regexp)
uri = URI(url) uri = URI(url)
@ -339,7 +339,11 @@ class AbstractFileDownloadStrategy < AbstractDownloadStrategy
end end
end end
uri.query ? "#{uri.path}?#{uri.query}" : uri.path if uri.query && search_query
"#{uri.path}?#{uri.query}"
else
uri.path
end
else else
url url
end end
@ -509,8 +513,8 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
.map(&:to_i) .map(&:to_i)
.last .last
basename = filenames.last || parse_basename(redirect_url)
is_redirection = url != redirect_url is_redirection = url != redirect_url
basename = filenames.last || parse_basename(redirect_url, search_query: !is_redirection)
@resolved_info_cache[url] = [redirect_url, basename, time, file_size, is_redirection] @resolved_info_cache[url] = [redirect_url, basename, time, file_size, is_redirection]
end end