Merge pull request #11381 from cnnrmnn/fix-range-requests

Fix range requests with `curl`
This commit is contained in:
Rylan Polster 2021-05-15 16:40:20 -04:00 committed by GitHub
commit 9f5afcbf63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 23 deletions

View File

@ -23,9 +23,10 @@ describe CurlDownloadStrategy do
it "calls curl with default arguments" do
expect(strategy).to receive(:curl).with(
# example.com supports partial requests.
"--continue-at", "-",
"--location",
"--remote-time",
"--continue-at", "0",
"--output", an_instance_of(Pathname),
url,
an_instance_of(Hash)

View File

@ -118,36 +118,40 @@ module Utils
result
end
def curl_download(*args, to: nil, partial: true, **options)
def parse_headers(headers)
return {} if headers.blank?
# Skip status code
headers.split("\r\n")[1..].to_h do |h|
name, content = h.split(": ")
[name.downcase, content]
end
end
def curl_download(*args, to: nil, try_partial: true, **options)
destination = Pathname(to)
destination.dirname.mkpath
if partial
range_stdout = curl_output("--location", "--range", "0-1",
"--dump-header", "-",
"--write-out", "%\{http_code}",
"--output", "/dev/null", *args, **options).stdout
headers, _, http_status = range_stdout.partition("\r\n\r\n")
if try_partial
range_stdout = curl_output("--location", "--head", *args, **options).stdout
headers = parse_headers(range_stdout.split("\r\n\r\n").first)
supports_partial_download = http_status.to_i == 206 # Partial Content
if supports_partial_download &&
# Any value for `accept-ranges` other than none indicates that the server supports partial requests.
# Its absence indicates no support.
supports_partial = headers.key?("accept-ranges") && headers["accept-ranges"] != "none"
if supports_partial &&
destination.exist? &&
destination.size == %r{^.*Content-Range: bytes \d+-\d+/(\d+)\r\n.*$}m.match(headers)&.[](1)&.to_i
destination.size == headers["content-length"].to_i
return # We've already downloaded all the bytes
end
else
supports_partial_download = false
end
continue_at = if destination.exist? && supports_partial_download
"-"
else
0
end
args = ["--location", "--remote-time", "--output", destination, *args]
# continue-at shouldn't be used with servers that don't support partial requests.
args = ["--continue-at", "-", *args] if destination.exist? && supports_partial
curl(
"--location", "--remote-time", "--continue-at", continue_at.to_s, "--output", destination, *args, **options
)
curl(*args, **options)
end
def curl_output(*args, **options)

View File

@ -34,8 +34,8 @@ module SPDX
def download_latest_license_data!(to: DATA_PATH)
data_url = "https://raw.githubusercontent.com/spdx/license-list-data/#{latest_tag}/json/"
curl_download("#{data_url}licenses.json", to: to/"spdx_licenses.json", partial: false)
curl_download("#{data_url}exceptions.json", to: to/"spdx_exceptions.json", partial: false)
curl_download("#{data_url}licenses.json", to: to/"spdx_licenses.json", try_partial: false)
curl_download("#{data_url}exceptions.json", to: to/"spdx_exceptions.json", try_partial: false)
end
def parse_license_expression(license_expression)