Merge pull request #14316 from alebcay/reproducible-gzip

utils: add reproducible_gzip helper function
This commit is contained in:
Mike McQuaid 2023-01-12 09:36:06 +00:00 committed by GitHub
commit aa077d5f86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 184 additions and 23 deletions

View File

@ -9,7 +9,7 @@ require "formula_versions"
require "cli/parser" require "cli/parser"
require "utils/inreplace" require "utils/inreplace"
require "erb" require "erb"
require "zlib" require "utils/gzip"
require "api" require "api"
BOTTLE_ERB = <<-EOS BOTTLE_ERB = <<-EOS
@ -30,7 +30,6 @@ BOTTLE_ERB = <<-EOS
EOS EOS
MAXIMUM_STRING_MATCHES = 100 MAXIMUM_STRING_MATCHES = 100
GZIP_BUFFER_SIZE = 64 * 1024
ALLOWABLE_HOMEBREW_REPOSITORY_LINKS = [ ALLOWABLE_HOMEBREW_REPOSITORY_LINKS = [
%r{#{Regexp.escape(HOMEBREW_LIBRARY)}/Homebrew/os/(mac|linux)/pkgconfig}, %r{#{Regexp.escape(HOMEBREW_LIBRARY)}/Homebrew/os/(mac|linux)/pkgconfig},
@ -425,14 +424,10 @@ module Homebrew
mv tar_path, relocatable_tar_path mv tar_path, relocatable_tar_path
# Use gzip, faster to compress than bzip2, faster to uncompress than bzip2 # Use gzip, faster to compress than bzip2, faster to uncompress than bzip2
# or an uncompressed tarball (and more bandwidth friendly). # or an uncompressed tarball (and more bandwidth friendly).
gz = Zlib::GzipWriter.open(bottle_path) Utils::Gzip.compress_with_options(relocatable_tar_path,
gz.mtime = tab.source_modified_time mtime: tab.source_modified_time,
gz.orig_name = relocatable_tar_path orig_name: relocatable_tar_path,
File.open(relocatable_tar_path, "rb") do |tarfile| output: bottle_path)
gz.write(tarfile.read(GZIP_BUFFER_SIZE)) until tarfile.eof?
end
gz.close
rm_f relocatable_tar_path
sudo_purge sudo_purge
end end

View File

@ -0,0 +1,95 @@
# typed: false
# frozen_string_literal: true
require "utils/gzip"
describe Utils::Gzip do
describe "compress_with_options" do
it "uses the explicitly specified mtime, orig_name, and output path when passed" do
mktmpdir do |path|
mtime = Time.at(12345).utc
orig_name = "someotherfile"
output = path/"subdir/anotherfile.gz"
file_content = "Hello world"
expected_checksum = "df509051b519faa8a1143157d2750d1694dc5fe6373e493c0d5c360be3e61516"
somefile = path/"somefile"
File.write(somefile, file_content)
mkdir path/"subdir"
expect(described_class.compress_with_options(somefile, mtime: mtime, orig_name: orig_name,
output: output)).to eq(output)
expect(Digest::SHA256.hexdigest(File.read(output))).to eq(expected_checksum)
end
end
it "uses SOURCE_DATE_EPOCH as mtime when not explicitly specified" do
mktmpdir do |path|
ENV["SOURCE_DATE_EPOCH"] = "23456"
file_content = "Hello world"
expected_checksum = "a579be88ec8073391a5753b1df4d87fbf008aaec6b5a03f8f16412e2e01f119a"
somefile = path/"somefile"
File.write(somefile, file_content)
expect(described_class.compress_with_options(somefile).to_s).to eq("#{somefile}.gz")
expect(Digest::SHA256.hexdigest(File.read("#{somefile}.gz"))).to eq(expected_checksum)
end
end
end
describe "compress" do
it "creates non-reproducible gz files from input files" do
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
FileUtils.touch files
results = described_class.compress(*files, reproducible: false)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Pathname.new("#{files[n]}.gz")).to exist
end
end
end
it "creates reproducible gz files from input files with explicit mtime" do
mtime = Time.at(12345).utc
expected_checksums = %w[
5b45cabc7f0192854365aeccd82036e482e35131ba39fbbc6d0684266eb2e88a
d422bf4cbede17ae242135d7f32ba5379fbffb288c29cd38b7e5e1a5f89073f8
1d93a3808e2bd5d8c6371ea1c9b8b538774d6486af260719400fc3a5b7ac8d6f
]
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
files.each { |f| File.write(f, "Hello world") }
results = described_class.compress(*files, mtime: mtime)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n])
end
end
end
it "creates reproducible gz files from input files with SOURCE_DATE_EPOCH as mtime" do
ENV["SOURCE_DATE_EPOCH"] = "23456"
expected_checksums = %w[
d5e0cc3259b1eb61d93ee5a30d41aef4a382c1cf2b759719c289f625e27b915c
068657725bca5f9c2bc62bc6bf679eb63786e92d16cae575dee2fd9787a338f3
e566e9fdaf9aa2a7c9501f9845fed1b70669bfa679b0de609e3b63f99988784d
]
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
files.each { |f| File.write(f, "Hello world") }
results = described_class.compress(*files)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n])
end
end
end
end
end

View File

@ -183,15 +183,6 @@ describe "globally-scoped helper methods" do
expect(which_editor).to eq("vemate -w") expect(which_editor).to eq("vemate -w")
end end
specify "#gzip" do
mktmpdir do |path|
somefile = path/"somefile"
FileUtils.touch somefile
expect(gzip(somefile)[0].to_s).to eq("#{somefile}.gz")
expect(Pathname.new("#{somefile}.gz")).to exist
end
end
specify "#capture_stderr" do specify "#capture_stderr" do
err = capture_stderr do err = capture_stderr do
$stderr.print "test" $stderr.print "test"

View File

@ -11,6 +11,7 @@ require "utils/gems"
require "utils/git" require "utils/git"
require "utils/git_repository" require "utils/git_repository"
require "utils/github" require "utils/github"
require "utils/gzip"
require "utils/inreplace" require "utils/inreplace"
require "utils/link" require "utils/link"
require "utils/popen" require "utils/popen"
@ -397,11 +398,10 @@ module Kernel
end end
# GZips the given paths, and returns the gzipped paths. # GZips the given paths, and returns the gzipped paths.
# TODO: Add deprecation
# odeprecated "Utils.gzip" "Utils::Gzip.compress"
def gzip(*paths) def gzip(*paths)
paths.map do |path| Utils::Gzip.compress(*paths)
safe_system "gzip", path
Pathname.new("#{path}.gz")
end
end end
def ignore_interrupts(_opt = nil) def ignore_interrupts(_opt = nil)

View File

@ -0,0 +1,73 @@
# typed: true
# frozen_string_literal: true
# Apple's gzip also uses zlib so use the same buffer size here.
# https://github.com/apple-oss-distributions/file_cmds/blob/file_cmds-400/gzip/gzip.c#L147
GZIP_BUFFER_SIZE = 64 * 1024
module Utils
# Helper functions for creating gzip files.
#
# @api private
module Gzip
extend T::Sig
module_function
sig {
params(
path: T.any(String, Pathname),
mtime: T.any(Integer, Time),
orig_name: String,
output: T.any(String, Pathname),
).returns(Pathname)
}
def compress_with_options(path, mtime: ENV["SOURCE_DATE_EPOCH"].to_i, orig_name: File.basename(path),
output: "#{path}.gz")
# Ideally, we would just set mtime = 0 if SOURCE_DATE_EPOCH is absent, but Ruby's
# Zlib::GzipWriter does not properly handle the case of setting mtime = 0:
# https://bugs.ruby-lang.org/issues/16285
#
# This was fixed in https://github.com/ruby/zlib/pull/10. Set mtime to 0 instead
# of raising exception once we are using zlib gem version 1.1.0 or newer.
if mtime.to_i.zero?
raise ArgumentError,
"Can't create reproducible gzip file without a valid mtime"
end
File.open(path, "rb") do |fp|
odebug "Creating gzip file at #{output}"
gz = Zlib::GzipWriter.open(output)
gz.mtime = mtime
gz.orig_name = orig_name
gz.write(fp.read(GZIP_BUFFER_SIZE)) until fp.eof?
ensure
# GzipWriter should be closed in case of error as well
gz.close
end
FileUtils.rm_f path
Pathname.new(output)
end
sig {
params(
paths: T.any(String, Pathname),
reproducible: T::Boolean,
mtime: T.any(Integer, Time),
).returns(T::Array[Pathname])
}
def compress(*paths, reproducible: true, mtime: ENV["SOURCE_DATE_EPOCH"].to_i)
if reproducible
paths.map do |path|
compress_with_options(path, mtime: mtime)
end
else
paths.map do |path|
safe_system "gzip", path
Pathname.new("#{path}.gz")
end
end
end
end
end

View File

@ -0,0 +1,7 @@
# typed: strict
module Utils
module Gzip
include Kernel
end
end