diff --git a/Library/Homebrew/test/utils/gzip_spec.rb b/Library/Homebrew/test/utils/gzip_spec.rb new file mode 100644 index 0000000000..af2d2d9f28 --- /dev/null +++ b/Library/Homebrew/test/utils/gzip_spec.rb @@ -0,0 +1,95 @@ +# typed: false +# frozen_string_literal: true + +require "utils/gzip" + +describe Utils::Gzip do + describe "compress_with_options" do + it "uses the explicitly specified mtime, orig_name, and output path when passed" do + mktmpdir do |path| + mtime = Time.at(12345).utc + orig_name = "someotherfile" + output = path/"subdir/anotherfile.gz" + file_content = "Hello world" + expected_checksum = "df509051b519faa8a1143157d2750d1694dc5fe6373e493c0d5c360be3e61516" + + somefile = path/"somefile" + File.write(somefile, file_content) + mkdir path/"subdir" + + expect(described_class.compress_with_options(somefile, mtime: mtime, orig_name: orig_name, +output: output)).to eq(output) + expect(Digest::SHA256.hexdigest(File.read(output))).to eq(expected_checksum) + end + end + + it "uses SOURCE_DATE_EPOCH as mtime when not explicitly specified" do + mktmpdir do |path| + ENV["SOURCE_DATE_EPOCH"] = "23456" + file_content = "Hello world" + expected_checksum = "a579be88ec8073391a5753b1df4d87fbf008aaec6b5a03f8f16412e2e01f119a" + + somefile = path/"somefile" + File.write(somefile, file_content) + + expect(described_class.compress_with_options(somefile).to_s).to eq("#{somefile}.gz") + expect(Digest::SHA256.hexdigest(File.read("#{somefile}.gz"))).to eq(expected_checksum) + end + end + end + + describe "compress" do + it "creates non-reproducible gz files from input files" do + mktmpdir do |path| + files = (0..2).map { |n| path/"somefile#{n}" } + FileUtils.touch files + + results = described_class.compress(*files, reproducible: false) + 3.times do |n| + expect(results[n].to_s).to eq("#{files[n]}.gz") + expect(Pathname.new("#{files[n]}.gz")).to exist + end + end + end + + it "creates reproducible gz files from input files with explicit mtime" do + mtime = Time.at(12345).utc + expected_checksums = %w[ + 5b45cabc7f0192854365aeccd82036e482e35131ba39fbbc6d0684266eb2e88a + d422bf4cbede17ae242135d7f32ba5379fbffb288c29cd38b7e5e1a5f89073f8 + 1d93a3808e2bd5d8c6371ea1c9b8b538774d6486af260719400fc3a5b7ac8d6f + ] + + mktmpdir do |path| + files = (0..2).map { |n| path/"somefile#{n}" } + files.each { |f| File.write(f, "Hello world") } + + results = described_class.compress(*files, mtime: mtime) + 3.times do |n| + expect(results[n].to_s).to eq("#{files[n]}.gz") + expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n]) + end + end + end + + it "creates reproducible gz files from input files with SOURCE_DATE_EPOCH as mtime" do + ENV["SOURCE_DATE_EPOCH"] = "23456" + expected_checksums = %w[ + d5e0cc3259b1eb61d93ee5a30d41aef4a382c1cf2b759719c289f625e27b915c + 068657725bca5f9c2bc62bc6bf679eb63786e92d16cae575dee2fd9787a338f3 + e566e9fdaf9aa2a7c9501f9845fed1b70669bfa679b0de609e3b63f99988784d + ] + + mktmpdir do |path| + files = (0..2).map { |n| path/"somefile#{n}" } + files.each { |f| File.write(f, "Hello world") } + + results = described_class.compress(*files) + 3.times do |n| + expect(results[n].to_s).to eq("#{files[n]}.gz") + expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n]) + end + end + end + end +end diff --git a/Library/Homebrew/utils/gzip.rb b/Library/Homebrew/utils/gzip.rb new file mode 100644 index 0000000000..e38703c9e5 --- /dev/null +++ b/Library/Homebrew/utils/gzip.rb @@ -0,0 +1,73 @@ +# typed: true +# frozen_string_literal: true + +# Apple's gzip also uses zlib so use the same buffer size here. +# https://github.com/apple-oss-distributions/file_cmds/blob/file_cmds-400/gzip/gzip.c#L147 +GZIP_BUFFER_SIZE = 64 * 1024 + +module Utils + # Helper functions for creating gzip files. + # + # @api private + module Gzip + extend T::Sig + + module_function + + sig { + params( + path: T.any(String, Pathname), + mtime: T.any(Integer, Time), + orig_name: String, + output: T.any(String, Pathname), + ).returns(Pathname) + } + def compress_with_options(path, mtime: ENV["SOURCE_DATE_EPOCH"].to_i, orig_name: File.basename(path), + output: "#{path}.gz") + # Ideally, we would just set mtime = 0 if SOURCE_DATE_EPOCH is absent, but Ruby's + # Zlib::GzipWriter does not properly handle the case of setting mtime = 0: + # https://bugs.ruby-lang.org/issues/16285 + # + # This was fixed in https://github.com/ruby/zlib/pull/10. Set mtime to 0 instead + # of raising exception once we are using zlib gem version 1.1.0 or newer. + if mtime.to_i.zero? + raise ArgumentError, + "Can't create reproducible gzip file without a valid mtime" + end + + File.open(path, "rb") do |fp| + odebug "Creating gzip file at #{output}" + gz = Zlib::GzipWriter.open(output) + gz.mtime = mtime + gz.orig_name = orig_name + gz.write(fp.read(GZIP_BUFFER_SIZE)) until fp.eof? + ensure + # GzipWriter should be closed in case of error as well + gz.close + end + + FileUtils.rm_f path + Pathname.new(output) + end + + sig { + params( + paths: T.any(String, Pathname), + reproducible: T::Boolean, + mtime: T.any(Integer, Time), + ).returns(T::Array[Pathname]) + } + def compress(*paths, reproducible: true, mtime: ENV["SOURCE_DATE_EPOCH"].to_i) + if reproducible + paths.map do |path| + compress_with_options(path, mtime: mtime) + end + else + paths.map do |path| + safe_system "gzip", path + Pathname.new("#{path}.gz") + end + end + end + end +end diff --git a/Library/Homebrew/utils/gzip.rbi b/Library/Homebrew/utils/gzip.rbi new file mode 100644 index 0000000000..7b7201eaca --- /dev/null +++ b/Library/Homebrew/utils/gzip.rbi @@ -0,0 +1,7 @@ +# typed: strict + +module Utils + module Gzip + include Kernel + end +end