Merge pull request #12964 from danielnachun/new_binary_grep

Use simpler method to detect binaries
This commit is contained in:
Daniel Nachun 2022-03-09 16:39:21 -08:00 committed by GitHub
commit d2857e0dd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 30 deletions

View File

@ -186,7 +186,7 @@ class Keg
def egrep_args def egrep_args
grep_bin = "egrep" grep_bin = "egrep"
grep_args = recursive_fgrep_args grep_args = "--files-with-matches"
[grep_bin, grep_args] [grep_bin, grep_args]
end end
end end

View File

@ -177,13 +177,18 @@ class Keg
def egrep_args def egrep_args
grep_bin = "grep" grep_bin = "grep"
grep_args = recursive_fgrep_args grep_args = [
grep_args += "Pa" "--files-with-matches",
"--perl-regexp",
"--binary-files=text",
]
[grep_bin, grep_args] [grep_bin, grep_args]
end end
alias generic_egrep_args egrep_args alias generic_egrep_args egrep_args
def each_unique_file(io, block) def each_unique_file_matching(string)
Utils.popen_read("fgrep", recursive_fgrep_args, string, to_s) do |io|
hardlinks = Set.new hardlinks = Set.new
until io.eof? until io.eof?
@ -193,25 +198,18 @@ class Keg
# To avoid returning hardlinks, only return files with unique inodes. # To avoid returning hardlinks, only return files with unique inodes.
# Hardlinks will have the same inode as the file they point to. # Hardlinks will have the same inode as the file they point to.
block.call file if hardlinks.add? file.stat.ino yield file if hardlinks.add? file.stat.ino
end
end end
end end
def each_unique_file_matching(string, &block) def binary_file?(file)
Utils.popen_read("fgrep", recursive_fgrep_args, string, to_s) do |io|
each_unique_file(io, block)
end
end
def each_unique_binary_file(&block)
grep_bin, grep_args = egrep_args grep_bin, grep_args = egrep_args
# We need to pass NULL_BYTE_STRING, the literal string "\x00", to grep # We need to pass NULL_BYTE_STRING, the literal string "\x00", to grep
# rather than NULL_BYTE, a literal null byte, because grep will internally # rather than NULL_BYTE, a literal null byte, because grep will internally
# convert the literal string "\x00" to a null byte. # convert the literal string "\x00" to a null byte.
Utils.popen_read(grep_bin, grep_args, NULL_BYTE_STRING, to_s) do |io| Utils.popen_read(grep_bin, *grep_args, NULL_BYTE_STRING, file).present?
each_unique_file(io, block)
end
end end
def lib def lib

View File

@ -43,16 +43,15 @@ describe Keg do
end end
end end
describe "#each_unique_binary_file" do describe "#binary_file?" do
specify "find null bytes in binaries" do specify "test if file has null bytes" do
setup_binary_file setup_binary_file
binary_matches = Set.new expect(keg.binary_file?(binary_file)).to be true
keg.each_unique_binary_file do |file|
binary_matches << file
end
expect(binary_matches.size).to eq 1 setup_text_file
expect(keg.binary_file?(text_file)).to be false
end end
end end
end end