Merge pull request #1298 from tdsmith/no-no-bad-unicode
Don't choke on invalid UTF-8 in `file` output
This commit is contained in:
commit
de880f1e87
@ -84,9 +84,16 @@ class Keg
|
|||||||
}
|
}
|
||||||
output, _status = Open3.capture2("/usr/bin/xargs -0 /usr/bin/file --no-dereference --print0",
|
output, _status = Open3.capture2("/usr/bin/xargs -0 /usr/bin/file --no-dereference --print0",
|
||||||
stdin_data: files.to_a.join("\0"))
|
stdin_data: files.to_a.join("\0"))
|
||||||
|
# `file` output sometimes contains data from the file, which may include
|
||||||
|
# invalid UTF-8 entities, so tell Ruby this is just a bytestring
|
||||||
|
output.force_encoding(Encoding::ASCII_8BIT)
|
||||||
output.each_line do |line|
|
output.each_line do |line|
|
||||||
path, info = line.split("\0")
|
path, info = line.split("\0", 2)
|
||||||
next unless info.to_s.include?("text")
|
# `file` sometimes prints more than one line of output per file;
|
||||||
|
# subsequent lines do not contain a null-byte separator, so `info`
|
||||||
|
# will be `nil` for those lines
|
||||||
|
next unless info
|
||||||
|
next unless info.include?("text")
|
||||||
path = Pathname.new(path)
|
path = Pathname.new(path)
|
||||||
next unless files.include?(path)
|
next unless files.include?(path)
|
||||||
text_files << path
|
text_files << path
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user