From 371cd0dd3e6d31ef96b2e000a4fa9519938b0aab Mon Sep 17 00:00:00 2001 From: "Tim D. Smith" Date: Sat, 15 Oct 2016 21:21:08 -0700 Subject: [PATCH 1/2] Don't choke on invalid UTF-8 in `file` output Sometimes `file` output contains data from the file under examination, which may include binary data that does not represent valid UTF-8 codepoints. String#split dies if it doesn't understand the encoding, so tell Ruby to treat `file` output as a bytestring. --- Library/Homebrew/keg_relocate.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Library/Homebrew/keg_relocate.rb b/Library/Homebrew/keg_relocate.rb index ab037da987..3e777d0c12 100644 --- a/Library/Homebrew/keg_relocate.rb +++ b/Library/Homebrew/keg_relocate.rb @@ -84,9 +84,12 @@ class Keg } output, _status = Open3.capture2("/usr/bin/xargs -0 /usr/bin/file --no-dereference --print0", stdin_data: files.to_a.join("\0")) + # `file` output sometimes contains data from the file, which may include + # invalid UTF-8 entities, so tell Ruby this is just a bytestring + output.force_encoding(Encoding::ASCII_8BIT) output.each_line do |line| - path, info = line.split("\0") - next unless info.to_s.include?("text") + path, info = line.split("\0", 2) + next unless info.include?("text") path = Pathname.new(path) next unless files.include?(path) text_files << path From 22a64aa6c630ad73c3830cf3bbbf129bfbc4d19c Mon Sep 17 00:00:00 2001 From: "Tim D. Smith" Date: Sat, 15 Oct 2016 23:52:55 -0700 Subject: [PATCH 2/2] Explain why info could be nil --- Library/Homebrew/keg_relocate.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Library/Homebrew/keg_relocate.rb b/Library/Homebrew/keg_relocate.rb index 3e777d0c12..17911b90b1 100644 --- a/Library/Homebrew/keg_relocate.rb +++ b/Library/Homebrew/keg_relocate.rb @@ -89,6 +89,10 @@ class Keg output.force_encoding(Encoding::ASCII_8BIT) output.each_line do |line| path, info = line.split("\0", 2) + # `file` sometimes prints more than one line of output per file; + # subsequent lines do not contain a null-byte separator, so `info` + # will be `nil` for those lines + next unless info next unless info.include?("text") path = Pathname.new(path) next unless files.include?(path)