gist-logs: truncate log files to be gist-friendly (#279)

Truncates each log file to about 1 MB. This avoids upload and
download errors with the gists, and respects GitHub's suggested
size limits.
This commit is contained in:
Andrew Janke 2016-07-05 08:45:17 -04:00 committed by GitHub
parent 1b1bf56733
commit f518f57bd8
4 changed files with 63 additions and 11 deletions

View File

@ -91,6 +91,9 @@ module Homebrew
logs = {} logs = {}
dir.children.sort.each do |file| dir.children.sort.each do |file|
contents = file.size? ? file.read : "empty log" contents = file.size? ? file.read : "empty log"
# small enough to avoid GitHub "unicorn" page-load-timeout errors
max_file_size = 1_000_000
contents = truncate_text_to_approximate_size(contents, max_file_size, :front_weight => 0.2)
logs[file.basename.to_s] = { :content => contents } logs[file.basename.to_s] = { :content => contents }
end if dir.exist? end if dir.exist?
raise "No logs." if logs.empty? raise "No logs." if logs.empty?

View File

@ -45,10 +45,6 @@ module Homebrew
HOMEBREW_TAP_REGEX = %r{^([\w-]+)/homebrew-([\w-]+)$} HOMEBREW_TAP_REGEX = %r{^([\w-]+)/homebrew-([\w-]+)$}
def ruby_has_encoding?
String.method_defined?(:force_encoding)
end
if ruby_has_encoding? if ruby_has_encoding?
def fix_encoding!(str) def fix_encoding!(str)
# Assume we are starting from a "mostly" UTF-8 string # Assume we are starting from a "mostly" UTF-8 string
@ -1032,13 +1028,7 @@ module Homebrew
# Truncate to 1MB to avoid hitting CI limits # Truncate to 1MB to avoid hitting CI limits
if output.bytesize > MAX_STEP_OUTPUT_SIZE if output.bytesize > MAX_STEP_OUTPUT_SIZE
if ruby_has_encoding? output = truncate_text_to_approximate_size(output, MAX_STEP_OUTPUT_SIZE, :front_weight => 0.0)
binary_output = output.force_encoding("BINARY")
output = binary_output.slice(-MAX_STEP_OUTPUT_SIZE, MAX_STEP_OUTPUT_SIZE)
fix_encoding!(output)
else
output = output.slice(-MAX_STEP_OUTPUT_SIZE, MAX_STEP_OUTPUT_SIZE)
end
output = "truncated output to 1MB:\n" + output output = "truncated output to 1MB:\n" + output
end end
end end

View File

@ -210,4 +210,17 @@ class UtilTests < Homebrew::TestCase
assert_equal "1,000", number_readable(1_000) assert_equal "1,000", number_readable(1_000)
assert_equal "1,000,000", number_readable(1_000_000) assert_equal "1,000,000", number_readable(1_000_000)
end end
def test_truncate_text_to_approximate_size
glue = "\n[...snip...]\n" # hard-coded copy from truncate_text_to_approximate_size
n = 20
long_s = "x" * 40
s = truncate_text_to_approximate_size(long_s, n)
assert_equal n, s.length
assert_match(/^x+#{Regexp.escape(glue)}x+$/, s)
s = truncate_text_to_approximate_size(long_s, n, :front_weight => 0.0)
assert_equal glue + ("x" * (n - glue.length)), s
s = truncate_text_to_approximate_size(long_s, n, :front_weight => 1.0)
assert_equal(("x" * (n - glue.length)) + glue, s)
end
end end

View File

@ -534,3 +534,49 @@ def number_readable(number)
(numstr.size - 3).step(1, -3) { |i| numstr.insert(i, ",") } (numstr.size - 3).step(1, -3) { |i| numstr.insert(i, ",") }
numstr numstr
end end
# True if this version of Ruby supports text encodings in its strings
def ruby_has_encoding?
String.method_defined?(:force_encoding)
end
# Truncates a text string to fit within a byte size constraint,
# preserving character encoding validity. The returned string will
# be not much longer than the specified max_bytes, though the exact
# shortfall or overrun may vary.
def truncate_text_to_approximate_size(s, max_bytes, options = {})
front_weight = options.fetch(:front_weight, 0.5)
if front_weight < 0.0 || front_weight > 1.0
raise "opts[:front_weight] must be between 0.0 and 1.0"
end
return s if s.bytesize <= max_bytes
glue = "\n[...snip...]\n"
max_bytes_in = [max_bytes - glue.bytesize, 1].max
if ruby_has_encoding?
bytes = s.dup.force_encoding("BINARY")
glue_bytes = glue.encode("BINARY")
else
bytes = s
glue_bytes = glue
end
n_front_bytes = (max_bytes_in * front_weight).floor
n_back_bytes = max_bytes_in - n_front_bytes
if n_front_bytes == 0
front = bytes[1..0]
back = bytes[-max_bytes_in..-1]
elsif n_back_bytes == 0
front = bytes[0..(max_bytes_in - 1)]
back = bytes[1..0]
else
front = bytes[0..(n_front_bytes - 1)]
back = bytes[-n_back_bytes..-1]
end
out = front + glue_bytes + back
if ruby_has_encoding?
out.force_encoding("UTF-8")
out.encode!("UTF-16", :invalid => :replace)
out.encode!("UTF-8")
end
out
end