gist-logs: truncate log files to be gist-friendly (#279)

Truncates each log file to about 1 MB. This avoids upload and download errors with the gists, and respects GitHub's suggested size limits.
2016-07-05 08:45:17 -04:00 · 2016-07-05 08:45:17 -04:00 · f518f57bd8
commit f518f57bd8
parent 1b1bf56733
4 changed files with 63 additions and 11 deletions
--- a/Library/Homebrew/cmd/gist-logs.rb
+++ b/Library/Homebrew/cmd/gist-logs.rb
@ -91,6 +91,9 @@ module Homebrew
    logs = {}
    dir.children.sort.each do |file|
      contents = file.size? ? file.read : "empty log"
      # small enough to avoid GitHub "unicorn" page-load-timeout errors
      max_file_size = 1_000_000
      contents = truncate_text_to_approximate_size(contents, max_file_size, :front_weight => 0.2)
      logs[file.basename.to_s] = { :content => contents }
    end if dir.exist?
    raise "No logs." if logs.empty?
--- a/Library/Homebrew/dev-cmd/test-bot.rb
+++ b/Library/Homebrew/dev-cmd/test-bot.rb
@ -45,10 +45,6 @@ module Homebrew
  HOMEBREW_TAP_REGEX = %r{^([\w-]+)/homebrew-([\w-]+)$}
  def ruby_has_encoding?
    String.method_defined?(:force_encoding)
  end
  if ruby_has_encoding?
    def fix_encoding!(str)
      # Assume we are starting from a "mostly" UTF-8 string
@ -1032,13 +1028,7 @@ module Homebrew
      # Truncate to 1MB to avoid hitting CI limits
      if output.bytesize > MAX_STEP_OUTPUT_SIZE
-        if ruby_has_encoding?
+        output = truncate_text_to_approximate_size(output, MAX_STEP_OUTPUT_SIZE, :front_weight => 0.0)
          binary_output = output.force_encoding("BINARY")
          output = binary_output.slice(-MAX_STEP_OUTPUT_SIZE, MAX_STEP_OUTPUT_SIZE)
          fix_encoding!(output)
        else
          output = output.slice(-MAX_STEP_OUTPUT_SIZE, MAX_STEP_OUTPUT_SIZE)
        end
        output = "truncated output to 1MB:\n" + output
      end
    end
--- a/Library/Homebrew/test/test_utils.rb
+++ b/Library/Homebrew/test/test_utils.rb
@ -210,4 +210,17 @@ class UtilTests < Homebrew::TestCase
    assert_equal "1,000", number_readable(1_000)
    assert_equal "1,000,000", number_readable(1_000_000)
  end
  def test_truncate_text_to_approximate_size
    glue = "\n[...snip...]\n" # hard-coded copy from truncate_text_to_approximate_size
    n = 20
    long_s = "x" * 40
    s = truncate_text_to_approximate_size(long_s, n)
    assert_equal n, s.length
    assert_match(/^x+#{Regexp.escape(glue)}x+$/, s)
    s = truncate_text_to_approximate_size(long_s, n, :front_weight => 0.0)
    assert_equal glue + ("x" * (n - glue.length)), s
    s = truncate_text_to_approximate_size(long_s, n, :front_weight => 1.0)
    assert_equal(("x" * (n - glue.length)) + glue, s)
  end
 end
--- a/Library/Homebrew/utils.rb
+++ b/Library/Homebrew/utils.rb
@ -534,3 +534,49 @@ def number_readable(number)
  (numstr.size - 3).step(1, -3) { |i| numstr.insert(i, ",") }
  numstr
 end
 # True if this version of Ruby supports text encodings in its strings
 def ruby_has_encoding?
  String.method_defined?(:force_encoding)
 end
 # Truncates a text string to fit within a byte size constraint,
 # preserving character encoding validity. The returned string will
 # be not much longer than the specified max_bytes, though the exact
 # shortfall or overrun may vary.
 def truncate_text_to_approximate_size(s, max_bytes, options = {})
  front_weight = options.fetch(:front_weight, 0.5)
  if front_weight < 0.0 || front_weight > 1.0
    raise "opts[:front_weight] must be between 0.0 and 1.0"
  end
  return s if s.bytesize <= max_bytes
  glue = "\n[...snip...]\n"
  max_bytes_in = [max_bytes - glue.bytesize, 1].max
  if ruby_has_encoding?
    bytes = s.dup.force_encoding("BINARY")
    glue_bytes = glue.encode("BINARY")
  else
    bytes = s
    glue_bytes = glue
  end
  n_front_bytes = (max_bytes_in * front_weight).floor
  n_back_bytes = max_bytes_in - n_front_bytes
  if n_front_bytes == 0
    front = bytes[1..0]
    back = bytes[-max_bytes_in..-1]
  elsif n_back_bytes == 0
    front = bytes[0..(max_bytes_in - 1)]
    back = bytes[1..0]
  else
    front = bytes[0..(n_front_bytes - 1)]
    back = bytes[-n_back_bytes..-1]
  end
  out = front + glue_bytes + back
  if ruby_has_encoding?
    out.force_encoding("UTF-8")
    out.encode!("UTF-16", :invalid => :replace)
    out.encode!("UTF-8")
  end
  out
 end