Use Mechanize to parse Content-Disposition.
This commit is contained in:
parent
0f270d8115
commit
de5b35876f
19
.gitignore
vendored
19
.gitignore
vendored
@ -25,6 +25,7 @@
|
||||
**/vendor/bundle
|
||||
**/vendor/ruby
|
||||
**/vendor/bundle-standalone/ruby/*/bin
|
||||
**/vendor/bundle-standalone/ruby/*/build_info/
|
||||
**/vendor/bundle-standalone/ruby/*/cache
|
||||
**/vendor/bundle-standalone/ruby/*/extensions
|
||||
**/vendor/bundle-standalone/ruby/*/gems/*/*
|
||||
@ -93,15 +94,33 @@
|
||||
|
||||
# Ignore rubocop's (and other) dependencies we don't wish to vendor
|
||||
**/vendor/bundle-standalone/ruby/*/gems/ast-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/connection_pool-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/domain_name-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/http-cookie-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/jaro_winkler-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/mime-types-data-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/mime-types-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/mini_portile2-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/minitest-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/net-http-digest_auth-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/net-http-persistent-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/nokogiri-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/ntlm-http-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/parallel-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/parser-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/powerpack-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/rainbow-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/rubocop-0*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/ruby-progressbar-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/unf_ext-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/unf-*/lib
|
||||
**/vendor/bundle-standalone/ruby/*/gems/unicode-display_width-*/
|
||||
**/vendor/bundle-standalone/ruby/*/gems/webrobots-*/lib
|
||||
|
||||
# Only include the `Mechanize::HTTP::ContentDispositionParser`.
|
||||
**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib
|
||||
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/http/content_disposition_parser.rb
|
||||
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/version.rb
|
||||
|
||||
# Ignore `bin` contents (again).
|
||||
/bin
|
||||
|
||||
@ -5,6 +5,9 @@ require "unpack_strategy"
|
||||
require "lazy_object"
|
||||
require "cgi"
|
||||
|
||||
require "mechanize/version"
|
||||
require "mechanize/http/content_disposition_parser"
|
||||
|
||||
class AbstractDownloadStrategy
|
||||
extend Forwardable
|
||||
include FileUtils
|
||||
@ -363,9 +366,20 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
|
||||
end
|
||||
end
|
||||
|
||||
filenames =
|
||||
lines.map { |line| line[/^Content\-Disposition:\s*(?:inline|attachment);\s*filename=(["']?)([^;]+)\1/i, 2] }
|
||||
.compact
|
||||
content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new
|
||||
|
||||
parse_content_disposition = lambda do |line|
|
||||
next unless content_disposition = content_disposition_parser.parse(line, true)
|
||||
|
||||
if filename_with_encoding = content_disposition.parameters["filename*"]
|
||||
encoding, encoded_filename = filename_with_encoding.split("''", 2)
|
||||
URI.decode_www_form_component(encoded_filename).encode(encoding)
|
||||
else
|
||||
content_disposition.filename
|
||||
end
|
||||
end
|
||||
|
||||
filenames = lines.map(&parse_content_disposition).compact
|
||||
|
||||
time =
|
||||
lines.map { |line| line[/^Last\-Modified:\s*(.+)/i, 1] }
|
||||
|
||||
1
Library/Homebrew/vendor/Gemfile
vendored
1
Library/Homebrew/vendor/Gemfile
vendored
@ -3,6 +3,7 @@ source "https://rubygems.org"
|
||||
gem "activesupport"
|
||||
gem "concurrent-ruby"
|
||||
gem "backports"
|
||||
gem "mechanize"
|
||||
gem "plist"
|
||||
gem "ruby-macho"
|
||||
gem "rubocop-rspec"
|
||||
|
||||
29
Library/Homebrew/vendor/Gemfile.lock
vendored
29
Library/Homebrew/vendor/Gemfile.lock
vendored
@ -9,10 +9,34 @@ GEM
|
||||
ast (2.4.0)
|
||||
backports (3.11.4)
|
||||
concurrent-ruby (1.1.4)
|
||||
connection_pool (2.2.2)
|
||||
domain_name (0.5.20180417)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
http-cookie (1.0.3)
|
||||
domain_name (~> 0.5)
|
||||
i18n (1.5.3)
|
||||
concurrent-ruby (~> 1.0)
|
||||
jaro_winkler (1.5.2)
|
||||
mechanize (2.7.6)
|
||||
domain_name (~> 0.5, >= 0.5.1)
|
||||
http-cookie (~> 1.0)
|
||||
mime-types (>= 1.17.2)
|
||||
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
||||
net-http-persistent (>= 2.5.2)
|
||||
nokogiri (~> 1.6)
|
||||
ntlm-http (~> 0.1, >= 0.1.1)
|
||||
webrobots (>= 0.0.9, < 0.2)
|
||||
mime-types (3.2.2)
|
||||
mime-types-data (~> 3.2015)
|
||||
mime-types-data (3.2018.0812)
|
||||
mini_portile2 (2.4.0)
|
||||
minitest (5.11.3)
|
||||
net-http-digest_auth (1.4.1)
|
||||
net-http-persistent (3.0.0)
|
||||
connection_pool (~> 2.2)
|
||||
nokogiri (1.10.1)
|
||||
mini_portile2 (~> 2.4.0)
|
||||
ntlm-http (0.1.1)
|
||||
parallel (1.13.0)
|
||||
parser (2.6.0.0)
|
||||
ast (~> 2.4.0)
|
||||
@ -34,7 +58,11 @@ GEM
|
||||
thread_safe (0.3.6)
|
||||
tzinfo (1.2.5)
|
||||
thread_safe (~> 0.1)
|
||||
unf (0.1.4)
|
||||
unf_ext
|
||||
unf_ext (0.0.7.5)
|
||||
unicode-display_width (1.4.1)
|
||||
webrobots (0.1.2)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
@ -43,6 +71,7 @@ DEPENDENCIES
|
||||
activesupport
|
||||
backports
|
||||
concurrent-ruby
|
||||
mechanize
|
||||
plist
|
||||
rubocop
|
||||
rubocop-rspec
|
||||
|
||||
@ -12,8 +12,24 @@ $:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/activesupport-5.2.2/l
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ast-2.4.0/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/backports-3.11.4/lib"
|
||||
$:.unshift "#{path}/"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/connection_pool-2.2.2/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/unf_ext-0.0.7.5"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf_ext-0.0.7.5/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf-0.1.4/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/domain_name-0.5.20180417/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/http-cookie-1.0.3/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/jaro_winkler-1.5.2"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/jaro_winkler-1.5.2/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-data-3.2018.0812/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-3.2.2/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-digest_auth-1.4.1/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-persistent-3.0.0/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mini_portile2-2.4.0/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/nokogiri-1.10.1"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/nokogiri-1.10.1/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ntlm-http-0.1.1/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/webrobots-0.1.2/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mechanize-2.7.6/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parallel-1.13.0/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parser-2.6.0.0/lib"
|
||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/plist-3.5.0/lib"
|
||||
|
||||
@ -0,0 +1,191 @@
|
||||
# coding: BINARY
|
||||
|
||||
require 'strscan'
|
||||
require 'time'
|
||||
|
||||
class Mechanize::HTTP
|
||||
ContentDisposition = Struct.new :type, :filename, :creation_date,
|
||||
:modification_date, :read_date, :size, :parameters
|
||||
end
|
||||
|
||||
##
|
||||
# Parser Content-Disposition headers that loosely follows RFC 2183.
|
||||
#
|
||||
# Beyond RFC 2183, this parser allows:
|
||||
#
|
||||
# * Missing disposition-type
|
||||
# * Multiple semicolons
|
||||
# * Whitespace around semicolons
|
||||
|
||||
class Mechanize::HTTP::ContentDispositionParser
|
||||
|
||||
attr_accessor :scanner # :nodoc:
|
||||
|
||||
@parser = nil
|
||||
|
||||
##
|
||||
# Parses the disposition type and params in the +content_disposition+
|
||||
# string. The "Content-Disposition:" must be removed.
|
||||
|
||||
def self.parse content_disposition
|
||||
@parser ||= self.new
|
||||
@parser.parse content_disposition
|
||||
end
|
||||
|
||||
##
|
||||
# Creates a new parser Content-Disposition headers
|
||||
|
||||
def initialize
|
||||
@scanner = nil
|
||||
end
|
||||
|
||||
##
|
||||
# Parses the +content_disposition+ header. If +header+ is set to true the
|
||||
# "Content-Disposition:" portion will be parsed
|
||||
|
||||
def parse content_disposition, header = false
|
||||
return nil if content_disposition.empty?
|
||||
|
||||
@scanner = StringScanner.new content_disposition
|
||||
|
||||
if header then
|
||||
return nil unless @scanner.scan(/Content-Disposition/i)
|
||||
return nil unless @scanner.scan(/:/)
|
||||
spaces
|
||||
end
|
||||
|
||||
type = rfc_2045_token
|
||||
@scanner.scan(/;+/)
|
||||
|
||||
if @scanner.peek(1) == '=' then
|
||||
@scanner.pos = 0
|
||||
type = nil
|
||||
end
|
||||
|
||||
disposition = Mechanize::HTTP::ContentDisposition.new type
|
||||
|
||||
spaces
|
||||
|
||||
return nil unless parameters = parse_parameters
|
||||
|
||||
disposition.filename = parameters.delete 'filename'
|
||||
disposition.creation_date = parameters.delete 'creation-date'
|
||||
disposition.modification_date = parameters.delete 'modification-date'
|
||||
disposition.read_date = parameters.delete 'read-date'
|
||||
disposition.size = parameters.delete 'size'
|
||||
disposition.parameters = parameters
|
||||
|
||||
disposition
|
||||
end
|
||||
|
||||
##
|
||||
# Extracts disposition-parm and returns a Hash.
|
||||
|
||||
def parse_parameters
|
||||
parameters = {}
|
||||
|
||||
while true do
|
||||
return nil unless param = rfc_2045_token
|
||||
param.downcase!
|
||||
return nil unless @scanner.scan(/=/)
|
||||
|
||||
value = case param
|
||||
when /^filename$/ then
|
||||
rfc_2045_value
|
||||
when /^(creation|modification|read)-date$/ then
|
||||
Time.rfc822 rfc_2045_quoted_string
|
||||
when /^size$/ then
|
||||
rfc_2045_value.to_i(10)
|
||||
else
|
||||
rfc_2045_value
|
||||
end
|
||||
|
||||
return nil unless value
|
||||
|
||||
parameters[param] = value
|
||||
|
||||
spaces
|
||||
|
||||
break if @scanner.eos? or not @scanner.scan(/;+/)
|
||||
|
||||
spaces
|
||||
end
|
||||
|
||||
parameters
|
||||
end
|
||||
|
||||
##
|
||||
# quoted-string = <"> *(qtext/quoted-pair) <">
|
||||
# qtext = <any CHAR excepting <">, "\" & CR,
|
||||
# and including linear-white-space
|
||||
# quoted-pair = "\" CHAR
|
||||
#
|
||||
# Parses an RFC 2045 quoted-string
|
||||
|
||||
def rfc_2045_quoted_string
|
||||
return nil unless @scanner.scan(/"/)
|
||||
|
||||
text = ''
|
||||
|
||||
while true do
|
||||
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
|
||||
|
||||
if chunk then
|
||||
text << chunk
|
||||
|
||||
if @scanner.peek(1) == '\\' then
|
||||
@scanner.get_byte
|
||||
return nil if @scanner.eos?
|
||||
text << @scanner.get_byte
|
||||
elsif @scanner.scan(/\r\n[\t ]+/) then
|
||||
text << " "
|
||||
end
|
||||
else
|
||||
if '\\"' == @scanner.peek(2) then
|
||||
@scanner.skip(/\\/)
|
||||
text << @scanner.get_byte
|
||||
elsif '"' == @scanner.peek(1) then
|
||||
@scanner.get_byte
|
||||
break
|
||||
else
|
||||
return nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
text
|
||||
end
|
||||
|
||||
##
|
||||
# token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
|
||||
#
|
||||
# Parses an RFC 2045 token
|
||||
|
||||
def rfc_2045_token
|
||||
@scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
|
||||
end
|
||||
|
||||
##
|
||||
# value := token / quoted-string
|
||||
#
|
||||
# Parses an RFC 2045 value
|
||||
|
||||
def rfc_2045_value
|
||||
if @scanner.peek(1) == '"' then
|
||||
rfc_2045_quoted_string
|
||||
else
|
||||
rfc_2045_token
|
||||
end
|
||||
end
|
||||
|
||||
##
|
||||
# 1*SP
|
||||
#
|
||||
# Parses spaces
|
||||
|
||||
def spaces
|
||||
@scanner.scan(/ +/)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
class Mechanize
|
||||
VERSION = "2.7.6"
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user