Use Mechanize to parse Content-Disposition.
This commit is contained in:
parent
0f270d8115
commit
de5b35876f
19
.gitignore
vendored
19
.gitignore
vendored
@ -25,6 +25,7 @@
|
|||||||
**/vendor/bundle
|
**/vendor/bundle
|
||||||
**/vendor/ruby
|
**/vendor/ruby
|
||||||
**/vendor/bundle-standalone/ruby/*/bin
|
**/vendor/bundle-standalone/ruby/*/bin
|
||||||
|
**/vendor/bundle-standalone/ruby/*/build_info/
|
||||||
**/vendor/bundle-standalone/ruby/*/cache
|
**/vendor/bundle-standalone/ruby/*/cache
|
||||||
**/vendor/bundle-standalone/ruby/*/extensions
|
**/vendor/bundle-standalone/ruby/*/extensions
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/*/*
|
**/vendor/bundle-standalone/ruby/*/gems/*/*
|
||||||
@ -93,15 +94,33 @@
|
|||||||
|
|
||||||
# Ignore rubocop's (and other) dependencies we don't wish to vendor
|
# Ignore rubocop's (and other) dependencies we don't wish to vendor
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/ast-*/
|
**/vendor/bundle-standalone/ruby/*/gems/ast-*/
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/connection_pool-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/domain_name-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/http-cookie-*/lib
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/jaro_winkler-*/
|
**/vendor/bundle-standalone/ruby/*/gems/jaro_winkler-*/
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/mime-types-data-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/mime-types-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/mini_portile2-*/lib
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/minitest-*/lib
|
**/vendor/bundle-standalone/ruby/*/gems/minitest-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/net-http-digest_auth-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/net-http-persistent-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/nokogiri-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/ntlm-http-*/lib
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/parallel-*/
|
**/vendor/bundle-standalone/ruby/*/gems/parallel-*/
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/parser-*/
|
**/vendor/bundle-standalone/ruby/*/gems/parser-*/
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/powerpack-*/
|
**/vendor/bundle-standalone/ruby/*/gems/powerpack-*/
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/rainbow-*/
|
**/vendor/bundle-standalone/ruby/*/gems/rainbow-*/
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/rubocop-0*/
|
**/vendor/bundle-standalone/ruby/*/gems/rubocop-0*/
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/ruby-progressbar-*/
|
**/vendor/bundle-standalone/ruby/*/gems/ruby-progressbar-*/
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/unf_ext-*/lib
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/unf-*/lib
|
||||||
**/vendor/bundle-standalone/ruby/*/gems/unicode-display_width-*/
|
**/vendor/bundle-standalone/ruby/*/gems/unicode-display_width-*/
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/webrobots-*/lib
|
||||||
|
|
||||||
|
# Only include the `Mechanize::HTTP::ContentDispositionParser`.
|
||||||
|
**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib
|
||||||
|
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/http/content_disposition_parser.rb
|
||||||
|
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/version.rb
|
||||||
|
|
||||||
# Ignore `bin` contents (again).
|
# Ignore `bin` contents (again).
|
||||||
/bin
|
/bin
|
||||||
|
|||||||
@ -5,6 +5,9 @@ require "unpack_strategy"
|
|||||||
require "lazy_object"
|
require "lazy_object"
|
||||||
require "cgi"
|
require "cgi"
|
||||||
|
|
||||||
|
require "mechanize/version"
|
||||||
|
require "mechanize/http/content_disposition_parser"
|
||||||
|
|
||||||
class AbstractDownloadStrategy
|
class AbstractDownloadStrategy
|
||||||
extend Forwardable
|
extend Forwardable
|
||||||
include FileUtils
|
include FileUtils
|
||||||
@ -363,9 +366,20 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
filenames =
|
content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new
|
||||||
lines.map { |line| line[/^Content\-Disposition:\s*(?:inline|attachment);\s*filename=(["']?)([^;]+)\1/i, 2] }
|
|
||||||
.compact
|
parse_content_disposition = lambda do |line|
|
||||||
|
next unless content_disposition = content_disposition_parser.parse(line, true)
|
||||||
|
|
||||||
|
if filename_with_encoding = content_disposition.parameters["filename*"]
|
||||||
|
encoding, encoded_filename = filename_with_encoding.split("''", 2)
|
||||||
|
URI.decode_www_form_component(encoded_filename).encode(encoding)
|
||||||
|
else
|
||||||
|
content_disposition.filename
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
filenames = lines.map(&parse_content_disposition).compact
|
||||||
|
|
||||||
time =
|
time =
|
||||||
lines.map { |line| line[/^Last\-Modified:\s*(.+)/i, 1] }
|
lines.map { |line| line[/^Last\-Modified:\s*(.+)/i, 1] }
|
||||||
|
|||||||
1
Library/Homebrew/vendor/Gemfile
vendored
1
Library/Homebrew/vendor/Gemfile
vendored
@ -3,6 +3,7 @@ source "https://rubygems.org"
|
|||||||
gem "activesupport"
|
gem "activesupport"
|
||||||
gem "concurrent-ruby"
|
gem "concurrent-ruby"
|
||||||
gem "backports"
|
gem "backports"
|
||||||
|
gem "mechanize"
|
||||||
gem "plist"
|
gem "plist"
|
||||||
gem "ruby-macho"
|
gem "ruby-macho"
|
||||||
gem "rubocop-rspec"
|
gem "rubocop-rspec"
|
||||||
|
|||||||
29
Library/Homebrew/vendor/Gemfile.lock
vendored
29
Library/Homebrew/vendor/Gemfile.lock
vendored
@ -9,10 +9,34 @@ GEM
|
|||||||
ast (2.4.0)
|
ast (2.4.0)
|
||||||
backports (3.11.4)
|
backports (3.11.4)
|
||||||
concurrent-ruby (1.1.4)
|
concurrent-ruby (1.1.4)
|
||||||
|
connection_pool (2.2.2)
|
||||||
|
domain_name (0.5.20180417)
|
||||||
|
unf (>= 0.0.5, < 1.0.0)
|
||||||
|
http-cookie (1.0.3)
|
||||||
|
domain_name (~> 0.5)
|
||||||
i18n (1.5.3)
|
i18n (1.5.3)
|
||||||
concurrent-ruby (~> 1.0)
|
concurrent-ruby (~> 1.0)
|
||||||
jaro_winkler (1.5.2)
|
jaro_winkler (1.5.2)
|
||||||
|
mechanize (2.7.6)
|
||||||
|
domain_name (~> 0.5, >= 0.5.1)
|
||||||
|
http-cookie (~> 1.0)
|
||||||
|
mime-types (>= 1.17.2)
|
||||||
|
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
||||||
|
net-http-persistent (>= 2.5.2)
|
||||||
|
nokogiri (~> 1.6)
|
||||||
|
ntlm-http (~> 0.1, >= 0.1.1)
|
||||||
|
webrobots (>= 0.0.9, < 0.2)
|
||||||
|
mime-types (3.2.2)
|
||||||
|
mime-types-data (~> 3.2015)
|
||||||
|
mime-types-data (3.2018.0812)
|
||||||
|
mini_portile2 (2.4.0)
|
||||||
minitest (5.11.3)
|
minitest (5.11.3)
|
||||||
|
net-http-digest_auth (1.4.1)
|
||||||
|
net-http-persistent (3.0.0)
|
||||||
|
connection_pool (~> 2.2)
|
||||||
|
nokogiri (1.10.1)
|
||||||
|
mini_portile2 (~> 2.4.0)
|
||||||
|
ntlm-http (0.1.1)
|
||||||
parallel (1.13.0)
|
parallel (1.13.0)
|
||||||
parser (2.6.0.0)
|
parser (2.6.0.0)
|
||||||
ast (~> 2.4.0)
|
ast (~> 2.4.0)
|
||||||
@ -34,7 +58,11 @@ GEM
|
|||||||
thread_safe (0.3.6)
|
thread_safe (0.3.6)
|
||||||
tzinfo (1.2.5)
|
tzinfo (1.2.5)
|
||||||
thread_safe (~> 0.1)
|
thread_safe (~> 0.1)
|
||||||
|
unf (0.1.4)
|
||||||
|
unf_ext
|
||||||
|
unf_ext (0.0.7.5)
|
||||||
unicode-display_width (1.4.1)
|
unicode-display_width (1.4.1)
|
||||||
|
webrobots (0.1.2)
|
||||||
|
|
||||||
PLATFORMS
|
PLATFORMS
|
||||||
ruby
|
ruby
|
||||||
@ -43,6 +71,7 @@ DEPENDENCIES
|
|||||||
activesupport
|
activesupport
|
||||||
backports
|
backports
|
||||||
concurrent-ruby
|
concurrent-ruby
|
||||||
|
mechanize
|
||||||
plist
|
plist
|
||||||
rubocop
|
rubocop
|
||||||
rubocop-rspec
|
rubocop-rspec
|
||||||
|
|||||||
@ -12,8 +12,24 @@ $:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/activesupport-5.2.2/l
|
|||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ast-2.4.0/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ast-2.4.0/lib"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/backports-3.11.4/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/backports-3.11.4/lib"
|
||||||
$:.unshift "#{path}/"
|
$:.unshift "#{path}/"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/connection_pool-2.2.2/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/unf_ext-0.0.7.5"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf_ext-0.0.7.5/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf-0.1.4/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/domain_name-0.5.20180417/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/http-cookie-1.0.3/lib"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/jaro_winkler-1.5.2"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/jaro_winkler-1.5.2"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/jaro_winkler-1.5.2/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/jaro_winkler-1.5.2/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-data-3.2018.0812/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-3.2.2/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-digest_auth-1.4.1/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-persistent-3.0.0/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mini_portile2-2.4.0/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/nokogiri-1.10.1"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/nokogiri-1.10.1/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ntlm-http-0.1.1/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/webrobots-0.1.2/lib"
|
||||||
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mechanize-2.7.6/lib"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parallel-1.13.0/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parallel-1.13.0/lib"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parser-2.6.0.0/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parser-2.6.0.0/lib"
|
||||||
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/plist-3.5.0/lib"
|
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/plist-3.5.0/lib"
|
||||||
|
|||||||
@ -0,0 +1,191 @@
|
|||||||
|
# coding: BINARY
|
||||||
|
|
||||||
|
require 'strscan'
|
||||||
|
require 'time'
|
||||||
|
|
||||||
|
class Mechanize::HTTP
|
||||||
|
ContentDisposition = Struct.new :type, :filename, :creation_date,
|
||||||
|
:modification_date, :read_date, :size, :parameters
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Parser Content-Disposition headers that loosely follows RFC 2183.
|
||||||
|
#
|
||||||
|
# Beyond RFC 2183, this parser allows:
|
||||||
|
#
|
||||||
|
# * Missing disposition-type
|
||||||
|
# * Multiple semicolons
|
||||||
|
# * Whitespace around semicolons
|
||||||
|
|
||||||
|
class Mechanize::HTTP::ContentDispositionParser
|
||||||
|
|
||||||
|
attr_accessor :scanner # :nodoc:
|
||||||
|
|
||||||
|
@parser = nil
|
||||||
|
|
||||||
|
##
|
||||||
|
# Parses the disposition type and params in the +content_disposition+
|
||||||
|
# string. The "Content-Disposition:" must be removed.
|
||||||
|
|
||||||
|
def self.parse content_disposition
|
||||||
|
@parser ||= self.new
|
||||||
|
@parser.parse content_disposition
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Creates a new parser Content-Disposition headers
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@scanner = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Parses the +content_disposition+ header. If +header+ is set to true the
|
||||||
|
# "Content-Disposition:" portion will be parsed
|
||||||
|
|
||||||
|
def parse content_disposition, header = false
|
||||||
|
return nil if content_disposition.empty?
|
||||||
|
|
||||||
|
@scanner = StringScanner.new content_disposition
|
||||||
|
|
||||||
|
if header then
|
||||||
|
return nil unless @scanner.scan(/Content-Disposition/i)
|
||||||
|
return nil unless @scanner.scan(/:/)
|
||||||
|
spaces
|
||||||
|
end
|
||||||
|
|
||||||
|
type = rfc_2045_token
|
||||||
|
@scanner.scan(/;+/)
|
||||||
|
|
||||||
|
if @scanner.peek(1) == '=' then
|
||||||
|
@scanner.pos = 0
|
||||||
|
type = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
disposition = Mechanize::HTTP::ContentDisposition.new type
|
||||||
|
|
||||||
|
spaces
|
||||||
|
|
||||||
|
return nil unless parameters = parse_parameters
|
||||||
|
|
||||||
|
disposition.filename = parameters.delete 'filename'
|
||||||
|
disposition.creation_date = parameters.delete 'creation-date'
|
||||||
|
disposition.modification_date = parameters.delete 'modification-date'
|
||||||
|
disposition.read_date = parameters.delete 'read-date'
|
||||||
|
disposition.size = parameters.delete 'size'
|
||||||
|
disposition.parameters = parameters
|
||||||
|
|
||||||
|
disposition
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# Extracts disposition-parm and returns a Hash.
|
||||||
|
|
||||||
|
def parse_parameters
|
||||||
|
parameters = {}
|
||||||
|
|
||||||
|
while true do
|
||||||
|
return nil unless param = rfc_2045_token
|
||||||
|
param.downcase!
|
||||||
|
return nil unless @scanner.scan(/=/)
|
||||||
|
|
||||||
|
value = case param
|
||||||
|
when /^filename$/ then
|
||||||
|
rfc_2045_value
|
||||||
|
when /^(creation|modification|read)-date$/ then
|
||||||
|
Time.rfc822 rfc_2045_quoted_string
|
||||||
|
when /^size$/ then
|
||||||
|
rfc_2045_value.to_i(10)
|
||||||
|
else
|
||||||
|
rfc_2045_value
|
||||||
|
end
|
||||||
|
|
||||||
|
return nil unless value
|
||||||
|
|
||||||
|
parameters[param] = value
|
||||||
|
|
||||||
|
spaces
|
||||||
|
|
||||||
|
break if @scanner.eos? or not @scanner.scan(/;+/)
|
||||||
|
|
||||||
|
spaces
|
||||||
|
end
|
||||||
|
|
||||||
|
parameters
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# quoted-string = <"> *(qtext/quoted-pair) <">
|
||||||
|
# qtext = <any CHAR excepting <">, "\" & CR,
|
||||||
|
# and including linear-white-space
|
||||||
|
# quoted-pair = "\" CHAR
|
||||||
|
#
|
||||||
|
# Parses an RFC 2045 quoted-string
|
||||||
|
|
||||||
|
def rfc_2045_quoted_string
|
||||||
|
return nil unless @scanner.scan(/"/)
|
||||||
|
|
||||||
|
text = ''
|
||||||
|
|
||||||
|
while true do
|
||||||
|
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
|
||||||
|
|
||||||
|
if chunk then
|
||||||
|
text << chunk
|
||||||
|
|
||||||
|
if @scanner.peek(1) == '\\' then
|
||||||
|
@scanner.get_byte
|
||||||
|
return nil if @scanner.eos?
|
||||||
|
text << @scanner.get_byte
|
||||||
|
elsif @scanner.scan(/\r\n[\t ]+/) then
|
||||||
|
text << " "
|
||||||
|
end
|
||||||
|
else
|
||||||
|
if '\\"' == @scanner.peek(2) then
|
||||||
|
@scanner.skip(/\\/)
|
||||||
|
text << @scanner.get_byte
|
||||||
|
elsif '"' == @scanner.peek(1) then
|
||||||
|
@scanner.get_byte
|
||||||
|
break
|
||||||
|
else
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
text
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
|
||||||
|
#
|
||||||
|
# Parses an RFC 2045 token
|
||||||
|
|
||||||
|
def rfc_2045_token
|
||||||
|
@scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# value := token / quoted-string
|
||||||
|
#
|
||||||
|
# Parses an RFC 2045 value
|
||||||
|
|
||||||
|
def rfc_2045_value
|
||||||
|
if @scanner.peek(1) == '"' then
|
||||||
|
rfc_2045_quoted_string
|
||||||
|
else
|
||||||
|
rfc_2045_token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# 1*SP
|
||||||
|
#
|
||||||
|
# Parses spaces
|
||||||
|
|
||||||
|
def spaces
|
||||||
|
@scanner.scan(/ +/)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
class Mechanize
|
||||||
|
VERSION = "2.7.6"
|
||||||
|
end
|
||||||
Loading…
x
Reference in New Issue
Block a user