utils/curl: get encoding from header
This commit is contained in:
		
							parent
							
								
									4575ddf909
								
							
						
					
					
						commit
						6643f58b49
					
				@ -296,8 +296,8 @@ module Utils
 | 
			
		||||
      return unless check_content
 | 
			
		||||
 | 
			
		||||
      no_protocol_file_contents = %r{https?:\\?/\\?/}
 | 
			
		||||
      http_content = details[:file]&.gsub(no_protocol_file_contents, "/")
 | 
			
		||||
      https_content = secure_details[:file]&.gsub(no_protocol_file_contents, "/")
 | 
			
		||||
      http_content = details[:file]&.scrub&.gsub(no_protocol_file_contents, "/")
 | 
			
		||||
      https_content = secure_details[:file]&.scrub&.gsub(no_protocol_file_contents, "/")
 | 
			
		||||
 | 
			
		||||
      # Check for the same content after removing all protocols
 | 
			
		||||
      if (http_content && https_content) && (http_content == https_content) && http_with_https_available
 | 
			
		||||
@ -358,8 +358,19 @@ module Utils
 | 
			
		||||
      content_length = headers["content-length"]
 | 
			
		||||
 | 
			
		||||
      if status.success?
 | 
			
		||||
        file_contents = File.read(file.path)
 | 
			
		||||
        file_contents.encode!(Encoding::UTF_8, invalid: :replace) if headers["content-type"]&.start_with?("text/")
 | 
			
		||||
        open_args = {}
 | 
			
		||||
        # Try to get encoding from Content-Type header
 | 
			
		||||
        # TODO: add guessing encoding by <meta http-equiv="Content-Type" ...> tag
 | 
			
		||||
        if (content_type = headers["content-type"]) &&
 | 
			
		||||
           (match = content_type.match(/;\s*charset\s*=\s*([^\s]+)/)) &&
 | 
			
		||||
           (charset = match[1])
 | 
			
		||||
          begin
 | 
			
		||||
            open_args[:encoding] = Encoding.find(charset)
 | 
			
		||||
          rescue ArgumentError
 | 
			
		||||
            # Unknown charset in Content-Type header
 | 
			
		||||
          end
 | 
			
		||||
        end
 | 
			
		||||
        file_contents = File.read(file.path, open_args)
 | 
			
		||||
        file_hash = Digest::SHA2.hexdigest(file_contents) if hash_needed
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user