vendor/bundle/ruby: unvendor REXML
This commit is contained in:
parent
3eba477b7c
commit
d6c389f4df
1
.gitignore
vendored
1
.gitignore
vendored
@ -117,6 +117,7 @@
|
|||||||
**/vendor/bundle/ruby/*/gems/rainbow-*/
|
**/vendor/bundle/ruby/*/gems/rainbow-*/
|
||||||
**/vendor/bundle/ruby/*/gems/rdiscount-*/
|
**/vendor/bundle/ruby/*/gems/rdiscount-*/
|
||||||
**/vendor/bundle/ruby/*/gems/regexp_parser-*/
|
**/vendor/bundle/ruby/*/gems/regexp_parser-*/
|
||||||
|
**/vendor/bundle/ruby/*/gems/rexml-*/
|
||||||
**/vendor/bundle/ruby/*/gems/ronn-*/
|
**/vendor/bundle/ruby/*/gems/ronn-*/
|
||||||
**/vendor/bundle/ruby/*/gems/rspec-*/
|
**/vendor/bundle/ruby/*/gems/rspec-*/
|
||||||
**/vendor/bundle/ruby/*/gems/rspec-core-*/
|
**/vendor/bundle/ruby/*/gems/rspec-core-*/
|
||||||
|
|||||||
@ -1,63 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
#vim:ts=2 sw=2 noexpandtab:
|
|
||||||
require_relative 'child'
|
|
||||||
require_relative 'source'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# This class needs:
|
|
||||||
# * Documentation
|
|
||||||
# * Work! Not all types of attlists are intelligently parsed, so we just
|
|
||||||
# spew back out what we get in. This works, but it would be better if
|
|
||||||
# we formatted the output ourselves.
|
|
||||||
#
|
|
||||||
# AttlistDecls provide *just* enough support to allow namespace
|
|
||||||
# declarations. If you need some sort of generalized support, or have an
|
|
||||||
# interesting idea about how to map the hideous, terrible design of DTD
|
|
||||||
# AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
|
|
||||||
# for anything to make DTDs more palateable.
|
|
||||||
class AttlistDecl < Child
|
|
||||||
include Enumerable
|
|
||||||
|
|
||||||
# What is this? Got me.
|
|
||||||
attr_reader :element_name
|
|
||||||
|
|
||||||
# Create an AttlistDecl, pulling the information from a Source. Notice
|
|
||||||
# that this isn't very convenient; to create an AttlistDecl, you basically
|
|
||||||
# have to format it yourself, and then have the initializer parse it.
|
|
||||||
# Sorry, but for the foreseeable future, DTD support in REXML is pretty
|
|
||||||
# weak on convenience. Have I mentioned how much I hate DTDs?
|
|
||||||
def initialize(source)
|
|
||||||
super()
|
|
||||||
if (source.kind_of? Array)
|
|
||||||
@element_name, @pairs, @contents = *source
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Access the attlist attribute/value pairs.
|
|
||||||
# value = attlist_decl[ attribute_name ]
|
|
||||||
def [](key)
|
|
||||||
@pairs[key]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Whether an attlist declaration includes the given attribute definition
|
|
||||||
# if attlist_decl.include? "xmlns:foobar"
|
|
||||||
def include?(key)
|
|
||||||
@pairs.keys.include? key
|
|
||||||
end
|
|
||||||
|
|
||||||
# Iterate over the key/value pairs:
|
|
||||||
# attlist_decl.each { |attribute_name, attribute_value| ... }
|
|
||||||
def each(&block)
|
|
||||||
@pairs.each(&block)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Write out exactly what we got in.
|
|
||||||
def write out, indent=-1
|
|
||||||
out << @contents
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:attlistdecl
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,205 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "namespace"
|
|
||||||
require_relative 'text'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Defines an Element Attribute; IE, a attribute=value pair, as in:
|
|
||||||
# <element attribute="value"/>. Attributes can be in their own
|
|
||||||
# namespaces. General users of REXML will not interact with the
|
|
||||||
# Attribute class much.
|
|
||||||
class Attribute
|
|
||||||
include Node
|
|
||||||
include Namespace
|
|
||||||
|
|
||||||
# The element to which this attribute belongs
|
|
||||||
attr_reader :element
|
|
||||||
# The normalized value of this attribute. That is, the attribute with
|
|
||||||
# entities intact.
|
|
||||||
attr_writer :normalized
|
|
||||||
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
|
||||||
|
|
||||||
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
|
||||||
|
|
||||||
# Constructor.
|
|
||||||
# FIXME: The parser doesn't catch illegal characters in attributes
|
|
||||||
#
|
|
||||||
# first::
|
|
||||||
# Either: an Attribute, which this new attribute will become a
|
|
||||||
# clone of; or a String, which is the name of this attribute
|
|
||||||
# second::
|
|
||||||
# If +first+ is an Attribute, then this may be an Element, or nil.
|
|
||||||
# If nil, then the Element parent of this attribute is the parent
|
|
||||||
# of the +first+ Attribute. If the first argument is a String,
|
|
||||||
# then this must also be a String, and is the content of the attribute.
|
|
||||||
# If this is the content, it must be fully normalized (contain no
|
|
||||||
# illegal characters).
|
|
||||||
# parent::
|
|
||||||
# Ignored unless +first+ is a String; otherwise, may be the Element
|
|
||||||
# parent of this attribute, or nil.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Attribute.new( attribute_to_clone )
|
|
||||||
# Attribute.new( attribute_to_clone, parent_element )
|
|
||||||
# Attribute.new( "attr", "attr_value" )
|
|
||||||
# Attribute.new( "attr", "attr_value", parent_element )
|
|
||||||
def initialize( first, second=nil, parent=nil )
|
|
||||||
@normalized = @unnormalized = @element = nil
|
|
||||||
if first.kind_of? Attribute
|
|
||||||
self.name = first.expanded_name
|
|
||||||
@unnormalized = first.value
|
|
||||||
if second.kind_of? Element
|
|
||||||
@element = second
|
|
||||||
else
|
|
||||||
@element = first.element
|
|
||||||
end
|
|
||||||
elsif first.kind_of? String
|
|
||||||
@element = parent
|
|
||||||
self.name = first
|
|
||||||
@normalized = second.to_s
|
|
||||||
else
|
|
||||||
raise "illegal argument #{first.class.name} to Attribute constructor"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the namespace of the attribute.
|
|
||||||
#
|
|
||||||
# e = Element.new( "elns:myelement" )
|
|
||||||
# e.add_attribute( "nsa:a", "aval" )
|
|
||||||
# e.add_attribute( "b", "bval" )
|
|
||||||
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
|
|
||||||
# e.attributes.get_attribute( "b" ).prefix # -> ""
|
|
||||||
# a = Attribute.new( "x", "y" )
|
|
||||||
# a.prefix # -> ""
|
|
||||||
def prefix
|
|
||||||
super
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the namespace URL, if defined, or nil otherwise
|
|
||||||
#
|
|
||||||
# e = Element.new("el")
|
|
||||||
# e.add_namespace("ns", "http://url")
|
|
||||||
# e.add_attribute("ns:a", "b")
|
|
||||||
# e.add_attribute("nsx:a", "c")
|
|
||||||
# e.attribute("ns:a").namespace # => "http://url"
|
|
||||||
# e.attribute("nsx:a").namespace # => nil
|
|
||||||
#
|
|
||||||
# This method always returns "" for no namespace attribute. Because
|
|
||||||
# the default namespace doesn't apply to attribute names.
|
|
||||||
#
|
|
||||||
# From https://www.w3.org/TR/xml-names/#uniqAttrs
|
|
||||||
#
|
|
||||||
# > the default namespace does not apply to attribute names
|
|
||||||
#
|
|
||||||
# e = REXML::Element.new("el")
|
|
||||||
# e.add_namespace("", "http://example.com/")
|
|
||||||
# e.namespace # => "http://example.com/"
|
|
||||||
# e.add_attribute("a", "b")
|
|
||||||
# e.attribute("a").namespace # => ""
|
|
||||||
def namespace arg=nil
|
|
||||||
arg = prefix if arg.nil?
|
|
||||||
if arg == ""
|
|
||||||
""
|
|
||||||
else
|
|
||||||
@element.namespace(arg)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns true if other is an Attribute and has the same name and value,
|
|
||||||
# false otherwise.
|
|
||||||
def ==( other )
|
|
||||||
other.kind_of?(Attribute) and other.name==name and other.value==value
|
|
||||||
end
|
|
||||||
|
|
||||||
# Creates (and returns) a hash from both the name and value
|
|
||||||
def hash
|
|
||||||
name.hash + value.hash
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns this attribute out as XML source, expanding the name
|
|
||||||
#
|
|
||||||
# a = Attribute.new( "x", "y" )
|
|
||||||
# a.to_string # -> "x='y'"
|
|
||||||
# b = Attribute.new( "ns:x", "y" )
|
|
||||||
# b.to_string # -> "ns:x='y'"
|
|
||||||
def to_string
|
|
||||||
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
|
||||||
%Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^
|
|
||||||
else
|
|
||||||
"#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def doctype
|
|
||||||
if @element
|
|
||||||
doc = @element.document
|
|
||||||
doc.doctype if doc
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the attribute value, with entities replaced
|
|
||||||
def to_s
|
|
||||||
return @normalized if @normalized
|
|
||||||
|
|
||||||
@normalized = Text::normalize( @unnormalized, doctype )
|
|
||||||
@unnormalized = nil
|
|
||||||
@normalized
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the UNNORMALIZED value of this attribute. That is, entities
|
|
||||||
# have been expanded to their values
|
|
||||||
def value
|
|
||||||
return @unnormalized if @unnormalized
|
|
||||||
@unnormalized = Text::unnormalize( @normalized, doctype )
|
|
||||||
@normalized = nil
|
|
||||||
@unnormalized
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns a copy of this attribute
|
|
||||||
def clone
|
|
||||||
Attribute.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Sets the element of which this object is an attribute. Normally, this
|
|
||||||
# is not directly called.
|
|
||||||
#
|
|
||||||
# Returns this attribute
|
|
||||||
def element=( element )
|
|
||||||
@element = element
|
|
||||||
|
|
||||||
if @normalized
|
|
||||||
Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
|
|
||||||
end
|
|
||||||
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Removes this Attribute from the tree, and returns true if successful
|
|
||||||
#
|
|
||||||
# This method is usually not called directly.
|
|
||||||
def remove
|
|
||||||
@element.attributes.delete self.name unless @element.nil?
|
|
||||||
end
|
|
||||||
|
|
||||||
# Writes this attribute (EG, puts 'key="value"' to the output)
|
|
||||||
def write( output, indent=-1 )
|
|
||||||
output << to_string
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:attribute
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
rv = ""
|
|
||||||
write( rv )
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def xpath
|
|
||||||
path = @element.xpath
|
|
||||||
path += "/@#{self.expanded_name}"
|
|
||||||
return path
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
#vim:ts=2 sw=2 noexpandtab:
|
|
||||||
@ -1,68 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "text"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
class CData < Text
|
|
||||||
START = '<![CDATA['
|
|
||||||
STOP = ']]>'
|
|
||||||
ILLEGAL = /(\]\]>)/
|
|
||||||
|
|
||||||
# Constructor. CData is data between <![CDATA[ ... ]]>
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
# CData.new( source )
|
|
||||||
# CData.new( "Here is some CDATA" )
|
|
||||||
# CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
|
|
||||||
def initialize( first, whitespace=true, parent=nil )
|
|
||||||
super( first, whitespace, parent, false, true, ILLEGAL )
|
|
||||||
end
|
|
||||||
|
|
||||||
# Make a copy of this object
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
# c = CData.new( "Some text" )
|
|
||||||
# d = c.clone
|
|
||||||
# d.to_s # -> "Some text"
|
|
||||||
def clone
|
|
||||||
CData.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the content of this CData object
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
# c = CData.new( "Some text" )
|
|
||||||
# c.to_s # -> "Some text"
|
|
||||||
def to_s
|
|
||||||
@string
|
|
||||||
end
|
|
||||||
|
|
||||||
def value
|
|
||||||
@string
|
|
||||||
end
|
|
||||||
|
|
||||||
# == DEPRECATED
|
|
||||||
# See the rexml/formatters package
|
|
||||||
#
|
|
||||||
# Generates XML output of this object
|
|
||||||
#
|
|
||||||
# output::
|
|
||||||
# Where to write the string. Defaults to $stdout
|
|
||||||
# indent::
|
|
||||||
# The amount to indent this node by
|
|
||||||
# transitive::
|
|
||||||
# Ignored
|
|
||||||
# ie_hack::
|
|
||||||
# Ignored
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
# c = CData.new( " Some text " )
|
|
||||||
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
|
|
||||||
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
|
|
||||||
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
|
|
||||||
indent( output, indent )
|
|
||||||
output << START
|
|
||||||
output << @string
|
|
||||||
output << STOP
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,97 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "node"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
##
|
|
||||||
# A Child object is something contained by a parent, and this class
|
|
||||||
# contains methods to support that. Most user code will not use this
|
|
||||||
# class directly.
|
|
||||||
class Child
|
|
||||||
include Node
|
|
||||||
attr_reader :parent # The Parent of this object
|
|
||||||
|
|
||||||
# Constructor. Any inheritors of this class should call super to make
|
|
||||||
# sure this method is called.
|
|
||||||
# parent::
|
|
||||||
# if supplied, the parent of this child will be set to the
|
|
||||||
# supplied value, and self will be added to the parent
|
|
||||||
def initialize( parent = nil )
|
|
||||||
@parent = nil
|
|
||||||
# Declare @parent, but don't define it. The next line sets the
|
|
||||||
# parent.
|
|
||||||
parent.add( self ) if parent
|
|
||||||
end
|
|
||||||
|
|
||||||
# Replaces this object with another object. Basically, calls
|
|
||||||
# Parent.replace_child
|
|
||||||
#
|
|
||||||
# Returns:: self
|
|
||||||
def replace_with( child )
|
|
||||||
@parent.replace_child( self, child )
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Removes this child from the parent.
|
|
||||||
#
|
|
||||||
# Returns:: self
|
|
||||||
def remove
|
|
||||||
unless @parent.nil?
|
|
||||||
@parent.delete self
|
|
||||||
end
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Sets the parent of this child to the supplied argument.
|
|
||||||
#
|
|
||||||
# other::
|
|
||||||
# Must be a Parent object. If this object is the same object as the
|
|
||||||
# existing parent of this child, no action is taken. Otherwise, this
|
|
||||||
# child is removed from the current parent (if one exists), and is added
|
|
||||||
# to the new parent.
|
|
||||||
# Returns:: The parent added
|
|
||||||
def parent=( other )
|
|
||||||
return @parent if @parent == other
|
|
||||||
@parent.delete self if defined? @parent and @parent
|
|
||||||
@parent = other
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :next_sibling :next_sibling_node
|
|
||||||
alias :previous_sibling :previous_sibling_node
|
|
||||||
|
|
||||||
# Sets the next sibling of this child. This can be used to insert a child
|
|
||||||
# after some other child.
|
|
||||||
# a = Element.new("a")
|
|
||||||
# b = a.add_element("b")
|
|
||||||
# c = Element.new("c")
|
|
||||||
# b.next_sibling = c
|
|
||||||
# # => <a><b/><c/></a>
|
|
||||||
def next_sibling=( other )
|
|
||||||
parent.insert_after self, other
|
|
||||||
end
|
|
||||||
|
|
||||||
# Sets the previous sibling of this child. This can be used to insert a
|
|
||||||
# child before some other child.
|
|
||||||
# a = Element.new("a")
|
|
||||||
# b = a.add_element("b")
|
|
||||||
# c = Element.new("c")
|
|
||||||
# b.previous_sibling = c
|
|
||||||
# # => <a><b/><c/></a>
|
|
||||||
def previous_sibling=(other)
|
|
||||||
parent.insert_before self, other
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns:: the document this child belongs to, or nil if this child
|
|
||||||
# belongs to no document
|
|
||||||
def document
|
|
||||||
return parent.document unless parent.nil?
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
|
|
||||||
# This doesn't yet handle encodings
|
|
||||||
def bytes
|
|
||||||
document.encoding
|
|
||||||
|
|
||||||
to_s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,80 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "child"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
##
|
|
||||||
# Represents an XML comment; that is, text between \<!-- ... -->
|
|
||||||
class Comment < Child
|
|
||||||
include Comparable
|
|
||||||
START = "<!--"
|
|
||||||
STOP = "-->"
|
|
||||||
|
|
||||||
# The content text
|
|
||||||
|
|
||||||
attr_accessor :string
|
|
||||||
|
|
||||||
##
|
|
||||||
# Constructor. The first argument can be one of three types:
|
|
||||||
# @param first If String, the contents of this comment are set to the
|
|
||||||
# argument. If Comment, the argument is duplicated. If
|
|
||||||
# Source, the argument is scanned for a comment.
|
|
||||||
# @param second If the first argument is a Source, this argument
|
|
||||||
# should be nil, not supplied, or a Parent to be set as the parent
|
|
||||||
# of this object
|
|
||||||
def initialize( first, second = nil )
|
|
||||||
super(second)
|
|
||||||
if first.kind_of? String
|
|
||||||
@string = first
|
|
||||||
elsif first.kind_of? Comment
|
|
||||||
@string = first.string
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def clone
|
|
||||||
Comment.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# == DEPRECATED
|
|
||||||
# See REXML::Formatters
|
|
||||||
#
|
|
||||||
# output::
|
|
||||||
# Where to write the string
|
|
||||||
# indent::
|
|
||||||
# An integer. If -1, no indenting will be used; otherwise, the
|
|
||||||
# indentation will be this number of spaces, and children will be
|
|
||||||
# indented an additional amount.
|
|
||||||
# transitive::
|
|
||||||
# Ignored by this class. The contents of comments are never modified.
|
|
||||||
# ie_hack::
|
|
||||||
# Needed for conformity to the child API, but not used by this class.
|
|
||||||
def write( output, indent=-1, transitive=false, ie_hack=false )
|
|
||||||
Kernel.warn("Comment.write is deprecated. See REXML::Formatters", uplevel: 1)
|
|
||||||
indent( output, indent )
|
|
||||||
output << START
|
|
||||||
output << @string
|
|
||||||
output << STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :to_s :string
|
|
||||||
|
|
||||||
##
|
|
||||||
# Compares this Comment to another; the contents of the comment are used
|
|
||||||
# in the comparison.
|
|
||||||
def <=>(other)
|
|
||||||
other.to_s <=> @string
|
|
||||||
end
|
|
||||||
|
|
||||||
##
|
|
||||||
# Compares this Comment to another; the contents of the comment are used
|
|
||||||
# in the comparison.
|
|
||||||
def ==( other )
|
|
||||||
other.kind_of? Comment and
|
|
||||||
(other <=> self) == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:comment
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
#vim:ts=2 sw=2 noexpandtab:
|
|
||||||
@ -1,287 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "parent"
|
|
||||||
require_relative "parseexception"
|
|
||||||
require_relative "namespace"
|
|
||||||
require_relative 'entity'
|
|
||||||
require_relative 'attlistdecl'
|
|
||||||
require_relative 'xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
|
|
||||||
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
|
|
||||||
# being used to declare entities used in the document.
|
|
||||||
class DocType < Parent
|
|
||||||
include XMLTokens
|
|
||||||
START = "<!DOCTYPE"
|
|
||||||
STOP = ">"
|
|
||||||
SYSTEM = "SYSTEM"
|
|
||||||
PUBLIC = "PUBLIC"
|
|
||||||
DEFAULT_ENTITIES = {
|
|
||||||
'gt'=>EntityConst::GT,
|
|
||||||
'lt'=>EntityConst::LT,
|
|
||||||
'quot'=>EntityConst::QUOT,
|
|
||||||
"apos"=>EntityConst::APOS
|
|
||||||
}
|
|
||||||
|
|
||||||
# name is the name of the doctype
|
|
||||||
# external_id is the referenced DTD, if given
|
|
||||||
attr_reader :name, :external_id, :entities, :namespaces
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
#
|
|
||||||
# dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
|
|
||||||
# # <!DOCTYPE foo '-//I/Hate/External/IDs'>
|
|
||||||
# dt = DocType.new( doctype_to_clone )
|
|
||||||
# # Incomplete. Shallow clone of doctype
|
|
||||||
#
|
|
||||||
# +Note+ that the constructor:
|
|
||||||
#
|
|
||||||
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
|
|
||||||
#
|
|
||||||
# is _deprecated_. Do not use it. It will probably disappear.
|
|
||||||
def initialize( first, parent=nil )
|
|
||||||
@entities = DEFAULT_ENTITIES
|
|
||||||
@long_name = @uri = nil
|
|
||||||
if first.kind_of? String
|
|
||||||
super()
|
|
||||||
@name = first
|
|
||||||
@external_id = parent
|
|
||||||
elsif first.kind_of? DocType
|
|
||||||
super( parent )
|
|
||||||
@name = first.name
|
|
||||||
@external_id = first.external_id
|
|
||||||
elsif first.kind_of? Array
|
|
||||||
super( parent )
|
|
||||||
@name = first[0]
|
|
||||||
@external_id = first[1]
|
|
||||||
@long_name = first[2]
|
|
||||||
@uri = first[3]
|
|
||||||
elsif first.kind_of? Source
|
|
||||||
super( parent )
|
|
||||||
parser = Parsers::BaseParser.new( first )
|
|
||||||
event = parser.pull
|
|
||||||
if event[0] == :start_doctype
|
|
||||||
@name, @external_id, @long_name, @uri, = event[1..-1]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
super()
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:doctype
|
|
||||||
end
|
|
||||||
|
|
||||||
def attributes_of element
|
|
||||||
rv = []
|
|
||||||
each do |child|
|
|
||||||
child.each do |key,val|
|
|
||||||
rv << Attribute.new(key,val)
|
|
||||||
end if child.kind_of? AttlistDecl and child.element_name == element
|
|
||||||
end
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def attribute_of element, attribute
|
|
||||||
att_decl = find do |child|
|
|
||||||
child.kind_of? AttlistDecl and
|
|
||||||
child.element_name == element and
|
|
||||||
child.include? attribute
|
|
||||||
end
|
|
||||||
return nil unless att_decl
|
|
||||||
att_decl[attribute]
|
|
||||||
end
|
|
||||||
|
|
||||||
def clone
|
|
||||||
DocType.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# output::
|
|
||||||
# Where to write the string
|
|
||||||
# indent::
|
|
||||||
# An integer. If -1, no indentation will be used; otherwise, the
|
|
||||||
# indentation will be this number of spaces, and children will be
|
|
||||||
# indented an additional amount.
|
|
||||||
# transitive::
|
|
||||||
# Ignored
|
|
||||||
# ie_hack::
|
|
||||||
# Ignored
|
|
||||||
def write( output, indent=0, transitive=false, ie_hack=false )
|
|
||||||
f = REXML::Formatters::Default.new
|
|
||||||
c = context
|
|
||||||
if c and c[:prologue_quote] == :apostrophe
|
|
||||||
quote = "'"
|
|
||||||
else
|
|
||||||
quote = "\""
|
|
||||||
end
|
|
||||||
indent( output, indent )
|
|
||||||
output << START
|
|
||||||
output << ' '
|
|
||||||
output << @name
|
|
||||||
output << " #{@external_id}" if @external_id
|
|
||||||
output << " #{quote}#{@long_name}#{quote}" if @long_name
|
|
||||||
output << " #{quote}#{@uri}#{quote}" if @uri
|
|
||||||
unless @children.empty?
|
|
||||||
output << ' ['
|
|
||||||
@children.each { |child|
|
|
||||||
output << "\n"
|
|
||||||
f.write( child, output )
|
|
||||||
}
|
|
||||||
output << "\n]"
|
|
||||||
end
|
|
||||||
output << STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
def context
|
|
||||||
if @parent
|
|
||||||
@parent.context
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def entity( name )
|
|
||||||
@entities[name].unnormalized if @entities[name]
|
|
||||||
end
|
|
||||||
|
|
||||||
def add child
|
|
||||||
super(child)
|
|
||||||
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
|
|
||||||
@entities[ child.name ] = child if child.kind_of? Entity
|
|
||||||
end
|
|
||||||
|
|
||||||
# This method retrieves the public identifier identifying the document's
|
|
||||||
# DTD.
|
|
||||||
#
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def public
|
|
||||||
case @external_id
|
|
||||||
when "SYSTEM"
|
|
||||||
nil
|
|
||||||
when "PUBLIC"
|
|
||||||
strip_quotes(@long_name)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This method retrieves the system identifier identifying the document's DTD
|
|
||||||
#
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def system
|
|
||||||
case @external_id
|
|
||||||
when "SYSTEM"
|
|
||||||
strip_quotes(@long_name)
|
|
||||||
when "PUBLIC"
|
|
||||||
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This method returns a list of notations that have been declared in the
|
|
||||||
# _internal_ DTD subset. Notations in the external DTD subset are not
|
|
||||||
# listed.
|
|
||||||
#
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def notations
|
|
||||||
children().select {|node| node.kind_of?(REXML::NotationDecl)}
|
|
||||||
end
|
|
||||||
|
|
||||||
# Retrieves a named notation. Only notations declared in the internal
|
|
||||||
# DTD subset can be retrieved.
|
|
||||||
#
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def notation(name)
|
|
||||||
notations.find { |notation_decl|
|
|
||||||
notation_decl.name == name
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def strip_quotes(quoted_string)
|
|
||||||
quoted_string =~ /^[\'\"].*[\'\"]$/ ?
|
|
||||||
quoted_string[1, quoted_string.length-2] :
|
|
||||||
quoted_string
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# We don't really handle any of these since we're not a validating
|
|
||||||
# parser, so we can be pretty dumb about them. All we need to be able
|
|
||||||
# to do is spew them back out on a write()
|
|
||||||
|
|
||||||
# This is an abstract class. You never use this directly; it serves as a
|
|
||||||
# parent class for the specific declarations.
|
|
||||||
class Declaration < Child
|
|
||||||
def initialize src
|
|
||||||
super()
|
|
||||||
@string = src
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
@string+'>'
|
|
||||||
end
|
|
||||||
|
|
||||||
# == DEPRECATED
|
|
||||||
# See REXML::Formatters
|
|
||||||
#
|
|
||||||
def write( output, indent )
|
|
||||||
output << to_s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
public
|
|
||||||
class ElementDecl < Declaration
|
|
||||||
def initialize( src )
|
|
||||||
super
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class ExternalEntity < Child
|
|
||||||
def initialize( src )
|
|
||||||
super()
|
|
||||||
@entity = src
|
|
||||||
end
|
|
||||||
def to_s
|
|
||||||
@entity
|
|
||||||
end
|
|
||||||
def write( output, indent )
|
|
||||||
output << @entity
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class NotationDecl < Child
|
|
||||||
attr_accessor :public, :system
|
|
||||||
def initialize name, middle, pub, sys
|
|
||||||
super(nil)
|
|
||||||
@name = name
|
|
||||||
@middle = middle
|
|
||||||
@public = pub
|
|
||||||
@system = sys
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
c = nil
|
|
||||||
c = parent.context if parent
|
|
||||||
if c and c[:prologue_quote] == :apostrophe
|
|
||||||
quote = "'"
|
|
||||||
else
|
|
||||||
quote = "\""
|
|
||||||
end
|
|
||||||
notation = "<!NOTATION #{@name} #{@middle}"
|
|
||||||
notation << " #{quote}#{@public}#{quote}" if @public
|
|
||||||
notation << " #{quote}#{@system}#{quote}" if @system
|
|
||||||
notation << ">"
|
|
||||||
notation
|
|
||||||
end
|
|
||||||
|
|
||||||
def write( output, indent=-1 )
|
|
||||||
output << to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
# This method retrieves the name of the notation.
|
|
||||||
#
|
|
||||||
# Method contributed by Henrik Martensson
|
|
||||||
def name
|
|
||||||
@name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,291 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "security"
|
|
||||||
require_relative "element"
|
|
||||||
require_relative "xmldecl"
|
|
||||||
require_relative "source"
|
|
||||||
require_relative "comment"
|
|
||||||
require_relative "doctype"
|
|
||||||
require_relative "instruction"
|
|
||||||
require_relative "rexml"
|
|
||||||
require_relative "parseexception"
|
|
||||||
require_relative "output"
|
|
||||||
require_relative "parsers/baseparser"
|
|
||||||
require_relative "parsers/streamparser"
|
|
||||||
require_relative "parsers/treeparser"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Represents a full XML document, including PIs, a doctype, etc. A
|
|
||||||
# Document has a single child that can be accessed by root().
|
|
||||||
# Note that if you want to have an XML declaration written for a document
|
|
||||||
# you create, you must add one; REXML documents do not write a default
|
|
||||||
# declaration for you. See |DECLARATION| and |write|.
|
|
||||||
class Document < Element
|
|
||||||
# A convenient default XML declaration. If you want an XML declaration,
|
|
||||||
# the easiest way to add one is mydoc << Document::DECLARATION
|
|
||||||
# +DEPRECATED+
|
|
||||||
# Use: mydoc << XMLDecl.default
|
|
||||||
DECLARATION = XMLDecl.default
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
# @param source if supplied, must be a Document, String, or IO.
|
|
||||||
# Documents have their context and Element attributes cloned.
|
|
||||||
# Strings are expected to be valid XML documents. IOs are expected
|
|
||||||
# to be sources of valid XML documents.
|
|
||||||
# @param context if supplied, contains the context of the document;
|
|
||||||
# this should be a Hash.
|
|
||||||
def initialize( source = nil, context = {} )
|
|
||||||
@entity_expansion_count = 0
|
|
||||||
super()
|
|
||||||
@context = context
|
|
||||||
return if source.nil?
|
|
||||||
if source.kind_of? Document
|
|
||||||
@context = source.context
|
|
||||||
super source
|
|
||||||
else
|
|
||||||
build( source )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:document
|
|
||||||
end
|
|
||||||
|
|
||||||
# Should be obvious
|
|
||||||
def clone
|
|
||||||
Document.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# According to the XML spec, a root node has no expanded name
|
|
||||||
def expanded_name
|
|
||||||
''
|
|
||||||
#d = doc_type
|
|
||||||
#d ? d.name : "UNDEFINED"
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :name :expanded_name
|
|
||||||
|
|
||||||
# We override this, because XMLDecls and DocTypes must go at the start
|
|
||||||
# of the document
|
|
||||||
def add( child )
|
|
||||||
if child.kind_of? XMLDecl
|
|
||||||
if @children[0].kind_of? XMLDecl
|
|
||||||
@children[0] = child
|
|
||||||
else
|
|
||||||
@children.unshift child
|
|
||||||
end
|
|
||||||
child.parent = self
|
|
||||||
elsif child.kind_of? DocType
|
|
||||||
# Find first Element or DocType node and insert the decl right
|
|
||||||
# before it. If there is no such node, just insert the child at the
|
|
||||||
# end. If there is a child and it is an DocType, then replace it.
|
|
||||||
insert_before_index = @children.find_index { |x|
|
|
||||||
x.kind_of?(Element) || x.kind_of?(DocType)
|
|
||||||
}
|
|
||||||
if insert_before_index # Not null = not end of list
|
|
||||||
if @children[ insert_before_index ].kind_of? DocType
|
|
||||||
@children[ insert_before_index ] = child
|
|
||||||
else
|
|
||||||
@children[ insert_before_index-1, 0 ] = child
|
|
||||||
end
|
|
||||||
else # Insert at end of list
|
|
||||||
@children << child
|
|
||||||
end
|
|
||||||
child.parent = self
|
|
||||||
else
|
|
||||||
rv = super
|
|
||||||
raise "attempted adding second root element to document" if @elements.size > 1
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
end
|
|
||||||
alias :<< :add
|
|
||||||
|
|
||||||
def add_element(arg=nil, arg2=nil)
|
|
||||||
rv = super
|
|
||||||
raise "attempted adding second root element to document" if @elements.size > 1
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the root Element of the document, or nil if this document
|
|
||||||
# has no children.
|
|
||||||
def root
|
|
||||||
elements[1]
|
|
||||||
#self
|
|
||||||
#@children.find { |item| item.kind_of? Element }
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the DocType child of the document, if one exists,
|
|
||||||
# and nil otherwise.
|
|
||||||
def doctype
|
|
||||||
@children.find { |item| item.kind_of? DocType }
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the XMLDecl of this document; if no XMLDecl has been
|
|
||||||
# set, the default declaration is returned.
|
|
||||||
def xml_decl
|
|
||||||
rv = @children[0]
|
|
||||||
return rv if rv.kind_of? XMLDecl
|
|
||||||
@children.unshift(XMLDecl.default)[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the XMLDecl version of this document as a String.
|
|
||||||
# If no XMLDecl has been set, returns the default version.
|
|
||||||
def version
|
|
||||||
xml_decl().version
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the XMLDecl encoding of this document as an
|
|
||||||
# Encoding object.
|
|
||||||
# If no XMLDecl has been set, returns the default encoding.
|
|
||||||
def encoding
|
|
||||||
xml_decl().encoding
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the XMLDecl standalone value of this document as a String.
|
|
||||||
# If no XMLDecl has been set, returns the default setting.
|
|
||||||
def stand_alone?
|
|
||||||
xml_decl().stand_alone?
|
|
||||||
end
|
|
||||||
|
|
||||||
# :call-seq:
|
|
||||||
# doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil)
|
|
||||||
# doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil})
|
|
||||||
#
|
|
||||||
# Write the XML tree out, optionally with indent. This writes out the
|
|
||||||
# entire XML document, including XML declarations, doctype declarations,
|
|
||||||
# and processing instructions (if any are given).
|
|
||||||
#
|
|
||||||
# A controversial point is whether Document should always write the XML
|
|
||||||
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
|
||||||
# user (or source document). REXML does not write one if one was not
|
|
||||||
# specified, because it adds unnecessary bandwidth to applications such
|
|
||||||
# as XML-RPC.
|
|
||||||
#
|
|
||||||
# Accept Nth argument style and options Hash style as argument.
|
|
||||||
# The recommended style is options Hash style for one or more
|
|
||||||
# arguments case.
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
# Document.new("<a><b/></a>").write
|
|
||||||
#
|
|
||||||
# output = ""
|
|
||||||
# Document.new("<a><b/></a>").write(output)
|
|
||||||
#
|
|
||||||
# output = ""
|
|
||||||
# Document.new("<a><b/></a>").write(:output => output, :indent => 2)
|
|
||||||
#
|
|
||||||
# See also the classes in the rexml/formatters package for the proper way
|
|
||||||
# to change the default formatting of XML output.
|
|
||||||
#
|
|
||||||
# _Examples_
|
|
||||||
#
|
|
||||||
# output = ""
|
|
||||||
# tr = Transitive.new
|
|
||||||
# tr.write(Document.new("<a><b/></a>"), output)
|
|
||||||
#
|
|
||||||
# output::
|
|
||||||
# output an object which supports '<< string'; this is where the
|
|
||||||
# document will be written.
|
|
||||||
# indent::
|
|
||||||
# An integer. If -1, no indenting will be used; otherwise, the
|
|
||||||
# indentation will be twice this number of spaces, and children will be
|
|
||||||
# indented an additional amount. For a value of 3, every item will be
|
|
||||||
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
|
|
||||||
# transitive::
|
|
||||||
# If transitive is true and indent is >= 0, then the output will be
|
|
||||||
# pretty-printed in such a way that the added whitespace does not affect
|
|
||||||
# the absolute *value* of the document -- that is, it leaves the value
|
|
||||||
# and number of Text nodes in the document unchanged.
|
|
||||||
# ie_hack::
|
|
||||||
# This hack inserts a space before the /> on empty tags to address
|
|
||||||
# a limitation of Internet Explorer. Defaults to false
|
|
||||||
# encoding::
|
|
||||||
# Encoding name as String. Change output encoding to specified encoding
|
|
||||||
# instead of encoding in XML declaration.
|
|
||||||
# Defaults to nil. It means encoding in XML declaration is used.
|
|
||||||
def write(*arguments)
|
|
||||||
if arguments.size == 1 and arguments[0].class == Hash
|
|
||||||
options = arguments[0]
|
|
||||||
|
|
||||||
output = options[:output]
|
|
||||||
indent = options[:indent]
|
|
||||||
transitive = options[:transitive]
|
|
||||||
ie_hack = options[:ie_hack]
|
|
||||||
encoding = options[:encoding]
|
|
||||||
else
|
|
||||||
output, indent, transitive, ie_hack, encoding, = *arguments
|
|
||||||
end
|
|
||||||
|
|
||||||
output ||= $stdout
|
|
||||||
indent ||= -1
|
|
||||||
transitive = false if transitive.nil?
|
|
||||||
ie_hack = false if ie_hack.nil?
|
|
||||||
encoding ||= xml_decl.encoding
|
|
||||||
|
|
||||||
if encoding != 'UTF-8' && !output.kind_of?(Output)
|
|
||||||
output = Output.new( output, encoding )
|
|
||||||
end
|
|
||||||
formatter = if indent > -1
|
|
||||||
if transitive
|
|
||||||
require_relative "formatters/transitive"
|
|
||||||
REXML::Formatters::Transitive.new( indent, ie_hack )
|
|
||||||
else
|
|
||||||
REXML::Formatters::Pretty.new( indent, ie_hack )
|
|
||||||
end
|
|
||||||
else
|
|
||||||
REXML::Formatters::Default.new( ie_hack )
|
|
||||||
end
|
|
||||||
formatter.write( self, output )
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def Document::parse_stream( source, listener )
|
|
||||||
Parsers::StreamParser.new( source, listener ).parse
|
|
||||||
end
|
|
||||||
|
|
||||||
# Set the entity expansion limit. By default the limit is set to 10000.
|
|
||||||
#
|
|
||||||
# Deprecated. Use REXML::Security.entity_expansion_limit= instead.
|
|
||||||
def Document::entity_expansion_limit=( val )
|
|
||||||
Security.entity_expansion_limit = val
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the entity expansion limit. By default the limit is set to 10000.
|
|
||||||
#
|
|
||||||
# Deprecated. Use REXML::Security.entity_expansion_limit= instead.
|
|
||||||
def Document::entity_expansion_limit
|
|
||||||
return Security.entity_expansion_limit
|
|
||||||
end
|
|
||||||
|
|
||||||
# Set the entity expansion limit. By default the limit is set to 10240.
|
|
||||||
#
|
|
||||||
# Deprecated. Use REXML::Security.entity_expansion_text_limit= instead.
|
|
||||||
def Document::entity_expansion_text_limit=( val )
|
|
||||||
Security.entity_expansion_text_limit = val
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the entity expansion limit. By default the limit is set to 10240.
|
|
||||||
#
|
|
||||||
# Deprecated. Use REXML::Security.entity_expansion_text_limit instead.
|
|
||||||
def Document::entity_expansion_text_limit
|
|
||||||
return Security.entity_expansion_text_limit
|
|
||||||
end
|
|
||||||
|
|
||||||
attr_reader :entity_expansion_count
|
|
||||||
|
|
||||||
def record_entity_expansion
|
|
||||||
@entity_expansion_count += 1
|
|
||||||
if @entity_expansion_count > Security.entity_expansion_limit
|
|
||||||
raise "number of entity expansions exceeded, processing aborted."
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def document
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def build( source )
|
|
||||||
Parsers::TreeParser.new( source, self ).parse
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "../child"
|
|
||||||
module REXML
|
|
||||||
module DTD
|
|
||||||
class AttlistDecl < Child
|
|
||||||
START = "<!ATTLIST"
|
|
||||||
START_RE = /^\s*#{START}/um
|
|
||||||
PATTERN_RE = /\s*(#{START}.*?>)/um
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "elementdecl"
|
|
||||||
require_relative "entitydecl"
|
|
||||||
require_relative "../comment"
|
|
||||||
require_relative "notationdecl"
|
|
||||||
require_relative "attlistdecl"
|
|
||||||
require_relative "../parent"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module DTD
|
|
||||||
class Parser
|
|
||||||
def Parser.parse( input )
|
|
||||||
case input
|
|
||||||
when String
|
|
||||||
parse_helper input
|
|
||||||
when File
|
|
||||||
parse_helper input.read
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Takes a String and parses it out
|
|
||||||
def Parser.parse_helper( input )
|
|
||||||
contents = Parent.new
|
|
||||||
while input.size > 0
|
|
||||||
case input
|
|
||||||
when ElementDecl.PATTERN_RE
|
|
||||||
match = $&
|
|
||||||
contents << ElementDecl.new( match )
|
|
||||||
when AttlistDecl.PATTERN_RE
|
|
||||||
matchdata = $~
|
|
||||||
contents << AttlistDecl.new( matchdata )
|
|
||||||
when EntityDecl.PATTERN_RE
|
|
||||||
matchdata = $~
|
|
||||||
contents << EntityDecl.new( matchdata )
|
|
||||||
when Comment.PATTERN_RE
|
|
||||||
matchdata = $~
|
|
||||||
contents << Comment.new( matchdata )
|
|
||||||
when NotationDecl.PATTERN_RE
|
|
||||||
matchdata = $~
|
|
||||||
contents << NotationDecl.new( matchdata )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
contents
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "../child"
|
|
||||||
module REXML
|
|
||||||
module DTD
|
|
||||||
class ElementDecl < Child
|
|
||||||
START = "<!ELEMENT"
|
|
||||||
START_RE = /^\s*#{START}/um
|
|
||||||
# PATTERN_RE = /^\s*(#{START}.*?)>/um
|
|
||||||
PATTERN_RE = /^\s*#{START}\s+((?:[:\w][-\.\w]*:)?[-!\*\.\w]*)(.*?)>/
|
|
||||||
#\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
|
|
||||||
|
|
||||||
def initialize match
|
|
||||||
@name = match[1]
|
|
||||||
@rest = match[2]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,57 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "../child"
|
|
||||||
module REXML
|
|
||||||
module DTD
|
|
||||||
class EntityDecl < Child
|
|
||||||
START = "<!ENTITY"
|
|
||||||
START_RE = /^\s*#{START}/um
|
|
||||||
PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
|
|
||||||
SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
|
|
||||||
PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
|
|
||||||
PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
|
|
||||||
# <!ENTITY name SYSTEM "...">
|
|
||||||
# <!ENTITY name "...">
|
|
||||||
def initialize src
|
|
||||||
super()
|
|
||||||
md = nil
|
|
||||||
if src.match( PUBLIC )
|
|
||||||
md = src.match( PUBLIC, true )
|
|
||||||
@middle = "PUBLIC"
|
|
||||||
@content = "#{md[2]} #{md[4]}"
|
|
||||||
elsif src.match( SYSTEM )
|
|
||||||
md = src.match( SYSTEM, true )
|
|
||||||
@middle = "SYSTEM"
|
|
||||||
@content = md[2]
|
|
||||||
elsif src.match( PLAIN )
|
|
||||||
md = src.match( PLAIN, true )
|
|
||||||
@middle = ""
|
|
||||||
@content = md[2]
|
|
||||||
elsif src.match( PERCENT )
|
|
||||||
md = src.match( PERCENT, true )
|
|
||||||
@middle = ""
|
|
||||||
@content = md[2]
|
|
||||||
end
|
|
||||||
raise ParseException.new("failed Entity match", src) if md.nil?
|
|
||||||
@name = md[1]
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
rv = "<!ENTITY #@name "
|
|
||||||
rv << "#@middle " if @middle.size > 0
|
|
||||||
rv << @content
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def write( output, indent )
|
|
||||||
indent( output, indent )
|
|
||||||
output << to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
def EntityDecl.parse_source source, listener
|
|
||||||
md = source.match( PATTERN_RE, true )
|
|
||||||
thing = md[0].squeeze(" \t\n\r")
|
|
||||||
listener.send inspect.downcase, thing
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "../child"
|
|
||||||
module REXML
|
|
||||||
module DTD
|
|
||||||
class NotationDecl < Child
|
|
||||||
START = "<!NOTATION"
|
|
||||||
START_RE = /^\s*#{START}/um
|
|
||||||
PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
|
|
||||||
SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
|
|
||||||
def initialize src
|
|
||||||
super()
|
|
||||||
if src.match( PUBLIC )
|
|
||||||
md = src.match( PUBLIC, true )
|
|
||||||
elsif src.match( SYSTEM )
|
|
||||||
md = src.match( SYSTEM, true )
|
|
||||||
else
|
|
||||||
raise ParseException.new( "error parsing notation: no matching pattern", src )
|
|
||||||
end
|
|
||||||
@name = md[1]
|
|
||||||
@middle = md[2]
|
|
||||||
@rest = md[3]
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
"<!NOTATION #@name #@middle #@rest>"
|
|
||||||
end
|
|
||||||
|
|
||||||
def write( output, indent )
|
|
||||||
indent( output, indent )
|
|
||||||
output << to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
def NotationDecl.parse_source source, listener
|
|
||||||
md = source.match( PATTERN_RE, true )
|
|
||||||
thing = md[0].squeeze(" \t\n\r")
|
|
||||||
listener.send inspect.downcase, thing
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,51 +0,0 @@
|
|||||||
# coding: US-ASCII
|
|
||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
module Encoding
|
|
||||||
# ID ---> Encoding name
|
|
||||||
attr_reader :encoding
|
|
||||||
def encoding=(encoding)
|
|
||||||
encoding = encoding.name if encoding.is_a?(Encoding)
|
|
||||||
if encoding.is_a?(String)
|
|
||||||
original_encoding = encoding
|
|
||||||
encoding = find_encoding(encoding)
|
|
||||||
unless encoding
|
|
||||||
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return false if defined?(@encoding) and encoding == @encoding
|
|
||||||
if encoding
|
|
||||||
@encoding = encoding.upcase
|
|
||||||
else
|
|
||||||
@encoding = 'UTF-8'
|
|
||||||
end
|
|
||||||
true
|
|
||||||
end
|
|
||||||
|
|
||||||
def encode(string)
|
|
||||||
string.encode(@encoding)
|
|
||||||
end
|
|
||||||
|
|
||||||
def decode(string)
|
|
||||||
string.encode(::Encoding::UTF_8, @encoding)
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def find_encoding(name)
|
|
||||||
case name
|
|
||||||
when /\Ashift-jis\z/i
|
|
||||||
return "SHIFT_JIS"
|
|
||||||
when /\ACP-(\d+)\z/
|
|
||||||
name = "CP#{$1}"
|
|
||||||
when /\AUTF-8\z/i
|
|
||||||
return name
|
|
||||||
end
|
|
||||||
begin
|
|
||||||
::Encoding::Converter.search_convpath(name, 'UTF-8')
|
|
||||||
rescue ::Encoding::ConverterNotFoundError
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,171 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'child'
|
|
||||||
require_relative 'source'
|
|
||||||
require_relative 'xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
class Entity < Child
|
|
||||||
include XMLTokens
|
|
||||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
|
||||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
|
||||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
|
||||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
|
||||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
|
||||||
PEREFERENCE = "%#{NAME};"
|
|
||||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
|
||||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
|
||||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
||||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
||||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
||||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
|
||||||
|
|
||||||
attr_reader :name, :external, :ref, :ndata, :pubid
|
|
||||||
|
|
||||||
# Create a new entity. Simple entities can be constructed by passing a
|
|
||||||
# name, value to the constructor; this creates a generic, plain entity
|
|
||||||
# reference. For anything more complicated, you have to pass a Source to
|
|
||||||
# the constructor with the entity definition, or use the accessor methods.
|
|
||||||
# +WARNING+: There is no validation of entity state except when the entity
|
|
||||||
# is read from a stream. If you start poking around with the accessors,
|
|
||||||
# you can easily create a non-conformant Entity.
|
|
||||||
#
|
|
||||||
# e = Entity.new( 'amp', '&' )
|
|
||||||
def initialize stream, value=nil, parent=nil, reference=false
|
|
||||||
super(parent)
|
|
||||||
@ndata = @pubid = @value = @external = nil
|
|
||||||
if stream.kind_of? Array
|
|
||||||
@name = stream[1]
|
|
||||||
if stream[-1] == '%'
|
|
||||||
@reference = true
|
|
||||||
stream.pop
|
|
||||||
else
|
|
||||||
@reference = false
|
|
||||||
end
|
|
||||||
if stream[2] =~ /SYSTEM|PUBLIC/
|
|
||||||
@external = stream[2]
|
|
||||||
if @external == 'SYSTEM'
|
|
||||||
@ref = stream[3]
|
|
||||||
@ndata = stream[4] if stream.size == 5
|
|
||||||
else
|
|
||||||
@pubid = stream[3]
|
|
||||||
@ref = stream[4]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@value = stream[2]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@reference = reference
|
|
||||||
@external = nil
|
|
||||||
@name = stream
|
|
||||||
@value = value
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Evaluates whether the given string matches an entity definition,
|
|
||||||
# returning true if so, and false otherwise.
|
|
||||||
def Entity::matches? string
|
|
||||||
(ENTITYDECL =~ string) == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
# Evaluates to the unnormalized value of this entity; that is, replacing
|
|
||||||
# all entities -- both %ent; and &ent; entities. This differs from
|
|
||||||
# +value()+ in that +value+ only replaces %ent; entities.
|
|
||||||
def unnormalized
|
|
||||||
document.record_entity_expansion unless document.nil?
|
|
||||||
v = value()
|
|
||||||
return nil if v.nil?
|
|
||||||
@unnormalized = Text::unnormalize(v, parent)
|
|
||||||
@unnormalized
|
|
||||||
end
|
|
||||||
|
|
||||||
#once :unnormalized
|
|
||||||
|
|
||||||
# Returns the value of this entity unprocessed -- raw. This is the
|
|
||||||
# normalized value; that is, with all %ent; and &ent; entities intact
|
|
||||||
def normalized
|
|
||||||
@value
|
|
||||||
end
|
|
||||||
|
|
||||||
# Write out a fully formed, correct entity definition (assuming the Entity
|
|
||||||
# object itself is valid.)
|
|
||||||
#
|
|
||||||
# out::
|
|
||||||
# An object implementing <TT><<</TT> to which the entity will be
|
|
||||||
# output
|
|
||||||
# indent::
|
|
||||||
# *DEPRECATED* and ignored
|
|
||||||
def write out, indent=-1
|
|
||||||
out << '<!ENTITY '
|
|
||||||
out << '% ' if @reference
|
|
||||||
out << @name
|
|
||||||
out << ' '
|
|
||||||
if @external
|
|
||||||
out << @external << ' '
|
|
||||||
if @pubid
|
|
||||||
q = @pubid.include?('"')?"'":'"'
|
|
||||||
out << q << @pubid << q << ' '
|
|
||||||
end
|
|
||||||
q = @ref.include?('"')?"'":'"'
|
|
||||||
out << q << @ref << q
|
|
||||||
out << ' NDATA ' << @ndata if @ndata
|
|
||||||
else
|
|
||||||
q = @value.include?('"')?"'":'"'
|
|
||||||
out << q << @value << q
|
|
||||||
end
|
|
||||||
out << '>'
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns this entity as a string. See write().
|
|
||||||
def to_s
|
|
||||||
rv = ''
|
|
||||||
write rv
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
|
||||||
# Returns the value of this entity. At the moment, only internal entities
|
|
||||||
# are processed. If the value contains internal references (IE,
|
|
||||||
# %blah;), those are replaced with their values. IE, if the doctype
|
|
||||||
# contains:
|
|
||||||
# <!ENTITY % foo "bar">
|
|
||||||
# <!ENTITY yada "nanoo %foo; nanoo>
|
|
||||||
# then:
|
|
||||||
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
|
||||||
def value
|
|
||||||
if @value
|
|
||||||
matches = @value.scan(PEREFERENCE_RE)
|
|
||||||
rv = @value.clone
|
|
||||||
if @parent
|
|
||||||
sum = 0
|
|
||||||
matches.each do |entity_reference|
|
|
||||||
entity_value = @parent.entity( entity_reference[0] )
|
|
||||||
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
|
||||||
raise "entity expansion has grown too large"
|
|
||||||
else
|
|
||||||
sum += entity_value.bytesize
|
|
||||||
end
|
|
||||||
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return rv
|
|
||||||
end
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This is a set of entity constants -- the ones defined in the XML
|
|
||||||
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
|
|
||||||
# CAUTION: these entities does not have parent and document
|
|
||||||
module EntityConst
|
|
||||||
# +>+
|
|
||||||
GT = Entity.new( 'gt', '>' )
|
|
||||||
# +<+
|
|
||||||
LT = Entity.new( 'lt', '<' )
|
|
||||||
# +&+
|
|
||||||
AMP = Entity.new( 'amp', '&' )
|
|
||||||
# +"+
|
|
||||||
QUOT = Entity.new( 'quot', '"' )
|
|
||||||
# +'+
|
|
||||||
APOS = Entity.new( 'apos', "'" )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,116 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Formatters
|
|
||||||
class Default
|
|
||||||
# Prints out the XML document with no formatting -- except if ie_hack is
|
|
||||||
# set.
|
|
||||||
#
|
|
||||||
# ie_hack::
|
|
||||||
# If set to true, then inserts whitespace before the close of an empty
|
|
||||||
# tag, so that IE's bad XML parser doesn't choke.
|
|
||||||
def initialize( ie_hack=false )
|
|
||||||
@ie_hack = ie_hack
|
|
||||||
end
|
|
||||||
|
|
||||||
# Writes the node to some output.
|
|
||||||
#
|
|
||||||
# node::
|
|
||||||
# The node to write
|
|
||||||
# output::
|
|
||||||
# A class implementing <TT><<</TT>. Pass in an Output object to
|
|
||||||
# change the output encoding.
|
|
||||||
def write( node, output )
|
|
||||||
case node
|
|
||||||
|
|
||||||
when Document
|
|
||||||
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
|
|
||||||
output = Output.new( output, node.xml_decl.encoding )
|
|
||||||
end
|
|
||||||
write_document( node, output )
|
|
||||||
|
|
||||||
when Element
|
|
||||||
write_element( node, output )
|
|
||||||
|
|
||||||
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
|
|
||||||
Attribute, AttlistDecl
|
|
||||||
node.write( output,-1 )
|
|
||||||
|
|
||||||
when Instruction
|
|
||||||
write_instruction( node, output )
|
|
||||||
|
|
||||||
when DocType, XMLDecl
|
|
||||||
node.write( output )
|
|
||||||
|
|
||||||
when Comment
|
|
||||||
write_comment( node, output )
|
|
||||||
|
|
||||||
when CData
|
|
||||||
write_cdata( node, output )
|
|
||||||
|
|
||||||
when Text
|
|
||||||
write_text( node, output )
|
|
||||||
|
|
||||||
else
|
|
||||||
raise Exception.new("XML FORMATTING ERROR")
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
protected
|
|
||||||
def write_document( node, output )
|
|
||||||
node.children.each { |child| write( child, output ) }
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_element( node, output )
|
|
||||||
output << "<#{node.expanded_name}"
|
|
||||||
|
|
||||||
node.attributes.to_a.map { |a|
|
|
||||||
Hash === a ? a.values : a
|
|
||||||
}.flatten.sort_by {|attr| attr.name}.each do |attr|
|
|
||||||
output << " "
|
|
||||||
attr.write( output )
|
|
||||||
end unless node.attributes.empty?
|
|
||||||
|
|
||||||
if node.children.empty?
|
|
||||||
output << " " if @ie_hack
|
|
||||||
output << "/"
|
|
||||||
else
|
|
||||||
output << ">"
|
|
||||||
node.children.each { |child|
|
|
||||||
write( child, output )
|
|
||||||
}
|
|
||||||
output << "</#{node.expanded_name}"
|
|
||||||
end
|
|
||||||
output << ">"
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_text( node, output )
|
|
||||||
output << node.to_s()
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_comment( node, output )
|
|
||||||
output << Comment::START
|
|
||||||
output << node.to_s
|
|
||||||
output << Comment::STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_cdata( node, output )
|
|
||||||
output << CData::START
|
|
||||||
output << node.to_s
|
|
||||||
output << CData::STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_instruction( node, output )
|
|
||||||
output << Instruction::START
|
|
||||||
output << node.target
|
|
||||||
content = node.content
|
|
||||||
if content
|
|
||||||
output << ' '
|
|
||||||
output << content
|
|
||||||
end
|
|
||||||
output << Instruction::STOP
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,142 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'default'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Formatters
|
|
||||||
# Pretty-prints an XML document. This destroys whitespace in text nodes
|
|
||||||
# and will insert carriage returns and indentations.
|
|
||||||
#
|
|
||||||
# TODO: Add an option to print attributes on new lines
|
|
||||||
class Pretty < Default
|
|
||||||
|
|
||||||
# If compact is set to true, then the formatter will attempt to use as
|
|
||||||
# little space as possible
|
|
||||||
attr_accessor :compact
|
|
||||||
# The width of a page. Used for formatting text
|
|
||||||
attr_accessor :width
|
|
||||||
|
|
||||||
# Create a new pretty printer.
|
|
||||||
#
|
|
||||||
# output::
|
|
||||||
# An object implementing '<<(String)', to which the output will be written.
|
|
||||||
# indentation::
|
|
||||||
# An integer greater than 0. The indentation of each level will be
|
|
||||||
# this number of spaces. If this is < 1, the behavior of this object
|
|
||||||
# is undefined. Defaults to 2.
|
|
||||||
# ie_hack::
|
|
||||||
# If true, the printer will insert whitespace before closing empty
|
|
||||||
# tags, thereby allowing Internet Explorer's XML parser to
|
|
||||||
# function. Defaults to false.
|
|
||||||
def initialize( indentation=2, ie_hack=false )
|
|
||||||
@indentation = indentation
|
|
||||||
@level = 0
|
|
||||||
@ie_hack = ie_hack
|
|
||||||
@width = 80
|
|
||||||
@compact = false
|
|
||||||
end
|
|
||||||
|
|
||||||
protected
|
|
||||||
def write_element(node, output)
|
|
||||||
output << ' '*@level
|
|
||||||
output << "<#{node.expanded_name}"
|
|
||||||
|
|
||||||
node.attributes.each_attribute do |attr|
|
|
||||||
output << " "
|
|
||||||
attr.write( output )
|
|
||||||
end unless node.attributes.empty?
|
|
||||||
|
|
||||||
if node.children.empty?
|
|
||||||
if @ie_hack
|
|
||||||
output << " "
|
|
||||||
end
|
|
||||||
output << "/"
|
|
||||||
else
|
|
||||||
output << ">"
|
|
||||||
# If compact and all children are text, and if the formatted output
|
|
||||||
# is less than the specified width, then try to print everything on
|
|
||||||
# one line
|
|
||||||
skip = false
|
|
||||||
if compact
|
|
||||||
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
|
||||||
string = ""
|
|
||||||
old_level = @level
|
|
||||||
@level = 0
|
|
||||||
node.children.each { |child| write( child, string ) }
|
|
||||||
@level = old_level
|
|
||||||
if string.length < @width
|
|
||||||
output << string
|
|
||||||
skip = true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
unless skip
|
|
||||||
output << "\n"
|
|
||||||
@level += @indentation
|
|
||||||
node.children.each { |child|
|
|
||||||
next if child.kind_of?(Text) and child.to_s.strip.length == 0
|
|
||||||
write( child, output )
|
|
||||||
output << "\n"
|
|
||||||
}
|
|
||||||
@level -= @indentation
|
|
||||||
output << ' '*@level
|
|
||||||
end
|
|
||||||
output << "</#{node.expanded_name}"
|
|
||||||
end
|
|
||||||
output << ">"
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_text( node, output )
|
|
||||||
s = node.to_s()
|
|
||||||
s.gsub!(/\s/,' ')
|
|
||||||
s.squeeze!(" ")
|
|
||||||
s = wrap(s, @width - @level)
|
|
||||||
s = indent_text(s, @level, " ", true)
|
|
||||||
output << (' '*@level + s)
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_comment( node, output)
|
|
||||||
output << ' ' * @level
|
|
||||||
super
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_cdata( node, output)
|
|
||||||
output << ' ' * @level
|
|
||||||
super
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_document( node, output )
|
|
||||||
# Ok, this is a bit odd. All XML documents have an XML declaration,
|
|
||||||
# but it may not write itself if the user didn't specifically add it,
|
|
||||||
# either through the API or in the input document. If it doesn't write
|
|
||||||
# itself, then we don't need a carriage return... which makes this
|
|
||||||
# logic more complex.
|
|
||||||
node.children.each { |child|
|
|
||||||
next if child == node.children[-1] and child.instance_of?(Text)
|
|
||||||
unless child == node.children[0] or child.instance_of?(Text) or
|
|
||||||
(child == node.children[1] and !node.children[0].writethis)
|
|
||||||
output << "\n"
|
|
||||||
end
|
|
||||||
write( child, output )
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
|
||||||
return string if level < 0
|
|
||||||
string.gsub(/\n/, "\n#{style*level}")
|
|
||||||
end
|
|
||||||
|
|
||||||
def wrap(string, width)
|
|
||||||
parts = []
|
|
||||||
while string.length > width and place = string.rindex(' ', width)
|
|
||||||
parts << string[0...place]
|
|
||||||
string = string[place+1..-1]
|
|
||||||
end
|
|
||||||
parts << string
|
|
||||||
parts.join("\n")
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@ -1,58 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'pretty'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Formatters
|
|
||||||
# The Transitive formatter writes an XML document that parses to an
|
|
||||||
# identical document as the source document. This means that no extra
|
|
||||||
# whitespace nodes are inserted, and whitespace within text nodes is
|
|
||||||
# preserved. Within these constraints, the document is pretty-printed,
|
|
||||||
# with whitespace inserted into the metadata to introduce formatting.
|
|
||||||
#
|
|
||||||
# Note that this is only useful if the original XML is not already
|
|
||||||
# formatted. Since this formatter does not alter whitespace nodes, the
|
|
||||||
# results of formatting already formatted XML will be odd.
|
|
||||||
class Transitive < Default
|
|
||||||
def initialize( indentation=2, ie_hack=false )
|
|
||||||
@indentation = indentation
|
|
||||||
@level = 0
|
|
||||||
@ie_hack = ie_hack
|
|
||||||
end
|
|
||||||
|
|
||||||
protected
|
|
||||||
def write_element( node, output )
|
|
||||||
output << "<#{node.expanded_name}"
|
|
||||||
|
|
||||||
node.attributes.each_attribute do |attr|
|
|
||||||
output << " "
|
|
||||||
attr.write( output )
|
|
||||||
end unless node.attributes.empty?
|
|
||||||
|
|
||||||
output << "\n"
|
|
||||||
output << ' '*@level
|
|
||||||
if node.children.empty?
|
|
||||||
output << " " if @ie_hack
|
|
||||||
output << "/"
|
|
||||||
else
|
|
||||||
output << ">"
|
|
||||||
# If compact and all children are text, and if the formatted output
|
|
||||||
# is less than the specified width, then try to print everything on
|
|
||||||
# one line
|
|
||||||
@level += @indentation
|
|
||||||
node.children.each { |child|
|
|
||||||
write( child, output )
|
|
||||||
}
|
|
||||||
@level -= @indentation
|
|
||||||
output << "</#{node.expanded_name}"
|
|
||||||
output << "\n"
|
|
||||||
output << ' '*@level
|
|
||||||
end
|
|
||||||
output << ">"
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_text( node, output )
|
|
||||||
output << node.to_s()
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,447 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
# If you add a method, keep in mind two things:
|
|
||||||
# (1) the first argument will always be a list of nodes from which to
|
|
||||||
# filter. In the case of context methods (such as position), the function
|
|
||||||
# should return an array with a value for each child in the array.
|
|
||||||
# (2) all method calls from XML will have "-" replaced with "_".
|
|
||||||
# Therefore, in XML, "local-name()" is identical (and actually becomes)
|
|
||||||
# "local_name()"
|
|
||||||
module Functions
|
|
||||||
@@available_functions = {}
|
|
||||||
@@context = nil
|
|
||||||
@@namespace_context = {}
|
|
||||||
@@variables = {}
|
|
||||||
|
|
||||||
INTERNAL_METHODS = [
|
|
||||||
:namespace_context,
|
|
||||||
:namespace_context=,
|
|
||||||
:variables,
|
|
||||||
:variables=,
|
|
||||||
:context=,
|
|
||||||
:get_namespace,
|
|
||||||
:send,
|
|
||||||
]
|
|
||||||
class << self
|
|
||||||
def singleton_method_added(name)
|
|
||||||
unless INTERNAL_METHODS.include?(name)
|
|
||||||
@@available_functions[name] = true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::namespace_context=(x) ; @@namespace_context=x ; end
|
|
||||||
def Functions::variables=(x) ; @@variables=x ; end
|
|
||||||
def Functions::namespace_context ; @@namespace_context ; end
|
|
||||||
def Functions::variables ; @@variables ; end
|
|
||||||
|
|
||||||
def Functions::context=(value); @@context = value; end
|
|
||||||
|
|
||||||
def Functions::text( )
|
|
||||||
if @@context[:node].node_type == :element
|
|
||||||
return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
|
|
||||||
elsif @@context[:node].node_type == :text
|
|
||||||
return @@context[:node].value
|
|
||||||
else
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the last node of the given list of nodes.
|
|
||||||
def Functions::last( )
|
|
||||||
@@context[:size]
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::position( )
|
|
||||||
@@context[:index]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the size of the given list of nodes.
|
|
||||||
def Functions::count( node_set )
|
|
||||||
node_set.size
|
|
||||||
end
|
|
||||||
|
|
||||||
# Since REXML is non-validating, this method is not implemented as it
|
|
||||||
# requires a DTD
|
|
||||||
def Functions::id( object )
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::local_name(node_set=nil)
|
|
||||||
get_namespace(node_set) do |node|
|
|
||||||
return node.local_name
|
|
||||||
end
|
|
||||||
""
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::namespace_uri( node_set=nil )
|
|
||||||
get_namespace( node_set ) {|node| node.namespace}
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::name( node_set=nil )
|
|
||||||
get_namespace( node_set ) do |node|
|
|
||||||
node.expanded_name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Helper method.
|
|
||||||
def Functions::get_namespace( node_set = nil )
|
|
||||||
if node_set == nil
|
|
||||||
yield @@context[:node] if @@context[:node].respond_to?(:namespace)
|
|
||||||
else
|
|
||||||
if node_set.respond_to? :each
|
|
||||||
result = []
|
|
||||||
node_set.each do |node|
|
|
||||||
result << yield(node) if node.respond_to?(:namespace)
|
|
||||||
end
|
|
||||||
result
|
|
||||||
elsif node_set.respond_to? :namespace
|
|
||||||
yield node_set
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A node-set is converted to a string by returning the string-value of the
|
|
||||||
# node in the node-set that is first in document order. If the node-set is
|
|
||||||
# empty, an empty string is returned.
|
|
||||||
#
|
|
||||||
# A number is converted to a string as follows
|
|
||||||
#
|
|
||||||
# NaN is converted to the string NaN
|
|
||||||
#
|
|
||||||
# positive zero is converted to the string 0
|
|
||||||
#
|
|
||||||
# negative zero is converted to the string 0
|
|
||||||
#
|
|
||||||
# positive infinity is converted to the string Infinity
|
|
||||||
#
|
|
||||||
# negative infinity is converted to the string -Infinity
|
|
||||||
#
|
|
||||||
# if the number is an integer, the number is represented in decimal form
|
|
||||||
# as a Number with no decimal point and no leading zeros, preceded by a
|
|
||||||
# minus sign (-) if the number is negative
|
|
||||||
#
|
|
||||||
# otherwise, the number is represented in decimal form as a Number
|
|
||||||
# including a decimal point with at least one digit before the decimal
|
|
||||||
# point and at least one digit after the decimal point, preceded by a
|
|
||||||
# minus sign (-) if the number is negative; there must be no leading zeros
|
|
||||||
# before the decimal point apart possibly from the one required digit
|
|
||||||
# immediately before the decimal point; beyond the one required digit
|
|
||||||
# after the decimal point there must be as many, but only as many, more
|
|
||||||
# digits as are needed to uniquely distinguish the number from all other
|
|
||||||
# IEEE 754 numeric values.
|
|
||||||
#
|
|
||||||
# The boolean false value is converted to the string false. The boolean
|
|
||||||
# true value is converted to the string true.
|
|
||||||
#
|
|
||||||
# An object of a type other than the four basic types is converted to a
|
|
||||||
# string in a way that is dependent on that type.
|
|
||||||
def Functions::string( object=@@context[:node] )
|
|
||||||
if object.respond_to?(:node_type)
|
|
||||||
case object.node_type
|
|
||||||
when :attribute
|
|
||||||
object.value
|
|
||||||
when :element
|
|
||||||
string_value(object)
|
|
||||||
when :document
|
|
||||||
string_value(object.root)
|
|
||||||
when :processing_instruction
|
|
||||||
object.content
|
|
||||||
else
|
|
||||||
object.to_s
|
|
||||||
end
|
|
||||||
else
|
|
||||||
case object
|
|
||||||
when Array
|
|
||||||
string(object[0])
|
|
||||||
when Float
|
|
||||||
if object.nan?
|
|
||||||
"NaN"
|
|
||||||
else
|
|
||||||
integer = object.to_i
|
|
||||||
if object == integer
|
|
||||||
"%d" % integer
|
|
||||||
else
|
|
||||||
object.to_s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
object.to_s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A node-set is converted to a string by
|
|
||||||
# returning the concatenation of the string-value
|
|
||||||
# of each of the children of the node in the
|
|
||||||
# node-set that is first in document order.
|
|
||||||
# If the node-set is empty, an empty string is returned.
|
|
||||||
def Functions::string_value( o )
|
|
||||||
rv = ""
|
|
||||||
o.children.each { |e|
|
|
||||||
if e.node_type == :text
|
|
||||||
rv << e.to_s
|
|
||||||
elsif e.node_type == :element
|
|
||||||
rv << string_value( e )
|
|
||||||
end
|
|
||||||
}
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::concat( *objects )
|
|
||||||
concatenated = ""
|
|
||||||
objects.each do |object|
|
|
||||||
concatenated << string(object)
|
|
||||||
end
|
|
||||||
concatenated
|
|
||||||
end
|
|
||||||
|
|
||||||
# Fixed by Mike Stok
|
|
||||||
def Functions::starts_with( string, test )
|
|
||||||
string(string).index(string(test)) == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
# Fixed by Mike Stok
|
|
||||||
def Functions::contains( string, test )
|
|
||||||
string(string).include?(string(test))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Kouhei fixed this
|
|
||||||
def Functions::substring_before( string, test )
|
|
||||||
ruby_string = string(string)
|
|
||||||
ruby_index = ruby_string.index(string(test))
|
|
||||||
if ruby_index.nil?
|
|
||||||
""
|
|
||||||
else
|
|
||||||
ruby_string[ 0...ruby_index ]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Kouhei fixed this too
|
|
||||||
def Functions::substring_after( string, test )
|
|
||||||
ruby_string = string(string)
|
|
||||||
return $1 if ruby_string =~ /#{test}(.*)/
|
|
||||||
""
|
|
||||||
end
|
|
||||||
|
|
||||||
# Take equal portions of Mike Stok and Sean Russell; mix
|
|
||||||
# vigorously, and pour into a tall, chilled glass. Serves 10,000.
|
|
||||||
def Functions::substring( string, start, length=nil )
|
|
||||||
ruby_string = string(string)
|
|
||||||
ruby_length = if length.nil?
|
|
||||||
ruby_string.length.to_f
|
|
||||||
else
|
|
||||||
number(length)
|
|
||||||
end
|
|
||||||
ruby_start = number(start)
|
|
||||||
|
|
||||||
# Handle the special cases
|
|
||||||
return '' if (
|
|
||||||
ruby_length.nan? or
|
|
||||||
ruby_start.nan? or
|
|
||||||
ruby_start.infinite?
|
|
||||||
)
|
|
||||||
|
|
||||||
infinite_length = ruby_length.infinite? == 1
|
|
||||||
ruby_length = ruby_string.length if infinite_length
|
|
||||||
|
|
||||||
# Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
|
|
||||||
# are 0..length. Therefore, we have to offset the bounds by one.
|
|
||||||
ruby_start = round(ruby_start) - 1
|
|
||||||
ruby_length = round(ruby_length)
|
|
||||||
|
|
||||||
if ruby_start < 0
|
|
||||||
ruby_length += ruby_start unless infinite_length
|
|
||||||
ruby_start = 0
|
|
||||||
end
|
|
||||||
return '' if ruby_length <= 0
|
|
||||||
ruby_string[ruby_start,ruby_length]
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::string_length( string )
|
|
||||||
string(string).length
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::normalize_space( string=nil )
|
|
||||||
string = string(@@context[:node]) if string.nil?
|
|
||||||
if string.kind_of? Array
|
|
||||||
string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
|
|
||||||
else
|
|
||||||
string.to_s.strip.gsub(/\s+/um, ' ')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This is entirely Mike Stok's beast
|
|
||||||
def Functions::translate( string, tr1, tr2 )
|
|
||||||
from = string(tr1)
|
|
||||||
to = string(tr2)
|
|
||||||
|
|
||||||
# the map is our translation table.
|
|
||||||
#
|
|
||||||
# if a character occurs more than once in the
|
|
||||||
# from string then we ignore the second &
|
|
||||||
# subsequent mappings
|
|
||||||
#
|
|
||||||
# if a character maps to nil then we delete it
|
|
||||||
# in the output. This happens if the from
|
|
||||||
# string is longer than the to string
|
|
||||||
#
|
|
||||||
# there's nothing about - or ^ being special in
|
|
||||||
# http://www.w3.org/TR/xpath#function-translate
|
|
||||||
# so we don't build ranges or negated classes
|
|
||||||
|
|
||||||
map = Hash.new
|
|
||||||
0.upto(from.length - 1) { |pos|
|
|
||||||
from_char = from[pos]
|
|
||||||
unless map.has_key? from_char
|
|
||||||
map[from_char] =
|
|
||||||
if pos < to.length
|
|
||||||
to[pos]
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
if ''.respond_to? :chars
|
|
||||||
string(string).chars.collect { |c|
|
|
||||||
if map.has_key? c then map[c] else c end
|
|
||||||
}.compact.join
|
|
||||||
else
|
|
||||||
string(string).unpack('U*').collect { |c|
|
|
||||||
if map.has_key? c then map[c] else c end
|
|
||||||
}.compact.pack('U*')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::boolean(object=@@context[:node])
|
|
||||||
case object
|
|
||||||
when true, false
|
|
||||||
object
|
|
||||||
when Float
|
|
||||||
return false if object.zero?
|
|
||||||
return false if object.nan?
|
|
||||||
true
|
|
||||||
when Numeric
|
|
||||||
not object.zero?
|
|
||||||
when String
|
|
||||||
not object.empty?
|
|
||||||
when Array
|
|
||||||
not object.empty?
|
|
||||||
else
|
|
||||||
object ? true : false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::not( object )
|
|
||||||
not boolean( object )
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::true( )
|
|
||||||
true
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::false( )
|
|
||||||
false
|
|
||||||
end
|
|
||||||
|
|
||||||
# UNTESTED
|
|
||||||
def Functions::lang( language )
|
|
||||||
lang = false
|
|
||||||
node = @@context[:node]
|
|
||||||
attr = nil
|
|
||||||
until node.nil?
|
|
||||||
if node.node_type == :element
|
|
||||||
attr = node.attributes["xml:lang"]
|
|
||||||
unless attr.nil?
|
|
||||||
lang = compare_language(string(language), attr)
|
|
||||||
break
|
|
||||||
else
|
|
||||||
end
|
|
||||||
end
|
|
||||||
node = node.parent
|
|
||||||
end
|
|
||||||
lang
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::compare_language lang1, lang2
|
|
||||||
lang2.downcase.index(lang1.downcase) == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
# a string that consists of optional whitespace followed by an optional
|
|
||||||
# minus sign followed by a Number followed by whitespace is converted to
|
|
||||||
# the IEEE 754 number that is nearest (according to the IEEE 754
|
|
||||||
# round-to-nearest rule) to the mathematical value represented by the
|
|
||||||
# string; any other string is converted to NaN
|
|
||||||
#
|
|
||||||
# boolean true is converted to 1; boolean false is converted to 0
|
|
||||||
#
|
|
||||||
# a node-set is first converted to a string as if by a call to the string
|
|
||||||
# function and then converted in the same way as a string argument
|
|
||||||
#
|
|
||||||
# an object of a type other than the four basic types is converted to a
|
|
||||||
# number in a way that is dependent on that type
|
|
||||||
def Functions::number(object=@@context[:node])
|
|
||||||
case object
|
|
||||||
when true
|
|
||||||
Float(1)
|
|
||||||
when false
|
|
||||||
Float(0)
|
|
||||||
when Array
|
|
||||||
number(string(object))
|
|
||||||
when Numeric
|
|
||||||
object.to_f
|
|
||||||
else
|
|
||||||
str = string(object)
|
|
||||||
case str.strip
|
|
||||||
when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/
|
|
||||||
$1.to_f
|
|
||||||
else
|
|
||||||
Float::NAN
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::sum( nodes )
|
|
||||||
nodes = [nodes] unless nodes.kind_of? Array
|
|
||||||
nodes.inject(0) { |r,n| r + number(string(n)) }
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::floor( number )
|
|
||||||
number(number).floor
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::ceiling( number )
|
|
||||||
number(number).ceil
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::round( number )
|
|
||||||
number = number(number)
|
|
||||||
begin
|
|
||||||
neg = number.negative?
|
|
||||||
number = number.abs.round
|
|
||||||
neg ? -number : number
|
|
||||||
rescue FloatDomainError
|
|
||||||
number
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::processing_instruction( node )
|
|
||||||
node.node_type == :processing_instruction
|
|
||||||
end
|
|
||||||
|
|
||||||
def Functions::send(name, *args)
|
|
||||||
if @@available_functions[name.to_sym]
|
|
||||||
super
|
|
||||||
else
|
|
||||||
# TODO: Maybe, this is not XPath spec behavior.
|
|
||||||
# This behavior must be reconsidered.
|
|
||||||
XPath.match(@@context[:node], name.to_s)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,79 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
|
|
||||||
require_relative "child"
|
|
||||||
require_relative "source"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Represents an XML Instruction; IE, <? ... ?>
|
|
||||||
# TODO: Add parent arg (3rd arg) to constructor
|
|
||||||
class Instruction < Child
|
|
||||||
START = "<?"
|
|
||||||
STOP = "?>"
|
|
||||||
|
|
||||||
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
|
|
||||||
# content is everything else.
|
|
||||||
attr_accessor :target, :content
|
|
||||||
|
|
||||||
# Constructs a new Instruction
|
|
||||||
# @param target can be one of a number of things. If String, then
|
|
||||||
# the target of this instruction is set to this. If an Instruction,
|
|
||||||
# then the Instruction is shallowly cloned (target and content are
|
|
||||||
# copied).
|
|
||||||
# @param content Must be either a String, or a Parent. Can only
|
|
||||||
# be a Parent if the target argument is a Source. Otherwise, this
|
|
||||||
# String is set as the content of this instruction.
|
|
||||||
def initialize(target, content=nil)
|
|
||||||
case target
|
|
||||||
when String
|
|
||||||
super()
|
|
||||||
@target = target
|
|
||||||
@content = content
|
|
||||||
when Instruction
|
|
||||||
super(content)
|
|
||||||
@target = target.target
|
|
||||||
@content = target.content
|
|
||||||
else
|
|
||||||
message =
|
|
||||||
"processing instruction target must be String or REXML::Instruction: "
|
|
||||||
message << "<#{target.inspect}>"
|
|
||||||
raise ArgumentError, message
|
|
||||||
end
|
|
||||||
@content.strip! if @content
|
|
||||||
end
|
|
||||||
|
|
||||||
def clone
|
|
||||||
Instruction.new self
|
|
||||||
end
|
|
||||||
|
|
||||||
# == DEPRECATED
|
|
||||||
# See the rexml/formatters package
|
|
||||||
#
|
|
||||||
def write writer, indent=-1, transitive=false, ie_hack=false
|
|
||||||
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
|
|
||||||
indent(writer, indent)
|
|
||||||
writer << START
|
|
||||||
writer << @target
|
|
||||||
if @content
|
|
||||||
writer << ' '
|
|
||||||
writer << @content
|
|
||||||
end
|
|
||||||
writer << STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return true if other is an Instruction, and the content and target
|
|
||||||
# of the other matches the target and content of this object.
|
|
||||||
def ==( other )
|
|
||||||
other.kind_of? Instruction and
|
|
||||||
other.target == @target and
|
|
||||||
other.content == @content
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:processing_instruction
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"<?p-i #{target} ...?>"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,196 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative '../xmltokens'
|
|
||||||
|
|
||||||
# [ :element, parent, name, attributes, children* ]
|
|
||||||
# a = Node.new
|
|
||||||
# a << "B" # => <a>B</a>
|
|
||||||
# a.b # => <a>B<b/></a>
|
|
||||||
# a.b[1] # => <a>B<b/><b/><a>
|
|
||||||
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
|
|
||||||
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
|
|
||||||
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
|
|
||||||
module REXML
|
|
||||||
module Light
|
|
||||||
# Represents a tagged XML element. Elements are characterized by
|
|
||||||
# having children, attributes, and names, and can themselves be
|
|
||||||
# children.
|
|
||||||
class Node
|
|
||||||
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
|
|
||||||
PARENTS = [ :element, :document, :doctype ]
|
|
||||||
# Create a new element.
|
|
||||||
def initialize node=nil
|
|
||||||
@node = node
|
|
||||||
if node.kind_of? String
|
|
||||||
node = [ :text, node ]
|
|
||||||
elsif node.nil?
|
|
||||||
node = [ :document, nil, nil ]
|
|
||||||
elsif node[0] == :start_element
|
|
||||||
node[0] = :element
|
|
||||||
elsif node[0] == :start_doctype
|
|
||||||
node[0] = :doctype
|
|
||||||
elsif node[0] == :start_document
|
|
||||||
node[0] = :document
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def size
|
|
||||||
if PARENTS.include? @node[0]
|
|
||||||
@node[-1].size
|
|
||||||
else
|
|
||||||
0
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def each
|
|
||||||
size.times { |x| yield( at(x+4) ) }
|
|
||||||
end
|
|
||||||
|
|
||||||
def name
|
|
||||||
at(2)
|
|
||||||
end
|
|
||||||
|
|
||||||
def name=( name_str, ns=nil )
|
|
||||||
pfx = ''
|
|
||||||
pfx = "#{prefix(ns)}:" if ns
|
|
||||||
_old_put(2, "#{pfx}#{name_str}")
|
|
||||||
end
|
|
||||||
|
|
||||||
def parent=( node )
|
|
||||||
_old_put(1,node)
|
|
||||||
end
|
|
||||||
|
|
||||||
def local_name
|
|
||||||
namesplit
|
|
||||||
@name
|
|
||||||
end
|
|
||||||
|
|
||||||
def local_name=( name_str )
|
|
||||||
_old_put( 1, "#@prefix:#{name_str}" )
|
|
||||||
end
|
|
||||||
|
|
||||||
def prefix( namespace=nil )
|
|
||||||
prefix_of( self, namespace )
|
|
||||||
end
|
|
||||||
|
|
||||||
def namespace( prefix=prefix() )
|
|
||||||
namespace_of( self, prefix )
|
|
||||||
end
|
|
||||||
|
|
||||||
def namespace=( namespace )
|
|
||||||
@prefix = prefix( namespace )
|
|
||||||
pfx = ''
|
|
||||||
pfx = "#@prefix:" if @prefix.size > 0
|
|
||||||
_old_put(1, "#{pfx}#@name")
|
|
||||||
end
|
|
||||||
|
|
||||||
def []( reference, ns=nil )
|
|
||||||
if reference.kind_of? String
|
|
||||||
pfx = ''
|
|
||||||
pfx = "#{prefix(ns)}:" if ns
|
|
||||||
at(3)["#{pfx}#{reference}"]
|
|
||||||
elsif reference.kind_of? Range
|
|
||||||
_old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
|
|
||||||
else
|
|
||||||
_old_get( 4+reference )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def =~( path )
|
|
||||||
XPath.match( self, path )
|
|
||||||
end
|
|
||||||
|
|
||||||
# Doesn't handle namespaces yet
|
|
||||||
def []=( reference, ns, value=nil )
|
|
||||||
if reference.kind_of? String
|
|
||||||
value = ns unless value
|
|
||||||
at( 3 )[reference] = value
|
|
||||||
elsif reference.kind_of? Range
|
|
||||||
_old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
|
|
||||||
else
|
|
||||||
if value
|
|
||||||
_old_put( 4+reference, ns, value )
|
|
||||||
else
|
|
||||||
_old_put( 4+reference, ns )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Append a child to this element, optionally under a provided namespace.
|
|
||||||
# The namespace argument is ignored if the element argument is an Element
|
|
||||||
# object. Otherwise, the element argument is a string, the namespace (if
|
|
||||||
# provided) is the namespace the element is created in.
|
|
||||||
def << element
|
|
||||||
if node_type() == :text
|
|
||||||
at(-1) << element
|
|
||||||
else
|
|
||||||
newnode = Node.new( element )
|
|
||||||
newnode.parent = self
|
|
||||||
self.push( newnode )
|
|
||||||
end
|
|
||||||
at(-1)
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
_old_get(0)
|
|
||||||
end
|
|
||||||
|
|
||||||
def text=( foo )
|
|
||||||
replace = at(4).kind_of?(String)? 1 : 0
|
|
||||||
self._old_put(4,replace, normalizefoo)
|
|
||||||
end
|
|
||||||
|
|
||||||
def root
|
|
||||||
context = self
|
|
||||||
context = context.at(1) while context.at(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
def has_name?( name, namespace = '' )
|
|
||||||
at(3) == name and namespace() == namespace
|
|
||||||
end
|
|
||||||
|
|
||||||
def children
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
def parent
|
|
||||||
at(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def namesplit
|
|
||||||
return if @name.defined?
|
|
||||||
at(2) =~ NAMESPLIT
|
|
||||||
@prefix = '' || $1
|
|
||||||
@name = $2
|
|
||||||
end
|
|
||||||
|
|
||||||
def namespace_of( node, prefix=nil )
|
|
||||||
if not prefix
|
|
||||||
name = at(2)
|
|
||||||
name =~ NAMESPLIT
|
|
||||||
prefix = $1
|
|
||||||
end
|
|
||||||
to_find = 'xmlns'
|
|
||||||
to_find = "xmlns:#{prefix}" if not prefix.nil?
|
|
||||||
ns = at(3)[ to_find ]
|
|
||||||
ns ? ns : namespace_of( @node[0], prefix )
|
|
||||||
end
|
|
||||||
|
|
||||||
def prefix_of( node, namespace=nil )
|
|
||||||
if not namespace
|
|
||||||
name = node.name
|
|
||||||
name =~ NAMESPLIT
|
|
||||||
$1
|
|
||||||
else
|
|
||||||
ns = at(3).find { |k,v| v == namespace }
|
|
||||||
ns ? ns : prefix_of( node.parent, namespace )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
|
|
||||||
require_relative 'xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Adds named attributes to an object.
|
|
||||||
module Namespace
|
|
||||||
# The name of the object, valid if set
|
|
||||||
attr_reader :name, :expanded_name
|
|
||||||
# The expanded name of the object, valid if name is set
|
|
||||||
attr_accessor :prefix
|
|
||||||
include XMLTokens
|
|
||||||
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
|
||||||
|
|
||||||
# Sets the name and the expanded name
|
|
||||||
def name=( name )
|
|
||||||
@expanded_name = name
|
|
||||||
case name
|
|
||||||
when NAMESPLIT
|
|
||||||
if $1
|
|
||||||
@prefix = $1
|
|
||||||
else
|
|
||||||
@prefix = ""
|
|
||||||
@namespace = ""
|
|
||||||
end
|
|
||||||
@name = $2
|
|
||||||
when ""
|
|
||||||
@prefix = nil
|
|
||||||
@namespace = nil
|
|
||||||
@name = nil
|
|
||||||
else
|
|
||||||
message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: "
|
|
||||||
message += "<#{name.inspect}>"
|
|
||||||
raise ArgumentError, message
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Compares names optionally WITH namespaces
|
|
||||||
def has_name?( other, ns=nil )
|
|
||||||
if ns
|
|
||||||
return (namespace() == ns and name() == other)
|
|
||||||
elsif other.include? ":"
|
|
||||||
return fully_expanded_name == other
|
|
||||||
else
|
|
||||||
return name == other
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :local_name :name
|
|
||||||
|
|
||||||
# Fully expand the name, even if the prefix wasn't specified in the
|
|
||||||
# source file.
|
|
||||||
def fully_expanded_name
|
|
||||||
ns = prefix
|
|
||||||
return "#{ns}:#@name" if ns.size > 0
|
|
||||||
return @name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,76 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "parseexception"
|
|
||||||
require_relative "formatters/pretty"
|
|
||||||
require_relative "formatters/default"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Represents a node in the tree. Nodes are never encountered except as
|
|
||||||
# superclasses of other objects. Nodes have siblings.
|
|
||||||
module Node
|
|
||||||
# @return the next sibling (nil if unset)
|
|
||||||
def next_sibling_node
|
|
||||||
return nil if @parent.nil?
|
|
||||||
@parent[ @parent.index(self) + 1 ]
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the previous sibling (nil if unset)
|
|
||||||
def previous_sibling_node
|
|
||||||
return nil if @parent.nil?
|
|
||||||
ind = @parent.index(self)
|
|
||||||
return nil if ind == 0
|
|
||||||
@parent[ ind - 1 ]
|
|
||||||
end
|
|
||||||
|
|
||||||
# indent::
|
|
||||||
# *DEPRECATED* This parameter is now ignored. See the formatters in the
|
|
||||||
# REXML::Formatters package for changing the output style.
|
|
||||||
def to_s indent=nil
|
|
||||||
unless indent.nil?
|
|
||||||
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1)
|
|
||||||
f = REXML::Formatters::Pretty.new( indent )
|
|
||||||
f.write( self, rv = "" )
|
|
||||||
else
|
|
||||||
f = REXML::Formatters::Default.new
|
|
||||||
f.write( self, rv = "" )
|
|
||||||
end
|
|
||||||
return rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def indent to, ind
|
|
||||||
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
|
|
||||||
indentstyle = @parent.context[:indentstyle]
|
|
||||||
else
|
|
||||||
indentstyle = ' '
|
|
||||||
end
|
|
||||||
to << indentstyle*ind unless ind<1
|
|
||||||
end
|
|
||||||
|
|
||||||
def parent?
|
|
||||||
false;
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# Visit all subnodes of +self+ recursively
|
|
||||||
def each_recursive(&block) # :yields: node
|
|
||||||
self.elements.each {|node|
|
|
||||||
block.call(node)
|
|
||||||
node.each_recursive(&block)
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
# Find (and return) first subnode (recursively) for which the block
|
|
||||||
# evaluates to true. Returns +nil+ if none was found.
|
|
||||||
def find_first_recursive(&block) # :yields: node
|
|
||||||
each_recursive {|node|
|
|
||||||
return node if block.call(node)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the position that +self+ holds in its parent's array, indexed
|
|
||||||
# from 1.
|
|
||||||
def index_in_parent
|
|
||||||
parent.index(self)+1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'encoding'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
class Output
|
|
||||||
include Encoding
|
|
||||||
|
|
||||||
attr_reader :encoding
|
|
||||||
|
|
||||||
def initialize real_IO, encd="iso-8859-1"
|
|
||||||
@output = real_IO
|
|
||||||
self.encoding = encd
|
|
||||||
|
|
||||||
@to_utf = encoding != 'UTF-8'
|
|
||||||
|
|
||||||
if encoding == "UTF-16"
|
|
||||||
@output << "\ufeff".encode("UTF-16BE")
|
|
||||||
self.encoding = "UTF-16BE"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def <<( content )
|
|
||||||
@output << (@to_utf ? self.encode(content) : content)
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
"Output[#{encoding}]"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,166 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "child"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# A parent has children, and has methods for accessing them. The Parent
|
|
||||||
# class is never encountered except as the superclass for some other
|
|
||||||
# object.
|
|
||||||
class Parent < Child
|
|
||||||
include Enumerable
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
# @param parent if supplied, will be set as the parent of this object
|
|
||||||
def initialize parent=nil
|
|
||||||
super(parent)
|
|
||||||
@children = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def add( object )
|
|
||||||
object.parent = self
|
|
||||||
@children << object
|
|
||||||
object
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :push :add
|
|
||||||
alias :<< :push
|
|
||||||
|
|
||||||
def unshift( object )
|
|
||||||
object.parent = self
|
|
||||||
@children.unshift object
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete( object )
|
|
||||||
found = false
|
|
||||||
@children.delete_if {|c| c.equal?(object) and found = true }
|
|
||||||
object.parent = nil if found
|
|
||||||
found ? object : nil
|
|
||||||
end
|
|
||||||
|
|
||||||
def each(&block)
|
|
||||||
@children.each(&block)
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete_if( &block )
|
|
||||||
@children.delete_if(&block)
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete_at( index )
|
|
||||||
@children.delete_at index
|
|
||||||
end
|
|
||||||
|
|
||||||
def each_index( &block )
|
|
||||||
@children.each_index(&block)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Fetches a child at a given index
|
|
||||||
# @param index the Integer index of the child to fetch
|
|
||||||
def []( index )
|
|
||||||
@children[index]
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :each_child :each
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Set an index entry. See Array.[]=
|
|
||||||
# @param index the index of the element to set
|
|
||||||
# @param opt either the object to set, or an Integer length
|
|
||||||
# @param child if opt is an Integer, this is the child to set
|
|
||||||
# @return the parent (self)
|
|
||||||
def []=( *args )
|
|
||||||
args[-1].parent = self
|
|
||||||
@children[*args[0..-2]] = args[-1]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Inserts an child before another child
|
|
||||||
# @param child1 this is either an xpath or an Element. If an Element,
|
|
||||||
# child2 will be inserted before child1 in the child list of the parent.
|
|
||||||
# If an xpath, child2 will be inserted before the first child to match
|
|
||||||
# the xpath.
|
|
||||||
# @param child2 the child to insert
|
|
||||||
# @return the parent (self)
|
|
||||||
def insert_before( child1, child2 )
|
|
||||||
if child1.kind_of? String
|
|
||||||
child1 = XPath.first( self, child1 )
|
|
||||||
child1.parent.insert_before child1, child2
|
|
||||||
else
|
|
||||||
ind = index(child1)
|
|
||||||
child2.parent.delete(child2) if child2.parent
|
|
||||||
@children[ind,0] = child2
|
|
||||||
child2.parent = self
|
|
||||||
end
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Inserts an child after another child
|
|
||||||
# @param child1 this is either an xpath or an Element. If an Element,
|
|
||||||
# child2 will be inserted after child1 in the child list of the parent.
|
|
||||||
# If an xpath, child2 will be inserted after the first child to match
|
|
||||||
# the xpath.
|
|
||||||
# @param child2 the child to insert
|
|
||||||
# @return the parent (self)
|
|
||||||
def insert_after( child1, child2 )
|
|
||||||
if child1.kind_of? String
|
|
||||||
child1 = XPath.first( self, child1 )
|
|
||||||
child1.parent.insert_after child1, child2
|
|
||||||
else
|
|
||||||
ind = index(child1)+1
|
|
||||||
child2.parent.delete(child2) if child2.parent
|
|
||||||
@children[ind,0] = child2
|
|
||||||
child2.parent = self
|
|
||||||
end
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_a
|
|
||||||
@children.dup
|
|
||||||
end
|
|
||||||
|
|
||||||
# Fetches the index of a given child
|
|
||||||
# @param child the child to get the index of
|
|
||||||
# @return the index of the child, or nil if the object is not a child
|
|
||||||
# of this parent.
|
|
||||||
def index( child )
|
|
||||||
count = -1
|
|
||||||
@children.find { |i| count += 1 ; i.hash == child.hash }
|
|
||||||
count
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the number of children of this parent
|
|
||||||
def size
|
|
||||||
@children.size
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :length :size
|
|
||||||
|
|
||||||
# Replaces one child with another, making sure the nodelist is correct
|
|
||||||
# @param to_replace the child to replace (must be a Child)
|
|
||||||
# @param replacement the child to insert into the nodelist (must be a
|
|
||||||
# Child)
|
|
||||||
def replace_child( to_replace, replacement )
|
|
||||||
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
|
|
||||||
to_replace.parent = nil
|
|
||||||
replacement.parent = self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Deeply clones this object. This creates a complete duplicate of this
|
|
||||||
# Parent, including all descendants.
|
|
||||||
def deep_clone
|
|
||||||
cl = clone()
|
|
||||||
each do |child|
|
|
||||||
if child.kind_of? Parent
|
|
||||||
cl << child.deep_clone
|
|
||||||
else
|
|
||||||
cl << child.clone
|
|
||||||
end
|
|
||||||
end
|
|
||||||
cl
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :children :to_a
|
|
||||||
|
|
||||||
def parent?
|
|
||||||
true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,52 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
class ParseException < RuntimeError
|
|
||||||
attr_accessor :source, :parser, :continued_exception
|
|
||||||
|
|
||||||
def initialize( message, source=nil, parser=nil, exception=nil )
|
|
||||||
super(message)
|
|
||||||
@source = source
|
|
||||||
@parser = parser
|
|
||||||
@continued_exception = exception
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
# Quote the original exception, if there was one
|
|
||||||
if @continued_exception
|
|
||||||
err = @continued_exception.inspect
|
|
||||||
err << "\n"
|
|
||||||
err << @continued_exception.backtrace.join("\n")
|
|
||||||
err << "\n...\n"
|
|
||||||
else
|
|
||||||
err = ""
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the stack trace and error message
|
|
||||||
err << super
|
|
||||||
|
|
||||||
# Add contextual information
|
|
||||||
if @source
|
|
||||||
err << "\nLine: #{line}\n"
|
|
||||||
err << "Position: #{position}\n"
|
|
||||||
err << "Last 80 unconsumed characters:\n"
|
|
||||||
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
|
||||||
end
|
|
||||||
|
|
||||||
err
|
|
||||||
end
|
|
||||||
|
|
||||||
def position
|
|
||||||
@source.current_line[0] if @source and defined? @source.current_line and
|
|
||||||
@source.current_line
|
|
||||||
end
|
|
||||||
|
|
||||||
def line
|
|
||||||
@source.current_line[2] if @source and defined? @source.current_line and
|
|
||||||
@source.current_line
|
|
||||||
end
|
|
||||||
|
|
||||||
def context
|
|
||||||
@source.current_line
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,594 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative '../parseexception'
|
|
||||||
require_relative '../undefinednamespaceexception'
|
|
||||||
require_relative '../source'
|
|
||||||
require 'set'
|
|
||||||
require "strscan"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
# = Using the Pull Parser
|
|
||||||
# <em>This API is experimental, and subject to change.</em>
|
|
||||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
||||||
# while parser.has_next?
|
|
||||||
# res = parser.next
|
|
||||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
|
||||||
# end
|
|
||||||
# See the PullEvent class for information on the content of the results.
|
|
||||||
# The data is identical to the arguments passed for the various events to
|
|
||||||
# the StreamListener API.
|
|
||||||
#
|
|
||||||
# Notice that:
|
|
||||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
|
||||||
# while parser.has_next?
|
|
||||||
# res = parser.next
|
|
||||||
# raise res[1] if res.error?
|
|
||||||
# end
|
|
||||||
#
|
|
||||||
# Nat Price gave me some good ideas for the API.
|
|
||||||
class BaseParser
|
|
||||||
LETTER = '[:alpha:]'
|
|
||||||
DIGIT = '[:digit:]'
|
|
||||||
|
|
||||||
COMBININGCHAR = '' # TODO
|
|
||||||
EXTENDER = '' # TODO
|
|
||||||
|
|
||||||
NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
|
|
||||||
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
|
||||||
QNAME = /(#{QNAME_STR})/
|
|
||||||
|
|
||||||
# Just for backward compatibility. For example, kramdown uses this.
|
|
||||||
# It's not used in REXML.
|
|
||||||
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
|
||||||
|
|
||||||
NAMECHAR = '[\-\w\.:]'
|
|
||||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
|
||||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
|
||||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
|
||||||
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
|
||||||
REFERENCE_RE = /#{REFERENCE}/
|
|
||||||
|
|
||||||
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
|
||||||
DOCTYPE_END = /\A\s*\]\s*>/um
|
|
||||||
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
|
||||||
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
|
||||||
COMMENT_START = /\A<!--/u
|
|
||||||
COMMENT_PATTERN = /<!--(.*?)-->/um
|
|
||||||
CDATA_START = /\A<!\[CDATA\[/u
|
|
||||||
CDATA_END = /\A\s*\]\s*>/um
|
|
||||||
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
|
||||||
XMLDECL_START = /\A<\?xml\s/u;
|
|
||||||
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
|
||||||
INSTRUCTION_START = /\A<\?/u
|
|
||||||
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
|
||||||
TAG_MATCH = /^<((?>#{QNAME_STR}))/um
|
|
||||||
CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
|
|
||||||
|
|
||||||
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
|
||||||
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
|
||||||
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
|
||||||
|
|
||||||
ENTITY_START = /\A\s*<!ENTITY/
|
|
||||||
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
|
||||||
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
|
||||||
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
|
||||||
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
|
||||||
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
|
||||||
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
|
||||||
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
|
||||||
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
|
||||||
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
|
||||||
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
|
||||||
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
|
||||||
ATTDEF_RE = /#{ATTDEF}/
|
|
||||||
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
|
||||||
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
||||||
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
|
||||||
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
|
||||||
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
|
||||||
|
|
||||||
TEXT_PATTERN = /\A([^<]*)/um
|
|
||||||
|
|
||||||
# Entity constants
|
|
||||||
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
|
||||||
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
|
||||||
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
|
||||||
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
|
||||||
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
|
||||||
PEREFERENCE = "%#{NAME};"
|
|
||||||
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
|
||||||
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
|
||||||
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
||||||
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
||||||
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
||||||
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
|
||||||
|
|
||||||
EREFERENCE = /&(?!#{NAME};)/
|
|
||||||
|
|
||||||
DEFAULT_ENTITIES = {
|
|
||||||
'gt' => [/>/, '>', '>', />/],
|
|
||||||
'lt' => [/</, '<', '<', /</],
|
|
||||||
'quot' => [/"/, '"', '"', /"/],
|
|
||||||
"apos" => [/'/, "'", "'", /'/]
|
|
||||||
}
|
|
||||||
|
|
||||||
def initialize( source )
|
|
||||||
self.stream = source
|
|
||||||
@listeners = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@listeners << listener
|
|
||||||
end
|
|
||||||
|
|
||||||
attr_reader :source
|
|
||||||
|
|
||||||
def stream=( source )
|
|
||||||
@source = SourceFactory.create_from( source )
|
|
||||||
@closed = nil
|
|
||||||
@document_status = nil
|
|
||||||
@tags = []
|
|
||||||
@stack = []
|
|
||||||
@entities = []
|
|
||||||
@nsstack = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def position
|
|
||||||
if @source.respond_to? :position
|
|
||||||
@source.position
|
|
||||||
else
|
|
||||||
# FIXME
|
|
||||||
0
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns true if there are no more events
|
|
||||||
def empty?
|
|
||||||
return (@source.empty? and @stack.empty?)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns true if there are more events. Synonymous with !empty?
|
|
||||||
def has_next?
|
|
||||||
return !(@source.empty? and @stack.empty?)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Push an event back on the head of the stream. This method
|
|
||||||
# has (theoretically) infinite depth.
|
|
||||||
def unshift token
|
|
||||||
@stack.unshift(token)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Peek at the +depth+ event in the stack. The first element on the stack
|
|
||||||
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
|
||||||
# stream and return the last event, which is always :end_document.
|
|
||||||
# Be aware that this causes the stream to be parsed up to the +depth+
|
|
||||||
# event, so you can effectively pre-parse the entire document (pull the
|
|
||||||
# entire thing into memory) using this method.
|
|
||||||
def peek depth=0
|
|
||||||
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
|
||||||
temp = []
|
|
||||||
if depth == -1
|
|
||||||
temp.push(pull()) until empty?
|
|
||||||
else
|
|
||||||
while @stack.size+temp.size < depth+1
|
|
||||||
temp.push(pull())
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@stack += temp if temp.size > 0
|
|
||||||
@stack[depth]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the next event. This is a +PullEvent+ object.
|
|
||||||
def pull
|
|
||||||
pull_event.tap do |event|
|
|
||||||
@listeners.each do |listener|
|
|
||||||
listener.receive event
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def pull_event
|
|
||||||
if @closed
|
|
||||||
x, @closed = @closed, nil
|
|
||||||
return [ :end_element, x ]
|
|
||||||
end
|
|
||||||
return [ :end_document ] if empty?
|
|
||||||
return @stack.shift if @stack.size > 0
|
|
||||||
#STDERR.puts @source.encoding
|
|
||||||
@source.read if @source.buffer.size<2
|
|
||||||
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
||||||
if @document_status == nil
|
|
||||||
#@source.consume( /^\s*/um )
|
|
||||||
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
|
||||||
word = word[1] unless word.nil?
|
|
||||||
#STDERR.puts "WORD = #{word.inspect}"
|
|
||||||
case word
|
|
||||||
when COMMENT_START
|
|
||||||
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
|
||||||
when XMLDECL_START
|
|
||||||
#STDERR.puts "XMLDECL"
|
|
||||||
results = @source.match( XMLDECL_PATTERN, true )[1]
|
|
||||||
version = VERSION.match( results )
|
|
||||||
version = version[1] unless version.nil?
|
|
||||||
encoding = ENCODING.match(results)
|
|
||||||
encoding = encoding[1] unless encoding.nil?
|
|
||||||
if need_source_encoding_update?(encoding)
|
|
||||||
@source.encoding = encoding
|
|
||||||
end
|
|
||||||
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
|
||||||
encoding = "UTF-16"
|
|
||||||
end
|
|
||||||
standalone = STANDALONE.match(results)
|
|
||||||
standalone = standalone[1] unless standalone.nil?
|
|
||||||
return [ :xmldecl, version, encoding, standalone ]
|
|
||||||
when INSTRUCTION_START
|
|
||||||
return process_instruction
|
|
||||||
when DOCTYPE_START
|
|
||||||
md = @source.match( DOCTYPE_PATTERN, true )
|
|
||||||
@nsstack.unshift(curr_ns=Set.new)
|
|
||||||
identity = md[1]
|
|
||||||
close = md[2]
|
|
||||||
identity =~ IDENTITY
|
|
||||||
name = $1
|
|
||||||
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
|
|
||||||
pub_sys = $2.nil? ? nil : $2.strip
|
|
||||||
long_name = $4.nil? ? nil : $4.strip
|
|
||||||
uri = $6.nil? ? nil : $6.strip
|
|
||||||
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
|
||||||
if close == ">"
|
|
||||||
@document_status = :after_doctype
|
|
||||||
@source.read if @source.buffer.size<2
|
|
||||||
md = @source.match(/^\s*/um, true)
|
|
||||||
@stack << [ :end_doctype ]
|
|
||||||
else
|
|
||||||
@document_status = :in_doctype
|
|
||||||
end
|
|
||||||
return args
|
|
||||||
when /^\s+/
|
|
||||||
else
|
|
||||||
@document_status = :after_doctype
|
|
||||||
@source.read if @source.buffer.size<2
|
|
||||||
md = @source.match(/\s*/um, true)
|
|
||||||
if @source.encoding == "UTF-8"
|
|
||||||
@source.buffer.force_encoding(::Encoding::UTF_8)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if @document_status == :in_doctype
|
|
||||||
md = @source.match(/\s*(.*?>)/um)
|
|
||||||
case md[1]
|
|
||||||
when SYSTEMENTITY
|
|
||||||
match = @source.match( SYSTEMENTITY, true )[1]
|
|
||||||
return [ :externalentity, match ]
|
|
||||||
|
|
||||||
when ELEMENTDECL_START
|
|
||||||
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
|
||||||
|
|
||||||
when ENTITY_START
|
|
||||||
match = @source.match( ENTITYDECL, true ).to_a.compact
|
|
||||||
match[0] = :entitydecl
|
|
||||||
ref = false
|
|
||||||
if match[1] == '%'
|
|
||||||
ref = true
|
|
||||||
match.delete_at 1
|
|
||||||
end
|
|
||||||
# Now we have to sort out what kind of entity reference this is
|
|
||||||
if match[2] == 'SYSTEM'
|
|
||||||
# External reference
|
|
||||||
match[3] = match[3][1..-2] # PUBID
|
|
||||||
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
||||||
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
||||||
elsif match[2] == 'PUBLIC'
|
|
||||||
# External reference
|
|
||||||
match[3] = match[3][1..-2] # PUBID
|
|
||||||
match[4] = match[4][1..-2] # HREF
|
|
||||||
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
||||||
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
||||||
else
|
|
||||||
match[2] = match[2][1..-2]
|
|
||||||
match.pop if match.size == 4
|
|
||||||
# match is [ :entity, name, value ]
|
|
||||||
end
|
|
||||||
match << '%' if ref
|
|
||||||
return match
|
|
||||||
when ATTLISTDECL_START
|
|
||||||
md = @source.match( ATTLISTDECL_PATTERN, true )
|
|
||||||
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
||||||
element = md[1]
|
|
||||||
contents = md[0]
|
|
||||||
|
|
||||||
pairs = {}
|
|
||||||
values = md[0].scan( ATTDEF_RE )
|
|
||||||
values.each do |attdef|
|
|
||||||
unless attdef[3] == "#IMPLIED"
|
|
||||||
attdef.compact!
|
|
||||||
val = attdef[3]
|
|
||||||
val = attdef[4] if val == "#FIXED "
|
|
||||||
pairs[attdef[0]] = val
|
|
||||||
if attdef[0] =~ /^xmlns:(.*)/
|
|
||||||
@nsstack[0] << $1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return [ :attlistdecl, element, pairs, contents ]
|
|
||||||
when NOTATIONDECL_START
|
|
||||||
md = nil
|
|
||||||
if @source.match( PUBLIC )
|
|
||||||
md = @source.match( PUBLIC, true )
|
|
||||||
vals = [md[1],md[2],md[4],md[6]]
|
|
||||||
elsif @source.match( SYSTEM )
|
|
||||||
md = @source.match( SYSTEM, true )
|
|
||||||
vals = [md[1],md[2],nil,md[4]]
|
|
||||||
else
|
|
||||||
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
|
||||||
end
|
|
||||||
return [ :notationdecl, *vals ]
|
|
||||||
when DOCTYPE_END
|
|
||||||
@document_status = :after_doctype
|
|
||||||
@source.match( DOCTYPE_END, true )
|
|
||||||
return [ :end_doctype ]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
begin
|
|
||||||
if @source.buffer[0] == ?<
|
|
||||||
if @source.buffer[1] == ?/
|
|
||||||
@nsstack.shift
|
|
||||||
last_tag = @tags.pop
|
|
||||||
md = @source.match( CLOSE_MATCH, true )
|
|
||||||
if md and !last_tag
|
|
||||||
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
if md.nil? or last_tag != md[1]
|
|
||||||
message = "Missing end tag for '#{last_tag}'"
|
|
||||||
message << " (got '#{md[1]}')" if md
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
return [ :end_element, last_tag ]
|
|
||||||
elsif @source.buffer[1] == ?!
|
|
||||||
md = @source.match(/\A(\s*[^>]*>)/um)
|
|
||||||
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
||||||
raise REXML::ParseException.new("Malformed node", @source) unless md
|
|
||||||
if md[0][2] == ?-
|
|
||||||
md = @source.match( COMMENT_PATTERN, true )
|
|
||||||
|
|
||||||
case md[1]
|
|
||||||
when /--/, /-\z/
|
|
||||||
raise REXML::ParseException.new("Malformed comment", @source)
|
|
||||||
end
|
|
||||||
|
|
||||||
return [ :comment, md[1] ] if md
|
|
||||||
else
|
|
||||||
md = @source.match( CDATA_PATTERN, true )
|
|
||||||
return [ :cdata, md[1] ] if md
|
|
||||||
end
|
|
||||||
raise REXML::ParseException.new( "Declarations can only occur "+
|
|
||||||
"in the doctype declaration.", @source)
|
|
||||||
elsif @source.buffer[1] == ??
|
|
||||||
return process_instruction
|
|
||||||
else
|
|
||||||
# Get the next tag
|
|
||||||
md = @source.match(TAG_MATCH, true)
|
|
||||||
unless md
|
|
||||||
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
|
||||||
end
|
|
||||||
prefixes = Set.new
|
|
||||||
prefixes << md[2] if md[2]
|
|
||||||
@nsstack.unshift(curr_ns=Set.new)
|
|
||||||
attributes, closed = parse_attributes(prefixes, curr_ns)
|
|
||||||
# Verify that all of the prefixes have been defined
|
|
||||||
for prefix in prefixes
|
|
||||||
unless @nsstack.find{|k| k.member?(prefix)}
|
|
||||||
raise UndefinedNamespaceException.new(prefix,@source,self)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if closed
|
|
||||||
@closed = md[1]
|
|
||||||
@nsstack.shift
|
|
||||||
else
|
|
||||||
@tags.push( md[1] )
|
|
||||||
end
|
|
||||||
return [ :start_element, md[1], attributes ]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
md = @source.match( TEXT_PATTERN, true )
|
|
||||||
if md[0].length == 0
|
|
||||||
@source.match( /(\s+)/, true )
|
|
||||||
end
|
|
||||||
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
|
||||||
#return [ :text, "" ] if md[0].length == 0
|
|
||||||
# unnormalized = Text::unnormalize( md[1], self )
|
|
||||||
# return PullEvent.new( :text, md[1], unnormalized )
|
|
||||||
return [ :text, md[1] ]
|
|
||||||
end
|
|
||||||
rescue REXML::UndefinedNamespaceException
|
|
||||||
raise
|
|
||||||
rescue REXML::ParseException
|
|
||||||
raise
|
|
||||||
rescue => error
|
|
||||||
raise REXML::ParseException.new( "Exception parsing",
|
|
||||||
@source, self, (error ? error : $!) )
|
|
||||||
end
|
|
||||||
return [ :dummy ]
|
|
||||||
end
|
|
||||||
private :pull_event
|
|
||||||
|
|
||||||
def entity( reference, entities )
|
|
||||||
value = nil
|
|
||||||
value = entities[ reference ] if entities
|
|
||||||
if not value
|
|
||||||
value = DEFAULT_ENTITIES[ reference ]
|
|
||||||
value = value[2] if value
|
|
||||||
end
|
|
||||||
unnormalize( value, entities ) if value
|
|
||||||
end
|
|
||||||
|
|
||||||
# Escapes all possible entities
|
|
||||||
def normalize( input, entities=nil, entity_filter=nil )
|
|
||||||
copy = input.clone
|
|
||||||
# Doing it like this rather than in a loop improves the speed
|
|
||||||
copy.gsub!( EREFERENCE, '&' )
|
|
||||||
entities.each do |key, value|
|
|
||||||
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
|
||||||
entity_filter.include?(entity)
|
|
||||||
end if entities
|
|
||||||
copy.gsub!( EREFERENCE, '&' )
|
|
||||||
DEFAULT_ENTITIES.each do |key, value|
|
|
||||||
copy.gsub!( value[3], value[1] )
|
|
||||||
end
|
|
||||||
copy
|
|
||||||
end
|
|
||||||
|
|
||||||
# Unescapes all possible entities
|
|
||||||
def unnormalize( string, entities=nil, filter=nil )
|
|
||||||
rv = string.clone
|
|
||||||
rv.gsub!( /\r\n?/, "\n" )
|
|
||||||
matches = rv.scan( REFERENCE_RE )
|
|
||||||
return rv if matches.size == 0
|
|
||||||
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
|
||||||
m=$1
|
|
||||||
m = "0#{m}" if m[0] == ?x
|
|
||||||
[Integer(m)].pack('U*')
|
|
||||||
}
|
|
||||||
matches.collect!{|x|x[0]}.compact!
|
|
||||||
if matches.size > 0
|
|
||||||
matches.each do |entity_reference|
|
|
||||||
unless filter and filter.include?(entity_reference)
|
|
||||||
entity_value = entity( entity_reference, entities )
|
|
||||||
if entity_value
|
|
||||||
re = /&#{entity_reference};/
|
|
||||||
rv.gsub!( re, entity_value )
|
|
||||||
else
|
|
||||||
er = DEFAULT_ENTITIES[entity_reference]
|
|
||||||
rv.gsub!( er[0], er[2] ) if er
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rv.gsub!( /&/, '&' )
|
|
||||||
end
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def need_source_encoding_update?(xml_declaration_encoding)
|
|
||||||
return false if xml_declaration_encoding.nil?
|
|
||||||
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
|
||||||
true
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_instruction
|
|
||||||
match_data = @source.match(INSTRUCTION_PATTERN, true)
|
|
||||||
unless match_data
|
|
||||||
message = "Invalid processing instruction node"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
[:processing_instruction, match_data[1], match_data[2]]
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_attributes(prefixes, curr_ns)
|
|
||||||
attributes = {}
|
|
||||||
closed = false
|
|
||||||
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
|
||||||
if match_data.nil?
|
|
||||||
message = "Start tag isn't ended"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
|
|
||||||
raw_attributes = match_data[1]
|
|
||||||
closed = !match_data[2].nil?
|
|
||||||
return attributes, closed if raw_attributes.nil?
|
|
||||||
return attributes, closed if raw_attributes.empty?
|
|
||||||
|
|
||||||
scanner = StringScanner.new(raw_attributes)
|
|
||||||
until scanner.eos?
|
|
||||||
if scanner.scan(/\s+/)
|
|
||||||
break if scanner.eos?
|
|
||||||
end
|
|
||||||
|
|
||||||
pos = scanner.pos
|
|
||||||
loop do
|
|
||||||
break if scanner.scan(ATTRIBUTE_PATTERN)
|
|
||||||
unless scanner.scan(QNAME)
|
|
||||||
message = "Invalid attribute name: <#{scanner.rest}>"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
name = scanner[0]
|
|
||||||
unless scanner.scan(/\s*=\s*/um)
|
|
||||||
message = "Missing attribute equal: <#{name}>"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
quote = scanner.scan(/['"]/)
|
|
||||||
unless quote
|
|
||||||
message = "Missing attribute value start quote: <#{name}>"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
|
|
||||||
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
|
||||||
if match_data
|
|
||||||
scanner << "/" if closed
|
|
||||||
scanner << ">"
|
|
||||||
scanner << match_data[1]
|
|
||||||
scanner.pos = pos
|
|
||||||
closed = !match_data[2].nil?
|
|
||||||
next
|
|
||||||
end
|
|
||||||
message =
|
|
||||||
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
||||||
raise REXML::ParseException.new(message, @source)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
name = scanner[1]
|
|
||||||
prefix = scanner[2]
|
|
||||||
local_part = scanner[3]
|
|
||||||
# quote = scanner[4]
|
|
||||||
value = scanner[5]
|
|
||||||
if prefix == "xmlns"
|
|
||||||
if local_part == "xml"
|
|
||||||
if value != "http://www.w3.org/XML/1998/namespace"
|
|
||||||
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
||||||
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
||||||
raise REXML::ParseException.new( msg, @source, self )
|
|
||||||
end
|
|
||||||
elsif local_part == "xmlns"
|
|
||||||
msg = "The 'xmlns' prefix must not be declared "+
|
|
||||||
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
||||||
raise REXML::ParseException.new( msg, @source, self)
|
|
||||||
end
|
|
||||||
curr_ns << local_part
|
|
||||||
elsif prefix
|
|
||||||
prefixes << prefix unless prefix == "xml"
|
|
||||||
end
|
|
||||||
|
|
||||||
if attributes.has_key?(name)
|
|
||||||
msg = "Duplicate attribute #{name.inspect}"
|
|
||||||
raise REXML::ParseException.new(msg, @source, self)
|
|
||||||
end
|
|
||||||
|
|
||||||
attributes[name] = value
|
|
||||||
end
|
|
||||||
return attributes, closed
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
=begin
|
|
||||||
case event[0]
|
|
||||||
when :start_element
|
|
||||||
when :text
|
|
||||||
when :end_element
|
|
||||||
when :processing_instruction
|
|
||||||
when :cdata
|
|
||||||
when :comment
|
|
||||||
when :xmldecl
|
|
||||||
when :start_doctype
|
|
||||||
when :end_doctype
|
|
||||||
when :externalentity
|
|
||||||
when :elementdecl
|
|
||||||
when :entity
|
|
||||||
when :attlistdecl
|
|
||||||
when :notationdecl
|
|
||||||
when :end_doctype
|
|
||||||
end
|
|
||||||
=end
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'streamparser'
|
|
||||||
require_relative 'baseparser'
|
|
||||||
require_relative '../light/node'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
class LightParser
|
|
||||||
def initialize stream
|
|
||||||
@stream = stream
|
|
||||||
@parser = REXML::Parsers::BaseParser.new( stream )
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@parser.add_listener( listener )
|
|
||||||
end
|
|
||||||
|
|
||||||
def rewind
|
|
||||||
@stream.rewind
|
|
||||||
@parser.stream = @stream
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse
|
|
||||||
root = context = [ :document ]
|
|
||||||
while true
|
|
||||||
event = @parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :end_document
|
|
||||||
break
|
|
||||||
when :start_element, :start_doctype
|
|
||||||
new_node = event
|
|
||||||
context << new_node
|
|
||||||
new_node[1,0] = [context]
|
|
||||||
context = new_node
|
|
||||||
when :end_element, :end_doctype
|
|
||||||
context = context[1]
|
|
||||||
else
|
|
||||||
new_node = event
|
|
||||||
context << new_node
|
|
||||||
new_node[1,0] = [context]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
root
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# An element is an array. The array contains:
|
|
||||||
# 0 The parent element
|
|
||||||
# 1 The tag name
|
|
||||||
# 2 A hash of attributes
|
|
||||||
# 3..-1 The child elements
|
|
||||||
# An element is an array of size > 3
|
|
||||||
# Text is a String
|
|
||||||
# PIs are [ :processing_instruction, target, data ]
|
|
||||||
# Comments are [ :comment, data ]
|
|
||||||
# DocTypes are DocType structs
|
|
||||||
# The root is an array with XMLDecls, Text, DocType, Array, Text
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,197 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require 'forwardable'
|
|
||||||
|
|
||||||
require_relative '../parseexception'
|
|
||||||
require_relative 'baseparser'
|
|
||||||
require_relative '../xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
# = Using the Pull Parser
|
|
||||||
# <em>This API is experimental, and subject to change.</em>
|
|
||||||
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
||||||
# while parser.has_next?
|
|
||||||
# res = parser.next
|
|
||||||
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
|
||||||
# end
|
|
||||||
# See the PullEvent class for information on the content of the results.
|
|
||||||
# The data is identical to the arguments passed for the various events to
|
|
||||||
# the StreamListener API.
|
|
||||||
#
|
|
||||||
# Notice that:
|
|
||||||
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
|
||||||
# while parser.has_next?
|
|
||||||
# res = parser.next
|
|
||||||
# raise res[1] if res.error?
|
|
||||||
# end
|
|
||||||
#
|
|
||||||
# Nat Price gave me some good ideas for the API.
|
|
||||||
class PullParser
|
|
||||||
include XMLTokens
|
|
||||||
extend Forwardable
|
|
||||||
|
|
||||||
def_delegators( :@parser, :has_next? )
|
|
||||||
def_delegators( :@parser, :entity )
|
|
||||||
def_delegators( :@parser, :empty? )
|
|
||||||
def_delegators( :@parser, :source )
|
|
||||||
|
|
||||||
def initialize stream
|
|
||||||
@entities = {}
|
|
||||||
@listeners = nil
|
|
||||||
@parser = BaseParser.new( stream )
|
|
||||||
@my_stack = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@listeners = [] unless @listeners
|
|
||||||
@listeners << listener
|
|
||||||
end
|
|
||||||
|
|
||||||
def each
|
|
||||||
while has_next?
|
|
||||||
yield self.pull
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def peek depth=0
|
|
||||||
if @my_stack.length <= depth
|
|
||||||
(depth - @my_stack.length + 1).times {
|
|
||||||
e = PullEvent.new(@parser.pull)
|
|
||||||
@my_stack.push(e)
|
|
||||||
}
|
|
||||||
end
|
|
||||||
@my_stack[depth]
|
|
||||||
end
|
|
||||||
|
|
||||||
def pull
|
|
||||||
return @my_stack.shift if @my_stack.length > 0
|
|
||||||
|
|
||||||
event = @parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :entitydecl
|
|
||||||
@entities[ event[1] ] =
|
|
||||||
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
|
||||||
when :text
|
|
||||||
unnormalized = @parser.unnormalize( event[1], @entities )
|
|
||||||
event << unnormalized
|
|
||||||
end
|
|
||||||
PullEvent.new( event )
|
|
||||||
end
|
|
||||||
|
|
||||||
def unshift token
|
|
||||||
@my_stack.unshift token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A parsing event. The contents of the event are accessed as an +Array?,
|
|
||||||
# and the type is given either by the ...? methods, or by accessing the
|
|
||||||
# +type+ accessor. The contents of this object vary from event to event,
|
|
||||||
# but are identical to the arguments passed to +StreamListener+s for each
|
|
||||||
# event.
|
|
||||||
class PullEvent
|
|
||||||
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
|
||||||
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
|
||||||
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
|
||||||
def initialize(arg)
|
|
||||||
@contents = arg
|
|
||||||
end
|
|
||||||
|
|
||||||
def []( start, endd=nil)
|
|
||||||
if start.kind_of? Range
|
|
||||||
@contents.slice( start.begin+1 .. start.end )
|
|
||||||
elsif start.kind_of? Numeric
|
|
||||||
if endd.nil?
|
|
||||||
@contents.slice( start+1 )
|
|
||||||
else
|
|
||||||
@contents.slice( start+1, endd )
|
|
||||||
end
|
|
||||||
else
|
|
||||||
raise "Illegal argument #{start.inspect} (#{start.class})"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def event_type
|
|
||||||
@contents[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String tag_name, Hash attributes ]
|
|
||||||
def start_element?
|
|
||||||
@contents[0] == :start_element
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String tag_name ]
|
|
||||||
def end_element?
|
|
||||||
@contents[0] == :end_element
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String raw_text, String unnormalized_text ]
|
|
||||||
def text?
|
|
||||||
@contents[0] == :text
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def instruction?
|
|
||||||
@contents[0] == :processing_instruction
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def comment?
|
|
||||||
@contents[0] == :comment
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
|
||||||
def doctype?
|
|
||||||
@contents[0] == :start_doctype
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def attlistdecl?
|
|
||||||
@contents[0] == :attlistdecl
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def elementdecl?
|
|
||||||
@contents[0] == :elementdecl
|
|
||||||
end
|
|
||||||
|
|
||||||
# Due to the wonders of DTDs, an entity declaration can be just about
|
|
||||||
# anything. There's no way to normalize it; you'll have to interpret the
|
|
||||||
# content yourself. However, the following is true:
|
|
||||||
#
|
|
||||||
# * If the entity declaration is an internal entity:
|
|
||||||
# [ String name, String value ]
|
|
||||||
# Content: [ String text ]
|
|
||||||
def entitydecl?
|
|
||||||
@contents[0] == :entitydecl
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def notationdecl?
|
|
||||||
@contents[0] == :notationdecl
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def entity?
|
|
||||||
@contents[0] == :entity
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String text ]
|
|
||||||
def cdata?
|
|
||||||
@contents[0] == :cdata
|
|
||||||
end
|
|
||||||
|
|
||||||
# Content: [ String version, String encoding, String standalone ]
|
|
||||||
def xmldecl?
|
|
||||||
@contents[0] == :xmldecl
|
|
||||||
end
|
|
||||||
|
|
||||||
def error?
|
|
||||||
@contents[0] == :error
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
@contents[0].to_s + ": " + @contents[1..-1].inspect
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,273 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'baseparser'
|
|
||||||
require_relative '../parseexception'
|
|
||||||
require_relative '../namespace'
|
|
||||||
require_relative '../text'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
# SAX2Parser
|
|
||||||
class SAX2Parser
|
|
||||||
def initialize source
|
|
||||||
@parser = BaseParser.new(source)
|
|
||||||
@listeners = []
|
|
||||||
@procs = []
|
|
||||||
@namespace_stack = []
|
|
||||||
@has_listeners = false
|
|
||||||
@tag_stack = []
|
|
||||||
@entities = {}
|
|
||||||
end
|
|
||||||
|
|
||||||
def source
|
|
||||||
@parser.source
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@parser.add_listener( listener )
|
|
||||||
end
|
|
||||||
|
|
||||||
# Listen arguments:
|
|
||||||
#
|
|
||||||
# Symbol, Array, Block
|
|
||||||
# Listen to Symbol events on Array elements
|
|
||||||
# Symbol, Block
|
|
||||||
# Listen to Symbol events
|
|
||||||
# Array, Listener
|
|
||||||
# Listen to all events on Array elements
|
|
||||||
# Array, Block
|
|
||||||
# Listen to :start_element events on Array elements
|
|
||||||
# Listener
|
|
||||||
# Listen to All events
|
|
||||||
#
|
|
||||||
# Symbol can be one of: :start_element, :end_element,
|
|
||||||
# :start_prefix_mapping, :end_prefix_mapping, :characters,
|
|
||||||
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
|
||||||
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
|
||||||
#
|
|
||||||
# There is an additional symbol that can be listened for: :progress.
|
|
||||||
# This will be called for every event generated, passing in the current
|
|
||||||
# stream position.
|
|
||||||
#
|
|
||||||
# Array contains regular expressions or strings which will be matched
|
|
||||||
# against fully qualified element names.
|
|
||||||
#
|
|
||||||
# Listener must implement the methods in SAX2Listener
|
|
||||||
#
|
|
||||||
# Block will be passed the same arguments as a SAX2Listener method would
|
|
||||||
# be, where the method name is the same as the matched Symbol.
|
|
||||||
# See the SAX2Listener for more information.
|
|
||||||
def listen( *args, &blok )
|
|
||||||
if args[0].kind_of? Symbol
|
|
||||||
if args.size == 2
|
|
||||||
args[1].each { |match| @procs << [args[0], match, blok] }
|
|
||||||
else
|
|
||||||
add( [args[0], nil, blok] )
|
|
||||||
end
|
|
||||||
elsif args[0].kind_of? Array
|
|
||||||
if args.size == 2
|
|
||||||
args[0].each { |match| add( [nil, match, args[1]] ) }
|
|
||||||
else
|
|
||||||
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
|
||||||
end
|
|
||||||
else
|
|
||||||
add([nil, nil, args[0]])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def deafen( listener=nil, &blok )
|
|
||||||
if listener
|
|
||||||
@listeners.delete_if {|item| item[-1] == listener }
|
|
||||||
@has_listeners = false if @listeners.size == 0
|
|
||||||
else
|
|
||||||
@procs.delete_if {|item| item[-1] == blok }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse
|
|
||||||
@procs.each { |sym,match,block| block.call if sym == :start_document }
|
|
||||||
@listeners.each { |sym,match,block|
|
|
||||||
block.start_document if sym == :start_document or sym.nil?
|
|
||||||
}
|
|
||||||
context = []
|
|
||||||
while true
|
|
||||||
event = @parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :end_document
|
|
||||||
handle( :end_document )
|
|
||||||
break
|
|
||||||
when :start_doctype
|
|
||||||
handle( :doctype, *event[1..-1])
|
|
||||||
when :end_doctype
|
|
||||||
context = context[1]
|
|
||||||
when :start_element
|
|
||||||
@tag_stack.push(event[1])
|
|
||||||
# find the observers for namespaces
|
|
||||||
procs = get_procs( :start_prefix_mapping, event[1] )
|
|
||||||
listeners = get_listeners( :start_prefix_mapping, event[1] )
|
|
||||||
if procs or listeners
|
|
||||||
# break out the namespace declarations
|
|
||||||
# The attributes live in event[2]
|
|
||||||
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
|
|
||||||
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
|
|
||||||
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
|
|
||||||
@namespace_stack.push({})
|
|
||||||
nsdecl.each do |n,v|
|
|
||||||
@namespace_stack[-1][n] = v
|
|
||||||
# notify observers of namespaces
|
|
||||||
procs.each { |ob| ob.call( n, v ) } if procs
|
|
||||||
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
|
|
||||||
end
|
|
||||||
end
|
|
||||||
event[1] =~ Namespace::NAMESPLIT
|
|
||||||
prefix = $1
|
|
||||||
local = $2
|
|
||||||
uri = get_namespace(prefix)
|
|
||||||
# find the observers for start_element
|
|
||||||
procs = get_procs( :start_element, event[1] )
|
|
||||||
listeners = get_listeners( :start_element, event[1] )
|
|
||||||
# notify observers
|
|
||||||
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
|
|
||||||
listeners.each { |ob|
|
|
||||||
ob.start_element( uri, local, event[1], event[2] )
|
|
||||||
} if listeners
|
|
||||||
when :end_element
|
|
||||||
@tag_stack.pop
|
|
||||||
event[1] =~ Namespace::NAMESPLIT
|
|
||||||
prefix = $1
|
|
||||||
local = $2
|
|
||||||
uri = get_namespace(prefix)
|
|
||||||
# find the observers for start_element
|
|
||||||
procs = get_procs( :end_element, event[1] )
|
|
||||||
listeners = get_listeners( :end_element, event[1] )
|
|
||||||
# notify observers
|
|
||||||
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
|
|
||||||
listeners.each { |ob|
|
|
||||||
ob.end_element( uri, local, event[1] )
|
|
||||||
} if listeners
|
|
||||||
|
|
||||||
namespace_mapping = @namespace_stack.pop
|
|
||||||
# find the observers for namespaces
|
|
||||||
procs = get_procs( :end_prefix_mapping, event[1] )
|
|
||||||
listeners = get_listeners( :end_prefix_mapping, event[1] )
|
|
||||||
if procs or listeners
|
|
||||||
namespace_mapping.each do |ns_prefix, ns_uri|
|
|
||||||
# notify observers of namespaces
|
|
||||||
procs.each { |ob| ob.call( ns_prefix ) } if procs
|
|
||||||
listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :text
|
|
||||||
#normalized = @parser.normalize( event[1] )
|
|
||||||
#handle( :characters, normalized )
|
|
||||||
copy = event[1].clone
|
|
||||||
|
|
||||||
esub = proc { |match|
|
|
||||||
if @entities.has_key?($1)
|
|
||||||
@entities[$1].gsub(Text::REFERENCE, &esub)
|
|
||||||
else
|
|
||||||
match
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
copy.gsub!( Text::REFERENCE, &esub )
|
|
||||||
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
|
||||||
m=$1
|
|
||||||
m = "0#{m}" if m[0] == ?x
|
|
||||||
[Integer(m)].pack('U*')
|
|
||||||
}
|
|
||||||
handle( :characters, copy )
|
|
||||||
when :entitydecl
|
|
||||||
handle_entitydecl( event )
|
|
||||||
when :processing_instruction, :comment, :attlistdecl,
|
|
||||||
:elementdecl, :cdata, :notationdecl, :xmldecl
|
|
||||||
handle( *event )
|
|
||||||
end
|
|
||||||
handle( :progress, @parser.position )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def handle( symbol, *arguments )
|
|
||||||
tag = @tag_stack[-1]
|
|
||||||
procs = get_procs( symbol, tag )
|
|
||||||
listeners = get_listeners( symbol, tag )
|
|
||||||
# notify observers
|
|
||||||
procs.each { |ob| ob.call( *arguments ) } if procs
|
|
||||||
listeners.each { |l|
|
|
||||||
l.send( symbol.to_s, *arguments )
|
|
||||||
} if listeners
|
|
||||||
end
|
|
||||||
|
|
||||||
def handle_entitydecl( event )
|
|
||||||
@entities[ event[1] ] = event[2] if event.size == 3
|
|
||||||
parameter_reference_p = false
|
|
||||||
case event[2]
|
|
||||||
when "SYSTEM"
|
|
||||||
if event.size == 5
|
|
||||||
if event.last == "%"
|
|
||||||
parameter_reference_p = true
|
|
||||||
else
|
|
||||||
event[4, 0] = "NDATA"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when "PUBLIC"
|
|
||||||
if event.size == 6
|
|
||||||
if event.last == "%"
|
|
||||||
parameter_reference_p = true
|
|
||||||
else
|
|
||||||
event[5, 0] = "NDATA"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
parameter_reference_p = (event.size == 4)
|
|
||||||
end
|
|
||||||
event[1, 0] = event.pop if parameter_reference_p
|
|
||||||
handle( event[0], event[1..-1] )
|
|
||||||
end
|
|
||||||
|
|
||||||
# The following methods are duplicates, but it is faster than using
|
|
||||||
# a helper
|
|
||||||
def get_procs( symbol, name )
|
|
||||||
return nil if @procs.size == 0
|
|
||||||
@procs.find_all do |sym, match, block|
|
|
||||||
(
|
|
||||||
(sym.nil? or symbol == sym) and
|
|
||||||
((name.nil? and match.nil?) or match.nil? or (
|
|
||||||
(name == match) or
|
|
||||||
(match.kind_of? Regexp and name =~ match)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
end.collect{|x| x[-1]}
|
|
||||||
end
|
|
||||||
def get_listeners( symbol, name )
|
|
||||||
return nil if @listeners.size == 0
|
|
||||||
@listeners.find_all do |sym, match, block|
|
|
||||||
(
|
|
||||||
(sym.nil? or symbol == sym) and
|
|
||||||
((name.nil? and match.nil?) or match.nil? or (
|
|
||||||
(name == match) or
|
|
||||||
(match.kind_of? Regexp and name =~ match)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
end.collect{|x| x[-1]}
|
|
||||||
end
|
|
||||||
|
|
||||||
def add( pair )
|
|
||||||
if pair[-1].respond_to? :call
|
|
||||||
@procs << pair unless @procs.include? pair
|
|
||||||
else
|
|
||||||
@listeners << pair unless @listeners.include? pair
|
|
||||||
@has_listeners = true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_namespace( prefix )
|
|
||||||
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
|
||||||
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
|
||||||
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,61 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "baseparser"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
class StreamParser
|
|
||||||
def initialize source, listener
|
|
||||||
@listener = listener
|
|
||||||
@parser = BaseParser.new( source )
|
|
||||||
@tag_stack = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@parser.add_listener( listener )
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse
|
|
||||||
# entity string
|
|
||||||
while true
|
|
||||||
event = @parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :end_document
|
|
||||||
unless @tag_stack.empty?
|
|
||||||
tag_path = "/" + @tag_stack.join("/")
|
|
||||||
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
|
||||||
@parser.source)
|
|
||||||
end
|
|
||||||
return
|
|
||||||
when :start_element
|
|
||||||
@tag_stack << event[1]
|
|
||||||
attrs = event[2].each do |n, v|
|
|
||||||
event[2][n] = @parser.unnormalize( v )
|
|
||||||
end
|
|
||||||
@listener.tag_start( event[1], attrs )
|
|
||||||
when :end_element
|
|
||||||
@listener.tag_end( event[1] )
|
|
||||||
@tag_stack.pop
|
|
||||||
when :text
|
|
||||||
normalized = @parser.unnormalize( event[1] )
|
|
||||||
@listener.text( normalized )
|
|
||||||
when :processing_instruction
|
|
||||||
@listener.instruction( *event[1,2] )
|
|
||||||
when :start_doctype
|
|
||||||
@listener.doctype( *event[1..-1] )
|
|
||||||
when :end_doctype
|
|
||||||
# FIXME: remove this condition for milestone:3.2
|
|
||||||
@listener.doctype_end if @listener.respond_to? :doctype_end
|
|
||||||
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
|
||||||
@listener.send( event[0].to_s, *event[1..-1] )
|
|
||||||
when :entitydecl, :notationdecl
|
|
||||||
@listener.send( event[0].to_s, event[1..-1] )
|
|
||||||
when :externalentity
|
|
||||||
entity_reference = event[1]
|
|
||||||
content = entity_reference.gsub(/\A%|;\z/, "")
|
|
||||||
@listener.entity(content)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,101 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative '../validation/validationexception'
|
|
||||||
require_relative '../undefinednamespaceexception'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
class TreeParser
|
|
||||||
def initialize( source, build_context = Document.new )
|
|
||||||
@build_context = build_context
|
|
||||||
@parser = Parsers::BaseParser.new( source )
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@parser.add_listener( listener )
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse
|
|
||||||
tag_stack = []
|
|
||||||
in_doctype = false
|
|
||||||
entities = nil
|
|
||||||
begin
|
|
||||||
while true
|
|
||||||
event = @parser.pull
|
|
||||||
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
|
||||||
case event[0]
|
|
||||||
when :end_document
|
|
||||||
unless tag_stack.empty?
|
|
||||||
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
|
||||||
@parser.source, @parser)
|
|
||||||
end
|
|
||||||
return
|
|
||||||
when :start_element
|
|
||||||
tag_stack.push(event[1])
|
|
||||||
el = @build_context = @build_context.add_element( event[1] )
|
|
||||||
event[2].each do |key, value|
|
|
||||||
el.attributes[key]=Attribute.new(key,value,self)
|
|
||||||
end
|
|
||||||
when :end_element
|
|
||||||
tag_stack.pop
|
|
||||||
@build_context = @build_context.parent
|
|
||||||
when :text
|
|
||||||
if not in_doctype
|
|
||||||
if @build_context[-1].instance_of? Text
|
|
||||||
@build_context[-1] << event[1]
|
|
||||||
else
|
|
||||||
@build_context.add(
|
|
||||||
Text.new(event[1], @build_context.whitespace, nil, true)
|
|
||||||
) unless (
|
|
||||||
@build_context.ignore_whitespace_nodes and
|
|
||||||
event[1].strip.size==0
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :comment
|
|
||||||
c = Comment.new( event[1] )
|
|
||||||
@build_context.add( c )
|
|
||||||
when :cdata
|
|
||||||
c = CData.new( event[1] )
|
|
||||||
@build_context.add( c )
|
|
||||||
when :processing_instruction
|
|
||||||
@build_context.add( Instruction.new( event[1], event[2] ) )
|
|
||||||
when :end_doctype
|
|
||||||
in_doctype = false
|
|
||||||
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
|
||||||
@build_context = @build_context.parent
|
|
||||||
when :start_doctype
|
|
||||||
doctype = DocType.new( event[1..-1], @build_context )
|
|
||||||
@build_context = doctype
|
|
||||||
entities = {}
|
|
||||||
in_doctype = true
|
|
||||||
when :attlistdecl
|
|
||||||
n = AttlistDecl.new( event[1..-1] )
|
|
||||||
@build_context.add( n )
|
|
||||||
when :externalentity
|
|
||||||
n = ExternalEntity.new( event[1] )
|
|
||||||
@build_context.add( n )
|
|
||||||
when :elementdecl
|
|
||||||
n = ElementDecl.new( event[1] )
|
|
||||||
@build_context.add(n)
|
|
||||||
when :entitydecl
|
|
||||||
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
|
||||||
@build_context.add(Entity.new(event))
|
|
||||||
when :notationdecl
|
|
||||||
n = NotationDecl.new( *event[1..-1] )
|
|
||||||
@build_context.add( n )
|
|
||||||
when :xmldecl
|
|
||||||
x = XMLDecl.new( event[1], event[2], event[3] )
|
|
||||||
@build_context.add( x )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rescue REXML::Validation::ValidationException
|
|
||||||
raise
|
|
||||||
rescue REXML::ParseException
|
|
||||||
raise
|
|
||||||
rescue
|
|
||||||
raise ParseException.new( $!.message, @parser.source, @parser, $! )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,57 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'streamparser'
|
|
||||||
require_relative 'baseparser'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
class UltraLightParser
|
|
||||||
def initialize stream
|
|
||||||
@stream = stream
|
|
||||||
@parser = REXML::Parsers::BaseParser.new( stream )
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_listener( listener )
|
|
||||||
@parser.add_listener( listener )
|
|
||||||
end
|
|
||||||
|
|
||||||
def rewind
|
|
||||||
@stream.rewind
|
|
||||||
@parser.stream = @stream
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse
|
|
||||||
root = context = []
|
|
||||||
while true
|
|
||||||
event = @parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :end_document
|
|
||||||
break
|
|
||||||
when :end_doctype
|
|
||||||
context = context[1]
|
|
||||||
when :start_element, :start_doctype
|
|
||||||
context << event
|
|
||||||
event[1,0] = [context]
|
|
||||||
context = event
|
|
||||||
when :end_element
|
|
||||||
context = context[1]
|
|
||||||
else
|
|
||||||
context << event
|
|
||||||
end
|
|
||||||
end
|
|
||||||
root
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# An element is an array. The array contains:
|
|
||||||
# 0 The parent element
|
|
||||||
# 1 The tag name
|
|
||||||
# 2 A hash of attributes
|
|
||||||
# 3..-1 The child elements
|
|
||||||
# An element is an array of size > 3
|
|
||||||
# Text is a String
|
|
||||||
# PIs are [ :processing_instruction, target, data ]
|
|
||||||
# Comments are [ :comment, data ]
|
|
||||||
# DocTypes are DocType structs
|
|
||||||
# The root is an array with XMLDecls, Text, DocType, Array, Text
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,675 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative '../namespace'
|
|
||||||
require_relative '../xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Parsers
|
|
||||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
||||||
# for this class. Believe me. You don't want to poke around in here.
|
|
||||||
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
|
||||||
# back while you still can!
|
|
||||||
class XPathParser
|
|
||||||
include XMLTokens
|
|
||||||
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
|
||||||
|
|
||||||
def namespaces=( namespaces )
|
|
||||||
Functions::namespace_context = namespaces
|
|
||||||
@namespaces = namespaces
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse path
|
|
||||||
path = path.dup
|
|
||||||
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
|
|
||||||
path.gsub!( /\s+([\]\)])/, '\1')
|
|
||||||
parsed = []
|
|
||||||
OrExpr(path, parsed)
|
|
||||||
parsed
|
|
||||||
end
|
|
||||||
|
|
||||||
def predicate path
|
|
||||||
parsed = []
|
|
||||||
Predicate( "[#{path}]", parsed )
|
|
||||||
parsed
|
|
||||||
end
|
|
||||||
|
|
||||||
def abbreviate( path )
|
|
||||||
path = path.kind_of?(String) ? parse( path ) : path
|
|
||||||
string = ""
|
|
||||||
document = false
|
|
||||||
while path.size > 0
|
|
||||||
op = path.shift
|
|
||||||
case op
|
|
||||||
when :node
|
|
||||||
when :attribute
|
|
||||||
string << "/" if string.size > 0
|
|
||||||
string << "@"
|
|
||||||
when :child
|
|
||||||
string << "/" if string.size > 0
|
|
||||||
when :descendant_or_self
|
|
||||||
string << "/"
|
|
||||||
when :self
|
|
||||||
string << "."
|
|
||||||
when :parent
|
|
||||||
string << ".."
|
|
||||||
when :any
|
|
||||||
string << "*"
|
|
||||||
when :text
|
|
||||||
string << "text()"
|
|
||||||
when :following, :following_sibling,
|
|
||||||
:ancestor, :ancestor_or_self, :descendant,
|
|
||||||
:namespace, :preceding, :preceding_sibling
|
|
||||||
string << "/" unless string.size == 0
|
|
||||||
string << op.to_s.tr("_", "-")
|
|
||||||
string << "::"
|
|
||||||
when :qname
|
|
||||||
prefix = path.shift
|
|
||||||
name = path.shift
|
|
||||||
string << prefix+":" if prefix.size > 0
|
|
||||||
string << name
|
|
||||||
when :predicate
|
|
||||||
string << '['
|
|
||||||
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
|
|
||||||
string << ']'
|
|
||||||
when :document
|
|
||||||
document = true
|
|
||||||
when :function
|
|
||||||
string << path.shift
|
|
||||||
string << "( "
|
|
||||||
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
|
|
||||||
string << " )"
|
|
||||||
when :literal
|
|
||||||
string << %Q{ "#{path.shift}" }
|
|
||||||
else
|
|
||||||
string << "/" unless string.size == 0
|
|
||||||
string << "UNKNOWN("
|
|
||||||
string << op.inspect
|
|
||||||
string << ")"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
string = "/"+string if document
|
|
||||||
return string
|
|
||||||
end
|
|
||||||
|
|
||||||
def expand( path )
|
|
||||||
path = path.kind_of?(String) ? parse( path ) : path
|
|
||||||
string = ""
|
|
||||||
document = false
|
|
||||||
while path.size > 0
|
|
||||||
op = path.shift
|
|
||||||
case op
|
|
||||||
when :node
|
|
||||||
string << "node()"
|
|
||||||
when :attribute, :child, :following, :following_sibling,
|
|
||||||
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
|
|
||||||
:namespace, :preceding, :preceding_sibling, :self, :parent
|
|
||||||
string << "/" unless string.size == 0
|
|
||||||
string << op.to_s.tr("_", "-")
|
|
||||||
string << "::"
|
|
||||||
when :any
|
|
||||||
string << "*"
|
|
||||||
when :qname
|
|
||||||
prefix = path.shift
|
|
||||||
name = path.shift
|
|
||||||
string << prefix+":" if prefix.size > 0
|
|
||||||
string << name
|
|
||||||
when :predicate
|
|
||||||
string << '['
|
|
||||||
string << predicate_to_string( path.shift ) { |x| expand(x) }
|
|
||||||
string << ']'
|
|
||||||
when :document
|
|
||||||
document = true
|
|
||||||
else
|
|
||||||
string << "/" unless string.size == 0
|
|
||||||
string << "UNKNOWN("
|
|
||||||
string << op.inspect
|
|
||||||
string << ")"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
string = "/"+string if document
|
|
||||||
return string
|
|
||||||
end
|
|
||||||
|
|
||||||
def predicate_to_string( path, &block )
|
|
||||||
string = ""
|
|
||||||
case path[0]
|
|
||||||
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
|
|
||||||
op = path.shift
|
|
||||||
case op
|
|
||||||
when :eq
|
|
||||||
op = "="
|
|
||||||
when :lt
|
|
||||||
op = "<"
|
|
||||||
when :gt
|
|
||||||
op = ">"
|
|
||||||
when :lteq
|
|
||||||
op = "<="
|
|
||||||
when :gteq
|
|
||||||
op = ">="
|
|
||||||
when :neq
|
|
||||||
op = "!="
|
|
||||||
when :union
|
|
||||||
op = "|"
|
|
||||||
end
|
|
||||||
left = predicate_to_string( path.shift, &block )
|
|
||||||
right = predicate_to_string( path.shift, &block )
|
|
||||||
string << " "
|
|
||||||
string << left
|
|
||||||
string << " "
|
|
||||||
string << op.to_s
|
|
||||||
string << " "
|
|
||||||
string << right
|
|
||||||
string << " "
|
|
||||||
when :function
|
|
||||||
path.shift
|
|
||||||
name = path.shift
|
|
||||||
string << name
|
|
||||||
string << "( "
|
|
||||||
string << predicate_to_string( path.shift, &block )
|
|
||||||
string << " )"
|
|
||||||
when :literal
|
|
||||||
path.shift
|
|
||||||
string << " "
|
|
||||||
string << path.shift.inspect
|
|
||||||
string << " "
|
|
||||||
else
|
|
||||||
string << " "
|
|
||||||
string << yield( path )
|
|
||||||
string << " "
|
|
||||||
end
|
|
||||||
return string.squeeze(" ")
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
#LocationPath
|
|
||||||
# | RelativeLocationPath
|
|
||||||
# | '/' RelativeLocationPath?
|
|
||||||
# | '//' RelativeLocationPath
|
|
||||||
def LocationPath path, parsed
|
|
||||||
path = path.lstrip
|
|
||||||
if path[0] == ?/
|
|
||||||
parsed << :document
|
|
||||||
if path[1] == ?/
|
|
||||||
parsed << :descendant_or_self
|
|
||||||
parsed << :node
|
|
||||||
path = path[2..-1]
|
|
||||||
else
|
|
||||||
path = path[1..-1]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return RelativeLocationPath( path, parsed ) if path.size > 0
|
|
||||||
end
|
|
||||||
|
|
||||||
#RelativeLocationPath
|
|
||||||
# | Step
|
|
||||||
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
|
|
||||||
# NodeTest
|
|
||||||
# Predicate
|
|
||||||
# | '.' | '..' AbbreviatedStep
|
|
||||||
# | RelativeLocationPath '/' Step
|
|
||||||
# | RelativeLocationPath '//' Step
|
|
||||||
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
|
|
||||||
def RelativeLocationPath path, parsed
|
|
||||||
loop do
|
|
||||||
original_path = path
|
|
||||||
path = path.lstrip
|
|
||||||
|
|
||||||
return original_path if path.empty?
|
|
||||||
|
|
||||||
# (axis or @ or <child::>) nodetest predicate >
|
|
||||||
# OR > / Step
|
|
||||||
# (. or ..) >
|
|
||||||
if path[0] == ?.
|
|
||||||
if path[1] == ?.
|
|
||||||
parsed << :parent
|
|
||||||
parsed << :node
|
|
||||||
path = path[2..-1]
|
|
||||||
else
|
|
||||||
parsed << :self
|
|
||||||
parsed << :node
|
|
||||||
path = path[1..-1]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if path[0] == ?@
|
|
||||||
parsed << :attribute
|
|
||||||
path = path[1..-1]
|
|
||||||
# Goto Nodetest
|
|
||||||
elsif path =~ AXIS
|
|
||||||
parsed << $1.tr('-','_').intern
|
|
||||||
path = $'
|
|
||||||
# Goto Nodetest
|
|
||||||
else
|
|
||||||
parsed << :child
|
|
||||||
end
|
|
||||||
|
|
||||||
n = []
|
|
||||||
path = NodeTest( path, n)
|
|
||||||
|
|
||||||
path = Predicate( path, n )
|
|
||||||
|
|
||||||
parsed.concat(n)
|
|
||||||
end
|
|
||||||
|
|
||||||
original_path = path
|
|
||||||
path = path.lstrip
|
|
||||||
return original_path if path.empty?
|
|
||||||
|
|
||||||
return original_path if path[0] != ?/
|
|
||||||
|
|
||||||
if path[1] == ?/
|
|
||||||
parsed << :descendant_or_self
|
|
||||||
parsed << :node
|
|
||||||
path = path[2..-1]
|
|
||||||
else
|
|
||||||
path = path[1..-1]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns a 1-1 map of the nodeset
|
|
||||||
# The contents of the resulting array are either:
|
|
||||||
# true/false, if a positive match
|
|
||||||
# String, if a name match
|
|
||||||
#NodeTest
|
|
||||||
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
|
|
||||||
# | '*' ':' NCNAME NameTest since XPath 2.0
|
|
||||||
# | NODE_TYPE '(' ')' NodeType
|
|
||||||
# | PI '(' LITERAL ')' PI
|
|
||||||
# | '[' expr ']' Predicate
|
|
||||||
PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u
|
|
||||||
LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u
|
|
||||||
QNAME = Namespace::NAMESPLIT
|
|
||||||
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
|
|
||||||
PI = /^processing-instruction\(/
|
|
||||||
def NodeTest path, parsed
|
|
||||||
original_path = path
|
|
||||||
path = path.lstrip
|
|
||||||
case path
|
|
||||||
when PREFIX_WILDCARD
|
|
||||||
prefix = nil
|
|
||||||
name = $1
|
|
||||||
path = $'
|
|
||||||
parsed << :qname
|
|
||||||
parsed << prefix
|
|
||||||
parsed << name
|
|
||||||
when /^\*/
|
|
||||||
path = $'
|
|
||||||
parsed << :any
|
|
||||||
when NODE_TYPE
|
|
||||||
type = $1
|
|
||||||
path = $'
|
|
||||||
parsed << type.tr('-', '_').intern
|
|
||||||
when PI
|
|
||||||
path = $'
|
|
||||||
literal = nil
|
|
||||||
if path !~ /^\s*\)/
|
|
||||||
path =~ LITERAL
|
|
||||||
literal = $1
|
|
||||||
path = $'
|
|
||||||
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
|
|
||||||
path = path[1..-1]
|
|
||||||
end
|
|
||||||
parsed << :processing_instruction
|
|
||||||
parsed << (literal || '')
|
|
||||||
when LOCAL_NAME_WILDCARD
|
|
||||||
prefix = $1
|
|
||||||
path = $'
|
|
||||||
parsed << :namespace
|
|
||||||
parsed << prefix
|
|
||||||
when QNAME
|
|
||||||
prefix = $1
|
|
||||||
name = $2
|
|
||||||
path = $'
|
|
||||||
prefix = "" unless prefix
|
|
||||||
parsed << :qname
|
|
||||||
parsed << prefix
|
|
||||||
parsed << name
|
|
||||||
else
|
|
||||||
path = original_path
|
|
||||||
end
|
|
||||||
return path
|
|
||||||
end
|
|
||||||
|
|
||||||
# Filters the supplied nodeset on the predicate(s)
|
|
||||||
def Predicate path, parsed
|
|
||||||
original_path = path
|
|
||||||
path = path.lstrip
|
|
||||||
return original_path unless path[0] == ?[
|
|
||||||
predicates = []
|
|
||||||
while path[0] == ?[
|
|
||||||
path, expr = get_group(path)
|
|
||||||
predicates << expr[1..-2] if expr
|
|
||||||
end
|
|
||||||
predicates.each{ |pred|
|
|
||||||
preds = []
|
|
||||||
parsed << :predicate
|
|
||||||
parsed << preds
|
|
||||||
OrExpr(pred, preds)
|
|
||||||
}
|
|
||||||
path
|
|
||||||
end
|
|
||||||
|
|
||||||
# The following return arrays of true/false, a 1-1 mapping of the
|
|
||||||
# supplied nodeset, except for axe(), which returns a filtered
|
|
||||||
# nodeset
|
|
||||||
|
|
||||||
#| OrExpr S 'or' S AndExpr
|
|
||||||
#| AndExpr
|
|
||||||
def OrExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = AndExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*( or )/
|
|
||||||
n = [ :or, n, [] ]
|
|
||||||
rest = AndExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| AndExpr S 'and' S EqualityExpr
|
|
||||||
#| EqualityExpr
|
|
||||||
def AndExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = EqualityExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*( and )/
|
|
||||||
n = [ :and, n, [] ]
|
|
||||||
rest = EqualityExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| EqualityExpr ('=' | '!=') RelationalExpr
|
|
||||||
#| RelationalExpr
|
|
||||||
def EqualityExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = RelationalExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*(!?=)\s*/
|
|
||||||
if $1[0] == ?!
|
|
||||||
n = [ :neq, n, [] ]
|
|
||||||
else
|
|
||||||
n = [ :eq, n, [] ]
|
|
||||||
end
|
|
||||||
rest = RelationalExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
|
|
||||||
#| AdditiveExpr
|
|
||||||
def RelationalExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = AdditiveExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*([<>]=?)\s*/
|
|
||||||
if $1[0] == ?<
|
|
||||||
sym = "lt"
|
|
||||||
else
|
|
||||||
sym = "gt"
|
|
||||||
end
|
|
||||||
sym << "eq" if $1[-1] == ?=
|
|
||||||
n = [ sym.intern, n, [] ]
|
|
||||||
rest = AdditiveExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| AdditiveExpr ('+' | '-') MultiplicativeExpr
|
|
||||||
#| MultiplicativeExpr
|
|
||||||
def AdditiveExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = MultiplicativeExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*(\+|-)\s*/
|
|
||||||
if $1[0] == ?+
|
|
||||||
n = [ :plus, n, [] ]
|
|
||||||
else
|
|
||||||
n = [ :minus, n, [] ]
|
|
||||||
end
|
|
||||||
rest = MultiplicativeExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
|
|
||||||
#| UnaryExpr
|
|
||||||
def MultiplicativeExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = UnaryExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*(\*| div | mod )\s*/
|
|
||||||
if $1[0] == ?*
|
|
||||||
n = [ :mult, n, [] ]
|
|
||||||
elsif $1.include?( "div" )
|
|
||||||
n = [ :div, n, [] ]
|
|
||||||
else
|
|
||||||
n = [ :mod, n, [] ]
|
|
||||||
end
|
|
||||||
rest = UnaryExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace(n)
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| '-' UnaryExpr
|
|
||||||
#| UnionExpr
|
|
||||||
def UnaryExpr path, parsed
|
|
||||||
path =~ /^(\-*)/
|
|
||||||
path = $'
|
|
||||||
if $1 and (($1.size % 2) != 0)
|
|
||||||
mult = -1
|
|
||||||
else
|
|
||||||
mult = 1
|
|
||||||
end
|
|
||||||
parsed << :neg if mult < 0
|
|
||||||
|
|
||||||
n = []
|
|
||||||
path = UnionExpr( path, n )
|
|
||||||
parsed.concat( n )
|
|
||||||
path
|
|
||||||
end
|
|
||||||
|
|
||||||
#| UnionExpr '|' PathExpr
|
|
||||||
#| PathExpr
|
|
||||||
def UnionExpr path, parsed
|
|
||||||
n = []
|
|
||||||
rest = PathExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
while rest =~ /^\s*(\|)\s*/
|
|
||||||
n = [ :union, n, [] ]
|
|
||||||
rest = PathExpr( $', n[-1] )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if parsed.size == 0 and n.size != 0
|
|
||||||
parsed.replace( n )
|
|
||||||
elsif n.size > 0
|
|
||||||
parsed << n
|
|
||||||
end
|
|
||||||
rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| LocationPath
|
|
||||||
#| FilterExpr ('/' | '//') RelativeLocationPath
|
|
||||||
def PathExpr path, parsed
|
|
||||||
path = path.lstrip
|
|
||||||
n = []
|
|
||||||
rest = FilterExpr( path, n )
|
|
||||||
if rest != path
|
|
||||||
if rest and rest[0] == ?/
|
|
||||||
rest = RelativeLocationPath(rest, n)
|
|
||||||
parsed.concat(n)
|
|
||||||
return rest
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
|
|
||||||
parsed.concat(n)
|
|
||||||
return rest
|
|
||||||
end
|
|
||||||
|
|
||||||
#| FilterExpr Predicate
|
|
||||||
#| PrimaryExpr
|
|
||||||
def FilterExpr path, parsed
|
|
||||||
n = []
|
|
||||||
path = PrimaryExpr( path, n )
|
|
||||||
path = Predicate(path, n)
|
|
||||||
parsed.concat(n)
|
|
||||||
path
|
|
||||||
end
|
|
||||||
|
|
||||||
#| VARIABLE_REFERENCE
|
|
||||||
#| '(' expr ')'
|
|
||||||
#| LITERAL
|
|
||||||
#| NUMBER
|
|
||||||
#| FunctionCall
|
|
||||||
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
|
|
||||||
NUMBER = /^(\d*\.?\d+)/
|
|
||||||
NT = /^comment|text|processing-instruction|node$/
|
|
||||||
def PrimaryExpr path, parsed
|
|
||||||
case path
|
|
||||||
when VARIABLE_REFERENCE
|
|
||||||
varname = $1
|
|
||||||
path = $'
|
|
||||||
parsed << :variable
|
|
||||||
parsed << varname
|
|
||||||
#arry << @variables[ varname ]
|
|
||||||
when /^(\w[-\w]*)(?:\()/
|
|
||||||
fname = $1
|
|
||||||
tmp = $'
|
|
||||||
return path if fname =~ NT
|
|
||||||
path = tmp
|
|
||||||
parsed << :function
|
|
||||||
parsed << fname
|
|
||||||
path = FunctionCall(path, parsed)
|
|
||||||
when NUMBER
|
|
||||||
varname = $1.nil? ? $2 : $1
|
|
||||||
path = $'
|
|
||||||
parsed << :literal
|
|
||||||
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
|
|
||||||
when LITERAL
|
|
||||||
varname = $1.nil? ? $2 : $1
|
|
||||||
path = $'
|
|
||||||
parsed << :literal
|
|
||||||
parsed << varname
|
|
||||||
when /^\(/ #/
|
|
||||||
path, contents = get_group(path)
|
|
||||||
contents = contents[1..-2]
|
|
||||||
n = []
|
|
||||||
OrExpr( contents, n )
|
|
||||||
parsed.concat(n)
|
|
||||||
end
|
|
||||||
path
|
|
||||||
end
|
|
||||||
|
|
||||||
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
|
|
||||||
def FunctionCall rest, parsed
|
|
||||||
path, arguments = parse_args(rest)
|
|
||||||
argset = []
|
|
||||||
for argument in arguments
|
|
||||||
args = []
|
|
||||||
OrExpr( argument, args )
|
|
||||||
argset << args
|
|
||||||
end
|
|
||||||
parsed << argset
|
|
||||||
path
|
|
||||||
end
|
|
||||||
|
|
||||||
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
|
|
||||||
def get_group string
|
|
||||||
ind = 0
|
|
||||||
depth = 0
|
|
||||||
st = string[0,1]
|
|
||||||
en = (st == "(" ? ")" : "]")
|
|
||||||
begin
|
|
||||||
case string[ind,1]
|
|
||||||
when st
|
|
||||||
depth += 1
|
|
||||||
when en
|
|
||||||
depth -= 1
|
|
||||||
end
|
|
||||||
ind += 1
|
|
||||||
end while depth > 0 and ind < string.length
|
|
||||||
return nil unless depth==0
|
|
||||||
[string[ind..-1], string[0..ind-1]]
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_args( string )
|
|
||||||
arguments = []
|
|
||||||
ind = 0
|
|
||||||
inquot = false
|
|
||||||
inapos = false
|
|
||||||
depth = 1
|
|
||||||
begin
|
|
||||||
case string[ind]
|
|
||||||
when ?"
|
|
||||||
inquot = !inquot unless inapos
|
|
||||||
when ?'
|
|
||||||
inapos = !inapos unless inquot
|
|
||||||
else
|
|
||||||
unless inquot or inapos
|
|
||||||
case string[ind]
|
|
||||||
when ?(
|
|
||||||
depth += 1
|
|
||||||
if depth == 1
|
|
||||||
string = string[1..-1]
|
|
||||||
ind -= 1
|
|
||||||
end
|
|
||||||
when ?)
|
|
||||||
depth -= 1
|
|
||||||
if depth == 0
|
|
||||||
s = string[0,ind].strip
|
|
||||||
arguments << s unless s == ""
|
|
||||||
string = string[ind+1..-1]
|
|
||||||
end
|
|
||||||
when ?,
|
|
||||||
if depth == 1
|
|
||||||
s = string[0,ind].strip
|
|
||||||
arguments << s unless s == ""
|
|
||||||
string = string[ind+1..-1]
|
|
||||||
ind = -1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
ind += 1
|
|
||||||
end while depth > 0 and ind < string.length
|
|
||||||
return nil unless depth==0
|
|
||||||
[string,arguments]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,266 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'functions'
|
|
||||||
require_relative 'xmltokens'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
class QuickPath
|
|
||||||
include Functions
|
|
||||||
include XMLTokens
|
|
||||||
|
|
||||||
# A base Hash object to be used when initializing a
|
|
||||||
# default empty namespaces set.
|
|
||||||
EMPTY_HASH = {}
|
|
||||||
|
|
||||||
def QuickPath::first element, path, namespaces=EMPTY_HASH
|
|
||||||
match(element, path, namespaces)[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
|
|
||||||
path = "*" unless path
|
|
||||||
match(element, path, namespaces).each( &block )
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::match element, path, namespaces=EMPTY_HASH
|
|
||||||
raise "nil is not a valid xpath" unless path
|
|
||||||
results = nil
|
|
||||||
Functions::namespace_context = namespaces
|
|
||||||
case path
|
|
||||||
when /^\/([^\/]|$)/u
|
|
||||||
# match on root
|
|
||||||
path = path[1..-1]
|
|
||||||
return [element.root.parent] if path == ''
|
|
||||||
results = filter([element.root], path)
|
|
||||||
when /^[-\w]*::/u
|
|
||||||
results = filter([element], path)
|
|
||||||
when /^\*/u
|
|
||||||
results = filter(element.to_a, path)
|
|
||||||
when /^[\[!\w:]/u
|
|
||||||
# match on child
|
|
||||||
children = element.to_a
|
|
||||||
results = filter(children, path)
|
|
||||||
else
|
|
||||||
results = filter([element], path)
|
|
||||||
end
|
|
||||||
return results
|
|
||||||
end
|
|
||||||
|
|
||||||
# Given an array of nodes it filters the array based on the path. The
|
|
||||||
# result is that when this method returns, the array will contain elements
|
|
||||||
# which match the path
|
|
||||||
def QuickPath::filter elements, path
|
|
||||||
return elements if path.nil? or path == '' or elements.size == 0
|
|
||||||
case path
|
|
||||||
when /^\/\//u # Descendant
|
|
||||||
return axe( elements, "descendant-or-self", $' )
|
|
||||||
when /^\/?\b(\w[-\w]*)\b::/u # Axe
|
|
||||||
return axe( elements, $1, $' )
|
|
||||||
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
|
|
||||||
rest = $'
|
|
||||||
results = []
|
|
||||||
elements.each do |element|
|
|
||||||
results |= filter( element.to_a, rest )
|
|
||||||
end
|
|
||||||
return results
|
|
||||||
when /^\/?(\w[-\w]*)\(/u # / Function
|
|
||||||
return function( elements, $1, $' )
|
|
||||||
when Namespace::NAMESPLIT # Element name
|
|
||||||
name = $2
|
|
||||||
ns = $1
|
|
||||||
rest = $'
|
|
||||||
elements.delete_if do |element|
|
|
||||||
!(element.kind_of? Element and
|
|
||||||
(element.expanded_name == name or
|
|
||||||
(element.name == name and
|
|
||||||
element.namespace == Functions.namespace_context[ns])))
|
|
||||||
end
|
|
||||||
return filter( elements, rest )
|
|
||||||
when /^\/\[/u
|
|
||||||
matches = []
|
|
||||||
elements.each do |element|
|
|
||||||
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
|
|
||||||
end
|
|
||||||
return matches
|
|
||||||
when /^\[/u # Predicate
|
|
||||||
return predicate( elements, path )
|
|
||||||
when /^\/?\.\.\./u # Ancestor
|
|
||||||
return axe( elements, "ancestor", $' )
|
|
||||||
when /^\/?\.\./u # Parent
|
|
||||||
return filter( elements.collect{|e|e.parent}, $' )
|
|
||||||
when /^\/?\./u # Self
|
|
||||||
return filter( elements, $' )
|
|
||||||
when /^\*/u # Any
|
|
||||||
results = []
|
|
||||||
elements.each do |element|
|
|
||||||
results |= filter( [element], $' ) if element.kind_of? Element
|
|
||||||
#if element.kind_of? Element
|
|
||||||
# children = element.to_a
|
|
||||||
# children.delete_if { |child| !child.kind_of?(Element) }
|
|
||||||
# results |= filter( children, $' )
|
|
||||||
#end
|
|
||||||
end
|
|
||||||
return results
|
|
||||||
end
|
|
||||||
return []
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::axe( elements, axe_name, rest )
|
|
||||||
matches = []
|
|
||||||
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
|
|
||||||
case axe_name
|
|
||||||
when /^descendant/u
|
|
||||||
elements.each do |element|
|
|
||||||
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
|
|
||||||
end
|
|
||||||
when /^ancestor/u
|
|
||||||
elements.each do |element|
|
|
||||||
while element.parent
|
|
||||||
matches << element.parent
|
|
||||||
element = element.parent
|
|
||||||
end
|
|
||||||
end
|
|
||||||
matches = filter( matches, rest )
|
|
||||||
when "self"
|
|
||||||
matches = filter( elements, rest )
|
|
||||||
when "child"
|
|
||||||
elements.each do |element|
|
|
||||||
matches |= filter( element.to_a, rest ) if element.kind_of? Element
|
|
||||||
end
|
|
||||||
when "attribute"
|
|
||||||
elements.each do |element|
|
|
||||||
matches << element.attributes[ rest ] if element.kind_of? Element
|
|
||||||
end
|
|
||||||
when "parent"
|
|
||||||
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
|
|
||||||
when "following-sibling"
|
|
||||||
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
|
|
||||||
rest)
|
|
||||||
when "previous-sibling"
|
|
||||||
matches = filter(elements.collect{|element|
|
|
||||||
element.previous_sibling}.uniq, rest )
|
|
||||||
end
|
|
||||||
return matches.uniq
|
|
||||||
end
|
|
||||||
|
|
||||||
OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
|
|
||||||
# A predicate filters a node-set with respect to an axis to produce a
|
|
||||||
# new node-set. For each node in the node-set to be filtered, the
|
|
||||||
# PredicateExpr is evaluated with that node as the context node, with
|
|
||||||
# the number of nodes in the node-set as the context size, and with the
|
|
||||||
# proximity position of the node in the node-set with respect to the
|
|
||||||
# axis as the context position; if PredicateExpr evaluates to true for
|
|
||||||
# that node, the node is included in the new node-set; otherwise, it is
|
|
||||||
# not included.
|
|
||||||
#
|
|
||||||
# A PredicateExpr is evaluated by evaluating the Expr and converting
|
|
||||||
# the result to a boolean. If the result is a number, the result will
|
|
||||||
# be converted to true if the number is equal to the context position
|
|
||||||
# and will be converted to false otherwise; if the result is not a
|
|
||||||
# number, then the result will be converted as if by a call to the
|
|
||||||
# boolean function. Thus a location path para[3] is equivalent to
|
|
||||||
# para[position()=3].
|
|
||||||
def QuickPath::predicate( elements, path )
|
|
||||||
ind = 1
|
|
||||||
bcount = 1
|
|
||||||
while bcount > 0
|
|
||||||
bcount += 1 if path[ind] == ?[
|
|
||||||
bcount -= 1 if path[ind] == ?]
|
|
||||||
ind += 1
|
|
||||||
end
|
|
||||||
ind -= 1
|
|
||||||
predicate = path[1..ind-1]
|
|
||||||
rest = path[ind+1..-1]
|
|
||||||
|
|
||||||
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
|
|
||||||
#
|
|
||||||
predicate.gsub!(
|
|
||||||
/#{OPERAND_}\s*([<>=])\s*#{OPERAND_}\s*([<>=])\s*#{OPERAND_}/u,
|
|
||||||
'\1 \2 \3 and \3 \4 \5' )
|
|
||||||
# Let's do some Ruby trickery to avoid some work:
|
|
||||||
predicate.gsub!( /&/u, "&&" )
|
|
||||||
predicate.gsub!( /=/u, "==" )
|
|
||||||
predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
|
|
||||||
predicate.gsub!( /\bmod\b/u, "%" )
|
|
||||||
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
|
|
||||||
fname = $1
|
|
||||||
fname.gsub( /-/u, "_" )
|
|
||||||
}
|
|
||||||
|
|
||||||
Functions.pair = [ 0, elements.size ]
|
|
||||||
results = []
|
|
||||||
elements.each do |element|
|
|
||||||
Functions.pair[0] += 1
|
|
||||||
Functions.node = element
|
|
||||||
res = eval( predicate )
|
|
||||||
case res
|
|
||||||
when true
|
|
||||||
results << element
|
|
||||||
when Integer
|
|
||||||
results << element if Functions.pair[0] == res
|
|
||||||
when String
|
|
||||||
results << element
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return filter( results, rest )
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::attribute( name )
|
|
||||||
return Functions.node.attributes[name] if Functions.node.kind_of? Element
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::name()
|
|
||||||
return Functions.node.name if Functions.node.kind_of? Element
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::method_missing( id, *args )
|
|
||||||
begin
|
|
||||||
Functions.send( id.id2name, *args )
|
|
||||||
rescue Exception
|
|
||||||
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::function( elements, fname, rest )
|
|
||||||
args = parse_args( elements, rest )
|
|
||||||
Functions.pair = [0, elements.size]
|
|
||||||
results = []
|
|
||||||
elements.each do |element|
|
|
||||||
Functions.pair[0] += 1
|
|
||||||
Functions.node = element
|
|
||||||
res = Functions.send( fname, *args )
|
|
||||||
case res
|
|
||||||
when true
|
|
||||||
results << element
|
|
||||||
when Integer
|
|
||||||
results << element if Functions.pair[0] == res
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return results
|
|
||||||
end
|
|
||||||
|
|
||||||
def QuickPath::parse_args( element, string )
|
|
||||||
# /.*?(?:\)|,)/
|
|
||||||
arguments = []
|
|
||||||
buffer = ""
|
|
||||||
while string and string != ""
|
|
||||||
c = string[0]
|
|
||||||
string.sub!(/^./u, "")
|
|
||||||
case c
|
|
||||||
when ?,
|
|
||||||
# if depth = 1, then we start a new argument
|
|
||||||
arguments << evaluate( buffer )
|
|
||||||
#arguments << evaluate( string[0..count] )
|
|
||||||
when ?(
|
|
||||||
# start a new method call
|
|
||||||
function( element, buffer, string )
|
|
||||||
buffer = ""
|
|
||||||
when ?)
|
|
||||||
# close the method call and return arguments
|
|
||||||
return arguments
|
|
||||||
else
|
|
||||||
buffer << c
|
|
||||||
end
|
|
||||||
end
|
|
||||||
""
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,32 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# frozen_string_literal: false
|
|
||||||
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
|
|
||||||
#
|
|
||||||
# REXML is a _pure_ Ruby, XML 1.0 conforming,
|
|
||||||
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
|
|
||||||
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
|
|
||||||
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
|
|
||||||
# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
|
|
||||||
# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
|
|
||||||
# Ruby 1.8, REXML is included in the standard Ruby distribution.
|
|
||||||
#
|
|
||||||
# Main page:: http://www.germane-software.com/software/rexml
|
|
||||||
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
|
||||||
# Date:: 2008/019
|
|
||||||
# Version:: 3.1.7.3
|
|
||||||
#
|
|
||||||
# This API documentation can be downloaded from the REXML home page, or can
|
|
||||||
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
|
||||||
#
|
|
||||||
# A tutorial is available in the REXML distribution in docs/tutorial.html,
|
|
||||||
# or can be accessed
|
|
||||||
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
|
||||||
module REXML
|
|
||||||
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
|
|
||||||
DATE = "2008/019"
|
|
||||||
VERSION = "3.2.4"
|
|
||||||
REVISION = ""
|
|
||||||
|
|
||||||
Copyright = COPYRIGHT
|
|
||||||
Version = VERSION
|
|
||||||
end
|
|
||||||
@ -1,98 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
# A template for stream parser listeners.
|
|
||||||
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
|
||||||
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
|
||||||
# have to parse them out yourself.
|
|
||||||
# === Missing methods from SAX2
|
|
||||||
# ignorable_whitespace
|
|
||||||
# === Methods extending SAX2
|
|
||||||
# +WARNING+
|
|
||||||
# These methods are certainly going to change, until DTDs are fully
|
|
||||||
# supported. Be aware of this.
|
|
||||||
# start_document
|
|
||||||
# end_document
|
|
||||||
# doctype
|
|
||||||
# elementdecl
|
|
||||||
# attlistdecl
|
|
||||||
# entitydecl
|
|
||||||
# notationdecl
|
|
||||||
# cdata
|
|
||||||
# xmldecl
|
|
||||||
# comment
|
|
||||||
module SAX2Listener
|
|
||||||
def start_document
|
|
||||||
end
|
|
||||||
def end_document
|
|
||||||
end
|
|
||||||
def start_prefix_mapping prefix, uri
|
|
||||||
end
|
|
||||||
def end_prefix_mapping prefix
|
|
||||||
end
|
|
||||||
def start_element uri, localname, qname, attributes
|
|
||||||
end
|
|
||||||
def end_element uri, localname, qname
|
|
||||||
end
|
|
||||||
def characters text
|
|
||||||
end
|
|
||||||
def processing_instruction target, data
|
|
||||||
end
|
|
||||||
# Handles a doctype declaration. Any attributes of the doctype which are
|
|
||||||
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
|
||||||
# @p name the name of the doctype; EG, "me"
|
|
||||||
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
|
||||||
# @p long_name the supplied long name, or nil. EG, "foo"
|
|
||||||
# @p uri the uri of the doctype, or nil. EG, "bar"
|
|
||||||
def doctype name, pub_sys, long_name, uri
|
|
||||||
end
|
|
||||||
# If a doctype includes an ATTLIST declaration, it will cause this
|
|
||||||
# method to be called. The content is the declaration itself, unparsed.
|
|
||||||
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
|
||||||
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
|
||||||
# methods.
|
|
||||||
def attlistdecl(element, pairs, contents)
|
|
||||||
end
|
|
||||||
# <!ELEMENT ...>
|
|
||||||
def elementdecl content
|
|
||||||
end
|
|
||||||
# <!ENTITY ...>
|
|
||||||
# The argument passed to this method is an array of the entity
|
|
||||||
# declaration. It can be in a number of formats, but in general it
|
|
||||||
# returns (example, result):
|
|
||||||
# <!ENTITY % YN '"Yes"'>
|
|
||||||
# ["%", "YN", "\"Yes\""]
|
|
||||||
# <!ENTITY % YN 'Yes'>
|
|
||||||
# ["%", "YN", "Yes"]
|
|
||||||
# <!ENTITY WhatHeSaid "He said %YN;">
|
|
||||||
# ["WhatHeSaid", "He said %YN;"]
|
|
||||||
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
|
||||||
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
|
||||||
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
|
||||||
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
|
||||||
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
|
||||||
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
|
|
||||||
def entitydecl declaration
|
|
||||||
end
|
|
||||||
# <!NOTATION ...>
|
|
||||||
def notationdecl name, public_or_system, public_id, system_id
|
|
||||||
end
|
|
||||||
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
|
||||||
# @p content "..."
|
|
||||||
def cdata content
|
|
||||||
end
|
|
||||||
# Called when an XML PI is encountered in the document.
|
|
||||||
# EG: <?xml version="1.0" encoding="utf"?>
|
|
||||||
# @p version the version attribute value. EG, "1.0"
|
|
||||||
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
|
||||||
# @p standalone the standalone attribute value, or nil. EG, nil
|
|
||||||
# @p spaced the declaration is followed by a line break
|
|
||||||
def xmldecl version, encoding, standalone
|
|
||||||
end
|
|
||||||
# Called when a comment is encountered.
|
|
||||||
# @p comment The content of the comment
|
|
||||||
def comment comment
|
|
||||||
end
|
|
||||||
def progress position
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,28 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
module Security
|
|
||||||
@@entity_expansion_limit = 10_000
|
|
||||||
|
|
||||||
# Set the entity expansion limit. By default the limit is set to 10000.
|
|
||||||
def self.entity_expansion_limit=( val )
|
|
||||||
@@entity_expansion_limit = val
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the entity expansion limit. By default the limit is set to 10000.
|
|
||||||
def self.entity_expansion_limit
|
|
||||||
return @@entity_expansion_limit
|
|
||||||
end
|
|
||||||
|
|
||||||
@@entity_expansion_text_limit = 10_240
|
|
||||||
|
|
||||||
# Set the entity expansion limit. By default the limit is set to 10240.
|
|
||||||
def self.entity_expansion_text_limit=( val )
|
|
||||||
@@entity_expansion_text_limit = val
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the entity expansion limit. By default the limit is set to 10240.
|
|
||||||
def self.entity_expansion_text_limit
|
|
||||||
return @@entity_expansion_text_limit
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,298 +0,0 @@
|
|||||||
# coding: US-ASCII
|
|
||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'encoding'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Generates Source-s. USE THIS CLASS.
|
|
||||||
class SourceFactory
|
|
||||||
# Generates a Source object
|
|
||||||
# @param arg Either a String, or an IO
|
|
||||||
# @return a Source, or nil if a bad argument was given
|
|
||||||
def SourceFactory::create_from(arg)
|
|
||||||
if arg.respond_to? :read and
|
|
||||||
arg.respond_to? :readline and
|
|
||||||
arg.respond_to? :nil? and
|
|
||||||
arg.respond_to? :eof?
|
|
||||||
IOSource.new(arg)
|
|
||||||
elsif arg.respond_to? :to_str
|
|
||||||
require 'stringio'
|
|
||||||
IOSource.new(StringIO.new(arg))
|
|
||||||
elsif arg.kind_of? Source
|
|
||||||
arg
|
|
||||||
else
|
|
||||||
raise "#{arg.class} is not a valid input stream. It must walk \n"+
|
|
||||||
"like either a String, an IO, or a Source."
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A Source can be searched for patterns, and wraps buffers and other
|
|
||||||
# objects and provides consumption of text
|
|
||||||
class Source
|
|
||||||
include Encoding
|
|
||||||
# The current buffer (what we're going to read next)
|
|
||||||
attr_reader :buffer
|
|
||||||
# The line number of the last consumed text
|
|
||||||
attr_reader :line
|
|
||||||
attr_reader :encoding
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
# @param arg must be a String, and should be a valid XML document
|
|
||||||
# @param encoding if non-null, sets the encoding of the source to this
|
|
||||||
# value, overriding all encoding detection
|
|
||||||
def initialize(arg, encoding=nil)
|
|
||||||
@orig = @buffer = arg
|
|
||||||
if encoding
|
|
||||||
self.encoding = encoding
|
|
||||||
else
|
|
||||||
detect_encoding
|
|
||||||
end
|
|
||||||
@line = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# Inherited from Encoding
|
|
||||||
# Overridden to support optimized en/decoding
|
|
||||||
def encoding=(enc)
|
|
||||||
return unless super
|
|
||||||
encoding_updated
|
|
||||||
end
|
|
||||||
|
|
||||||
# Scans the source for a given pattern. Note, that this is not your
|
|
||||||
# usual scan() method. For one thing, the pattern argument has some
|
|
||||||
# requirements; for another, the source can be consumed. You can easily
|
|
||||||
# confuse this method. Originally, the patterns were easier
|
|
||||||
# to construct and this method more robust, because this method
|
|
||||||
# generated search regexps on the fly; however, this was
|
|
||||||
# computationally expensive and slowed down the entire REXML package
|
|
||||||
# considerably, since this is by far the most commonly called method.
|
|
||||||
# @param pattern must be a Regexp, and must be in the form of
|
|
||||||
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
|
||||||
# will be returned; the second group is used if the consume flag is
|
|
||||||
# set.
|
|
||||||
# @param consume if true, the pattern returned will be consumed, leaving
|
|
||||||
# everything after it in the Source.
|
|
||||||
# @return the pattern, if found, or nil if the Source is empty or the
|
|
||||||
# pattern is not found.
|
|
||||||
def scan(pattern, cons=false)
|
|
||||||
return nil if @buffer.nil?
|
|
||||||
rv = @buffer.scan(pattern)
|
|
||||||
@buffer = $' if cons and rv.size>0
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def read
|
|
||||||
end
|
|
||||||
|
|
||||||
def consume( pattern )
|
|
||||||
@buffer = $' if pattern.match( @buffer )
|
|
||||||
end
|
|
||||||
|
|
||||||
def match_to( char, pattern )
|
|
||||||
return pattern.match(@buffer)
|
|
||||||
end
|
|
||||||
|
|
||||||
def match_to_consume( char, pattern )
|
|
||||||
md = pattern.match(@buffer)
|
|
||||||
@buffer = $'
|
|
||||||
return md
|
|
||||||
end
|
|
||||||
|
|
||||||
def match(pattern, cons=false)
|
|
||||||
md = pattern.match(@buffer)
|
|
||||||
@buffer = $' if cons and md
|
|
||||||
return md
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return true if the Source is exhausted
|
|
||||||
def empty?
|
|
||||||
@buffer == ""
|
|
||||||
end
|
|
||||||
|
|
||||||
def position
|
|
||||||
@orig.index( @buffer )
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the current line in the source
|
|
||||||
def current_line
|
|
||||||
lines = @orig.split
|
|
||||||
res = lines.grep @buffer[0..30]
|
|
||||||
res = res[-1] if res.kind_of? Array
|
|
||||||
lines.index( res ) if res
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def detect_encoding
|
|
||||||
buffer_encoding = @buffer.encoding
|
|
||||||
detected_encoding = "UTF-8"
|
|
||||||
begin
|
|
||||||
@buffer.force_encoding("ASCII-8BIT")
|
|
||||||
if @buffer[0, 2] == "\xfe\xff"
|
|
||||||
@buffer[0, 2] = ""
|
|
||||||
detected_encoding = "UTF-16BE"
|
|
||||||
elsif @buffer[0, 2] == "\xff\xfe"
|
|
||||||
@buffer[0, 2] = ""
|
|
||||||
detected_encoding = "UTF-16LE"
|
|
||||||
elsif @buffer[0, 3] == "\xef\xbb\xbf"
|
|
||||||
@buffer[0, 3] = ""
|
|
||||||
detected_encoding = "UTF-8"
|
|
||||||
end
|
|
||||||
ensure
|
|
||||||
@buffer.force_encoding(buffer_encoding)
|
|
||||||
end
|
|
||||||
self.encoding = detected_encoding
|
|
||||||
end
|
|
||||||
|
|
||||||
def encoding_updated
|
|
||||||
if @encoding != 'UTF-8'
|
|
||||||
@buffer = decode(@buffer)
|
|
||||||
@to_utf = true
|
|
||||||
else
|
|
||||||
@to_utf = false
|
|
||||||
@buffer.force_encoding ::Encoding::UTF_8
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A Source that wraps an IO. See the Source class for method
|
|
||||||
# documentation
|
|
||||||
class IOSource < Source
|
|
||||||
#attr_reader :block_size
|
|
||||||
|
|
||||||
# block_size has been deprecated
|
|
||||||
def initialize(arg, block_size=500, encoding=nil)
|
|
||||||
@er_source = @source = arg
|
|
||||||
@to_utf = false
|
|
||||||
@pending_buffer = nil
|
|
||||||
|
|
||||||
if encoding
|
|
||||||
super("", encoding)
|
|
||||||
else
|
|
||||||
super(@source.read(3) || "")
|
|
||||||
end
|
|
||||||
|
|
||||||
if !@to_utf and
|
|
||||||
@buffer.respond_to?(:force_encoding) and
|
|
||||||
@source.respond_to?(:external_encoding) and
|
|
||||||
@source.external_encoding != ::Encoding::UTF_8
|
|
||||||
@force_utf8 = true
|
|
||||||
else
|
|
||||||
@force_utf8 = false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def scan(pattern, cons=false)
|
|
||||||
rv = super
|
|
||||||
# You'll notice that this next section is very similar to the same
|
|
||||||
# section in match(), but just a liiittle different. This is
|
|
||||||
# because it is a touch faster to do it this way with scan()
|
|
||||||
# than the way match() does it; enough faster to warrant duplicating
|
|
||||||
# some code
|
|
||||||
if rv.size == 0
|
|
||||||
until @buffer =~ pattern or @source.nil?
|
|
||||||
begin
|
|
||||||
@buffer << readline
|
|
||||||
rescue Iconv::IllegalSequence
|
|
||||||
raise
|
|
||||||
rescue
|
|
||||||
@source = nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rv = super
|
|
||||||
end
|
|
||||||
rv.taint if RUBY_VERSION < '2.7'
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def read
|
|
||||||
begin
|
|
||||||
@buffer << readline
|
|
||||||
rescue Exception, NameError
|
|
||||||
@source = nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def consume( pattern )
|
|
||||||
match( pattern, true )
|
|
||||||
end
|
|
||||||
|
|
||||||
def match( pattern, cons=false )
|
|
||||||
rv = pattern.match(@buffer)
|
|
||||||
@buffer = $' if cons and rv
|
|
||||||
while !rv and @source
|
|
||||||
begin
|
|
||||||
@buffer << readline
|
|
||||||
rv = pattern.match(@buffer)
|
|
||||||
@buffer = $' if cons and rv
|
|
||||||
rescue
|
|
||||||
@source = nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rv.taint if RUBY_VERSION < '2.7'
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def empty?
|
|
||||||
super and ( @source.nil? || @source.eof? )
|
|
||||||
end
|
|
||||||
|
|
||||||
def position
|
|
||||||
@er_source.pos rescue 0
|
|
||||||
end
|
|
||||||
|
|
||||||
# @return the current line in the source
|
|
||||||
def current_line
|
|
||||||
begin
|
|
||||||
pos = @er_source.pos # The byte position in the source
|
|
||||||
lineno = @er_source.lineno # The XML < position in the source
|
|
||||||
@er_source.rewind
|
|
||||||
line = 0 # The \r\n position in the source
|
|
||||||
begin
|
|
||||||
while @er_source.pos < pos
|
|
||||||
@er_source.readline
|
|
||||||
line += 1
|
|
||||||
end
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
@er_source.seek(pos)
|
|
||||||
rescue IOError
|
|
||||||
pos = -1
|
|
||||||
line = -1
|
|
||||||
end
|
|
||||||
[pos, lineno, line]
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def readline
|
|
||||||
str = @source.readline(@line_break)
|
|
||||||
if @pending_buffer
|
|
||||||
if str.nil?
|
|
||||||
str = @pending_buffer
|
|
||||||
else
|
|
||||||
str = @pending_buffer + str
|
|
||||||
end
|
|
||||||
@pending_buffer = nil
|
|
||||||
end
|
|
||||||
return nil if str.nil?
|
|
||||||
|
|
||||||
if @to_utf
|
|
||||||
decode(str)
|
|
||||||
else
|
|
||||||
str.force_encoding(::Encoding::UTF_8) if @force_utf8
|
|
||||||
str
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def encoding_updated
|
|
||||||
case @encoding
|
|
||||||
when "UTF-16BE", "UTF-16LE"
|
|
||||||
@source.binmode
|
|
||||||
@source.set_encoding(@encoding, @encoding)
|
|
||||||
end
|
|
||||||
@line_break = encode(">")
|
|
||||||
@pending_buffer, @buffer = @buffer, ""
|
|
||||||
@pending_buffer.force_encoding(@encoding)
|
|
||||||
super
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,93 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
# A template for stream parser listeners.
|
|
||||||
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
|
||||||
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
|
||||||
# have to parse them out yourself.
|
|
||||||
module StreamListener
|
|
||||||
# Called when a tag is encountered.
|
|
||||||
# @p name the tag name
|
|
||||||
# @p attrs an array of arrays of attribute/value pairs, suitable for
|
|
||||||
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
|
|
||||||
# will result in
|
|
||||||
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
|
|
||||||
def tag_start name, attrs
|
|
||||||
end
|
|
||||||
# Called when the end tag is reached. In the case of <tag/>, tag_end
|
|
||||||
# will be called immediately after tag_start
|
|
||||||
# @p the name of the tag
|
|
||||||
def tag_end name
|
|
||||||
end
|
|
||||||
# Called when text is encountered in the document
|
|
||||||
# @p text the text content.
|
|
||||||
def text text
|
|
||||||
end
|
|
||||||
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
|
|
||||||
# @p name the instruction name; in the example, "xsl"
|
|
||||||
# @p instruction the rest of the instruction. In the example,
|
|
||||||
# "sheet='foo'"
|
|
||||||
def instruction name, instruction
|
|
||||||
end
|
|
||||||
# Called when a comment is encountered.
|
|
||||||
# @p comment The content of the comment
|
|
||||||
def comment comment
|
|
||||||
end
|
|
||||||
# Handles a doctype declaration. Any attributes of the doctype which are
|
|
||||||
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
|
||||||
# @p name the name of the doctype; EG, "me"
|
|
||||||
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
|
||||||
# @p long_name the supplied long name, or nil. EG, "foo"
|
|
||||||
# @p uri the uri of the doctype, or nil. EG, "bar"
|
|
||||||
def doctype name, pub_sys, long_name, uri
|
|
||||||
end
|
|
||||||
# Called when the doctype is done
|
|
||||||
def doctype_end
|
|
||||||
end
|
|
||||||
# If a doctype includes an ATTLIST declaration, it will cause this
|
|
||||||
# method to be called. The content is the declaration itself, unparsed.
|
|
||||||
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
|
||||||
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
|
||||||
# methods.
|
|
||||||
def attlistdecl element_name, attributes, raw_content
|
|
||||||
end
|
|
||||||
# <!ELEMENT ...>
|
|
||||||
def elementdecl content
|
|
||||||
end
|
|
||||||
# <!ENTITY ...>
|
|
||||||
# The argument passed to this method is an array of the entity
|
|
||||||
# declaration. It can be in a number of formats, but in general it
|
|
||||||
# returns (example, result):
|
|
||||||
# <!ENTITY % YN '"Yes"'>
|
|
||||||
# ["YN", "\"Yes\"", "%"]
|
|
||||||
# <!ENTITY % YN 'Yes'>
|
|
||||||
# ["YN", "Yes", "%"]
|
|
||||||
# <!ENTITY WhatHeSaid "He said %YN;">
|
|
||||||
# ["WhatHeSaid", "He said %YN;"]
|
|
||||||
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
|
||||||
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
|
||||||
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
|
||||||
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
|
|
||||||
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
|
||||||
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
|
|
||||||
def entitydecl content
|
|
||||||
end
|
|
||||||
# <!NOTATION ...>
|
|
||||||
def notationdecl content
|
|
||||||
end
|
|
||||||
# Called when %foo; is encountered in a doctype declaration.
|
|
||||||
# @p content "foo"
|
|
||||||
def entity content
|
|
||||||
end
|
|
||||||
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
|
||||||
# @p content "..."
|
|
||||||
def cdata content
|
|
||||||
end
|
|
||||||
# Called when an XML PI is encountered in the document.
|
|
||||||
# EG: <?xml version="1.0" encoding="utf"?>
|
|
||||||
# @p version the version attribute value. EG, "1.0"
|
|
||||||
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
|
||||||
# @p standalone the standalone attribute value, or nil. EG, nil
|
|
||||||
def xmldecl version, encoding, standalone
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,424 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'security'
|
|
||||||
require_relative 'entity'
|
|
||||||
require_relative 'doctype'
|
|
||||||
require_relative 'child'
|
|
||||||
require_relative 'doctype'
|
|
||||||
require_relative 'parseexception'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Represents text nodes in an XML document
|
|
||||||
class Text < Child
|
|
||||||
include Comparable
|
|
||||||
# The order in which the substitutions occur
|
|
||||||
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
|
|
||||||
SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']
|
|
||||||
# Characters which are substituted in written strings
|
|
||||||
SLAICEPS = [ '<', '>', '"', "'", '&' ]
|
|
||||||
SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]
|
|
||||||
|
|
||||||
# If +raw+ is true, then REXML leaves the value alone
|
|
||||||
attr_accessor :raw
|
|
||||||
|
|
||||||
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
|
||||||
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
||||||
VALID_CHAR = [
|
|
||||||
0x9, 0xA, 0xD,
|
|
||||||
(0x20..0xD7FF),
|
|
||||||
(0xE000..0xFFFD),
|
|
||||||
(0x10000..0x10FFFF)
|
|
||||||
]
|
|
||||||
|
|
||||||
if String.method_defined? :encode
|
|
||||||
VALID_XML_CHARS = Regexp.new('^['+
|
|
||||||
VALID_CHAR.map { |item|
|
|
||||||
case item
|
|
||||||
when Integer
|
|
||||||
[item].pack('U').force_encoding('utf-8')
|
|
||||||
when Range
|
|
||||||
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
|
||||||
end
|
|
||||||
}.join +
|
|
||||||
']*$')
|
|
||||||
else
|
|
||||||
VALID_XML_CHARS = /^(
|
|
||||||
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
|
||||||
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
|
||||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
|
||||||
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
|
||||||
| \xEF[\x80-\xBE]{2} #
|
|
||||||
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
|
||||||
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
|
||||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
|
||||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
|
||||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
|
||||||
)*$/nx;
|
|
||||||
end
|
|
||||||
|
|
||||||
# Constructor
|
|
||||||
# +arg+ if a String, the content is set to the String. If a Text,
|
|
||||||
# the object is shallowly cloned.
|
|
||||||
#
|
|
||||||
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
|
||||||
# respected
|
|
||||||
#
|
|
||||||
# +parent+ (nil) if this is a Parent object, the parent
|
|
||||||
# will be set to this.
|
|
||||||
#
|
|
||||||
# +raw+ (nil) This argument can be given three values.
|
|
||||||
# If true, then the value of used to construct this object is expected to
|
|
||||||
# contain no unescaped XML markup, and REXML will not change the text. If
|
|
||||||
# this value is false, the string may contain any characters, and REXML will
|
|
||||||
# escape any and all defined entities whose values are contained in the
|
|
||||||
# text. If this value is nil (the default), then the raw value of the
|
|
||||||
# parent will be used as the raw value for this node. If there is no raw
|
|
||||||
# value for the parent, and no value is supplied, the default is false.
|
|
||||||
# Use this field if you have entities defined for some text, and you don't
|
|
||||||
# want REXML to escape that text in output.
|
|
||||||
# Text.new( "<&", false, nil, false ) #-> "<&"
|
|
||||||
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
|
|
||||||
# Text.new( "<&", false, nil, true ) #-> Parse exception
|
|
||||||
# Text.new( "<&", false, nil, true ) #-> "<&"
|
|
||||||
# # Assume that the entity "s" is defined to be "sean"
|
|
||||||
# # and that the entity "r" is defined to be "russell"
|
|
||||||
# Text.new( "sean russell" ) #-> "&s; &r;"
|
|
||||||
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
|
|
||||||
#
|
|
||||||
# +entity_filter+ (nil) This can be an array of entities to match in the
|
|
||||||
# supplied text. This argument is only useful if +raw+ is set to false.
|
|
||||||
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
|
|
||||||
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
|
||||||
# In the last example, the +entity_filter+ argument is ignored.
|
|
||||||
#
|
|
||||||
# +illegal+ INTERNAL USE ONLY
|
|
||||||
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
|
||||||
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
|
|
||||||
|
|
||||||
@raw = false
|
|
||||||
@parent = nil
|
|
||||||
@entity_filter = nil
|
|
||||||
|
|
||||||
if parent
|
|
||||||
super( parent )
|
|
||||||
@raw = parent.raw
|
|
||||||
end
|
|
||||||
|
|
||||||
if arg.kind_of? String
|
|
||||||
@string = arg.dup
|
|
||||||
elsif arg.kind_of? Text
|
|
||||||
@string = arg.instance_variable_get(:@string).dup
|
|
||||||
@raw = arg.raw
|
|
||||||
@entity_filter = arg.instance_variable_get(:@entity_filter)
|
|
||||||
else
|
|
||||||
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
|
|
||||||
end
|
|
||||||
|
|
||||||
@string.squeeze!(" \n\t") unless respect_whitespace
|
|
||||||
@string.gsub!(/\r\n?/, "\n")
|
|
||||||
@raw = raw unless raw.nil?
|
|
||||||
@entity_filter = entity_filter if entity_filter
|
|
||||||
clear_cache
|
|
||||||
|
|
||||||
Text.check(@string, illegal, doctype) if @raw
|
|
||||||
end
|
|
||||||
|
|
||||||
def parent= parent
|
|
||||||
super(parent)
|
|
||||||
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
|
|
||||||
end
|
|
||||||
|
|
||||||
# check for illegal characters
|
|
||||||
def Text.check string, pattern, doctype
|
|
||||||
|
|
||||||
# illegal anywhere
|
|
||||||
if string !~ VALID_XML_CHARS
|
|
||||||
if String.method_defined? :encode
|
|
||||||
string.chars.each do |c|
|
|
||||||
case c.ord
|
|
||||||
when *VALID_CHAR
|
|
||||||
else
|
|
||||||
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
|
||||||
case c.unpack('U')
|
|
||||||
when *VALID_CHAR
|
|
||||||
else
|
|
||||||
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# context sensitive
|
|
||||||
string.scan(pattern) do
|
|
||||||
if $1[-1] != ?;
|
|
||||||
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
||||||
elsif $1[0] == ?&
|
|
||||||
if $5 and $5[0] == ?#
|
|
||||||
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
|
||||||
when *VALID_CHAR
|
|
||||||
else
|
|
||||||
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
||||||
end
|
|
||||||
# FIXME: below can't work but this needs API change.
|
|
||||||
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
|
||||||
# if !doctype or !doctype.entities.has_key?($3)
|
|
||||||
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
|
||||||
# end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:text
|
|
||||||
end
|
|
||||||
|
|
||||||
def empty?
|
|
||||||
@string.size==0
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def clone
|
|
||||||
return Text.new(self, true)
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# Appends text to this text node. The text is appended in the +raw+ mode
|
|
||||||
# of this text node.
|
|
||||||
#
|
|
||||||
# +returns+ the text itself to enable method chain like
|
|
||||||
# 'text << "XXX" << "YYY"'.
|
|
||||||
def <<( to_append )
|
|
||||||
@string << to_append.gsub( /\r\n?/, "\n" )
|
|
||||||
clear_cache
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# +other+ a String or a Text
|
|
||||||
# +returns+ the result of (to_s <=> arg.to_s)
|
|
||||||
def <=>( other )
|
|
||||||
to_s() <=> other.to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
def doctype
|
|
||||||
if @parent
|
|
||||||
doc = @parent.document
|
|
||||||
doc.doctype if doc
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
REFERENCE = /#{Entity::REFERENCE}/
|
|
||||||
# Returns the string value of this text node. This string is always
|
|
||||||
# escaped, meaning that it is a valid XML text node string, and all
|
|
||||||
# entities that can be escaped, have been inserted. This method respects
|
|
||||||
# the entity filter set in the constructor.
|
|
||||||
#
|
|
||||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
|
||||||
# # entity "r" is defined to be "russell"
|
|
||||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
|
||||||
# t.to_s #-> "< & &s; russell"
|
|
||||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
|
||||||
# t.to_s #-> "< & &s; russell"
|
|
||||||
# u = Text.new( "sean russell", false, nil, true )
|
|
||||||
# u.to_s #-> "sean russell"
|
|
||||||
def to_s
|
|
||||||
return @string if @raw
|
|
||||||
@normalized ||= Text::normalize( @string, doctype, @entity_filter )
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
@string.inspect
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the string value of this text. This is the text without
|
|
||||||
# entities, as it might be used programmatically, or printed to the
|
|
||||||
# console. This ignores the 'raw' attribute setting, and any
|
|
||||||
# entity_filter.
|
|
||||||
#
|
|
||||||
# # Assume that the entity "s" is defined to be "sean", and that the
|
|
||||||
# # entity "r" is defined to be "russell"
|
|
||||||
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
|
||||||
# t.value #-> "< & sean russell"
|
|
||||||
# t = Text.new( "< & &s; russell", false, nil, false )
|
|
||||||
# t.value #-> "< & sean russell"
|
|
||||||
# u = Text.new( "sean russell", false, nil, true )
|
|
||||||
# u.value #-> "sean russell"
|
|
||||||
def value
|
|
||||||
@unnormalized ||= Text::unnormalize( @string, doctype )
|
|
||||||
end
|
|
||||||
|
|
||||||
# Sets the contents of this text node. This expects the text to be
|
|
||||||
# unnormalized. It returns self.
|
|
||||||
#
|
|
||||||
# e = Element.new( "a" )
|
|
||||||
# e.add_text( "foo" ) # <a>foo</a>
|
|
||||||
# e[0].value = "bar" # <a>bar</a>
|
|
||||||
# e[0].value = "<a>" # <a><a></a>
|
|
||||||
def value=( val )
|
|
||||||
@string = val.gsub( /\r\n?/, "\n" )
|
|
||||||
clear_cache
|
|
||||||
@raw = false
|
|
||||||
end
|
|
||||||
|
|
||||||
def wrap(string, width, addnewline=false)
|
|
||||||
# Recursively wrap string at width.
|
|
||||||
return string if string.length <= width
|
|
||||||
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
|
||||||
if addnewline then
|
|
||||||
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
||||||
else
|
|
||||||
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
|
||||||
return string if level < 0
|
|
||||||
new_string = ''
|
|
||||||
string.each_line { |line|
|
|
||||||
indent_string = style * level
|
|
||||||
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
|
||||||
new_string << new_line
|
|
||||||
}
|
|
||||||
new_string.strip! unless indentfirstline
|
|
||||||
return new_string
|
|
||||||
end
|
|
||||||
|
|
||||||
# == DEPRECATED
|
|
||||||
# See REXML::Formatters
|
|
||||||
#
|
|
||||||
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
|
||||||
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
|
|
||||||
formatter = if indent > -1
|
|
||||||
REXML::Formatters::Pretty.new( indent )
|
|
||||||
else
|
|
||||||
REXML::Formatters::Default.new
|
|
||||||
end
|
|
||||||
formatter.write( self, writer )
|
|
||||||
end
|
|
||||||
|
|
||||||
# FIXME
|
|
||||||
# This probably won't work properly
|
|
||||||
def xpath
|
|
||||||
path = @parent.xpath
|
|
||||||
path += "/text()"
|
|
||||||
return path
|
|
||||||
end
|
|
||||||
|
|
||||||
# Writes out text, substituting special characters beforehand.
|
|
||||||
# +out+ A String, IO, or any other object supporting <<( String )
|
|
||||||
# +input+ the text to substitute and the write out
|
|
||||||
#
|
|
||||||
# z=utf8.unpack("U*")
|
|
||||||
# ascOut=""
|
|
||||||
# z.each{|r|
|
|
||||||
# if r < 0x100
|
|
||||||
# ascOut.concat(r.chr)
|
|
||||||
# else
|
|
||||||
# ascOut.concat(sprintf("&#x%x;", r))
|
|
||||||
# end
|
|
||||||
# }
|
|
||||||
# puts ascOut
|
|
||||||
def write_with_substitution out, input
|
|
||||||
copy = input.clone
|
|
||||||
# Doing it like this rather than in a loop improves the speed
|
|
||||||
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
|
|
||||||
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
|
|
||||||
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
|
|
||||||
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
|
|
||||||
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
|
|
||||||
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
|
|
||||||
out << copy
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def clear_cache
|
|
||||||
@normalized = nil
|
|
||||||
@unnormalized = nil
|
|
||||||
end
|
|
||||||
|
|
||||||
# Reads text, substituting entities
|
|
||||||
def Text::read_with_substitution( input, illegal=nil )
|
|
||||||
copy = input.clone
|
|
||||||
|
|
||||||
if copy =~ illegal
|
|
||||||
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
|
||||||
end if illegal
|
|
||||||
|
|
||||||
copy.gsub!( /\r\n?/, "\n" )
|
|
||||||
if copy.include? ?&
|
|
||||||
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
|
||||||
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
|
|
||||||
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
|
||||||
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
|
||||||
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
|
||||||
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {
|
|
||||||
m=$1
|
|
||||||
#m='0' if m==''
|
|
||||||
m = "0#{m}" if m[0] == ?x
|
|
||||||
[Integer(m)].pack('U*')
|
|
||||||
}
|
|
||||||
end
|
|
||||||
copy
|
|
||||||
end
|
|
||||||
|
|
||||||
EREFERENCE = /&(?!#{Entity::NAME};)/
|
|
||||||
# Escapes all possible entities
|
|
||||||
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
|
||||||
copy = input.to_s
|
|
||||||
# Doing it like this rather than in a loop improves the speed
|
|
||||||
#copy = copy.gsub( EREFERENCE, '&' )
|
|
||||||
copy = copy.gsub( "&", "&" )
|
|
||||||
if doctype
|
|
||||||
# Replace all ampersands that aren't part of an entity
|
|
||||||
doctype.entities.each_value do |entity|
|
|
||||||
copy = copy.gsub( entity.value,
|
|
||||||
"&#{entity.name};" ) if entity.value and
|
|
||||||
not( entity_filter and entity_filter.include?(entity.name) )
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# Replace all ampersands that aren't part of an entity
|
|
||||||
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
|
||||||
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
copy
|
|
||||||
end
|
|
||||||
|
|
||||||
# Unescapes all possible entities
|
|
||||||
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
|
||||||
sum = 0
|
|
||||||
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
|
||||||
s = Text.expand($&, doctype, filter)
|
|
||||||
if sum + s.bytesize > Security.entity_expansion_text_limit
|
|
||||||
raise "entity expansion has grown too large"
|
|
||||||
else
|
|
||||||
sum += s.bytesize
|
|
||||||
end
|
|
||||||
s
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def Text.expand(ref, doctype, filter)
|
|
||||||
if ref[1] == ?#
|
|
||||||
if ref[2] == ?x
|
|
||||||
[ref[3...-1].to_i(16)].pack('U*')
|
|
||||||
else
|
|
||||||
[ref[2...-1].to_i].pack('U*')
|
|
||||||
end
|
|
||||||
elsif ref == '&'
|
|
||||||
'&'
|
|
||||||
elsif filter and filter.include?( ref[1...-1] )
|
|
||||||
ref
|
|
||||||
elsif doctype
|
|
||||||
doctype.entity( ref[1...-1] ) or ref
|
|
||||||
else
|
|
||||||
entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
|
|
||||||
entity_value ? entity_value.value : ref
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'parseexception'
|
|
||||||
module REXML
|
|
||||||
class UndefinedNamespaceException < ParseException
|
|
||||||
def initialize( prefix, source, parser )
|
|
||||||
super( "Undefined prefix #{prefix} found" )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,539 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative "validation"
|
|
||||||
require_relative "../parsers/baseparser"
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Validation
|
|
||||||
# Implemented:
|
|
||||||
# * empty
|
|
||||||
# * element
|
|
||||||
# * attribute
|
|
||||||
# * text
|
|
||||||
# * optional
|
|
||||||
# * choice
|
|
||||||
# * oneOrMore
|
|
||||||
# * zeroOrMore
|
|
||||||
# * group
|
|
||||||
# * value
|
|
||||||
# * interleave
|
|
||||||
# * mixed
|
|
||||||
# * ref
|
|
||||||
# * grammar
|
|
||||||
# * start
|
|
||||||
# * define
|
|
||||||
#
|
|
||||||
# Not implemented:
|
|
||||||
# * data
|
|
||||||
# * param
|
|
||||||
# * include
|
|
||||||
# * externalRef
|
|
||||||
# * notAllowed
|
|
||||||
# * anyName
|
|
||||||
# * nsName
|
|
||||||
# * except
|
|
||||||
# * name
|
|
||||||
class RelaxNG
|
|
||||||
include Validator
|
|
||||||
|
|
||||||
INFINITY = 1.0 / 0.0
|
|
||||||
EMPTY = Event.new( nil )
|
|
||||||
TEXT = [:start_element, "text"]
|
|
||||||
attr_accessor :current
|
|
||||||
attr_accessor :count
|
|
||||||
attr_reader :references
|
|
||||||
|
|
||||||
# FIXME: Namespaces
|
|
||||||
def initialize source
|
|
||||||
parser = REXML::Parsers::BaseParser.new( source )
|
|
||||||
|
|
||||||
@count = 0
|
|
||||||
@references = {}
|
|
||||||
@root = @current = Sequence.new(self)
|
|
||||||
@root.previous = true
|
|
||||||
states = [ @current ]
|
|
||||||
begin
|
|
||||||
event = parser.pull
|
|
||||||
case event[0]
|
|
||||||
when :start_element
|
|
||||||
case event[1]
|
|
||||||
when "empty"
|
|
||||||
when "element", "attribute", "text", "value"
|
|
||||||
states[-1] << event
|
|
||||||
when "optional"
|
|
||||||
states << Optional.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "choice"
|
|
||||||
states << Choice.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "oneOrMore"
|
|
||||||
states << OneOrMore.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "zeroOrMore"
|
|
||||||
states << ZeroOrMore.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "group"
|
|
||||||
states << Sequence.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "interleave"
|
|
||||||
states << Interleave.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "mixed"
|
|
||||||
states << Interleave.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
states[-1] << TEXT
|
|
||||||
when "define"
|
|
||||||
states << [ event[2]["name"] ]
|
|
||||||
when "ref"
|
|
||||||
states[-1] << Ref.new( event[2]["name"] )
|
|
||||||
when "anyName"
|
|
||||||
states << AnyName.new( self )
|
|
||||||
states[-2] << states[-1]
|
|
||||||
when "nsName"
|
|
||||||
when "except"
|
|
||||||
when "name"
|
|
||||||
when "data"
|
|
||||||
when "param"
|
|
||||||
when "include"
|
|
||||||
when "grammar"
|
|
||||||
when "start"
|
|
||||||
when "externalRef"
|
|
||||||
when "notAllowed"
|
|
||||||
end
|
|
||||||
when :end_element
|
|
||||||
case event[1]
|
|
||||||
when "element", "attribute"
|
|
||||||
states[-1] << event
|
|
||||||
when "zeroOrMore", "oneOrMore", "choice", "optional",
|
|
||||||
"interleave", "group", "mixed"
|
|
||||||
states.pop
|
|
||||||
when "define"
|
|
||||||
ref = states.pop
|
|
||||||
@references[ ref.shift ] = ref
|
|
||||||
#when "empty"
|
|
||||||
end
|
|
||||||
when :end_document
|
|
||||||
states[-1] << event
|
|
||||||
when :text
|
|
||||||
states[-1] << event
|
|
||||||
end
|
|
||||||
end while event[0] != :end_document
|
|
||||||
end
|
|
||||||
|
|
||||||
def receive event
|
|
||||||
validate( event )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class State
|
|
||||||
def initialize( context )
|
|
||||||
@previous = []
|
|
||||||
@events = []
|
|
||||||
@current = 0
|
|
||||||
@count = context.count += 1
|
|
||||||
@references = context.references
|
|
||||||
@value = false
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset
|
|
||||||
return if @current == 0
|
|
||||||
@current = 0
|
|
||||||
@events.each {|s| s.reset if s.kind_of? State }
|
|
||||||
end
|
|
||||||
|
|
||||||
def previous=( previous )
|
|
||||||
@previous << previous
|
|
||||||
end
|
|
||||||
|
|
||||||
def next( event )
|
|
||||||
#print "In next with #{event.inspect}. "
|
|
||||||
#p @previous
|
|
||||||
return @previous.pop.next( event ) if @events[@current].nil?
|
|
||||||
expand_ref_in( @events, @current ) if @events[@current].class == Ref
|
|
||||||
if ( @events[@current].kind_of? State )
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1].next( event )
|
|
||||||
end
|
|
||||||
if ( @events[@current].matches?(event) )
|
|
||||||
@current += 1
|
|
||||||
if @events[@current].nil?
|
|
||||||
return @previous.pop
|
|
||||||
elsif @events[@current].kind_of? State
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1]
|
|
||||||
else
|
|
||||||
return self
|
|
||||||
end
|
|
||||||
else
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
# Abbreviated:
|
|
||||||
self.class.name =~ /(?:::)(\w)\w+$/
|
|
||||||
# Full:
|
|
||||||
#self.class.name =~ /(?:::)(\w+)$/
|
|
||||||
"#$1.#@count"
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"< #{to_s} #{@events.collect{|e|
|
|
||||||
pre = e == @events[@current] ? '#' : ''
|
|
||||||
pre + e.inspect unless self == e
|
|
||||||
}.join(', ')} >"
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
return [@events[@current]]
|
|
||||||
end
|
|
||||||
|
|
||||||
def <<( event )
|
|
||||||
add_event_to_arry( @events, event )
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
protected
|
|
||||||
def expand_ref_in( arry, ind )
|
|
||||||
new_events = []
|
|
||||||
@references[ arry[ind].to_s ].each{ |evt|
|
|
||||||
add_event_to_arry(new_events,evt)
|
|
||||||
}
|
|
||||||
arry[ind,1] = new_events
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_event_to_arry( arry, evt )
|
|
||||||
evt = generate_event( evt )
|
|
||||||
if evt.kind_of? String
|
|
||||||
arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
|
|
||||||
@value = false
|
|
||||||
else
|
|
||||||
arry << evt
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def generate_event( event )
|
|
||||||
return event if event.kind_of? State or event.class == Ref
|
|
||||||
evt = nil
|
|
||||||
arg = nil
|
|
||||||
case event[0]
|
|
||||||
when :start_element
|
|
||||||
case event[1]
|
|
||||||
when "element"
|
|
||||||
evt = :start_element
|
|
||||||
arg = event[2]["name"]
|
|
||||||
when "attribute"
|
|
||||||
evt = :start_attribute
|
|
||||||
arg = event[2]["name"]
|
|
||||||
when "text"
|
|
||||||
evt = :text
|
|
||||||
when "value"
|
|
||||||
evt = :text
|
|
||||||
@value = true
|
|
||||||
end
|
|
||||||
when :text
|
|
||||||
return event[1]
|
|
||||||
when :end_document
|
|
||||||
return Event.new( event[0] )
|
|
||||||
else # then :end_element
|
|
||||||
case event[1]
|
|
||||||
when "element"
|
|
||||||
evt = :end_element
|
|
||||||
when "attribute"
|
|
||||||
evt = :end_attribute
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return Event.new( evt, arg )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class Sequence < State
|
|
||||||
def matches?(event)
|
|
||||||
@events[@current].matches?( event )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class Optional < State
|
|
||||||
def next( event )
|
|
||||||
if @current == 0
|
|
||||||
rv = super
|
|
||||||
return rv if rv
|
|
||||||
@prior = @previous.pop
|
|
||||||
return @prior.next( event )
|
|
||||||
end
|
|
||||||
super
|
|
||||||
end
|
|
||||||
|
|
||||||
def matches?(event)
|
|
||||||
@events[@current].matches?(event) ||
|
|
||||||
(@current == 0 and @previous[-1].matches?(event))
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
return [ @prior.expected, @events[0] ].flatten if @current == 0
|
|
||||||
return [@events[@current]]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class ZeroOrMore < Optional
|
|
||||||
def next( event )
|
|
||||||
expand_ref_in( @events, @current ) if @events[@current].class == Ref
|
|
||||||
if ( @events[@current].matches?(event) )
|
|
||||||
@current += 1
|
|
||||||
if @events[@current].nil?
|
|
||||||
@current = 0
|
|
||||||
return self
|
|
||||||
elsif @events[@current].kind_of? State
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1]
|
|
||||||
else
|
|
||||||
return self
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@prior = @previous.pop
|
|
||||||
return @prior.next( event ) if @current == 0
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
return [ @prior.expected, @events[0] ].flatten if @current == 0
|
|
||||||
return [@events[@current]]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class OneOrMore < State
|
|
||||||
def initialize context
|
|
||||||
super
|
|
||||||
@ord = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset
|
|
||||||
super
|
|
||||||
@ord = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def next( event )
|
|
||||||
expand_ref_in( @events, @current ) if @events[@current].class == Ref
|
|
||||||
if ( @events[@current].matches?(event) )
|
|
||||||
@current += 1
|
|
||||||
@ord += 1
|
|
||||||
if @events[@current].nil?
|
|
||||||
@current = 0
|
|
||||||
return self
|
|
||||||
elsif @events[@current].kind_of? State
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1]
|
|
||||||
else
|
|
||||||
return self
|
|
||||||
end
|
|
||||||
else
|
|
||||||
return @previous.pop.next( event ) if @current == 0 and @ord > 0
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def matches?( event )
|
|
||||||
@events[@current].matches?(event) ||
|
|
||||||
(@current == 0 and @ord > 0 and @previous[-1].matches?(event))
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
if @current == 0 and @ord > 0
|
|
||||||
return [@previous[-1].expected, @events[0]].flatten
|
|
||||||
else
|
|
||||||
return [@events[@current]]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class Choice < State
|
|
||||||
def initialize context
|
|
||||||
super
|
|
||||||
@choices = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset
|
|
||||||
super
|
|
||||||
@events = []
|
|
||||||
@choices.each { |c| c.each { |s| s.reset if s.kind_of? State } }
|
|
||||||
end
|
|
||||||
|
|
||||||
def <<( event )
|
|
||||||
add_event_to_arry( @choices, event )
|
|
||||||
end
|
|
||||||
|
|
||||||
def next( event )
|
|
||||||
# Make the choice if we haven't
|
|
||||||
if @events.size == 0
|
|
||||||
c = 0 ; max = @choices.size
|
|
||||||
while c < max
|
|
||||||
if @choices[c][0].class == Ref
|
|
||||||
expand_ref_in( @choices[c], 0 )
|
|
||||||
@choices += @choices[c]
|
|
||||||
@choices.delete( @choices[c] )
|
|
||||||
max -= 1
|
|
||||||
else
|
|
||||||
c += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@events = @choices.find { |evt| evt[0].matches? event }
|
|
||||||
# Remove the references
|
|
||||||
# Find the events
|
|
||||||
end
|
|
||||||
unless @events
|
|
||||||
@events = []
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
super
|
|
||||||
end
|
|
||||||
|
|
||||||
def matches?( event )
|
|
||||||
return @events[@current].matches?( event ) if @events.size > 0
|
|
||||||
!@choices.find{|evt| evt[0].matches?(event)}.nil?
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
return [@events[@current]] if @events.size > 0
|
|
||||||
return @choices.collect do |x|
|
|
||||||
if x[0].kind_of? State
|
|
||||||
x[0].expected
|
|
||||||
else
|
|
||||||
x[0]
|
|
||||||
end
|
|
||||||
end.flatten
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' or ')} >"
|
|
||||||
end
|
|
||||||
|
|
||||||
protected
|
|
||||||
def add_event_to_arry( arry, evt )
|
|
||||||
if evt.kind_of? State or evt.class == Ref
|
|
||||||
arry << [evt]
|
|
||||||
elsif evt[0] == :text
|
|
||||||
if arry[-1] and
|
|
||||||
arry[-1][-1].kind_of?( Event ) and
|
|
||||||
arry[-1][-1].event_type == :text and @value
|
|
||||||
|
|
||||||
arry[-1][-1].event_arg = evt[1]
|
|
||||||
@value = false
|
|
||||||
end
|
|
||||||
else
|
|
||||||
arry << [] if evt[0] == :start_element
|
|
||||||
arry[-1] << generate_event( evt )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
class Interleave < Choice
|
|
||||||
def initialize context
|
|
||||||
super
|
|
||||||
@choice = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset
|
|
||||||
@choice = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
def next_current( event )
|
|
||||||
# Expand references
|
|
||||||
c = 0 ; max = @choices.size
|
|
||||||
while c < max
|
|
||||||
if @choices[c][0].class == Ref
|
|
||||||
expand_ref_in( @choices[c], 0 )
|
|
||||||
@choices += @choices[c]
|
|
||||||
@choices.delete( @choices[c] )
|
|
||||||
max -= 1
|
|
||||||
else
|
|
||||||
c += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@events = @choices[@choice..-1].find { |evt| evt[0].matches? event }
|
|
||||||
@current = 0
|
|
||||||
if @events
|
|
||||||
# reorder the choices
|
|
||||||
old = @choices[@choice]
|
|
||||||
idx = @choices.index( @events )
|
|
||||||
@choices[@choice] = @events
|
|
||||||
@choices[idx] = old
|
|
||||||
@choice += 1
|
|
||||||
end
|
|
||||||
|
|
||||||
@events = [] unless @events
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def next( event )
|
|
||||||
# Find the next series
|
|
||||||
next_current(event) unless @events[@current]
|
|
||||||
return nil unless @events[@current]
|
|
||||||
|
|
||||||
expand_ref_in( @events, @current ) if @events[@current].class == Ref
|
|
||||||
if ( @events[@current].kind_of? State )
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1].next( event )
|
|
||||||
end
|
|
||||||
return @previous.pop.next( event ) if @events[@current].nil?
|
|
||||||
if ( @events[@current].matches?(event) )
|
|
||||||
@current += 1
|
|
||||||
if @events[@current].nil?
|
|
||||||
return self unless @choices[@choice].nil?
|
|
||||||
return @previous.pop
|
|
||||||
elsif @events[@current].kind_of? State
|
|
||||||
@current += 1
|
|
||||||
@events[@current-1].previous = self
|
|
||||||
return @events[@current-1]
|
|
||||||
else
|
|
||||||
return self
|
|
||||||
end
|
|
||||||
else
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def matches?( event )
|
|
||||||
return @events[@current].matches?( event ) if @events[@current]
|
|
||||||
!@choices[@choice..-1].find{|evt| evt[0].matches?(event)}.nil?
|
|
||||||
end
|
|
||||||
|
|
||||||
def expected
|
|
||||||
return [@events[@current]] if @events[@current]
|
|
||||||
return @choices[@choice..-1].collect do |x|
|
|
||||||
if x[0].kind_of? State
|
|
||||||
x[0].expected
|
|
||||||
else
|
|
||||||
x[0]
|
|
||||||
end
|
|
||||||
end.flatten
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' and ')} >"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Ref
|
|
||||||
def initialize value
|
|
||||||
@value = value
|
|
||||||
end
|
|
||||||
def to_s
|
|
||||||
@value
|
|
||||||
end
|
|
||||||
def inspect
|
|
||||||
"{#{to_s}}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,144 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'validationexception'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
module Validation
|
|
||||||
module Validator
|
|
||||||
NILEVENT = [ nil ]
|
|
||||||
def reset
|
|
||||||
@current = @root
|
|
||||||
@root.reset
|
|
||||||
@root.previous = true
|
|
||||||
@attr_stack = []
|
|
||||||
self
|
|
||||||
end
|
|
||||||
def dump
|
|
||||||
puts @root.inspect
|
|
||||||
end
|
|
||||||
def validate( event )
|
|
||||||
@attr_stack = [] unless defined? @attr_stack
|
|
||||||
match = @current.next(event)
|
|
||||||
raise ValidationException.new( "Validation error. Expected: "+
|
|
||||||
@current.expected.join( " or " )+" from #{@current.inspect} "+
|
|
||||||
" but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
|
|
||||||
@current = match
|
|
||||||
|
|
||||||
# Check for attributes
|
|
||||||
case event[0]
|
|
||||||
when :start_element
|
|
||||||
@attr_stack << event[2]
|
|
||||||
begin
|
|
||||||
sattr = [:start_attribute, nil]
|
|
||||||
eattr = [:end_attribute]
|
|
||||||
text = [:text, nil]
|
|
||||||
k, = event[2].find { |key,value|
|
|
||||||
sattr[1] = key
|
|
||||||
m = @current.next( sattr )
|
|
||||||
if m
|
|
||||||
# If the state has text children...
|
|
||||||
if m.matches?( eattr )
|
|
||||||
@current = m
|
|
||||||
else
|
|
||||||
text[1] = value
|
|
||||||
m = m.next( text )
|
|
||||||
text[1] = nil
|
|
||||||
return false unless m
|
|
||||||
@current = m if m
|
|
||||||
end
|
|
||||||
m = @current.next( eattr )
|
|
||||||
if m
|
|
||||||
@current = m
|
|
||||||
true
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
}
|
|
||||||
event[2].delete(k) if k
|
|
||||||
end while k
|
|
||||||
when :end_element
|
|
||||||
attrs = @attr_stack.pop
|
|
||||||
raise ValidationException.new( "Validation error. Illegal "+
|
|
||||||
" attributes: #{attrs.inspect}") if attrs.length > 0
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Event
|
|
||||||
def initialize(event_type, event_arg=nil )
|
|
||||||
@event_type = event_type
|
|
||||||
@event_arg = event_arg
|
|
||||||
end
|
|
||||||
|
|
||||||
attr_reader :event_type
|
|
||||||
attr_accessor :event_arg
|
|
||||||
|
|
||||||
def done?
|
|
||||||
@done
|
|
||||||
end
|
|
||||||
|
|
||||||
def single?
|
|
||||||
return (@event_type != :start_element and @event_type != :start_attribute)
|
|
||||||
end
|
|
||||||
|
|
||||||
def matches?( event )
|
|
||||||
return false unless event[0] == @event_type
|
|
||||||
case event[0]
|
|
||||||
when nil
|
|
||||||
return true
|
|
||||||
when :start_element
|
|
||||||
return true if event[1] == @event_arg
|
|
||||||
when :end_element
|
|
||||||
return true
|
|
||||||
when :start_attribute
|
|
||||||
return true if event[1] == @event_arg
|
|
||||||
when :end_attribute
|
|
||||||
return true
|
|
||||||
when :end_document
|
|
||||||
return true
|
|
||||||
when :text
|
|
||||||
return (@event_arg.nil? or @event_arg == event[1])
|
|
||||||
=begin
|
|
||||||
when :processing_instruction
|
|
||||||
false
|
|
||||||
when :xmldecl
|
|
||||||
false
|
|
||||||
when :start_doctype
|
|
||||||
false
|
|
||||||
when :end_doctype
|
|
||||||
false
|
|
||||||
when :externalentity
|
|
||||||
false
|
|
||||||
when :elementdecl
|
|
||||||
false
|
|
||||||
when :entity
|
|
||||||
false
|
|
||||||
when :attlistdecl
|
|
||||||
false
|
|
||||||
when :notationdecl
|
|
||||||
false
|
|
||||||
when :end_doctype
|
|
||||||
false
|
|
||||||
=end
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def ==( other )
|
|
||||||
return false unless other.kind_of? Event
|
|
||||||
@event_type == other.event_type and @event_arg == other.event_arg
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
inspect
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"#{@event_type.inspect}( #@event_arg )"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
module Validation
|
|
||||||
class ValidationException < RuntimeError
|
|
||||||
def initialize msg
|
|
||||||
super
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,130 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
|
|
||||||
require_relative 'encoding'
|
|
||||||
require_relative 'source'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# NEEDS DOCUMENTATION
|
|
||||||
class XMLDecl < Child
|
|
||||||
include Encoding
|
|
||||||
|
|
||||||
DEFAULT_VERSION = "1.0"
|
|
||||||
DEFAULT_ENCODING = "UTF-8"
|
|
||||||
DEFAULT_STANDALONE = "no"
|
|
||||||
START = "<?xml"
|
|
||||||
STOP = "?>"
|
|
||||||
|
|
||||||
attr_accessor :version, :standalone
|
|
||||||
attr_reader :writeencoding, :writethis
|
|
||||||
|
|
||||||
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
|
||||||
@writethis = true
|
|
||||||
@writeencoding = !encoding.nil?
|
|
||||||
if version.kind_of? XMLDecl
|
|
||||||
super()
|
|
||||||
@version = version.version
|
|
||||||
self.encoding = version.encoding
|
|
||||||
@writeencoding = version.writeencoding
|
|
||||||
@standalone = version.standalone
|
|
||||||
@writethis = version.writethis
|
|
||||||
else
|
|
||||||
super()
|
|
||||||
@version = version
|
|
||||||
self.encoding = encoding
|
|
||||||
@standalone = standalone
|
|
||||||
end
|
|
||||||
@version = DEFAULT_VERSION if @version.nil?
|
|
||||||
end
|
|
||||||
|
|
||||||
def clone
|
|
||||||
XMLDecl.new(self)
|
|
||||||
end
|
|
||||||
|
|
||||||
# indent::
|
|
||||||
# Ignored. There must be no whitespace before an XML declaration
|
|
||||||
# transitive::
|
|
||||||
# Ignored
|
|
||||||
# ie_hack::
|
|
||||||
# Ignored
|
|
||||||
def write(writer, indent=-1, transitive=false, ie_hack=false)
|
|
||||||
return nil unless @writethis or writer.kind_of? Output
|
|
||||||
writer << START
|
|
||||||
writer << " #{content encoding}"
|
|
||||||
writer << STOP
|
|
||||||
end
|
|
||||||
|
|
||||||
def ==( other )
|
|
||||||
other.kind_of?(XMLDecl) and
|
|
||||||
other.version == @version and
|
|
||||||
other.encoding == self.encoding and
|
|
||||||
other.standalone == @standalone
|
|
||||||
end
|
|
||||||
|
|
||||||
def xmldecl version, encoding, standalone
|
|
||||||
@version = version
|
|
||||||
self.encoding = encoding
|
|
||||||
@standalone = standalone
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_type
|
|
||||||
:xmldecl
|
|
||||||
end
|
|
||||||
|
|
||||||
alias :stand_alone? :standalone
|
|
||||||
alias :old_enc= :encoding=
|
|
||||||
|
|
||||||
def encoding=( enc )
|
|
||||||
if enc.nil?
|
|
||||||
self.old_enc = "UTF-8"
|
|
||||||
@writeencoding = false
|
|
||||||
else
|
|
||||||
self.old_enc = enc
|
|
||||||
@writeencoding = true
|
|
||||||
end
|
|
||||||
self.dowrite
|
|
||||||
end
|
|
||||||
|
|
||||||
# Only use this if you do not want the XML declaration to be written;
|
|
||||||
# this object is ignored by the XML writer. Otherwise, instantiate your
|
|
||||||
# own XMLDecl and add it to the document.
|
|
||||||
#
|
|
||||||
# Note that XML 1.1 documents *must* include an XML declaration
|
|
||||||
def XMLDecl.default
|
|
||||||
rv = XMLDecl.new( "1.0" )
|
|
||||||
rv.nowrite
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
|
|
||||||
def nowrite
|
|
||||||
@writethis = false
|
|
||||||
end
|
|
||||||
|
|
||||||
def dowrite
|
|
||||||
@writethis = true
|
|
||||||
end
|
|
||||||
|
|
||||||
def inspect
|
|
||||||
"#{START} ... #{STOP}"
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def content(enc)
|
|
||||||
context = nil
|
|
||||||
context = parent.context if parent
|
|
||||||
if context and context[:prologue_quote] == :quote
|
|
||||||
quote = "\""
|
|
||||||
else
|
|
||||||
quote = "'"
|
|
||||||
end
|
|
||||||
|
|
||||||
rv = "version=#{quote}#{@version}#{quote}"
|
|
||||||
if @writeencoding or enc !~ /\Autf-8\z/i
|
|
||||||
rv << " encoding=#{quote}#{enc}#{quote}"
|
|
||||||
end
|
|
||||||
if @standalone
|
|
||||||
rv << " standalone=#{quote}#{@standalone}#{quote}"
|
|
||||||
end
|
|
||||||
rv
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,85 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
module REXML
|
|
||||||
# Defines a number of tokens used for parsing XML. Not for general
|
|
||||||
# consumption.
|
|
||||||
module XMLTokens
|
|
||||||
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
|
||||||
#
|
|
||||||
# [4] NameStartChar ::=
|
|
||||||
# ":" |
|
|
||||||
# [A-Z] |
|
|
||||||
# "_" |
|
|
||||||
# [a-z] |
|
|
||||||
# [#xC0-#xD6] |
|
|
||||||
# [#xD8-#xF6] |
|
|
||||||
# [#xF8-#x2FF] |
|
|
||||||
# [#x370-#x37D] |
|
|
||||||
# [#x37F-#x1FFF] |
|
|
||||||
# [#x200C-#x200D] |
|
|
||||||
# [#x2070-#x218F] |
|
|
||||||
# [#x2C00-#x2FEF] |
|
|
||||||
# [#x3001-#xD7FF] |
|
|
||||||
# [#xF900-#xFDCF] |
|
|
||||||
# [#xFDF0-#xFFFD] |
|
|
||||||
# [#x10000-#xEFFFF]
|
|
||||||
name_start_chars = [
|
|
||||||
":",
|
|
||||||
"A-Z",
|
|
||||||
"_",
|
|
||||||
"a-z",
|
|
||||||
"\\u00C0-\\u00D6",
|
|
||||||
"\\u00D8-\\u00F6",
|
|
||||||
"\\u00F8-\\u02FF",
|
|
||||||
"\\u0370-\\u037D",
|
|
||||||
"\\u037F-\\u1FFF",
|
|
||||||
"\\u200C-\\u200D",
|
|
||||||
"\\u2070-\\u218F",
|
|
||||||
"\\u2C00-\\u2FEF",
|
|
||||||
"\\u3001-\\uD7FF",
|
|
||||||
"\\uF900-\\uFDCF",
|
|
||||||
"\\uFDF0-\\uFFFD",
|
|
||||||
"\\u{10000}-\\u{EFFFF}",
|
|
||||||
]
|
|
||||||
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
|
||||||
#
|
|
||||||
# [4a] NameChar ::=
|
|
||||||
# NameStartChar |
|
|
||||||
# "-" |
|
|
||||||
# "." |
|
|
||||||
# [0-9] |
|
|
||||||
# #xB7 |
|
|
||||||
# [#x0300-#x036F] |
|
|
||||||
# [#x203F-#x2040]
|
|
||||||
name_chars = name_start_chars + [
|
|
||||||
"\\-",
|
|
||||||
"\\.",
|
|
||||||
"0-9",
|
|
||||||
"\\u00B7",
|
|
||||||
"\\u0300-\\u036F",
|
|
||||||
"\\u203F-\\u2040",
|
|
||||||
]
|
|
||||||
NAME_START_CHAR = "[#{name_start_chars.join('')}]"
|
|
||||||
NAME_CHAR = "[#{name_chars.join('')}]"
|
|
||||||
NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
|
|
||||||
|
|
||||||
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
|
||||||
#
|
|
||||||
# [6] NCNameStartChar ::= NameStartChar - ':'
|
|
||||||
ncname_start_chars = name_start_chars - [":"]
|
|
||||||
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
|
||||||
#
|
|
||||||
# [5] NCNameChar ::= NameChar - ':'
|
|
||||||
ncname_chars = name_chars - [":"]
|
|
||||||
NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
|
|
||||||
NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
|
||||||
|
|
||||||
NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
|
|
||||||
NMTOKEN = "(?:#{NAME_CHAR})+"
|
|
||||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
|
||||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
|
||||||
|
|
||||||
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
|
|
||||||
#ENTITYREF = "&#{NAME};"
|
|
||||||
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
require_relative 'functions'
|
|
||||||
require_relative 'xpath_parser'
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# Wrapper class. Use this class to access the XPath functions.
|
|
||||||
class XPath
|
|
||||||
include Functions
|
|
||||||
# A base Hash object, supposing to be used when initializing a
|
|
||||||
# default empty namespaces set, but is currently unused.
|
|
||||||
# TODO: either set the namespaces=EMPTY_HASH, or deprecate this.
|
|
||||||
EMPTY_HASH = {}
|
|
||||||
|
|
||||||
# Finds and returns the first node that matches the supplied xpath.
|
|
||||||
# element::
|
|
||||||
# The context element
|
|
||||||
# path::
|
|
||||||
# The xpath to search for. If not supplied or nil, returns the first
|
|
||||||
# node matching '*'.
|
|
||||||
# namespaces::
|
|
||||||
# If supplied, a Hash which defines a namespace mapping.
|
|
||||||
# variables::
|
|
||||||
# If supplied, a Hash which maps $variables in the query
|
|
||||||
# to values. This can be used to avoid XPath injection attacks
|
|
||||||
# or to automatically handle escaping string values.
|
|
||||||
#
|
|
||||||
# XPath.first( node )
|
|
||||||
# XPath.first( doc, "//b"} )
|
|
||||||
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
|
||||||
# XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
|
|
||||||
def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
|
|
||||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
|
||||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
|
||||||
parser = XPathParser.new(**options)
|
|
||||||
parser.namespaces = namespaces
|
|
||||||
parser.variables = variables
|
|
||||||
path = "*" unless path
|
|
||||||
element = [element] unless element.kind_of? Array
|
|
||||||
parser.parse(path, element).flatten[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Iterates over nodes that match the given path, calling the supplied
|
|
||||||
# block with the match.
|
|
||||||
# element::
|
|
||||||
# The context element
|
|
||||||
# path::
|
|
||||||
# The xpath to search for. If not supplied or nil, defaults to '*'
|
|
||||||
# namespaces::
|
|
||||||
# If supplied, a Hash which defines a namespace mapping
|
|
||||||
# variables::
|
|
||||||
# If supplied, a Hash which maps $variables in the query
|
|
||||||
# to values. This can be used to avoid XPath injection attacks
|
|
||||||
# or to automatically handle escaping string values.
|
|
||||||
#
|
|
||||||
# XPath.each( node ) { |el| ... }
|
|
||||||
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
|
|
||||||
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
|
||||||
# XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
|
|
||||||
# {|el| ... }
|
|
||||||
def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block)
|
|
||||||
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
|
||||||
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
|
||||||
parser = XPathParser.new(**options)
|
|
||||||
parser.namespaces = namespaces
|
|
||||||
parser.variables = variables
|
|
||||||
path = "*" unless path
|
|
||||||
element = [element] unless element.kind_of? Array
|
|
||||||
parser.parse(path, element).each( &block )
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns an array of nodes matching a given XPath.
|
|
||||||
def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
|
|
||||||
parser = XPathParser.new(**options)
|
|
||||||
parser.namespaces = namespaces
|
|
||||||
parser.variables = variables
|
|
||||||
path = "*" unless path
|
|
||||||
element = [element] unless element.kind_of? Array
|
|
||||||
parser.parse(path,element)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@ -1,968 +0,0 @@
|
|||||||
# frozen_string_literal: false
|
|
||||||
|
|
||||||
require "pp"
|
|
||||||
|
|
||||||
require_relative 'namespace'
|
|
||||||
require_relative 'xmltokens'
|
|
||||||
require_relative 'attribute'
|
|
||||||
require_relative 'parsers/xpathparser'
|
|
||||||
|
|
||||||
class Object
|
|
||||||
# provides a unified +clone+ operation, for REXML::XPathParser
|
|
||||||
# to use across multiple Object types
|
|
||||||
def dclone
|
|
||||||
clone
|
|
||||||
end
|
|
||||||
end
|
|
||||||
class Symbol
|
|
||||||
# provides a unified +clone+ operation, for REXML::XPathParser
|
|
||||||
# to use across multiple Object types
|
|
||||||
def dclone ; self ; end
|
|
||||||
end
|
|
||||||
class Integer
|
|
||||||
# provides a unified +clone+ operation, for REXML::XPathParser
|
|
||||||
# to use across multiple Object types
|
|
||||||
def dclone ; self ; end
|
|
||||||
end
|
|
||||||
class Float
|
|
||||||
# provides a unified +clone+ operation, for REXML::XPathParser
|
|
||||||
# to use across multiple Object types
|
|
||||||
def dclone ; self ; end
|
|
||||||
end
|
|
||||||
class Array
|
|
||||||
# provides a unified +clone+ operation, for REXML::XPathParser
|
|
||||||
# to use across multiple Object+ types
|
|
||||||
def dclone
|
|
||||||
klone = self.clone
|
|
||||||
klone.clear
|
|
||||||
self.each{|v| klone << v.dclone}
|
|
||||||
klone
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
module REXML
|
|
||||||
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
||||||
# for this class. Believe me. You don't want to poke around in here.
|
|
||||||
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
|
||||||
# back while you still can!
|
|
||||||
class XPathParser
|
|
||||||
include XMLTokens
|
|
||||||
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
|
||||||
|
|
||||||
DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true")
|
|
||||||
|
|
||||||
def initialize(strict: false)
|
|
||||||
@debug = DEBUG
|
|
||||||
@parser = REXML::Parsers::XPathParser.new
|
|
||||||
@namespaces = nil
|
|
||||||
@variables = {}
|
|
||||||
@nest = 0
|
|
||||||
@strict = strict
|
|
||||||
end
|
|
||||||
|
|
||||||
def namespaces=( namespaces={} )
|
|
||||||
Functions::namespace_context = namespaces
|
|
||||||
@namespaces = namespaces
|
|
||||||
end
|
|
||||||
|
|
||||||
def variables=( vars={} )
|
|
||||||
Functions::variables = vars
|
|
||||||
@variables = vars
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse path, nodeset
|
|
||||||
path_stack = @parser.parse( path )
|
|
||||||
match( path_stack, nodeset )
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_first path, nodeset
|
|
||||||
path_stack = @parser.parse( path )
|
|
||||||
first( path_stack, nodeset )
|
|
||||||
end
|
|
||||||
|
|
||||||
def predicate path, nodeset
|
|
||||||
path_stack = @parser.parse( path )
|
|
||||||
match( path_stack, nodeset )
|
|
||||||
end
|
|
||||||
|
|
||||||
def []=( variable_name, value )
|
|
||||||
@variables[ variable_name ] = value
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# Performs a depth-first (document order) XPath search, and returns the
|
|
||||||
# first match. This is the fastest, lightest way to return a single result.
|
|
||||||
#
|
|
||||||
# FIXME: This method is incomplete!
|
|
||||||
def first( path_stack, node )
|
|
||||||
return nil if path.size == 0
|
|
||||||
|
|
||||||
case path[0]
|
|
||||||
when :document
|
|
||||||
# do nothing
|
|
||||||
return first( path[1..-1], node )
|
|
||||||
when :child
|
|
||||||
for c in node.children
|
|
||||||
r = first( path[1..-1], c )
|
|
||||||
return r if r
|
|
||||||
end
|
|
||||||
when :qname
|
|
||||||
name = path[2]
|
|
||||||
if node.name == name
|
|
||||||
return node if path.size == 3
|
|
||||||
return first( path[3..-1], node )
|
|
||||||
else
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
when :descendant_or_self
|
|
||||||
r = first( path[1..-1], node )
|
|
||||||
return r if r
|
|
||||||
for c in node.children
|
|
||||||
r = first( path, c )
|
|
||||||
return r if r
|
|
||||||
end
|
|
||||||
when :node
|
|
||||||
return first( path[1..-1], node )
|
|
||||||
when :any
|
|
||||||
return first( path[1..-1], node )
|
|
||||||
end
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def match(path_stack, nodeset)
|
|
||||||
nodeset = nodeset.collect.with_index do |node, i|
|
|
||||||
position = i + 1
|
|
||||||
XPathNode.new(node, position: position)
|
|
||||||
end
|
|
||||||
result = expr(path_stack, nodeset)
|
|
||||||
case result
|
|
||||||
when Array # nodeset
|
|
||||||
unnode(result)
|
|
||||||
else
|
|
||||||
[result]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
def strict?
|
|
||||||
@strict
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns a String namespace for a node, given a prefix
|
|
||||||
# The rules are:
|
|
||||||
#
|
|
||||||
# 1. Use the supplied namespace mapping first.
|
|
||||||
# 2. If no mapping was supplied, use the context node to look up the namespace
|
|
||||||
def get_namespace( node, prefix )
|
|
||||||
if @namespaces
|
|
||||||
return @namespaces[prefix] || ''
|
|
||||||
else
|
|
||||||
return node.namespace( prefix ) if node.node_type == :element
|
|
||||||
return ''
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
# Expr takes a stack of path elements and a set of nodes (either a Parent
|
|
||||||
# or an Array and returns an Array of matching nodes
|
|
||||||
def expr( path_stack, nodeset, context=nil )
|
|
||||||
enter(:expr, path_stack, nodeset) if @debug
|
|
||||||
return nodeset if path_stack.length == 0 || nodeset.length == 0
|
|
||||||
while path_stack.length > 0
|
|
||||||
trace(:while, path_stack, nodeset) if @debug
|
|
||||||
if nodeset.length == 0
|
|
||||||
path_stack.clear
|
|
||||||
return []
|
|
||||||
end
|
|
||||||
op = path_stack.shift
|
|
||||||
case op
|
|
||||||
when :document
|
|
||||||
first_raw_node = nodeset.first.raw_node
|
|
||||||
nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
|
|
||||||
when :self
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
[nodeset]
|
|
||||||
end
|
|
||||||
when :child
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
child(nodeset)
|
|
||||||
end
|
|
||||||
when :literal
|
|
||||||
trace(:literal, path_stack, nodeset) if @debug
|
|
||||||
return path_stack.shift
|
|
||||||
when :attribute
|
|
||||||
nodeset = step(path_stack, any_type: :attribute) do
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
next unless raw_node.node_type == :element
|
|
||||||
attributes = raw_node.attributes
|
|
||||||
next if attributes.empty?
|
|
||||||
nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
|
|
||||||
XPathNode.new(attribute, position: i + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :namespace
|
|
||||||
pre_defined_namespaces = {
|
|
||||||
"xml" => "http://www.w3.org/XML/1998/namespace",
|
|
||||||
}
|
|
||||||
nodeset = step(path_stack, any_type: :namespace) do
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
case raw_node.node_type
|
|
||||||
when :element
|
|
||||||
if @namespaces
|
|
||||||
nodesets << pre_defined_namespaces.merge(@namespaces)
|
|
||||||
else
|
|
||||||
nodesets << pre_defined_namespaces.merge(raw_node.namespaces)
|
|
||||||
end
|
|
||||||
when :attribute
|
|
||||||
if @namespaces
|
|
||||||
nodesets << pre_defined_namespaces.merge(@namespaces)
|
|
||||||
else
|
|
||||||
nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :parent
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
if raw_node.node_type == :attribute
|
|
||||||
parent = raw_node.element
|
|
||||||
else
|
|
||||||
parent = raw_node.parent
|
|
||||||
end
|
|
||||||
nodesets << [XPathNode.new(parent, position: 1)] if parent
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :ancestor
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
nodesets = []
|
|
||||||
# new_nodes = {}
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
new_nodeset = []
|
|
||||||
while raw_node.parent
|
|
||||||
raw_node = raw_node.parent
|
|
||||||
# next if new_nodes.key?(node)
|
|
||||||
new_nodeset << XPathNode.new(raw_node,
|
|
||||||
position: new_nodeset.size + 1)
|
|
||||||
# new_nodes[node] = true
|
|
||||||
end
|
|
||||||
nodesets << new_nodeset unless new_nodeset.empty?
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :ancestor_or_self
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
nodesets = []
|
|
||||||
# new_nodes = {}
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
next unless raw_node.node_type == :element
|
|
||||||
new_nodeset = [XPathNode.new(raw_node, position: 1)]
|
|
||||||
# new_nodes[node] = true
|
|
||||||
while raw_node.parent
|
|
||||||
raw_node = raw_node.parent
|
|
||||||
# next if new_nodes.key?(node)
|
|
||||||
new_nodeset << XPathNode.new(raw_node,
|
|
||||||
position: new_nodeset.size + 1)
|
|
||||||
# new_nodes[node] = true
|
|
||||||
end
|
|
||||||
nodesets << new_nodeset unless new_nodeset.empty?
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :descendant_or_self
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
descendant(nodeset, true)
|
|
||||||
end
|
|
||||||
when :descendant
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
descendant(nodeset, false)
|
|
||||||
end
|
|
||||||
when :following_sibling
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
next unless raw_node.respond_to?(:parent)
|
|
||||||
next if raw_node.parent.nil?
|
|
||||||
all_siblings = raw_node.parent.children
|
|
||||||
current_index = all_siblings.index(raw_node)
|
|
||||||
following_siblings = all_siblings[(current_index + 1)..-1]
|
|
||||||
next if following_siblings.empty?
|
|
||||||
nodesets << following_siblings.collect.with_index do |sibling, i|
|
|
||||||
XPathNode.new(sibling, position: i + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :preceding_sibling
|
|
||||||
nodeset = step(path_stack, order: :reverse) do
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
next unless raw_node.respond_to?(:parent)
|
|
||||||
next if raw_node.parent.nil?
|
|
||||||
all_siblings = raw_node.parent.children
|
|
||||||
current_index = all_siblings.index(raw_node)
|
|
||||||
preceding_siblings = all_siblings[0, current_index].reverse
|
|
||||||
next if preceding_siblings.empty?
|
|
||||||
nodesets << preceding_siblings.collect.with_index do |sibling, i|
|
|
||||||
XPathNode.new(sibling, position: i + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
when :preceding
|
|
||||||
nodeset = step(path_stack, order: :reverse) do
|
|
||||||
unnode(nodeset) do |node|
|
|
||||||
preceding(node)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :following
|
|
||||||
nodeset = step(path_stack) do
|
|
||||||
unnode(nodeset) do |node|
|
|
||||||
following(node)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :variable
|
|
||||||
var_name = path_stack.shift
|
|
||||||
return [@variables[var_name]]
|
|
||||||
|
|
||||||
when :eq, :neq, :lt, :lteq, :gt, :gteq
|
|
||||||
left = expr( path_stack.shift, nodeset.dup, context )
|
|
||||||
right = expr( path_stack.shift, nodeset.dup, context )
|
|
||||||
res = equality_relational_compare( left, op, right )
|
|
||||||
trace(op, left, right, res) if @debug
|
|
||||||
return res
|
|
||||||
|
|
||||||
when :or
|
|
||||||
left = expr(path_stack.shift, nodeset.dup, context)
|
|
||||||
return true if Functions.boolean(left)
|
|
||||||
right = expr(path_stack.shift, nodeset.dup, context)
|
|
||||||
return Functions.boolean(right)
|
|
||||||
|
|
||||||
when :and
|
|
||||||
left = expr(path_stack.shift, nodeset.dup, context)
|
|
||||||
return false unless Functions.boolean(left)
|
|
||||||
right = expr(path_stack.shift, nodeset.dup, context)
|
|
||||||
return Functions.boolean(right)
|
|
||||||
|
|
||||||
when :div, :mod, :mult, :plus, :minus
|
|
||||||
left = expr(path_stack.shift, nodeset, context)
|
|
||||||
right = expr(path_stack.shift, nodeset, context)
|
|
||||||
left = unnode(left) if left.is_a?(Array)
|
|
||||||
right = unnode(right) if right.is_a?(Array)
|
|
||||||
left = Functions::number(left)
|
|
||||||
right = Functions::number(right)
|
|
||||||
case op
|
|
||||||
when :div
|
|
||||||
return left / right
|
|
||||||
when :mod
|
|
||||||
return left % right
|
|
||||||
when :mult
|
|
||||||
return left * right
|
|
||||||
when :plus
|
|
||||||
return left + right
|
|
||||||
when :minus
|
|
||||||
return left - right
|
|
||||||
else
|
|
||||||
raise "[BUG] Unexpected operator: <#{op.inspect}>"
|
|
||||||
end
|
|
||||||
when :union
|
|
||||||
left = expr( path_stack.shift, nodeset, context )
|
|
||||||
right = expr( path_stack.shift, nodeset, context )
|
|
||||||
left = unnode(left) if left.is_a?(Array)
|
|
||||||
right = unnode(right) if right.is_a?(Array)
|
|
||||||
return (left | right)
|
|
||||||
when :neg
|
|
||||||
res = expr( path_stack, nodeset, context )
|
|
||||||
res = unnode(res) if res.is_a?(Array)
|
|
||||||
return -Functions.number(res)
|
|
||||||
when :not
|
|
||||||
when :function
|
|
||||||
func_name = path_stack.shift.tr('-','_')
|
|
||||||
arguments = path_stack.shift
|
|
||||||
|
|
||||||
if nodeset.size != 1
|
|
||||||
message = "[BUG] Node set size must be 1 for function call: "
|
|
||||||
message += "<#{func_name}>: <#{nodeset.inspect}>: "
|
|
||||||
message += "<#{arguments.inspect}>"
|
|
||||||
raise message
|
|
||||||
end
|
|
||||||
|
|
||||||
node = nodeset.first
|
|
||||||
if context
|
|
||||||
target_context = context
|
|
||||||
else
|
|
||||||
target_context = {:size => nodeset.size}
|
|
||||||
if node.is_a?(XPathNode)
|
|
||||||
target_context[:node] = node.raw_node
|
|
||||||
target_context[:index] = node.position
|
|
||||||
else
|
|
||||||
target_context[:node] = node
|
|
||||||
target_context[:index] = 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
args = arguments.dclone.collect do |arg|
|
|
||||||
result = expr(arg, nodeset, target_context)
|
|
||||||
result = unnode(result) if result.is_a?(Array)
|
|
||||||
result
|
|
||||||
end
|
|
||||||
Functions.context = target_context
|
|
||||||
return Functions.send(func_name, *args)
|
|
||||||
|
|
||||||
else
|
|
||||||
raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
|
|
||||||
end
|
|
||||||
end # while
|
|
||||||
return nodeset
|
|
||||||
ensure
|
|
||||||
leave(:expr, path_stack, nodeset) if @debug
|
|
||||||
end
|
|
||||||
|
|
||||||
def step(path_stack, any_type: :element, order: :forward)
|
|
||||||
nodesets = yield
|
|
||||||
begin
|
|
||||||
enter(:step, path_stack, nodesets) if @debug
|
|
||||||
nodesets = node_test(path_stack, nodesets, any_type: any_type)
|
|
||||||
while path_stack[0] == :predicate
|
|
||||||
path_stack.shift # :predicate
|
|
||||||
predicate_expression = path_stack.shift.dclone
|
|
||||||
nodesets = evaluate_predicate(predicate_expression, nodesets)
|
|
||||||
end
|
|
||||||
if nodesets.size == 1
|
|
||||||
ordered_nodeset = nodesets[0]
|
|
||||||
else
|
|
||||||
raw_nodes = []
|
|
||||||
nodesets.each do |nodeset|
|
|
||||||
nodeset.each do |node|
|
|
||||||
if node.respond_to?(:raw_node)
|
|
||||||
raw_nodes << node.raw_node
|
|
||||||
else
|
|
||||||
raw_nodes << node
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
ordered_nodeset = sort(raw_nodes, order)
|
|
||||||
end
|
|
||||||
new_nodeset = []
|
|
||||||
ordered_nodeset.each do |node|
|
|
||||||
# TODO: Remove duplicated
|
|
||||||
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
new_nodeset
|
|
||||||
ensure
|
|
||||||
leave(:step, path_stack, new_nodeset) if @debug
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def node_test(path_stack, nodesets, any_type: :element)
|
|
||||||
enter(:node_test, path_stack, nodesets) if @debug
|
|
||||||
operator = path_stack.shift
|
|
||||||
case operator
|
|
||||||
when :qname
|
|
||||||
prefix = path_stack.shift
|
|
||||||
name = path_stack.shift
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
case raw_node.node_type
|
|
||||||
when :element
|
|
||||||
if prefix.nil?
|
|
||||||
raw_node.name == name
|
|
||||||
elsif prefix.empty?
|
|
||||||
if strict?
|
|
||||||
raw_node.name == name and raw_node.namespace == ""
|
|
||||||
else
|
|
||||||
# FIXME: This DOUBLES the time XPath searches take
|
|
||||||
ns = get_namespace(raw_node, prefix)
|
|
||||||
raw_node.name == name and raw_node.namespace == ns
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# FIXME: This DOUBLES the time XPath searches take
|
|
||||||
ns = get_namespace(raw_node, prefix)
|
|
||||||
raw_node.name == name and raw_node.namespace == ns
|
|
||||||
end
|
|
||||||
when :attribute
|
|
||||||
if prefix.nil?
|
|
||||||
raw_node.name == name
|
|
||||||
elsif prefix.empty?
|
|
||||||
raw_node.name == name and raw_node.namespace == ""
|
|
||||||
else
|
|
||||||
# FIXME: This DOUBLES the time XPath searches take
|
|
||||||
ns = get_namespace(raw_node.element, prefix)
|
|
||||||
raw_node.name == name and raw_node.namespace == ns
|
|
||||||
end
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :namespace
|
|
||||||
prefix = path_stack.shift
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
case raw_node.node_type
|
|
||||||
when :element
|
|
||||||
namespaces = @namespaces || raw_node.namespaces
|
|
||||||
raw_node.namespace == namespaces[prefix]
|
|
||||||
when :attribute
|
|
||||||
namespaces = @namespaces || raw_node.element.namespaces
|
|
||||||
raw_node.namespace == namespaces[prefix]
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :any
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
raw_node.node_type == any_type
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :comment
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
raw_node.node_type == :comment
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :text
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
raw_node.node_type == :text
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :processing_instruction
|
|
||||||
target = path_stack.shift
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
(raw_node.node_type == :processing_instruction) and
|
|
||||||
(target.empty? or (raw_node.target == target))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
when :node
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
filter_nodeset(nodeset) do |node|
|
|
||||||
true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
message = "[BUG] Unexpected node test: " +
|
|
||||||
"<#{operator.inspect}>: <#{path_stack.inspect}>"
|
|
||||||
raise message
|
|
||||||
end
|
|
||||||
new_nodesets
|
|
||||||
ensure
|
|
||||||
leave(:node_test, path_stack, new_nodesets) if @debug
|
|
||||||
end
|
|
||||||
|
|
||||||
def filter_nodeset(nodeset)
|
|
||||||
new_nodeset = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
next unless yield(node)
|
|
||||||
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
new_nodeset
|
|
||||||
end
|
|
||||||
|
|
||||||
def evaluate_predicate(expression, nodesets)
|
|
||||||
enter(:predicate, expression, nodesets) if @debug
|
|
||||||
new_nodesets = nodesets.collect do |nodeset|
|
|
||||||
new_nodeset = []
|
|
||||||
subcontext = { :size => nodeset.size }
|
|
||||||
nodeset.each_with_index do |node, index|
|
|
||||||
if node.is_a?(XPathNode)
|
|
||||||
subcontext[:node] = node.raw_node
|
|
||||||
subcontext[:index] = node.position
|
|
||||||
else
|
|
||||||
subcontext[:node] = node
|
|
||||||
subcontext[:index] = index + 1
|
|
||||||
end
|
|
||||||
result = expr(expression.dclone, [node], subcontext)
|
|
||||||
trace(:predicate_evaluate, expression, node, subcontext, result) if @debug
|
|
||||||
result = result[0] if result.kind_of? Array and result.length == 1
|
|
||||||
if result.kind_of? Numeric
|
|
||||||
if result == node.position
|
|
||||||
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
elsif result.instance_of? Array
|
|
||||||
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
|
||||||
if result.size > 0
|
|
||||||
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if result
|
|
||||||
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
new_nodeset
|
|
||||||
end
|
|
||||||
new_nodesets
|
|
||||||
ensure
|
|
||||||
leave(:predicate, new_nodesets) if @debug
|
|
||||||
end
|
|
||||||
|
|
||||||
def trace(*args)
|
|
||||||
indent = " " * @nest
|
|
||||||
PP.pp(args, "").each_line do |line|
|
|
||||||
puts("#{indent}#{line}")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def enter(tag, *args)
|
|
||||||
trace(:enter, tag, *args)
|
|
||||||
@nest += 1
|
|
||||||
end
|
|
||||||
|
|
||||||
def leave(tag, *args)
|
|
||||||
@nest -= 1
|
|
||||||
trace(:leave, tag, *args)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Reorders an array of nodes so that they are in document order
|
|
||||||
# It tries to do this efficiently.
|
|
||||||
#
|
|
||||||
# FIXME: I need to get rid of this, but the issue is that most of the XPath
|
|
||||||
# interpreter functions as a filter, which means that we lose context going
|
|
||||||
# in and out of function calls. If I knew what the index of the nodes was,
|
|
||||||
# I wouldn't have to do this. Maybe add a document IDX for each node?
|
|
||||||
# Problems with mutable documents. Or, rewrite everything.
|
|
||||||
def sort(array_of_nodes, order)
|
|
||||||
new_arry = []
|
|
||||||
array_of_nodes.each { |node|
|
|
||||||
node_idx = []
|
|
||||||
np = node.node_type == :attribute ? node.element : node
|
|
||||||
while np.parent and np.parent.node_type == :element
|
|
||||||
node_idx << np.parent.index( np )
|
|
||||||
np = np.parent
|
|
||||||
end
|
|
||||||
new_arry << [ node_idx.reverse, node ]
|
|
||||||
}
|
|
||||||
ordered = new_arry.sort_by do |index, node|
|
|
||||||
if order == :forward
|
|
||||||
index
|
|
||||||
else
|
|
||||||
-index
|
|
||||||
end
|
|
||||||
end
|
|
||||||
ordered.collect do |_index, node|
|
|
||||||
node
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def descendant(nodeset, include_self)
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
new_nodeset = []
|
|
||||||
new_nodes = {}
|
|
||||||
descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
|
|
||||||
nodesets << new_nodeset unless new_nodeset.empty?
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
|
|
||||||
def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
|
|
||||||
if include_self
|
|
||||||
return if new_nodes.key?(raw_node)
|
|
||||||
new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
|
|
||||||
new_nodes[raw_node] = true
|
|
||||||
end
|
|
||||||
|
|
||||||
node_type = raw_node.node_type
|
|
||||||
if node_type == :element or node_type == :document
|
|
||||||
raw_node.children.each do |child|
|
|
||||||
descendant_recursive(child, new_nodeset, new_nodes, true)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Builds a nodeset of all of the preceding nodes of the supplied node,
|
|
||||||
# in reverse document order
|
|
||||||
# preceding:: includes every element in the document that precedes this node,
|
|
||||||
# except for ancestors
|
|
||||||
def preceding(node)
|
|
||||||
ancestors = []
|
|
||||||
parent = node.parent
|
|
||||||
while parent
|
|
||||||
ancestors << parent
|
|
||||||
parent = parent.parent
|
|
||||||
end
|
|
||||||
|
|
||||||
precedings = []
|
|
||||||
preceding_node = preceding_node_of(node)
|
|
||||||
while preceding_node
|
|
||||||
if ancestors.include?(preceding_node)
|
|
||||||
ancestors.delete(preceding_node)
|
|
||||||
else
|
|
||||||
precedings << XPathNode.new(preceding_node,
|
|
||||||
position: precedings.size + 1)
|
|
||||||
end
|
|
||||||
preceding_node = preceding_node_of(preceding_node)
|
|
||||||
end
|
|
||||||
precedings
|
|
||||||
end
|
|
||||||
|
|
||||||
def preceding_node_of( node )
|
|
||||||
psn = node.previous_sibling_node
|
|
||||||
if psn.nil?
|
|
||||||
if node.parent.nil? or node.parent.class == Document
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
return node.parent
|
|
||||||
#psn = preceding_node_of( node.parent )
|
|
||||||
end
|
|
||||||
while psn and psn.kind_of? Element and psn.children.size > 0
|
|
||||||
psn = psn.children[-1]
|
|
||||||
end
|
|
||||||
psn
|
|
||||||
end
|
|
||||||
|
|
||||||
def following(node)
|
|
||||||
followings = []
|
|
||||||
following_node = next_sibling_node(node)
|
|
||||||
while following_node
|
|
||||||
followings << XPathNode.new(following_node,
|
|
||||||
position: followings.size + 1)
|
|
||||||
following_node = following_node_of(following_node)
|
|
||||||
end
|
|
||||||
followings
|
|
||||||
end
|
|
||||||
|
|
||||||
def following_node_of( node )
|
|
||||||
if node.kind_of? Element and node.children.size > 0
|
|
||||||
return node.children[0]
|
|
||||||
end
|
|
||||||
return next_sibling_node(node)
|
|
||||||
end
|
|
||||||
|
|
||||||
def next_sibling_node(node)
|
|
||||||
psn = node.next_sibling_node
|
|
||||||
while psn.nil?
|
|
||||||
if node.parent.nil? or node.parent.class == Document
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
node = node.parent
|
|
||||||
psn = node.next_sibling_node
|
|
||||||
end
|
|
||||||
return psn
|
|
||||||
end
|
|
||||||
|
|
||||||
def child(nodeset)
|
|
||||||
nodesets = []
|
|
||||||
nodeset.each do |node|
|
|
||||||
raw_node = node.raw_node
|
|
||||||
node_type = raw_node.node_type
|
|
||||||
# trace(:child, node_type, node)
|
|
||||||
case node_type
|
|
||||||
when :element
|
|
||||||
nodesets << raw_node.children.collect.with_index do |child_node, i|
|
|
||||||
XPathNode.new(child_node, position: i + 1)
|
|
||||||
end
|
|
||||||
when :document
|
|
||||||
new_nodeset = []
|
|
||||||
raw_node.children.each do |child|
|
|
||||||
case child
|
|
||||||
when XMLDecl, Text
|
|
||||||
# Ignore
|
|
||||||
else
|
|
||||||
new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets << new_nodeset unless new_nodeset.empty?
|
|
||||||
end
|
|
||||||
end
|
|
||||||
nodesets
|
|
||||||
end
|
|
||||||
|
|
||||||
def norm b
|
|
||||||
case b
|
|
||||||
when true, false
|
|
||||||
return b
|
|
||||||
when 'true', 'false'
|
|
||||||
return Functions::boolean( b )
|
|
||||||
when /^\d+(\.\d+)?$/, Numeric
|
|
||||||
return Functions::number( b )
|
|
||||||
else
|
|
||||||
return Functions::string( b )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def equality_relational_compare(set1, op, set2)
|
|
||||||
set1 = unnode(set1) if set1.is_a?(Array)
|
|
||||||
set2 = unnode(set2) if set2.is_a?(Array)
|
|
||||||
|
|
||||||
if set1.kind_of? Array and set2.kind_of? Array
|
|
||||||
# If both objects to be compared are node-sets, then the
|
|
||||||
# comparison will be true if and only if there is a node in the
|
|
||||||
# first node-set and a node in the second node-set such that the
|
|
||||||
# result of performing the comparison on the string-values of
|
|
||||||
# the two nodes is true.
|
|
||||||
set1.product(set2).any? do |node1, node2|
|
|
||||||
node_string1 = Functions.string(node1)
|
|
||||||
node_string2 = Functions.string(node2)
|
|
||||||
compare(node_string1, op, node_string2)
|
|
||||||
end
|
|
||||||
elsif set1.kind_of? Array or set2.kind_of? Array
|
|
||||||
# If one is nodeset and other is number, compare number to each item
|
|
||||||
# in nodeset s.t. number op number(string(item))
|
|
||||||
# If one is nodeset and other is string, compare string to each item
|
|
||||||
# in nodeset s.t. string op string(item)
|
|
||||||
# If one is nodeset and other is boolean, compare boolean to each item
|
|
||||||
# in nodeset s.t. boolean op boolean(item)
|
|
||||||
if set1.kind_of? Array
|
|
||||||
a = set1
|
|
||||||
b = set2
|
|
||||||
else
|
|
||||||
a = set2
|
|
||||||
b = set1
|
|
||||||
end
|
|
||||||
|
|
||||||
case b
|
|
||||||
when true, false
|
|
||||||
each_unnode(a).any? do |unnoded|
|
|
||||||
compare(Functions.boolean(unnoded), op, b)
|
|
||||||
end
|
|
||||||
when Numeric
|
|
||||||
each_unnode(a).any? do |unnoded|
|
|
||||||
compare(Functions.number(unnoded), op, b)
|
|
||||||
end
|
|
||||||
when /\A\d+(\.\d+)?\z/
|
|
||||||
b = Functions.number(b)
|
|
||||||
each_unnode(a).any? do |unnoded|
|
|
||||||
compare(Functions.number(unnoded), op, b)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
b = Functions::string(b)
|
|
||||||
each_unnode(a).any? do |unnoded|
|
|
||||||
compare(Functions::string(unnoded), op, b)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# If neither is nodeset,
|
|
||||||
# If op is = or !=
|
|
||||||
# If either boolean, convert to boolean
|
|
||||||
# If either number, convert to number
|
|
||||||
# Else, convert to string
|
|
||||||
# Else
|
|
||||||
# Convert both to numbers and compare
|
|
||||||
compare(set1, op, set2)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def value_type(value)
|
|
||||||
case value
|
|
||||||
when true, false
|
|
||||||
:boolean
|
|
||||||
when Numeric
|
|
||||||
:number
|
|
||||||
when String
|
|
||||||
:string
|
|
||||||
else
|
|
||||||
raise "[BUG] Unexpected value type: <#{value.inspect}>"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def normalize_compare_values(a, operator, b)
|
|
||||||
a_type = value_type(a)
|
|
||||||
b_type = value_type(b)
|
|
||||||
case operator
|
|
||||||
when :eq, :neq
|
|
||||||
if a_type == :boolean or b_type == :boolean
|
|
||||||
a = Functions.boolean(a) unless a_type == :boolean
|
|
||||||
b = Functions.boolean(b) unless b_type == :boolean
|
|
||||||
elsif a_type == :number or b_type == :number
|
|
||||||
a = Functions.number(a) unless a_type == :number
|
|
||||||
b = Functions.number(b) unless b_type == :number
|
|
||||||
else
|
|
||||||
a = Functions.string(a) unless a_type == :string
|
|
||||||
b = Functions.string(b) unless b_type == :string
|
|
||||||
end
|
|
||||||
when :lt, :lteq, :gt, :gteq
|
|
||||||
a = Functions.number(a) unless a_type == :number
|
|
||||||
b = Functions.number(b) unless b_type == :number
|
|
||||||
else
|
|
||||||
message = "[BUG] Unexpected compare operator: " +
|
|
||||||
"<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
|
|
||||||
raise message
|
|
||||||
end
|
|
||||||
[a, b]
|
|
||||||
end
|
|
||||||
|
|
||||||
def compare(a, operator, b)
|
|
||||||
a, b = normalize_compare_values(a, operator, b)
|
|
||||||
case operator
|
|
||||||
when :eq
|
|
||||||
a == b
|
|
||||||
when :neq
|
|
||||||
a != b
|
|
||||||
when :lt
|
|
||||||
a < b
|
|
||||||
when :lteq
|
|
||||||
a <= b
|
|
||||||
when :gt
|
|
||||||
a > b
|
|
||||||
when :gteq
|
|
||||||
a >= b
|
|
||||||
else
|
|
||||||
message = "[BUG] Unexpected compare operator: " +
|
|
||||||
"<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
|
|
||||||
raise message
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def each_unnode(nodeset)
|
|
||||||
return to_enum(__method__, nodeset) unless block_given?
|
|
||||||
nodeset.each do |node|
|
|
||||||
if node.is_a?(XPathNode)
|
|
||||||
unnoded = node.raw_node
|
|
||||||
else
|
|
||||||
unnoded = node
|
|
||||||
end
|
|
||||||
yield(unnoded)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def unnode(nodeset)
|
|
||||||
each_unnode(nodeset).collect do |unnoded|
|
|
||||||
unnoded = yield(unnoded) if block_given?
|
|
||||||
unnoded
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# @private
|
|
||||||
class XPathNode
|
|
||||||
attr_reader :raw_node, :context
|
|
||||||
def initialize(node, context=nil)
|
|
||||||
if node.is_a?(XPathNode)
|
|
||||||
@raw_node = node.raw_node
|
|
||||||
else
|
|
||||||
@raw_node = node
|
|
||||||
end
|
|
||||||
@context = context || {}
|
|
||||||
end
|
|
||||||
|
|
||||||
def position
|
|
||||||
@context[:position]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
Loading…
x
Reference in New Issue
Block a user