Merge pull request #8309 from Bo98/rexml

vendor/bundle/ruby: unvendor REXML
This commit is contained in:
Mike McQuaid 2020-08-12 10:53:34 +01:00 committed by GitHub
commit 845d3a2f20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 1 additions and 9398 deletions

1
.gitignore vendored
View File

@ -117,6 +117,7 @@
**/vendor/bundle/ruby/*/gems/rainbow-*/
**/vendor/bundle/ruby/*/gems/rdiscount-*/
**/vendor/bundle/ruby/*/gems/regexp_parser-*/
**/vendor/bundle/ruby/*/gems/rexml-*/
**/vendor/bundle/ruby/*/gems/ronn-*/
**/vendor/bundle/ruby/*/gems/rspec-*/
**/vendor/bundle/ruby/*/gems/rspec-core-*/

View File

@ -1,63 +0,0 @@
# frozen_string_literal: false
#vim:ts=2 sw=2 noexpandtab:
require_relative 'child'
require_relative 'source'
module REXML
# This class needs:
# * Documentation
# * Work! Not all types of attlists are intelligently parsed, so we just
# spew back out what we get in. This works, but it would be better if
# we formatted the output ourselves.
#
# AttlistDecls provide *just* enough support to allow namespace
# declarations. If you need some sort of generalized support, or have an
# interesting idea about how to map the hideous, terrible design of DTD
# AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
# for anything to make DTDs more palateable.
class AttlistDecl < Child
include Enumerable
# What is this? Got me.
attr_reader :element_name
# Create an AttlistDecl, pulling the information from a Source. Notice
# that this isn't very convenient; to create an AttlistDecl, you basically
# have to format it yourself, and then have the initializer parse it.
# Sorry, but for the foreseeable future, DTD support in REXML is pretty
# weak on convenience. Have I mentioned how much I hate DTDs?
def initialize(source)
super()
if (source.kind_of? Array)
@element_name, @pairs, @contents = *source
end
end
# Access the attlist attribute/value pairs.
# value = attlist_decl[ attribute_name ]
def [](key)
@pairs[key]
end
# Whether an attlist declaration includes the given attribute definition
# if attlist_decl.include? "xmlns:foobar"
def include?(key)
@pairs.keys.include? key
end
# Iterate over the key/value pairs:
# attlist_decl.each { |attribute_name, attribute_value| ... }
def each(&block)
@pairs.each(&block)
end
# Write out exactly what we got in.
def write out, indent=-1
out << @contents
end
def node_type
:attlistdecl
end
end
end

View File

@ -1,205 +0,0 @@
# frozen_string_literal: false
require_relative "namespace"
require_relative 'text'
module REXML
# Defines an Element Attribute; IE, a attribute=value pair, as in:
# <element attribute="value"/>. Attributes can be in their own
# namespaces. General users of REXML will not interact with the
# Attribute class much.
class Attribute
include Node
include Namespace
# The element to which this attribute belongs
attr_reader :element
# The normalized value of this attribute. That is, the attribute with
# entities intact.
attr_writer :normalized
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
# Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
# first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
# of the +first+ Attribute. If the first argument is a String,
# then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
# Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
#
# Attribute.new( attribute_to_clone )
# Attribute.new( attribute_to_clone, parent_element )
# Attribute.new( "attr", "attr_value" )
# Attribute.new( "attr", "attr_value", parent_element )
def initialize( first, second=nil, parent=nil )
@normalized = @unnormalized = @element = nil
if first.kind_of? Attribute
self.name = first.expanded_name
@unnormalized = first.value
if second.kind_of? Element
@element = second
else
@element = first.element
end
elsif first.kind_of? String
@element = parent
self.name = first
@normalized = second.to_s
else
raise "illegal argument #{first.class.name} to Attribute constructor"
end
end
# Returns the namespace of the attribute.
#
# e = Element.new( "elns:myelement" )
# e.add_attribute( "nsa:a", "aval" )
# e.add_attribute( "b", "bval" )
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
# e.attributes.get_attribute( "b" ).prefix # -> ""
# a = Attribute.new( "x", "y" )
# a.prefix # -> ""
def prefix
super
end
# Returns the namespace URL, if defined, or nil otherwise
#
# e = Element.new("el")
# e.add_namespace("ns", "http://url")
# e.add_attribute("ns:a", "b")
# e.add_attribute("nsx:a", "c")
# e.attribute("ns:a").namespace # => "http://url"
# e.attribute("nsx:a").namespace # => nil
#
# This method always returns "" for no namespace attribute. Because
# the default namespace doesn't apply to attribute names.
#
# From https://www.w3.org/TR/xml-names/#uniqAttrs
#
# > the default namespace does not apply to attribute names
#
# e = REXML::Element.new("el")
# e.add_namespace("", "http://example.com/")
# e.namespace # => "http://example.com/"
# e.add_attribute("a", "b")
# e.attribute("a").namespace # => ""
def namespace arg=nil
arg = prefix if arg.nil?
if arg == ""
""
else
@element.namespace(arg)
end
end
# Returns true if other is an Attribute and has the same name and value,
# false otherwise.
def ==( other )
other.kind_of?(Attribute) and other.name==name and other.value==value
end
# Creates (and returns) a hash from both the name and value
def hash
name.hash + value.hash
end
# Returns this attribute out as XML source, expanding the name
#
# a = Attribute.new( "x", "y" )
# a.to_string # -> "x='y'"
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quot;')}"^
else
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
end
end
def doctype
if @element
doc = @element.document
doc.doctype if doc
end
end
# Returns the attribute value, with entities replaced
def to_s
return @normalized if @normalized
@normalized = Text::normalize( @unnormalized, doctype )
@unnormalized = nil
@normalized
end
# Returns the UNNORMALIZED value of this attribute. That is, entities
# have been expanded to their values
def value
return @unnormalized if @unnormalized
@unnormalized = Text::unnormalize( @normalized, doctype )
@normalized = nil
@unnormalized
end
# Returns a copy of this attribute
def clone
Attribute.new self
end
# Sets the element of which this object is an attribute. Normally, this
# is not directly called.
#
# Returns this attribute
def element=( element )
@element = element
if @normalized
Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
end
self
end
# Removes this Attribute from the tree, and returns true if successful
#
# This method is usually not called directly.
def remove
@element.attributes.delete self.name unless @element.nil?
end
# Writes this attribute (EG, puts 'key="value"' to the output)
def write( output, indent=-1 )
output << to_string
end
def node_type
:attribute
end
def inspect
rv = ""
write( rv )
rv
end
def xpath
path = @element.xpath
path += "/@#{self.expanded_name}"
return path
end
end
end
#vim:ts=2 sw=2 noexpandtab:

View File

@ -1,68 +0,0 @@
# frozen_string_literal: false
require_relative "text"
module REXML
class CData < Text
START = '<![CDATA['
STOP = ']]>'
ILLEGAL = /(\]\]>)/
# Constructor. CData is data between <![CDATA[ ... ]]>
#
# _Examples_
# CData.new( source )
# CData.new( "Here is some CDATA" )
# CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
def initialize( first, whitespace=true, parent=nil )
super( first, whitespace, parent, false, true, ILLEGAL )
end
# Make a copy of this object
#
# _Examples_
# c = CData.new( "Some text" )
# d = c.clone
# d.to_s # -> "Some text"
def clone
CData.new self
end
# Returns the content of this CData object
#
# _Examples_
# c = CData.new( "Some text" )
# c.to_s # -> "Some text"
def to_s
@string
end
def value
@string
end
# == DEPRECATED
# See the rexml/formatters package
#
# Generates XML output of this object
#
# output::
# Where to write the string. Defaults to $stdout
# indent::
# The amount to indent this node by
# transitive::
# Ignored
# ie_hack::
# Ignored
#
# _Examples_
# c = CData.new( " Some text " )
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
indent( output, indent )
output << START
output << @string
output << STOP
end
end
end

View File

@ -1,97 +0,0 @@
# frozen_string_literal: false
require_relative "node"
module REXML
##
# A Child object is something contained by a parent, and this class
# contains methods to support that. Most user code will not use this
# class directly.
class Child
include Node
attr_reader :parent # The Parent of this object
# Constructor. Any inheritors of this class should call super to make
# sure this method is called.
# parent::
# if supplied, the parent of this child will be set to the
# supplied value, and self will be added to the parent
def initialize( parent = nil )
@parent = nil
# Declare @parent, but don't define it. The next line sets the
# parent.
parent.add( self ) if parent
end
# Replaces this object with another object. Basically, calls
# Parent.replace_child
#
# Returns:: self
def replace_with( child )
@parent.replace_child( self, child )
self
end
# Removes this child from the parent.
#
# Returns:: self
def remove
unless @parent.nil?
@parent.delete self
end
self
end
# Sets the parent of this child to the supplied argument.
#
# other::
# Must be a Parent object. If this object is the same object as the
# existing parent of this child, no action is taken. Otherwise, this
# child is removed from the current parent (if one exists), and is added
# to the new parent.
# Returns:: The parent added
def parent=( other )
return @parent if @parent == other
@parent.delete self if defined? @parent and @parent
@parent = other
end
alias :next_sibling :next_sibling_node
alias :previous_sibling :previous_sibling_node
# Sets the next sibling of this child. This can be used to insert a child
# after some other child.
# a = Element.new("a")
# b = a.add_element("b")
# c = Element.new("c")
# b.next_sibling = c
# # => <a><b/><c/></a>
def next_sibling=( other )
parent.insert_after self, other
end
# Sets the previous sibling of this child. This can be used to insert a
# child before some other child.
# a = Element.new("a")
# b = a.add_element("b")
# c = Element.new("c")
# b.previous_sibling = c
# # => <a><b/><c/></a>
def previous_sibling=(other)
parent.insert_before self, other
end
# Returns:: the document this child belongs to, or nil if this child
# belongs to no document
def document
return parent.document unless parent.nil?
nil
end
# This doesn't yet handle encodings
def bytes
document.encoding
to_s
end
end
end

View File

@ -1,80 +0,0 @@
# frozen_string_literal: false
require_relative "child"
module REXML
##
# Represents an XML comment; that is, text between \<!-- ... -->
class Comment < Child
include Comparable
START = "<!--"
STOP = "-->"
# The content text
attr_accessor :string
##
# Constructor. The first argument can be one of three types:
# @param first If String, the contents of this comment are set to the
# argument. If Comment, the argument is duplicated. If
# Source, the argument is scanned for a comment.
# @param second If the first argument is a Source, this argument
# should be nil, not supplied, or a Parent to be set as the parent
# of this object
def initialize( first, second = nil )
super(second)
if first.kind_of? String
@string = first
elsif first.kind_of? Comment
@string = first.string
end
end
def clone
Comment.new self
end
# == DEPRECATED
# See REXML::Formatters
#
# output::
# Where to write the string
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# Ignored by this class. The contents of comments are never modified.
# ie_hack::
# Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters", uplevel: 1)
indent( output, indent )
output << START
output << @string
output << STOP
end
alias :to_s :string
##
# Compares this Comment to another; the contents of the comment are used
# in the comparison.
def <=>(other)
other.to_s <=> @string
end
##
# Compares this Comment to another; the contents of the comment are used
# in the comparison.
def ==( other )
other.kind_of? Comment and
(other <=> self) == 0
end
def node_type
:comment
end
end
end
#vim:ts=2 sw=2 noexpandtab:

View File

@ -1,287 +0,0 @@
# frozen_string_literal: false
require_relative "parent"
require_relative "parseexception"
require_relative "namespace"
require_relative 'entity'
require_relative 'attlistdecl'
require_relative 'xmltokens'
module REXML
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
# being used to declare entities used in the document.
class DocType < Parent
include XMLTokens
START = "<!DOCTYPE"
STOP = ">"
SYSTEM = "SYSTEM"
PUBLIC = "PUBLIC"
DEFAULT_ENTITIES = {
'gt'=>EntityConst::GT,
'lt'=>EntityConst::LT,
'quot'=>EntityConst::QUOT,
"apos"=>EntityConst::APOS
}
# name is the name of the doctype
# external_id is the referenced DTD, if given
attr_reader :name, :external_id, :entities, :namespaces
# Constructor
#
# dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
# # <!DOCTYPE foo '-//I/Hate/External/IDs'>
# dt = DocType.new( doctype_to_clone )
# # Incomplete. Shallow clone of doctype
#
# +Note+ that the constructor:
#
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
#
# is _deprecated_. Do not use it. It will probably disappear.
def initialize( first, parent=nil )
@entities = DEFAULT_ENTITIES
@long_name = @uri = nil
if first.kind_of? String
super()
@name = first
@external_id = parent
elsif first.kind_of? DocType
super( parent )
@name = first.name
@external_id = first.external_id
elsif first.kind_of? Array
super( parent )
@name = first[0]
@external_id = first[1]
@long_name = first[2]
@uri = first[3]
elsif first.kind_of? Source
super( parent )
parser = Parsers::BaseParser.new( first )
event = parser.pull
if event[0] == :start_doctype
@name, @external_id, @long_name, @uri, = event[1..-1]
end
else
super()
end
end
def node_type
:doctype
end
def attributes_of element
rv = []
each do |child|
child.each do |key,val|
rv << Attribute.new(key,val)
end if child.kind_of? AttlistDecl and child.element_name == element
end
rv
end
def attribute_of element, attribute
att_decl = find do |child|
child.kind_of? AttlistDecl and
child.element_name == element and
child.include? attribute
end
return nil unless att_decl
att_decl[attribute]
end
def clone
DocType.new self
end
# output::
# Where to write the string
# indent::
# An integer. If -1, no indentation will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# Ignored
# ie_hack::
# Ignored
def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
c = context
if c and c[:prologue_quote] == :apostrophe
quote = "'"
else
quote = "\""
end
indent( output, indent )
output << START
output << ' '
output << @name
output << " #{@external_id}" if @external_id
output << " #{quote}#{@long_name}#{quote}" if @long_name
output << " #{quote}#{@uri}#{quote}" if @uri
unless @children.empty?
output << ' ['
@children.each { |child|
output << "\n"
f.write( child, output )
}
output << "\n]"
end
output << STOP
end
def context
if @parent
@parent.context
else
nil
end
end
def entity( name )
@entities[name].unnormalized if @entities[name]
end
def add child
super(child)
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
@entities[ child.name ] = child if child.kind_of? Entity
end
# This method retrieves the public identifier identifying the document's
# DTD.
#
# Method contributed by Henrik Martensson
def public
case @external_id
when "SYSTEM"
nil
when "PUBLIC"
strip_quotes(@long_name)
end
end
# This method retrieves the system identifier identifying the document's DTD
#
# Method contributed by Henrik Martensson
def system
case @external_id
when "SYSTEM"
strip_quotes(@long_name)
when "PUBLIC"
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
end
end
# This method returns a list of notations that have been declared in the
# _internal_ DTD subset. Notations in the external DTD subset are not
# listed.
#
# Method contributed by Henrik Martensson
def notations
children().select {|node| node.kind_of?(REXML::NotationDecl)}
end
# Retrieves a named notation. Only notations declared in the internal
# DTD subset can be retrieved.
#
# Method contributed by Henrik Martensson
def notation(name)
notations.find { |notation_decl|
notation_decl.name == name
}
end
private
# Method contributed by Henrik Martensson
def strip_quotes(quoted_string)
quoted_string =~ /^[\'\"].*[\'\"]$/ ?
quoted_string[1, quoted_string.length-2] :
quoted_string
end
end
# We don't really handle any of these since we're not a validating
# parser, so we can be pretty dumb about them. All we need to be able
# to do is spew them back out on a write()
# This is an abstract class. You never use this directly; it serves as a
# parent class for the specific declarations.
class Declaration < Child
def initialize src
super()
@string = src
end
def to_s
@string+'>'
end
# == DEPRECATED
# See REXML::Formatters
#
def write( output, indent )
output << to_s
end
end
public
class ElementDecl < Declaration
def initialize( src )
super
end
end
class ExternalEntity < Child
def initialize( src )
super()
@entity = src
end
def to_s
@entity
end
def write( output, indent )
output << @entity
end
end
class NotationDecl < Child
attr_accessor :public, :system
def initialize name, middle, pub, sys
super(nil)
@name = name
@middle = middle
@public = pub
@system = sys
end
def to_s
c = nil
c = parent.context if parent
if c and c[:prologue_quote] == :apostrophe
quote = "'"
else
quote = "\""
end
notation = "<!NOTATION #{@name} #{@middle}"
notation << " #{quote}#{@public}#{quote}" if @public
notation << " #{quote}#{@system}#{quote}" if @system
notation << ">"
notation
end
def write( output, indent=-1 )
output << to_s
end
# This method retrieves the name of the notation.
#
# Method contributed by Henrik Martensson
def name
@name
end
end
end

View File

@ -1,291 +0,0 @@
# frozen_string_literal: false
require_relative "security"
require_relative "element"
require_relative "xmldecl"
require_relative "source"
require_relative "comment"
require_relative "doctype"
require_relative "instruction"
require_relative "rexml"
require_relative "parseexception"
require_relative "output"
require_relative "parsers/baseparser"
require_relative "parsers/streamparser"
require_relative "parsers/treeparser"
module REXML
# Represents a full XML document, including PIs, a doctype, etc. A
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
# declaration for you. See |DECLARATION| and |write|.
class Document < Element
# A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
DECLARATION = XMLDecl.default
# Constructor
# @param source if supplied, must be a Document, String, or IO.
# Documents have their context and Element attributes cloned.
# Strings are expected to be valid XML documents. IOs are expected
# to be sources of valid XML documents.
# @param context if supplied, contains the context of the document;
# this should be a Hash.
def initialize( source = nil, context = {} )
@entity_expansion_count = 0
super()
@context = context
return if source.nil?
if source.kind_of? Document
@context = source.context
super source
else
build( source )
end
end
def node_type
:document
end
# Should be obvious
def clone
Document.new self
end
# According to the XML spec, a root node has no expanded name
def expanded_name
''
#d = doc_type
#d ? d.name : "UNDEFINED"
end
alias :name :expanded_name
# We override this, because XMLDecls and DocTypes must go at the start
# of the document
def add( child )
if child.kind_of? XMLDecl
if @children[0].kind_of? XMLDecl
@children[0] = child
else
@children.unshift child
end
child.parent = self
elsif child.kind_of? DocType
# Find first Element or DocType node and insert the decl right
# before it. If there is no such node, just insert the child at the
# end. If there is a child and it is an DocType, then replace it.
insert_before_index = @children.find_index { |x|
x.kind_of?(Element) || x.kind_of?(DocType)
}
if insert_before_index # Not null = not end of list
if @children[ insert_before_index ].kind_of? DocType
@children[ insert_before_index ] = child
else
@children[ insert_before_index-1, 0 ] = child
end
else # Insert at end of list
@children << child
end
child.parent = self
else
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
end
alias :<< :add
def add_element(arg=nil, arg2=nil)
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
# @return the root Element of the document, or nil if this document
# has no children.
def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
end
# @return the DocType child of the document, if one exists,
# and nil otherwise.
def doctype
@children.find { |item| item.kind_of? DocType }
end
# @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned.
def xml_decl
rv = @children[0]
return rv if rv.kind_of? XMLDecl
@children.unshift(XMLDecl.default)[0]
end
# @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version.
def version
xml_decl().version
end
# @return the XMLDecl encoding of this document as an
# Encoding object.
# If no XMLDecl has been set, returns the default encoding.
def encoding
xml_decl().encoding
end
# @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting.
def stand_alone?
xml_decl().stand_alone?
end
# :call-seq:
# doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil)
# doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil})
#
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
#
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unnecessary bandwidth to applications such
# as XML-RPC.
#
# Accept Nth argument style and options Hash style as argument.
# The recommended style is options Hash style for one or more
# arguments case.
#
# _Examples_
# Document.new("<a><b/></a>").write
#
# output = ""
# Document.new("<a><b/></a>").write(output)
#
# output = ""
# Document.new("<a><b/></a>").write(:output => output, :indent => 2)
#
# See also the classes in the rexml/formatters package for the proper way
# to change the default formatting of XML output.
#
# _Examples_
#
# output = ""
# tr = Transitive.new
# tr.write(Document.new("<a><b/></a>"), output)
#
# output::
# output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be twice this number of spaces, and children will be
# indented an additional amount. For a value of 3, every item will be
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
# transitive::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack::
# This hack inserts a space before the /> on empty tags to address
# a limitation of Internet Explorer. Defaults to false
# encoding::
# Encoding name as String. Change output encoding to specified encoding
# instead of encoding in XML declaration.
# Defaults to nil. It means encoding in XML declaration is used.
def write(*arguments)
if arguments.size == 1 and arguments[0].class == Hash
options = arguments[0]
output = options[:output]
indent = options[:indent]
transitive = options[:transitive]
ie_hack = options[:ie_hack]
encoding = options[:encoding]
else
output, indent, transitive, ie_hack, encoding, = *arguments
end
output ||= $stdout
indent ||= -1
transitive = false if transitive.nil?
ie_hack = false if ie_hack.nil?
encoding ||= xml_decl.encoding
if encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, encoding )
end
formatter = if indent > -1
if transitive
require_relative "formatters/transitive"
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
end
else
REXML::Formatters::Default.new( ie_hack )
end
formatter.write( self, output )
end
def Document::parse_stream( source, listener )
Parsers::StreamParser.new( source, listener ).parse
end
# Set the entity expansion limit. By default the limit is set to 10000.
#
# Deprecated. Use REXML::Security.entity_expansion_limit= instead.
def Document::entity_expansion_limit=( val )
Security.entity_expansion_limit = val
end
# Get the entity expansion limit. By default the limit is set to 10000.
#
# Deprecated. Use REXML::Security.entity_expansion_limit= instead.
def Document::entity_expansion_limit
return Security.entity_expansion_limit
end
# Set the entity expansion limit. By default the limit is set to 10240.
#
# Deprecated. Use REXML::Security.entity_expansion_text_limit= instead.
def Document::entity_expansion_text_limit=( val )
Security.entity_expansion_text_limit = val
end
# Get the entity expansion limit. By default the limit is set to 10240.
#
# Deprecated. Use REXML::Security.entity_expansion_text_limit instead.
def Document::entity_expansion_text_limit
return Security.entity_expansion_text_limit
end
attr_reader :entity_expansion_count
def record_entity_expansion
@entity_expansion_count += 1
if @entity_expansion_count > Security.entity_expansion_limit
raise "number of entity expansions exceeded, processing aborted."
end
end
def document
self
end
private
def build( source )
Parsers::TreeParser.new( source, self ).parse
end
end
end

View File

@ -1,11 +0,0 @@
# frozen_string_literal: false
require_relative "../child"
module REXML
module DTD
class AttlistDecl < Child
START = "<!ATTLIST"
START_RE = /^\s*#{START}/um
PATTERN_RE = /\s*(#{START}.*?>)/um
end
end
end

View File

@ -1,47 +0,0 @@
# frozen_string_literal: false
require_relative "elementdecl"
require_relative "entitydecl"
require_relative "../comment"
require_relative "notationdecl"
require_relative "attlistdecl"
require_relative "../parent"
module REXML
module DTD
class Parser
def Parser.parse( input )
case input
when String
parse_helper input
when File
parse_helper input.read
end
end
# Takes a String and parses it out
def Parser.parse_helper( input )
contents = Parent.new
while input.size > 0
case input
when ElementDecl.PATTERN_RE
match = $&
contents << ElementDecl.new( match )
when AttlistDecl.PATTERN_RE
matchdata = $~
contents << AttlistDecl.new( matchdata )
when EntityDecl.PATTERN_RE
matchdata = $~
contents << EntityDecl.new( matchdata )
when Comment.PATTERN_RE
matchdata = $~
contents << Comment.new( matchdata )
when NotationDecl.PATTERN_RE
matchdata = $~
contents << NotationDecl.new( matchdata )
end
end
contents
end
end
end
end

View File

@ -1,18 +0,0 @@
# frozen_string_literal: false
require_relative "../child"
module REXML
module DTD
class ElementDecl < Child
START = "<!ELEMENT"
START_RE = /^\s*#{START}/um
# PATTERN_RE = /^\s*(#{START}.*?)>/um
PATTERN_RE = /^\s*#{START}\s+((?:[:\w][-\.\w]*:)?[-!\*\.\w]*)(.*?)>/
#\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
def initialize match
@name = match[1]
@rest = match[2]
end
end
end
end

View File

@ -1,57 +0,0 @@
# frozen_string_literal: false
require_relative "../child"
module REXML
module DTD
class EntityDecl < Child
START = "<!ENTITY"
START_RE = /^\s*#{START}/um
PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
# <!ENTITY name SYSTEM "...">
# <!ENTITY name "...">
def initialize src
super()
md = nil
if src.match( PUBLIC )
md = src.match( PUBLIC, true )
@middle = "PUBLIC"
@content = "#{md[2]} #{md[4]}"
elsif src.match( SYSTEM )
md = src.match( SYSTEM, true )
@middle = "SYSTEM"
@content = md[2]
elsif src.match( PLAIN )
md = src.match( PLAIN, true )
@middle = ""
@content = md[2]
elsif src.match( PERCENT )
md = src.match( PERCENT, true )
@middle = ""
@content = md[2]
end
raise ParseException.new("failed Entity match", src) if md.nil?
@name = md[1]
end
def to_s
rv = "<!ENTITY #@name "
rv << "#@middle " if @middle.size > 0
rv << @content
rv
end
def write( output, indent )
indent( output, indent )
output << to_s
end
def EntityDecl.parse_source source, listener
md = source.match( PATTERN_RE, true )
thing = md[0].squeeze(" \t\n\r")
listener.send inspect.downcase, thing
end
end
end
end

View File

@ -1,40 +0,0 @@
# frozen_string_literal: false
require_relative "../child"
module REXML
module DTD
class NotationDecl < Child
START = "<!NOTATION"
START_RE = /^\s*#{START}/um
PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
def initialize src
super()
if src.match( PUBLIC )
md = src.match( PUBLIC, true )
elsif src.match( SYSTEM )
md = src.match( SYSTEM, true )
else
raise ParseException.new( "error parsing notation: no matching pattern", src )
end
@name = md[1]
@middle = md[2]
@rest = md[3]
end
def to_s
"<!NOTATION #@name #@middle #@rest>"
end
def write( output, indent )
indent( output, indent )
output << to_s
end
def NotationDecl.parse_source source, listener
md = source.match( PATTERN_RE, true )
thing = md[0].squeeze(" \t\n\r")
listener.send inspect.downcase, thing
end
end
end
end

View File

@ -1,51 +0,0 @@
# coding: US-ASCII
# frozen_string_literal: false
module REXML
module Encoding
# ID ---> Encoding name
attr_reader :encoding
def encoding=(encoding)
encoding = encoding.name if encoding.is_a?(Encoding)
if encoding.is_a?(String)
original_encoding = encoding
encoding = find_encoding(encoding)
unless encoding
raise ArgumentError, "Bad encoding name #{original_encoding}"
end
end
return false if defined?(@encoding) and encoding == @encoding
if encoding
@encoding = encoding.upcase
else
@encoding = 'UTF-8'
end
true
end
def encode(string)
string.encode(@encoding)
end
def decode(string)
string.encode(::Encoding::UTF_8, @encoding)
end
private
def find_encoding(name)
case name
when /\Ashift-jis\z/i
return "SHIFT_JIS"
when /\ACP-(\d+)\z/
name = "CP#{$1}"
when /\AUTF-8\z/i
return name
end
begin
::Encoding::Converter.search_convpath(name, 'UTF-8')
rescue ::Encoding::ConverterNotFoundError
return nil
end
name
end
end
end

View File

@ -1,171 +0,0 @@
# frozen_string_literal: false
require_relative 'child'
require_relative 'source'
require_relative 'xmltokens'
module REXML
class Entity < Child
include XMLTokens
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
attr_reader :name, :external, :ref, :ndata, :pubid
# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
# reference. For anything more complicated, you have to pass a Source to
# the constructor with the entity definition, or use the accessor methods.
# +WARNING+: There is no validation of entity state except when the entity
# is read from a stream. If you start poking around with the accessors,
# you can easily create a non-conformant Entity.
#
# e = Entity.new( 'amp', '&' )
def initialize stream, value=nil, parent=nil, reference=false
super(parent)
@ndata = @pubid = @value = @external = nil
if stream.kind_of? Array
@name = stream[1]
if stream[-1] == '%'
@reference = true
stream.pop
else
@reference = false
end
if stream[2] =~ /SYSTEM|PUBLIC/
@external = stream[2]
if @external == 'SYSTEM'
@ref = stream[3]
@ndata = stream[4] if stream.size == 5
else
@pubid = stream[3]
@ref = stream[4]
end
else
@value = stream[2]
end
else
@reference = reference
@external = nil
@name = stream
@value = value
end
end
# Evaluates whether the given string matches an entity definition,
# returning true if so, and false otherwise.
def Entity::matches? string
(ENTITYDECL =~ string) == 0
end
# Evaluates to the unnormalized value of this entity; that is, replacing
# all entities -- both %ent; and &ent; entities. This differs from
# +value()+ in that +value+ only replaces %ent; entities.
def unnormalized
document.record_entity_expansion unless document.nil?
v = value()
return nil if v.nil?
@unnormalized = Text::unnormalize(v, parent)
@unnormalized
end
#once :unnormalized
# Returns the value of this entity unprocessed -- raw. This is the
# normalized value; that is, with all %ent; and &ent; entities intact
def normalized
@value
end
# Write out a fully formed, correct entity definition (assuming the Entity
# object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;</TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
def write out, indent=-1
out << '<!ENTITY '
out << '% ' if @reference
out << @name
out << ' '
if @external
out << @external << ' '
if @pubid
q = @pubid.include?('"')?"'":'"'
out << q << @pubid << q << ' '
end
q = @ref.include?('"')?"'":'"'
out << q << @ref << q
out << ' NDATA ' << @ndata if @ndata
else
q = @value.include?('"')?"'":'"'
out << q << @value << q
end
out << '>'
end
# Returns this entity as a string. See write().
def to_s
rv = ''
write rv
rv
end
PEREFERENCE_RE = /#{PEREFERENCE}/um
# Returns the value of this entity. At the moment, only internal entities
# are processed. If the value contains internal references (IE,
# %blah;), those are replaced with their values. IE, if the doctype
# contains:
# <!ENTITY % foo "bar">
# <!ENTITY yada "nanoo %foo; nanoo>
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
if @value
matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
end
return rv
end
nil
end
end
# This is a set of entity constants -- the ones defined in the XML
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
# CAUTION: these entities does not have parent and document
module EntityConst
# +>+
GT = Entity.new( 'gt', '>' )
# +<+
LT = Entity.new( 'lt', '<' )
# +&+
AMP = Entity.new( 'amp', '&' )
# +"+
QUOT = Entity.new( 'quot', '"' )
# +'+
APOS = Entity.new( 'apos', "'" )
end
end

View File

@ -1,116 +0,0 @@
# frozen_string_literal: false
module REXML
module Formatters
class Default
# Prints out the XML document with no formatting -- except if ie_hack is
# set.
#
# ie_hack::
# If set to true, then inserts whitespace before the close of an empty
# tag, so that IE's bad XML parser doesn't choke.
def initialize( ie_hack=false )
@ie_hack = ie_hack
end
# Writes the node to some output.
#
# node::
# The node to write
# output::
# A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
# change the output encoding.
def write( node, output )
case node
when Document
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
when Element
write_element( node, output )
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
Attribute, AttlistDecl
node.write( output,-1 )
when Instruction
write_instruction( node, output )
when DocType, XMLDecl
node.write( output )
when Comment
write_comment( node, output )
when CData
write_cdata( node, output )
when Text
write_text( node, output )
else
raise Exception.new("XML FORMATTING ERROR")
end
end
protected
def write_document( node, output )
node.children.each { |child| write( child, output ) }
end
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.to_a.map { |a|
Hash === a ? a.values : a
}.flatten.sort_by {|attr| attr.name}.each do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
node.children.each { |child|
write( child, output )
}
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
def write_comment( node, output )
output << Comment::START
output << node.to_s
output << Comment::STOP
end
def write_cdata( node, output )
output << CData::START
output << node.to_s
output << CData::STOP
end
def write_instruction( node, output )
output << Instruction::START
output << node.target
content = node.content
if content
output << ' '
output << content
end
output << Instruction::STOP
end
end
end
end

View File

@ -1,142 +0,0 @@
# frozen_string_literal: false
require_relative 'default'
module REXML
module Formatters
# Pretty-prints an XML document. This destroys whitespace in text nodes
# and will insert carriage returns and indentations.
#
# TODO: Add an option to print attributes on new lines
class Pretty < Default
# If compact is set to true, then the formatter will attempt to use as
# little space as possible
attr_accessor :compact
# The width of a page. Used for formatting text
attr_accessor :width
# Create a new pretty printer.
#
# output::
# An object implementing '<<(String)', to which the output will be written.
# indentation::
# An integer greater than 0. The indentation of each level will be
# this number of spaces. If this is < 1, the behavior of this object
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
# tags, thereby allowing Internet Explorer's XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
@compact = false
end
protected
def write_element(node, output)
output << ' '*@level
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
if @ie_hack
output << " "
end
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
@level = old_level
if string.length < @width
output << string
skip = true
end
end
end
unless skip
output << "\n"
@level += @indentation
node.children.each { |child|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
write( child, output )
output << "\n"
}
@level -= @indentation
output << ' '*@level
end
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
s = wrap(s, @width - @level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
def write_comment( node, output)
output << ' ' * @level
super
end
def write_cdata( node, output)
output << ' ' * @level
super
end
def write_document( node, output )
# Ok, this is a bit odd. All XML documents have an XML declaration,
# but it may not write itself if the user didn't specifically add it,
# either through the API or in the input document. If it doesn't write
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
next if child == node.children[-1] and child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
end
write( child, output )
}
end
private
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
string.gsub(/\n/, "\n#{style*level}")
end
def wrap(string, width)
parts = []
while string.length > width and place = string.rindex(' ', width)
parts << string[0...place]
string = string[place+1..-1]
end
parts << string
parts.join("\n")
end
end
end
end

View File

@ -1,58 +0,0 @@
# frozen_string_literal: false
require_relative 'pretty'
module REXML
module Formatters
# The Transitive formatter writes an XML document that parses to an
# identical document as the source document. This means that no extra
# whitespace nodes are inserted, and whitespace within text nodes is
# preserved. Within these constraints, the document is pretty-printed,
# with whitespace inserted into the metadata to introduce formatting.
#
# Note that this is only useful if the original XML is not already
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
end
protected
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
output << "\n"
output << ' '*@level
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
@level += @indentation
node.children.each { |child|
write( child, output )
}
@level -= @indentation
output << "</#{node.expanded_name}"
output << "\n"
output << ' '*@level
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
end
end
end

View File

@ -1,447 +0,0 @@
# frozen_string_literal: false
module REXML
# If you add a method, keep in mind two things:
# (1) the first argument will always be a list of nodes from which to
# filter. In the case of context methods (such as position), the function
# should return an array with a value for each child in the array.
# (2) all method calls from XML will have "-" replaced with "_".
# Therefore, in XML, "local-name()" is identical (and actually becomes)
# "local_name()"
module Functions
@@available_functions = {}
@@context = nil
@@namespace_context = {}
@@variables = {}
INTERNAL_METHODS = [
:namespace_context,
:namespace_context=,
:variables,
:variables=,
:context=,
:get_namespace,
:send,
]
class << self
def singleton_method_added(name)
unless INTERNAL_METHODS.include?(name)
@@available_functions[name] = true
end
end
end
def Functions::namespace_context=(x) ; @@namespace_context=x ; end
def Functions::variables=(x) ; @@variables=x ; end
def Functions::namespace_context ; @@namespace_context ; end
def Functions::variables ; @@variables ; end
def Functions::context=(value); @@context = value; end
def Functions::text( )
if @@context[:node].node_type == :element
return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
elsif @@context[:node].node_type == :text
return @@context[:node].value
else
return false
end
end
# Returns the last node of the given list of nodes.
def Functions::last( )
@@context[:size]
end
def Functions::position( )
@@context[:index]
end
# Returns the size of the given list of nodes.
def Functions::count( node_set )
node_set.size
end
# Since REXML is non-validating, this method is not implemented as it
# requires a DTD
def Functions::id( object )
end
def Functions::local_name(node_set=nil)
get_namespace(node_set) do |node|
return node.local_name
end
""
end
def Functions::namespace_uri( node_set=nil )
get_namespace( node_set ) {|node| node.namespace}
end
def Functions::name( node_set=nil )
get_namespace( node_set ) do |node|
node.expanded_name
end
end
# Helper method.
def Functions::get_namespace( node_set = nil )
if node_set == nil
yield @@context[:node] if @@context[:node].respond_to?(:namespace)
else
if node_set.respond_to? :each
result = []
node_set.each do |node|
result << yield(node) if node.respond_to?(:namespace)
end
result
elsif node_set.respond_to? :namespace
yield node_set
end
end
end
# A node-set is converted to a string by returning the string-value of the
# node in the node-set that is first in document order. If the node-set is
# empty, an empty string is returned.
#
# A number is converted to a string as follows
#
# NaN is converted to the string NaN
#
# positive zero is converted to the string 0
#
# negative zero is converted to the string 0
#
# positive infinity is converted to the string Infinity
#
# negative infinity is converted to the string -Infinity
#
# if the number is an integer, the number is represented in decimal form
# as a Number with no decimal point and no leading zeros, preceded by a
# minus sign (-) if the number is negative
#
# otherwise, the number is represented in decimal form as a Number
# including a decimal point with at least one digit before the decimal
# point and at least one digit after the decimal point, preceded by a
# minus sign (-) if the number is negative; there must be no leading zeros
# before the decimal point apart possibly from the one required digit
# immediately before the decimal point; beyond the one required digit
# after the decimal point there must be as many, but only as many, more
# digits as are needed to uniquely distinguish the number from all other
# IEEE 754 numeric values.
#
# The boolean false value is converted to the string false. The boolean
# true value is converted to the string true.
#
# An object of a type other than the four basic types is converted to a
# string in a way that is dependent on that type.
def Functions::string( object=@@context[:node] )
if object.respond_to?(:node_type)
case object.node_type
when :attribute
object.value
when :element
string_value(object)
when :document
string_value(object.root)
when :processing_instruction
object.content
else
object.to_s
end
else
case object
when Array
string(object[0])
when Float
if object.nan?
"NaN"
else
integer = object.to_i
if object == integer
"%d" % integer
else
object.to_s
end
end
else
object.to_s
end
end
end
# A node-set is converted to a string by
# returning the concatenation of the string-value
# of each of the children of the node in the
# node-set that is first in document order.
# If the node-set is empty, an empty string is returned.
def Functions::string_value( o )
rv = ""
o.children.each { |e|
if e.node_type == :text
rv << e.to_s
elsif e.node_type == :element
rv << string_value( e )
end
}
rv
end
def Functions::concat( *objects )
concatenated = ""
objects.each do |object|
concatenated << string(object)
end
concatenated
end
# Fixed by Mike Stok
def Functions::starts_with( string, test )
string(string).index(string(test)) == 0
end
# Fixed by Mike Stok
def Functions::contains( string, test )
string(string).include?(string(test))
end
# Kouhei fixed this
def Functions::substring_before( string, test )
ruby_string = string(string)
ruby_index = ruby_string.index(string(test))
if ruby_index.nil?
""
else
ruby_string[ 0...ruby_index ]
end
end
# Kouhei fixed this too
def Functions::substring_after( string, test )
ruby_string = string(string)
return $1 if ruby_string =~ /#{test}(.*)/
""
end
# Take equal portions of Mike Stok and Sean Russell; mix
# vigorously, and pour into a tall, chilled glass. Serves 10,000.
def Functions::substring( string, start, length=nil )
ruby_string = string(string)
ruby_length = if length.nil?
ruby_string.length.to_f
else
number(length)
end
ruby_start = number(start)
# Handle the special cases
return '' if (
ruby_length.nan? or
ruby_start.nan? or
ruby_start.infinite?
)
infinite_length = ruby_length.infinite? == 1
ruby_length = ruby_string.length if infinite_length
# Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
# are 0..length. Therefore, we have to offset the bounds by one.
ruby_start = round(ruby_start) - 1
ruby_length = round(ruby_length)
if ruby_start < 0
ruby_length += ruby_start unless infinite_length
ruby_start = 0
end
return '' if ruby_length <= 0
ruby_string[ruby_start,ruby_length]
end
# UNTESTED
def Functions::string_length( string )
string(string).length
end
# UNTESTED
def Functions::normalize_space( string=nil )
string = string(@@context[:node]) if string.nil?
if string.kind_of? Array
string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
else
string.to_s.strip.gsub(/\s+/um, ' ')
end
end
# This is entirely Mike Stok's beast
def Functions::translate( string, tr1, tr2 )
from = string(tr1)
to = string(tr2)
# the map is our translation table.
#
# if a character occurs more than once in the
# from string then we ignore the second &
# subsequent mappings
#
# if a character maps to nil then we delete it
# in the output. This happens if the from
# string is longer than the to string
#
# there's nothing about - or ^ being special in
# http://www.w3.org/TR/xpath#function-translate
# so we don't build ranges or negated classes
map = Hash.new
0.upto(from.length - 1) { |pos|
from_char = from[pos]
unless map.has_key? from_char
map[from_char] =
if pos < to.length
to[pos]
else
nil
end
end
}
if ''.respond_to? :chars
string(string).chars.collect { |c|
if map.has_key? c then map[c] else c end
}.compact.join
else
string(string).unpack('U*').collect { |c|
if map.has_key? c then map[c] else c end
}.compact.pack('U*')
end
end
def Functions::boolean(object=@@context[:node])
case object
when true, false
object
when Float
return false if object.zero?
return false if object.nan?
true
when Numeric
not object.zero?
when String
not object.empty?
when Array
not object.empty?
else
object ? true : false
end
end
# UNTESTED
def Functions::not( object )
not boolean( object )
end
# UNTESTED
def Functions::true( )
true
end
# UNTESTED
def Functions::false( )
false
end
# UNTESTED
def Functions::lang( language )
lang = false
node = @@context[:node]
attr = nil
until node.nil?
if node.node_type == :element
attr = node.attributes["xml:lang"]
unless attr.nil?
lang = compare_language(string(language), attr)
break
else
end
end
node = node.parent
end
lang
end
def Functions::compare_language lang1, lang2
lang2.downcase.index(lang1.downcase) == 0
end
# a string that consists of optional whitespace followed by an optional
# minus sign followed by a Number followed by whitespace is converted to
# the IEEE 754 number that is nearest (according to the IEEE 754
# round-to-nearest rule) to the mathematical value represented by the
# string; any other string is converted to NaN
#
# boolean true is converted to 1; boolean false is converted to 0
#
# a node-set is first converted to a string as if by a call to the string
# function and then converted in the same way as a string argument
#
# an object of a type other than the four basic types is converted to a
# number in a way that is dependent on that type
def Functions::number(object=@@context[:node])
case object
when true
Float(1)
when false
Float(0)
when Array
number(string(object))
when Numeric
object.to_f
else
str = string(object)
case str.strip
when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/
$1.to_f
else
Float::NAN
end
end
end
def Functions::sum( nodes )
nodes = [nodes] unless nodes.kind_of? Array
nodes.inject(0) { |r,n| r + number(string(n)) }
end
def Functions::floor( number )
number(number).floor
end
def Functions::ceiling( number )
number(number).ceil
end
def Functions::round( number )
number = number(number)
begin
neg = number.negative?
number = number.abs.round
neg ? -number : number
rescue FloatDomainError
number
end
end
def Functions::processing_instruction( node )
node.node_type == :processing_instruction
end
def Functions::send(name, *args)
if @@available_functions[name.to_sym]
super
else
# TODO: Maybe, this is not XPath spec behavior.
# This behavior must be reconsidered.
XPath.match(@@context[:node], name.to_s)
end
end
end
end

View File

@ -1,79 +0,0 @@
# frozen_string_literal: false
require_relative "child"
require_relative "source"
module REXML
# Represents an XML Instruction; IE, <? ... ?>
# TODO: Add parent arg (3rd arg) to constructor
class Instruction < Child
START = "<?"
STOP = "?>"
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
# content is everything else.
attr_accessor :target, :content
# Constructs a new Instruction
# @param target can be one of a number of things. If String, then
# the target of this instruction is set to this. If an Instruction,
# then the Instruction is shallowly cloned (target and content are
# copied).
# @param content Must be either a String, or a Parent. Can only
# be a Parent if the target argument is a Source. Otherwise, this
# String is set as the content of this instruction.
def initialize(target, content=nil)
case target
when String
super()
@target = target
@content = content
when Instruction
super(content)
@target = target.target
@content = target.content
else
message =
"processing instruction target must be String or REXML::Instruction: "
message << "<#{target.inspect}>"
raise ArgumentError, message
end
@content.strip! if @content
end
def clone
Instruction.new self
end
# == DEPRECATED
# See the rexml/formatters package
#
def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
indent(writer, indent)
writer << START
writer << @target
if @content
writer << ' '
writer << @content
end
writer << STOP
end
# @return true if other is an Instruction, and the content and target
# of the other matches the target and content of this object.
def ==( other )
other.kind_of? Instruction and
other.target == @target and
other.content == @content
end
def node_type
:processing_instruction
end
def inspect
"<?p-i #{target} ...?>"
end
end
end

View File

@ -1,196 +0,0 @@
# frozen_string_literal: false
require_relative '../xmltokens'
# [ :element, parent, name, attributes, children* ]
# a = Node.new
# a << "B" # => <a>B</a>
# a.b # => <a>B<b/></a>
# a.b[1] # => <a>B<b/><b/><a>
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
module REXML
module Light
# Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be
# children.
class Node
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
PARENTS = [ :element, :document, :doctype ]
# Create a new element.
def initialize node=nil
@node = node
if node.kind_of? String
node = [ :text, node ]
elsif node.nil?
node = [ :document, nil, nil ]
elsif node[0] == :start_element
node[0] = :element
elsif node[0] == :start_doctype
node[0] = :doctype
elsif node[0] == :start_document
node[0] = :document
end
end
def size
if PARENTS.include? @node[0]
@node[-1].size
else
0
end
end
def each
size.times { |x| yield( at(x+4) ) }
end
def name
at(2)
end
def name=( name_str, ns=nil )
pfx = ''
pfx = "#{prefix(ns)}:" if ns
_old_put(2, "#{pfx}#{name_str}")
end
def parent=( node )
_old_put(1,node)
end
def local_name
namesplit
@name
end
def local_name=( name_str )
_old_put( 1, "#@prefix:#{name_str}" )
end
def prefix( namespace=nil )
prefix_of( self, namespace )
end
def namespace( prefix=prefix() )
namespace_of( self, prefix )
end
def namespace=( namespace )
@prefix = prefix( namespace )
pfx = ''
pfx = "#@prefix:" if @prefix.size > 0
_old_put(1, "#{pfx}#@name")
end
def []( reference, ns=nil )
if reference.kind_of? String
pfx = ''
pfx = "#{prefix(ns)}:" if ns
at(3)["#{pfx}#{reference}"]
elsif reference.kind_of? Range
_old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
else
_old_get( 4+reference )
end
end
def =~( path )
XPath.match( self, path )
end
# Doesn't handle namespaces yet
def []=( reference, ns, value=nil )
if reference.kind_of? String
value = ns unless value
at( 3 )[reference] = value
elsif reference.kind_of? Range
_old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
else
if value
_old_put( 4+reference, ns, value )
else
_old_put( 4+reference, ns )
end
end
end
# Append a child to this element, optionally under a provided namespace.
# The namespace argument is ignored if the element argument is an Element
# object. Otherwise, the element argument is a string, the namespace (if
# provided) is the namespace the element is created in.
def << element
if node_type() == :text
at(-1) << element
else
newnode = Node.new( element )
newnode.parent = self
self.push( newnode )
end
at(-1)
end
def node_type
_old_get(0)
end
def text=( foo )
replace = at(4).kind_of?(String)? 1 : 0
self._old_put(4,replace, normalizefoo)
end
def root
context = self
context = context.at(1) while context.at(1)
end
def has_name?( name, namespace = '' )
at(3) == name and namespace() == namespace
end
def children
self
end
def parent
at(1)
end
def to_s
end
private
def namesplit
return if @name.defined?
at(2) =~ NAMESPLIT
@prefix = '' || $1
@name = $2
end
def namespace_of( node, prefix=nil )
if not prefix
name = at(2)
name =~ NAMESPLIT
prefix = $1
end
to_find = 'xmlns'
to_find = "xmlns:#{prefix}" if not prefix.nil?
ns = at(3)[ to_find ]
ns ? ns : namespace_of( @node[0], prefix )
end
def prefix_of( node, namespace=nil )
if not namespace
name = node.name
name =~ NAMESPLIT
$1
else
ns = at(3).find { |k,v| v == namespace }
ns ? ns : prefix_of( node.parent, namespace )
end
end
end
end
end

View File

@ -1,59 +0,0 @@
# frozen_string_literal: false
require_relative 'xmltokens'
module REXML
# Adds named attributes to an object.
module Namespace
# The name of the object, valid if set
attr_reader :name, :expanded_name
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
# Sets the name and the expanded name
def name=( name )
@expanded_name = name
case name
when NAMESPLIT
if $1
@prefix = $1
else
@prefix = ""
@namespace = ""
end
@name = $2
when ""
@prefix = nil
@namespace = nil
@name = nil
else
message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: "
message += "<#{name.inspect}>"
raise ArgumentError, message
end
end
# Compares names optionally WITH namespaces
def has_name?( other, ns=nil )
if ns
return (namespace() == ns and name() == other)
elsif other.include? ":"
return fully_expanded_name == other
else
return name == other
end
end
alias :local_name :name
# Fully expand the name, even if the prefix wasn't specified in the
# source file.
def fully_expanded_name
ns = prefix
return "#{ns}:#@name" if ns.size > 0
return @name
end
end
end

View File

@ -1,76 +0,0 @@
# frozen_string_literal: false
require_relative "parseexception"
require_relative "formatters/pretty"
require_relative "formatters/default"
module REXML
# Represents a node in the tree. Nodes are never encountered except as
# superclasses of other objects. Nodes have siblings.
module Node
# @return the next sibling (nil if unset)
def next_sibling_node
return nil if @parent.nil?
@parent[ @parent.index(self) + 1 ]
end
# @return the previous sibling (nil if unset)
def previous_sibling_node
return nil if @parent.nil?
ind = @parent.index(self)
return nil if ind == 0
@parent[ ind - 1 ]
end
# indent::
# *DEPRECATED* This parameter is now ignored. See the formatters in the
# REXML::Formatters package for changing the output style.
def to_s indent=nil
unless indent.nil?
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1)
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv = "" )
else
f = REXML::Formatters::Default.new
f.write( self, rv = "" )
end
return rv
end
def indent to, ind
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
indentstyle = @parent.context[:indentstyle]
else
indentstyle = ' '
end
to << indentstyle*ind unless ind<1
end
def parent?
false;
end
# Visit all subnodes of +self+ recursively
def each_recursive(&block) # :yields: node
self.elements.each {|node|
block.call(node)
node.each_recursive(&block)
}
end
# Find (and return) first subnode (recursively) for which the block
# evaluates to true. Returns +nil+ if none was found.
def find_first_recursive(&block) # :yields: node
each_recursive {|node|
return node if block.call(node)
}
return nil
end
# Returns the position that +self+ holds in its parent's array, indexed
# from 1.
def index_in_parent
parent.index(self)+1
end
end
end

View File

@ -1,30 +0,0 @@
# frozen_string_literal: false
require_relative 'encoding'
module REXML
class Output
include Encoding
attr_reader :encoding
def initialize real_IO, encd="iso-8859-1"
@output = real_IO
self.encoding = encd
@to_utf = encoding != 'UTF-8'
if encoding == "UTF-16"
@output << "\ufeff".encode("UTF-16BE")
self.encoding = "UTF-16BE"
end
end
def <<( content )
@output << (@to_utf ? self.encode(content) : content)
end
def to_s
"Output[#{encoding}]"
end
end
end

View File

@ -1,166 +0,0 @@
# frozen_string_literal: false
require_relative "child"
module REXML
# A parent has children, and has methods for accessing them. The Parent
# class is never encountered except as the superclass for some other
# object.
class Parent < Child
include Enumerable
# Constructor
# @param parent if supplied, will be set as the parent of this object
def initialize parent=nil
super(parent)
@children = []
end
def add( object )
object.parent = self
@children << object
object
end
alias :push :add
alias :<< :push
def unshift( object )
object.parent = self
@children.unshift object
end
def delete( object )
found = false
@children.delete_if {|c| c.equal?(object) and found = true }
object.parent = nil if found
found ? object : nil
end
def each(&block)
@children.each(&block)
end
def delete_if( &block )
@children.delete_if(&block)
end
def delete_at( index )
@children.delete_at index
end
def each_index( &block )
@children.each_index(&block)
end
# Fetches a child at a given index
# @param index the Integer index of the child to fetch
def []( index )
@children[index]
end
alias :each_child :each
# Set an index entry. See Array.[]=
# @param index the index of the element to set
# @param opt either the object to set, or an Integer length
# @param child if opt is an Integer, this is the child to set
# @return the parent (self)
def []=( *args )
args[-1].parent = self
@children[*args[0..-2]] = args[-1]
end
# Inserts an child before another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted before child1 in the child list of the parent.
# If an xpath, child2 will be inserted before the first child to match
# the xpath.
# @param child2 the child to insert
# @return the parent (self)
def insert_before( child1, child2 )
if child1.kind_of? String
child1 = XPath.first( self, child1 )
child1.parent.insert_before child1, child2
else
ind = index(child1)
child2.parent.delete(child2) if child2.parent
@children[ind,0] = child2
child2.parent = self
end
self
end
# Inserts an child after another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted after child1 in the child list of the parent.
# If an xpath, child2 will be inserted after the first child to match
# the xpath.
# @param child2 the child to insert
# @return the parent (self)
def insert_after( child1, child2 )
if child1.kind_of? String
child1 = XPath.first( self, child1 )
child1.parent.insert_after child1, child2
else
ind = index(child1)+1
child2.parent.delete(child2) if child2.parent
@children[ind,0] = child2
child2.parent = self
end
self
end
def to_a
@children.dup
end
# Fetches the index of a given child
# @param child the child to get the index of
# @return the index of the child, or nil if the object is not a child
# of this parent.
def index( child )
count = -1
@children.find { |i| count += 1 ; i.hash == child.hash }
count
end
# @return the number of children of this parent
def size
@children.size
end
alias :length :size
# Replaces one child with another, making sure the nodelist is correct
# @param to_replace the child to replace (must be a Child)
# @param replacement the child to insert into the nodelist (must be a
# Child)
def replace_child( to_replace, replacement )
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
to_replace.parent = nil
replacement.parent = self
end
# Deeply clones this object. This creates a complete duplicate of this
# Parent, including all descendants.
def deep_clone
cl = clone()
each do |child|
if child.kind_of? Parent
cl << child.deep_clone
else
cl << child.clone
end
end
cl
end
alias :children :to_a
def parent?
true
end
end
end

View File

@ -1,52 +0,0 @@
# frozen_string_literal: false
module REXML
class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception
def initialize( message, source=nil, parser=nil, exception=nil )
super(message)
@source = source
@parser = parser
@continued_exception = exception
end
def to_s
# Quote the original exception, if there was one
if @continued_exception
err = @continued_exception.inspect
err << "\n"
err << @continued_exception.backtrace.join("\n")
err << "\n...\n"
else
err = ""
end
# Get the stack trace and error message
err << super
# Add contextual information
if @source
err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
end
err
end
def position
@source.current_line[0] if @source and defined? @source.current_line and
@source.current_line
end
def line
@source.current_line[2] if @source and defined? @source.current_line and
@source.current_line
end
def context
@source.current_line
end
end
end

View File

@ -1,594 +0,0 @@
# frozen_string_literal: false
require_relative '../parseexception'
require_relative '../undefinednamespaceexception'
require_relative '../source'
require 'set'
require "strscan"
module REXML
module Parsers
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next?
# res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end
# See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to
# the StreamListener API.
#
# Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next?
# res = parser.next
# raise res[1] if res.error?
# end
#
# Nat Price gave me some good ideas for the API.
class BaseParser
LETTER = '[:alpha:]'
DIGIT = '[:digit:]'
COMBININGCHAR = '' # TODO
EXTENDER = '' # TODO
NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
QNAME = /(#{QNAME_STR})/
# Just for backward compatibility. For example, kramdown uses this.
# It's not used in REXML.
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
REFERENCE_RE = /#{REFERENCE}/
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_END = /\A\s*\]\s*>/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
CDATA_END = /\A\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{QNAME_STR}))/um
CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
ENTITY_START = /\A\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
# Entity constants
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = {
'gt' => [/&gt;/, '&gt;', '>', />/],
'lt' => [/&lt;/, '&lt;', '<', /</],
'quot' => [/&quot;/, '&quot;', '"', /"/],
"apos" => [/&apos;/, "&apos;", "'", /'/]
}
def initialize( source )
self.stream = source
@listeners = []
end
def add_listener( listener )
@listeners << listener
end
attr_reader :source
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
@document_status = nil
@tags = []
@stack = []
@entities = []
@nsstack = []
end
def position
if @source.respond_to? :position
@source.position
else
# FIXME
0
end
end
# Returns true if there are no more events
def empty?
return (@source.empty? and @stack.empty?)
end
# Returns true if there are more events. Synonymous with !empty?
def has_next?
return !(@source.empty? and @stack.empty?)
end
# Push an event back on the head of the stream. This method
# has (theoretically) infinite depth.
def unshift token
@stack.unshift(token)
end
# Peek at the +depth+ event in the stack. The first element on the stack
# is at depth 0. If +depth+ is -1, will parse to the end of the input
# stream and return the last event, which is always :end_document.
# Be aware that this causes the stream to be parsed up to the +depth+
# event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method.
def peek depth=0
raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = []
if depth == -1
temp.push(pull()) until empty?
else
while @stack.size+temp.size < depth+1
temp.push(pull())
end
end
@stack += temp if temp.size > 0
@stack[depth]
end
# Returns the next event. This is a +PullEvent+ object.
def pull
pull_event.tap do |event|
@listeners.each do |listener|
listener.receive event
end
end
end
def pull_event
if @closed
x, @closed = @closed, nil
return [ :end_element, x ]
end
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
@source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
#@source.consume( /^\s*/um )
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil?
#STDERR.puts "WORD = #{word.inspect}"
case word
when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
when XMLDECL_START
#STDERR.puts "XMLDECL"
results = @source.match( XMLDECL_PATTERN, true )[1]
version = VERSION.match( results )
version = version[1] unless version.nil?
encoding = ENCODING.match(results)
encoding = encoding[1] unless encoding.nil?
if need_source_encoding_update?(encoding)
@source.encoding = encoding
end
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
encoding = "UTF-16"
end
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]
when INSTRUCTION_START
return process_instruction
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
@nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
name = $1
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip
long_name = $4.nil? ? nil : $4.strip
uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">"
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/^\s*/um, true)
@stack << [ :end_doctype ]
else
@document_status = :in_doctype
end
return args
when /^\s+/
else
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true)
if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
end
end
end
if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um)
case md[1]
when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ]
when ELEMENTDECL_START
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
when ENTITY_START
match = @source.match( ENTITYDECL, true ).to_a.compact
match[0] = :entitydecl
ref = false
if match[1] == '%'
ref = true
match.delete_at 1
end
# Now we have to sort out what kind of entity reference this is
if match[2] == 'SYSTEM'
# External reference
match[3] = match[3][1..-2] # PUBID
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
elsif match[2] == 'PUBLIC'
# External reference
match[3] = match[3][1..-2] # PUBID
match[4] = match[4][1..-2] # HREF
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
# match is [ :entity, name, value ]
end
match << '%' if ref
return match
when ATTLISTDECL_START
md = @source.match( ATTLISTDECL_PATTERN, true )
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
element = md[1]
contents = md[0]
pairs = {}
values = md[0].scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
if attdef[0] =~ /^xmlns:(.*)/
@nsstack[0] << $1
end
end
end
return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START
md = nil
if @source.match( PUBLIC )
md = @source.match( PUBLIC, true )
vals = [md[1],md[2],md[4],md[6]]
elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true )
vals = [md[1],md[2],nil,md[4]]
else
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end
return [ :notationdecl, *vals ]
when DOCTYPE_END
@document_status = :after_doctype
@source.match( DOCTYPE_END, true )
return [ :end_doctype ]
end
end
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
@nsstack.shift
last_tag = @tags.pop
md = @source.match( CLOSE_MATCH, true )
if md and !last_tag
message = "Unexpected top-level end tag (got '#{md[1]}')"
raise REXML::ParseException.new(message, @source)
end
if md.nil? or last_tag != md[1]
message = "Missing end tag for '#{last_tag}'"
message << " (got '#{md[1]}')" if md
raise REXML::ParseException.new(message, @source)
end
return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?-
md = @source.match( COMMENT_PATTERN, true )
case md[1]
when /--/, /-\z/
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ] if md
else
md = @source.match( CDATA_PATTERN, true )
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.buffer[1] == ??
return process_instruction
else
# Get the next tag
md = @source.match(TAG_MATCH, true)
unless md
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
prefixes = Set.new
prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
attributes, closed = parse_attributes(prefixes, curr_ns)
# Verify that all of the prefixes have been defined
for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if closed
@closed = md[1]
@nsstack.shift
else
@tags.push( md[1] )
end
return [ :start_element, md[1], attributes ]
end
else
md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
@source.match( /(\s+)/, true )
end
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
#return [ :text, "" ] if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException
raise
rescue => error
raise REXML::ParseException.new( "Exception parsing",
@source, self, (error ? error : $!) )
end
return [ :dummy ]
end
private :pull_event
def entity( reference, entities )
value = nil
value = entities[ reference ] if entities
if not value
value = DEFAULT_ENTITIES[ reference ]
value = value[2] if value
end
unnormalize( value, entities ) if value
end
# Escapes all possible entities
def normalize( input, entities=nil, entity_filter=nil )
copy = input.clone
# Doing it like this rather than in a loop improves the speed
copy.gsub!( EREFERENCE, '&amp;' )
entities.each do |key, value|
copy.gsub!( value, "&#{key};" ) unless entity_filter and
entity_filter.include?(entity)
end if entities
copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[3], value[1] )
end
copy
end
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
rv = string.clone
rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if matches.size > 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = /&#{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
rv.gsub!( /&amp;/, '&' )
end
rv
end
private
def need_source_encoding_update?(xml_declaration_encoding)
return false if xml_declaration_encoding.nil?
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
true
end
def process_instruction
match_data = @source.match(INSTRUCTION_PATTERN, true)
unless match_data
message = "Invalid processing instruction node"
raise REXML::ParseException.new(message, @source)
end
[:processing_instruction, match_data[1], match_data[2]]
end
def parse_attributes(prefixes, curr_ns)
attributes = {}
closed = false
match_data = @source.match(/^(.*?)(\/)?>/um, true)
if match_data.nil?
message = "Start tag isn't ended"
raise REXML::ParseException.new(message, @source)
end
raw_attributes = match_data[1]
closed = !match_data[2].nil?
return attributes, closed if raw_attributes.nil?
return attributes, closed if raw_attributes.empty?
scanner = StringScanner.new(raw_attributes)
until scanner.eos?
if scanner.scan(/\s+/)
break if scanner.eos?
end
pos = scanner.pos
loop do
break if scanner.scan(ATTRIBUTE_PATTERN)
unless scanner.scan(QNAME)
message = "Invalid attribute name: <#{scanner.rest}>"
raise REXML::ParseException.new(message, @source)
end
name = scanner[0]
unless scanner.scan(/\s*=\s*/um)
message = "Missing attribute equal: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
quote = scanner.scan(/['"]/)
unless quote
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
match_data = @source.match(/^(.*?)(\/)?>/um, true)
if match_data
scanner << "/" if closed
scanner << ">"
scanner << match_data[1]
scanner.pos = pos
closed = !match_data[2].nil?
next
end
message =
"Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
end
name = scanner[1]
prefix = scanner[2]
local_part = scanner[3]
# quote = scanner[4]
value = scanner[5]
if prefix == "xmlns"
if local_part == "xml"
if value != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif local_part == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << local_part
elsif prefix
prefixes << prefix unless prefix == "xml"
end
if attributes.has_key?(name)
msg = "Duplicate attribute #{name.inspect}"
raise REXML::ParseException.new(msg, @source, self)
end
attributes[name] = value
end
return attributes, closed
end
end
end
end
=begin
case event[0]
when :start_element
when :text
when :end_element
when :processing_instruction
when :cdata
when :comment
when :xmldecl
when :start_doctype
when :end_doctype
when :externalentity
when :elementdecl
when :entity
when :attlistdecl
when :notationdecl
when :end_doctype
end
=end

View File

@ -1,59 +0,0 @@
# frozen_string_literal: false
require_relative 'streamparser'
require_relative 'baseparser'
require_relative '../light/node'
module REXML
module Parsers
class LightParser
def initialize stream
@stream = stream
@parser = REXML::Parsers::BaseParser.new( stream )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def rewind
@stream.rewind
@parser.stream = @stream
end
def parse
root = context = [ :document ]
while true
event = @parser.pull
case event[0]
when :end_document
break
when :start_element, :start_doctype
new_node = event
context << new_node
new_node[1,0] = [context]
context = new_node
when :end_element, :end_doctype
context = context[1]
else
new_node = event
context << new_node
new_node[1,0] = [context]
end
end
root
end
end
# An element is an array. The array contains:
# 0 The parent element
# 1 The tag name
# 2 A hash of attributes
# 3..-1 The child elements
# An element is an array of size > 3
# Text is a String
# PIs are [ :processing_instruction, target, data ]
# Comments are [ :comment, data ]
# DocTypes are DocType structs
# The root is an array with XMLDecls, Text, DocType, Array, Text
end
end

View File

@ -1,197 +0,0 @@
# frozen_string_literal: false
require 'forwardable'
require_relative '../parseexception'
require_relative 'baseparser'
require_relative '../xmltokens'
module REXML
module Parsers
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next?
# res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end
# See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to
# the StreamListener API.
#
# Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next?
# res = parser.next
# raise res[1] if res.error?
# end
#
# Nat Price gave me some good ideas for the API.
class PullParser
include XMLTokens
extend Forwardable
def_delegators( :@parser, :has_next? )
def_delegators( :@parser, :entity )
def_delegators( :@parser, :empty? )
def_delegators( :@parser, :source )
def initialize stream
@entities = {}
@listeners = nil
@parser = BaseParser.new( stream )
@my_stack = []
end
def add_listener( listener )
@listeners = [] unless @listeners
@listeners << listener
end
def each
while has_next?
yield self.pull
end
end
def peek depth=0
if @my_stack.length <= depth
(depth - @my_stack.length + 1).times {
e = PullEvent.new(@parser.pull)
@my_stack.push(e)
}
end
@my_stack[depth]
end
def pull
return @my_stack.shift if @my_stack.length > 0
event = @parser.pull
case event[0]
when :entitydecl
@entities[ event[1] ] =
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
when :text
unnormalized = @parser.unnormalize( event[1], @entities )
event << unnormalized
end
PullEvent.new( event )
end
def unshift token
@my_stack.unshift token
end
end
# A parsing event. The contents of the event are accessed as an +Array?,
# and the type is given either by the ...? methods, or by accessing the
# +type+ accessor. The contents of this object vary from event to event,
# but are identical to the arguments passed to +StreamListener+s for each
# event.
class PullEvent
# The type of this event. Will be one of :tag_start, :tag_end, :text,
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
def initialize(arg)
@contents = arg
end
def []( start, endd=nil)
if start.kind_of? Range
@contents.slice( start.begin+1 .. start.end )
elsif start.kind_of? Numeric
if endd.nil?
@contents.slice( start+1 )
else
@contents.slice( start+1, endd )
end
else
raise "Illegal argument #{start.inspect} (#{start.class})"
end
end
def event_type
@contents[0]
end
# Content: [ String tag_name, Hash attributes ]
def start_element?
@contents[0] == :start_element
end
# Content: [ String tag_name ]
def end_element?
@contents[0] == :end_element
end
# Content: [ String raw_text, String unnormalized_text ]
def text?
@contents[0] == :text
end
# Content: [ String text ]
def instruction?
@contents[0] == :processing_instruction
end
# Content: [ String text ]
def comment?
@contents[0] == :comment
end
# Content: [ String name, String pub_sys, String long_name, String uri ]
def doctype?
@contents[0] == :start_doctype
end
# Content: [ String text ]
def attlistdecl?
@contents[0] == :attlistdecl
end
# Content: [ String text ]
def elementdecl?
@contents[0] == :elementdecl
end
# Due to the wonders of DTDs, an entity declaration can be just about
# anything. There's no way to normalize it; you'll have to interpret the
# content yourself. However, the following is true:
#
# * If the entity declaration is an internal entity:
# [ String name, String value ]
# Content: [ String text ]
def entitydecl?
@contents[0] == :entitydecl
end
# Content: [ String text ]
def notationdecl?
@contents[0] == :notationdecl
end
# Content: [ String text ]
def entity?
@contents[0] == :entity
end
# Content: [ String text ]
def cdata?
@contents[0] == :cdata
end
# Content: [ String version, String encoding, String standalone ]
def xmldecl?
@contents[0] == :xmldecl
end
def error?
@contents[0] == :error
end
def inspect
@contents[0].to_s + ": " + @contents[1..-1].inspect
end
end
end
end

View File

@ -1,273 +0,0 @@
# frozen_string_literal: false
require_relative 'baseparser'
require_relative '../parseexception'
require_relative '../namespace'
require_relative '../text'
module REXML
module Parsers
# SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@listeners = []
@procs = []
@namespace_stack = []
@has_listeners = false
@tag_stack = []
@entities = {}
end
def source
@parser.source
end
def add_listener( listener )
@parser.add_listener( listener )
end
# Listen arguments:
#
# Symbol, Array, Block
# Listen to Symbol events on Array elements
# Symbol, Block
# Listen to Symbol events
# Array, Listener
# Listen to all events on Array elements
# Array, Block
# Listen to :start_element events on Array elements
# Listener
# Listen to All events
#
# Symbol can be one of: :start_element, :end_element,
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
# This will be called for every event generated, passing in the current
# stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
#
# Listener must implement the methods in SAX2Listener
#
# Block will be passed the same arguments as a SAX2Listener method would
# be, where the method name is the same as the matched Symbol.
# See the SAX2Listener for more information.
def listen( *args, &blok )
if args[0].kind_of? Symbol
if args.size == 2
args[1].each { |match| @procs << [args[0], match, blok] }
else
add( [args[0], nil, blok] )
end
elsif args[0].kind_of? Array
if args.size == 2
args[0].each { |match| add( [nil, match, args[1]] ) }
else
args[0].each { |match| add( [ :start_element, match, blok ] ) }
end
else
add([nil, nil, args[0]])
end
end
def deafen( listener=nil, &blok )
if listener
@listeners.delete_if {|item| item[-1] == listener }
@has_listeners = false if @listeners.size == 0
else
@procs.delete_if {|item| item[-1] == blok }
end
end
def parse
@procs.each { |sym,match,block| block.call if sym == :start_document }
@listeners.each { |sym,match,block|
block.start_document if sym == :start_document or sym.nil?
}
context = []
while true
event = @parser.pull
case event[0]
when :end_document
handle( :end_document )
break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype
context = context[1]
when :start_element
@tag_stack.push(event[1])
# find the observers for namespaces
procs = get_procs( :start_prefix_mapping, event[1] )
listeners = get_listeners( :start_prefix_mapping, event[1] )
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
nsdecl.each do |n,v|
@namespace_stack[-1][n] = v
# notify observers of namespaces
procs.each { |ob| ob.call( n, v ) } if procs
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
end
end
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :start_element, event[1] )
listeners = get_listeners( :start_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
listeners.each { |ob|
ob.start_element( uri, local, event[1], event[2] )
} if listeners
when :end_element
@tag_stack.pop
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :end_element, event[1] )
listeners = get_listeners( :end_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
listeners.each { |ob|
ob.end_element( uri, local, event[1] )
} if listeners
namespace_mapping = @namespace_stack.pop
# find the observers for namespaces
procs = get_procs( :end_prefix_mapping, event[1] )
listeners = get_listeners( :end_prefix_mapping, event[1] )
if procs or listeners
namespace_mapping.each do |ns_prefix, ns_uri|
# notify observers of namespaces
procs.each { |ob| ob.call( ns_prefix ) } if procs
listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
end
end
when :text
#normalized = @parser.normalize( event[1] )
#handle( :characters, normalized )
copy = event[1].clone
esub = proc { |match|
if @entities.has_key?($1)
@entities[$1].gsub(Text::REFERENCE, &esub)
else
match
end
}
copy.gsub!( Text::REFERENCE, &esub )
copy.gsub!( Text::NUMERICENTITY ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
handle( :characters, copy )
when :entitydecl
handle_entitydecl( event )
when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
handle( :progress, @parser.position )
end
end
private
def handle( symbol, *arguments )
tag = @tag_stack[-1]
procs = get_procs( symbol, tag )
listeners = get_listeners( symbol, tag )
# notify observers
procs.each { |ob| ob.call( *arguments ) } if procs
listeners.each { |l|
l.send( symbol.to_s, *arguments )
} if listeners
end
def handle_entitydecl( event )
@entities[ event[1] ] = event[2] if event.size == 3
parameter_reference_p = false
case event[2]
when "SYSTEM"
if event.size == 5
if event.last == "%"
parameter_reference_p = true
else
event[4, 0] = "NDATA"
end
end
when "PUBLIC"
if event.size == 6
if event.last == "%"
parameter_reference_p = true
else
event[5, 0] = "NDATA"
end
end
else
parameter_reference_p = (event.size == 4)
end
event[1, 0] = event.pop if parameter_reference_p
handle( event[0], event[1..-1] )
end
# The following methods are duplicates, but it is faster than using
# a helper
def get_procs( symbol, name )
return nil if @procs.size == 0
@procs.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def get_listeners( symbol, name )
return nil if @listeners.size == 0
@listeners.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def add( pair )
if pair[-1].respond_to? :call
@procs << pair unless @procs.include? pair
else
@listeners << pair unless @listeners.include? pair
@has_listeners = true
end
end
def get_namespace( prefix )
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
end
end
end
end

View File

@ -1,61 +0,0 @@
# frozen_string_literal: false
require_relative "baseparser"
module REXML
module Parsers
class StreamParser
def initialize source, listener
@listener = listener
@parser = BaseParser.new( source )
@tag_stack = []
end
def add_listener( listener )
@parser.add_listener( listener )
end
def parse
# entity string
while true
event = @parser.pull
case event[0]
when :end_document
unless @tag_stack.empty?
tag_path = "/" + @tag_stack.join("/")
raise ParseException.new("Missing end tag for '#{tag_path}'",
@parser.source)
end
return
when :start_element
@tag_stack << event[1]
attrs = event[2].each do |n, v|
event[2][n] = @parser.unnormalize( v )
end
@listener.tag_start( event[1], attrs )
when :end_element
@listener.tag_end( event[1] )
@tag_stack.pop
when :text
normalized = @parser.unnormalize( event[1] )
@listener.text( normalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
when :start_doctype
@listener.doctype( *event[1..-1] )
when :end_doctype
# FIXME: remove this condition for milestone:3.2
@listener.doctype_end if @listener.respond_to? :doctype_end
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
@listener.send( event[0].to_s, *event[1..-1] )
when :entitydecl, :notationdecl
@listener.send( event[0].to_s, event[1..-1] )
when :externalentity
entity_reference = event[1]
content = entity_reference.gsub(/\A%|;\z/, "")
@listener.entity(content)
end
end
end
end
end
end

View File

@ -1,101 +0,0 @@
# frozen_string_literal: false
require_relative '../validation/validationexception'
require_relative '../undefinednamespaceexception'
module REXML
module Parsers
class TreeParser
def initialize( source, build_context = Document.new )
@build_context = build_context
@parser = Parsers::BaseParser.new( source )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def parse
tag_stack = []
in_doctype = false
entities = nil
begin
while true
event = @parser.pull
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
case event[0]
when :end_document
unless tag_stack.empty?
raise ParseException.new("No close tag for #{@build_context.xpath}",
@parser.source, @parser)
end
return
when :start_element
tag_stack.push(event[1])
el = @build_context = @build_context.add_element( event[1] )
event[2].each do |key, value|
el.attributes[key]=Attribute.new(key,value,self)
end
when :end_element
tag_stack.pop
@build_context = @build_context.parent
when :text
if not in_doctype
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
@build_context.add(
Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
)
end
end
when :comment
c = Comment.new( event[1] )
@build_context.add( c )
when :cdata
c = CData.new( event[1] )
@build_context.add( c )
when :processing_instruction
@build_context.add( Instruction.new( event[1], event[2] ) )
when :end_doctype
in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
doctype = DocType.new( event[1..-1], @build_context )
@build_context = doctype
entities = {}
in_doctype = true
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
@build_context.add( n )
when :externalentity
n = ExternalEntity.new( event[1] )
@build_context.add( n )
when :elementdecl
n = ElementDecl.new( event[1] )
@build_context.add(n)
when :entitydecl
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
@build_context.add(Entity.new(event))
when :notationdecl
n = NotationDecl.new( *event[1..-1] )
@build_context.add( n )
when :xmldecl
x = XMLDecl.new( event[1], event[2], event[3] )
@build_context.add( x )
end
end
rescue REXML::Validation::ValidationException
raise
rescue REXML::ParseException
raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end
end
end
end
end

View File

@ -1,57 +0,0 @@
# frozen_string_literal: false
require_relative 'streamparser'
require_relative 'baseparser'
module REXML
module Parsers
class UltraLightParser
def initialize stream
@stream = stream
@parser = REXML::Parsers::BaseParser.new( stream )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def rewind
@stream.rewind
@parser.stream = @stream
end
def parse
root = context = []
while true
event = @parser.pull
case event[0]
when :end_document
break
when :end_doctype
context = context[1]
when :start_element, :start_doctype
context << event
event[1,0] = [context]
context = event
when :end_element
context = context[1]
else
context << event
end
end
root
end
end
# An element is an array. The array contains:
# 0 The parent element
# 1 The tag name
# 2 A hash of attributes
# 3..-1 The child elements
# An element is an array of size > 3
# Text is a String
# PIs are [ :processing_instruction, target, data ]
# Comments are [ :comment, data ]
# DocTypes are DocType structs
# The root is an array with XMLDecls, Text, DocType, Array, Text
end
end

View File

@ -1,675 +0,0 @@
# frozen_string_literal: false
require_relative '../namespace'
require_relative '../xmltokens'
module REXML
module Parsers
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
def namespaces=( namespaces )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def parse path
path = path.dup
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
path.gsub!( /\s+([\]\)])/, '\1')
parsed = []
OrExpr(path, parsed)
parsed
end
def predicate path
parsed = []
Predicate( "[#{path}]", parsed )
parsed
end
def abbreviate( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
when :attribute
string << "/" if string.size > 0
string << "@"
when :child
string << "/" if string.size > 0
when :descendant_or_self
string << "/"
when :self
string << "."
when :parent
string << ".."
when :any
string << "*"
when :text
string << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
string << ']'
when :document
document = true
when :function
string << path.shift
string << "( "
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
string << " )"
when :literal
string << %Q{ "#{path.shift}" }
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def expand( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
string << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :any
string << "*"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) { |x| expand(x) }
string << ']'
when :document
document = true
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def predicate_to_string( path, &block )
string = ""
case path[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
op = path.shift
case op
when :eq
op = "="
when :lt
op = "<"
when :gt
op = ">"
when :lteq
op = "<="
when :gteq
op = ">="
when :neq
op = "!="
when :union
op = "|"
end
left = predicate_to_string( path.shift, &block )
right = predicate_to_string( path.shift, &block )
string << " "
string << left
string << " "
string << op.to_s
string << " "
string << right
string << " "
when :function
path.shift
name = path.shift
string << name
string << "( "
string << predicate_to_string( path.shift, &block )
string << " )"
when :literal
path.shift
string << " "
string << path.shift.inspect
string << " "
else
string << " "
string << yield( path )
string << " "
end
return string.squeeze(" ")
end
private
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
path = path.lstrip
if path[0] == ?/
parsed << :document
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
end
return RelativeLocationPath( path, parsed ) if path.size > 0
end
#RelativeLocationPath
# | Step
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
# NodeTest
# Predicate
# | '.' | '..' AbbreviatedStep
# | RelativeLocationPath '/' Step
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
loop do
original_path = path
path = path.lstrip
return original_path if path.empty?
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
# (. or ..) >
if path[0] == ?.
if path[1] == ?.
parsed << :parent
parsed << :node
path = path[2..-1]
else
parsed << :self
parsed << :node
path = path[1..-1]
end
else
if path[0] == ?@
parsed << :attribute
path = path[1..-1]
# Goto Nodetest
elsif path =~ AXIS
parsed << $1.tr('-','_').intern
path = $'
# Goto Nodetest
else
parsed << :child
end
n = []
path = NodeTest( path, n)
path = Predicate( path, n )
parsed.concat(n)
end
original_path = path
path = path.lstrip
return original_path if path.empty?
return original_path if path[0] != ?/
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
end
end
# Returns a 1-1 map of the nodeset
# The contents of the resulting array are either:
# true/false, if a positive match
# String, if a name match
#NodeTest
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
# | '*' ':' NCNAME NameTest since XPath 2.0
# | NODE_TYPE '(' ')' NodeType
# | PI '(' LITERAL ')' PI
# | '[' expr ']' Predicate
PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u
LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u
QNAME = Namespace::NAMESPLIT
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
original_path = path
path = path.lstrip
case path
when PREFIX_WILDCARD
prefix = nil
name = $1
path = $'
parsed << :qname
parsed << prefix
parsed << name
when /^\*/
path = $'
parsed << :any
when NODE_TYPE
type = $1
path = $'
parsed << type.tr('-', '_').intern
when PI
path = $'
literal = nil
if path !~ /^\s*\)/
path =~ LITERAL
literal = $1
path = $'
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
path = path[1..-1]
end
parsed << :processing_instruction
parsed << (literal || '')
when LOCAL_NAME_WILDCARD
prefix = $1
path = $'
parsed << :namespace
parsed << prefix
when QNAME
prefix = $1
name = $2
path = $'
prefix = "" unless prefix
parsed << :qname
parsed << prefix
parsed << name
else
path = original_path
end
return path
end
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
original_path = path
path = path.lstrip
return original_path unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
predicates << expr[1..-2] if expr
end
predicates.each{ |pred|
preds = []
parsed << :predicate
parsed << preds
OrExpr(pred, preds)
}
path
end
# The following return arrays of true/false, a 1-1 mapping of the
# supplied nodeset, except for axe(), which returns a filtered
# nodeset
#| OrExpr S 'or' S AndExpr
#| AndExpr
def OrExpr path, parsed
n = []
rest = AndExpr( path, n )
if rest != path
while rest =~ /^\s*( or )/
n = [ :or, n, [] ]
rest = AndExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AndExpr S 'and' S EqualityExpr
#| EqualityExpr
def AndExpr path, parsed
n = []
rest = EqualityExpr( path, n )
if rest != path
while rest =~ /^\s*( and )/
n = [ :and, n, [] ]
rest = EqualityExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| EqualityExpr ('=' | '!=') RelationalExpr
#| RelationalExpr
def EqualityExpr path, parsed
n = []
rest = RelationalExpr( path, n )
if rest != path
while rest =~ /^\s*(!?=)\s*/
if $1[0] == ?!
n = [ :neq, n, [] ]
else
n = [ :eq, n, [] ]
end
rest = RelationalExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
#| AdditiveExpr
def RelationalExpr path, parsed
n = []
rest = AdditiveExpr( path, n )
if rest != path
while rest =~ /^\s*([<>]=?)\s*/
if $1[0] == ?<
sym = "lt"
else
sym = "gt"
end
sym << "eq" if $1[-1] == ?=
n = [ sym.intern, n, [] ]
rest = AdditiveExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AdditiveExpr ('+' | '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
n = []
rest = MultiplicativeExpr( path, n )
if rest != path
while rest =~ /^\s*(\+|-)\s*/
if $1[0] == ?+
n = [ :plus, n, [] ]
else
n = [ :minus, n, [] ]
end
rest = MultiplicativeExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
#| UnaryExpr
def MultiplicativeExpr path, parsed
n = []
rest = UnaryExpr( path, n )
if rest != path
while rest =~ /^\s*(\*| div | mod )\s*/
if $1[0] == ?*
n = [ :mult, n, [] ]
elsif $1.include?( "div" )
n = [ :div, n, [] ]
else
n = [ :mod, n, [] ]
end
rest = UnaryExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| '-' UnaryExpr
#| UnionExpr
def UnaryExpr path, parsed
path =~ /^(\-*)/
path = $'
if $1 and (($1.size % 2) != 0)
mult = -1
else
mult = 1
end
parsed << :neg if mult < 0
n = []
path = UnionExpr( path, n )
parsed.concat( n )
path
end
#| UnionExpr '|' PathExpr
#| PathExpr
def UnionExpr path, parsed
n = []
rest = PathExpr( path, n )
if rest != path
while rest =~ /^\s*(\|)\s*/
n = [ :union, n, [] ]
rest = PathExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace( n )
elsif n.size > 0
parsed << n
end
rest
end
#| LocationPath
#| FilterExpr ('/' | '//') RelativeLocationPath
def PathExpr path, parsed
path = path.lstrip
n = []
rest = FilterExpr( path, n )
if rest != path
if rest and rest[0] == ?/
rest = RelativeLocationPath(rest, n)
parsed.concat(n)
return rest
end
end
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
parsed.concat(n)
return rest
end
#| FilterExpr Predicate
#| PrimaryExpr
def FilterExpr path, parsed
n = []
path = PrimaryExpr( path, n )
path = Predicate(path, n)
parsed.concat(n)
path
end
#| VARIABLE_REFERENCE
#| '(' expr ')'
#| LITERAL
#| NUMBER
#| FunctionCall
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
NUMBER = /^(\d*\.?\d+)/
NT = /^comment|text|processing-instruction|node$/
def PrimaryExpr path, parsed
case path
when VARIABLE_REFERENCE
varname = $1
path = $'
parsed << :variable
parsed << varname
#arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/
fname = $1
tmp = $'
return path if fname =~ NT
path = tmp
parsed << :function
parsed << fname
path = FunctionCall(path, parsed)
when NUMBER
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
when LITERAL
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << varname
when /^\(/ #/
path, contents = get_group(path)
contents = contents[1..-2]
n = []
OrExpr( contents, n )
parsed.concat(n)
end
path
end
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed
path, arguments = parse_args(rest)
argset = []
for argument in arguments
args = []
OrExpr( argument, args )
argset << args
end
parsed << argset
path
end
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
def get_group string
ind = 0
depth = 0
st = string[0,1]
en = (st == "(" ? ")" : "]")
begin
case string[ind,1]
when st
depth += 1
when en
depth -= 1
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string[ind..-1], string[0..ind-1]]
end
def parse_args( string )
arguments = []
ind = 0
inquot = false
inapos = false
depth = 1
begin
case string[ind]
when ?"
inquot = !inquot unless inapos
when ?'
inapos = !inapos unless inquot
else
unless inquot or inapos
case string[ind]
when ?(
depth += 1
if depth == 1
string = string[1..-1]
ind -= 1
end
when ?)
depth -= 1
if depth == 0
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
end
when ?,
if depth == 1
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
ind = -1
end
end
end
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string,arguments]
end
end
end
end

View File

@ -1,266 +0,0 @@
# frozen_string_literal: false
require_relative 'functions'
require_relative 'xmltokens'
module REXML
class QuickPath
include Functions
include XMLTokens
# A base Hash object to be used when initializing a
# default empty namespaces set.
EMPTY_HASH = {}
def QuickPath::first element, path, namespaces=EMPTY_HASH
match(element, path, namespaces)[0]
end
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
path = "*" unless path
match(element, path, namespaces).each( &block )
end
def QuickPath::match element, path, namespaces=EMPTY_HASH
raise "nil is not a valid xpath" unless path
results = nil
Functions::namespace_context = namespaces
case path
when /^\/([^\/]|$)/u
# match on root
path = path[1..-1]
return [element.root.parent] if path == ''
results = filter([element.root], path)
when /^[-\w]*::/u
results = filter([element], path)
when /^\*/u
results = filter(element.to_a, path)
when /^[\[!\w:]/u
# match on child
children = element.to_a
results = filter(children, path)
else
results = filter([element], path)
end
return results
end
# Given an array of nodes it filters the array based on the path. The
# result is that when this method returns, the array will contain elements
# which match the path
def QuickPath::filter elements, path
return elements if path.nil? or path == '' or elements.size == 0
case path
when /^\/\//u # Descendant
return axe( elements, "descendant-or-self", $' )
when /^\/?\b(\w[-\w]*)\b::/u # Axe
return axe( elements, $1, $' )
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
rest = $'
results = []
elements.each do |element|
results |= filter( element.to_a, rest )
end
return results
when /^\/?(\w[-\w]*)\(/u # / Function
return function( elements, $1, $' )
when Namespace::NAMESPLIT # Element name
name = $2
ns = $1
rest = $'
elements.delete_if do |element|
!(element.kind_of? Element and
(element.expanded_name == name or
(element.name == name and
element.namespace == Functions.namespace_context[ns])))
end
return filter( elements, rest )
when /^\/\[/u
matches = []
elements.each do |element|
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
end
return matches
when /^\[/u # Predicate
return predicate( elements, path )
when /^\/?\.\.\./u # Ancestor
return axe( elements, "ancestor", $' )
when /^\/?\.\./u # Parent
return filter( elements.collect{|e|e.parent}, $' )
when /^\/?\./u # Self
return filter( elements, $' )
when /^\*/u # Any
results = []
elements.each do |element|
results |= filter( [element], $' ) if element.kind_of? Element
#if element.kind_of? Element
# children = element.to_a
# children.delete_if { |child| !child.kind_of?(Element) }
# results |= filter( children, $' )
#end
end
return results
end
return []
end
def QuickPath::axe( elements, axe_name, rest )
matches = []
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
case axe_name
when /^descendant/u
elements.each do |element|
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
end
when /^ancestor/u
elements.each do |element|
while element.parent
matches << element.parent
element = element.parent
end
end
matches = filter( matches, rest )
when "self"
matches = filter( elements, rest )
when "child"
elements.each do |element|
matches |= filter( element.to_a, rest ) if element.kind_of? Element
end
when "attribute"
elements.each do |element|
matches << element.attributes[ rest ] if element.kind_of? Element
end
when "parent"
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
when "following-sibling"
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
rest)
when "previous-sibling"
matches = filter(elements.collect{|element|
element.previous_sibling}.uniq, rest )
end
return matches.uniq
end
OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
# A predicate filters a node-set with respect to an axis to produce a
# new node-set. For each node in the node-set to be filtered, the
# PredicateExpr is evaluated with that node as the context node, with
# the number of nodes in the node-set as the context size, and with the
# proximity position of the node in the node-set with respect to the
# axis as the context position; if PredicateExpr evaluates to true for
# that node, the node is included in the new node-set; otherwise, it is
# not included.
#
# A PredicateExpr is evaluated by evaluating the Expr and converting
# the result to a boolean. If the result is a number, the result will
# be converted to true if the number is equal to the context position
# and will be converted to false otherwise; if the result is not a
# number, then the result will be converted as if by a call to the
# boolean function. Thus a location path para[3] is equivalent to
# para[position()=3].
def QuickPath::predicate( elements, path )
ind = 1
bcount = 1
while bcount > 0
bcount += 1 if path[ind] == ?[
bcount -= 1 if path[ind] == ?]
ind += 1
end
ind -= 1
predicate = path[1..ind-1]
rest = path[ind+1..-1]
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
#
predicate.gsub!(
/#{OPERAND_}\s*([<>=])\s*#{OPERAND_}\s*([<>=])\s*#{OPERAND_}/u,
'\1 \2 \3 and \3 \4 \5' )
# Let's do some Ruby trickery to avoid some work:
predicate.gsub!( /&/u, "&&" )
predicate.gsub!( /=/u, "==" )
predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
predicate.gsub!( /\bmod\b/u, "%" )
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
fname = $1
fname.gsub( /-/u, "_" )
}
Functions.pair = [ 0, elements.size ]
results = []
elements.each do |element|
Functions.pair[0] += 1
Functions.node = element
res = eval( predicate )
case res
when true
results << element
when Integer
results << element if Functions.pair[0] == res
when String
results << element
end
end
return filter( results, rest )
end
def QuickPath::attribute( name )
return Functions.node.attributes[name] if Functions.node.kind_of? Element
end
def QuickPath::name()
return Functions.node.name if Functions.node.kind_of? Element
end
def QuickPath::method_missing( id, *args )
begin
Functions.send( id.id2name, *args )
rescue Exception
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
end
end
def QuickPath::function( elements, fname, rest )
args = parse_args( elements, rest )
Functions.pair = [0, elements.size]
results = []
elements.each do |element|
Functions.pair[0] += 1
Functions.node = element
res = Functions.send( fname, *args )
case res
when true
results << element
when Integer
results << element if Functions.pair[0] == res
end
end
return results
end
def QuickPath::parse_args( element, string )
# /.*?(?:\)|,)/
arguments = []
buffer = ""
while string and string != ""
c = string[0]
string.sub!(/^./u, "")
case c
when ?,
# if depth = 1, then we start a new argument
arguments << evaluate( buffer )
#arguments << evaluate( string[0..count] )
when ?(
# start a new method call
function( element, buffer, string )
buffer = ""
when ?)
# close the method call and return arguments
return arguments
else
buffer << c
end
end
""
end
end
end

View File

@ -1,32 +0,0 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
#
# REXML is a _pure_ Ruby, XML 1.0 conforming,
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
# Ruby 1.8, REXML is included in the standard Ruby distribution.
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Date:: 2008/019
# Version:: 3.1.7.3
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
#
# A tutorial is available in the REXML distribution in docs/tutorial.html,
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
DATE = "2008/019"
VERSION = "3.2.4"
REVISION = ""
Copyright = COPYRIGHT
Version = VERSION
end

View File

@ -1,98 +0,0 @@
# frozen_string_literal: false
module REXML
# A template for stream parser listeners.
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
# processed; REXML doesn't yet handle doctype entity declarations, so you
# have to parse them out yourself.
# === Missing methods from SAX2
# ignorable_whitespace
# === Methods extending SAX2
# +WARNING+
# These methods are certainly going to change, until DTDs are fully
# supported. Be aware of this.
# start_document
# end_document
# doctype
# elementdecl
# attlistdecl
# entitydecl
# notationdecl
# cdata
# xmldecl
# comment
module SAX2Listener
def start_document
end
def end_document
end
def start_prefix_mapping prefix, uri
end
def end_prefix_mapping prefix
end
def start_element uri, localname, qname, attributes
end
def end_element uri, localname, qname
end
def characters text
end
def processing_instruction target, data
end
# Handles a doctype declaration. Any attributes of the doctype which are
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
# @p name the name of the doctype; EG, "me"
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
# @p long_name the supplied long name, or nil. EG, "foo"
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
# attr CDATA #REQUIRED". This is the same for all of the .*decl
# methods.
def attlistdecl(element, pairs, contents)
end
# <!ELEMENT ...>
def elementdecl content
end
# <!ENTITY ...>
# The argument passed to this method is an array of the entity
# declaration. It can be in a number of formats, but in general it
# returns (example, result):
# <!ENTITY % YN '"Yes"'>
# ["%", "YN", "\"Yes\""]
# <!ENTITY % YN 'Yes'>
# ["%", "YN", "Yes"]
# <!ENTITY WhatHeSaid "He said %YN;">
# ["WhatHeSaid", "He said %YN;"]
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
def entitydecl declaration
end
# <!NOTATION ...>
def notationdecl name, public_or_system, public_id, system_id
end
# Called when <![CDATA[ ... ]]> is encountered in a document.
# @p content "..."
def cdata content
end
# Called when an XML PI is encountered in the document.
# EG: <?xml version="1.0" encoding="utf"?>
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
# @p spaced the declaration is followed by a line break
def xmldecl version, encoding, standalone
end
# Called when a comment is encountered.
# @p comment The content of the comment
def comment comment
end
def progress position
end
end
end

View File

@ -1,28 +0,0 @@
# frozen_string_literal: false
module REXML
module Security
@@entity_expansion_limit = 10_000
# Set the entity expansion limit. By default the limit is set to 10000.
def self.entity_expansion_limit=( val )
@@entity_expansion_limit = val
end
# Get the entity expansion limit. By default the limit is set to 10000.
def self.entity_expansion_limit
return @@entity_expansion_limit
end
@@entity_expansion_text_limit = 10_240
# Set the entity expansion limit. By default the limit is set to 10240.
def self.entity_expansion_text_limit=( val )
@@entity_expansion_text_limit = val
end
# Get the entity expansion limit. By default the limit is set to 10240.
def self.entity_expansion_text_limit
return @@entity_expansion_text_limit
end
end
end

View File

@ -1,298 +0,0 @@
# coding: US-ASCII
# frozen_string_literal: false
require_relative 'encoding'
module REXML
# Generates Source-s. USE THIS CLASS.
class SourceFactory
# Generates a Source object
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from(arg)
if arg.respond_to? :read and
arg.respond_to? :readline and
arg.respond_to? :nil? and
arg.respond_to? :eof?
IOSource.new(arg)
elsif arg.respond_to? :to_str
require 'stringio'
IOSource.new(StringIO.new(arg))
elsif arg.kind_of? Source
arg
else
raise "#{arg.class} is not a valid input stream. It must walk \n"+
"like either a String, an IO, or a Source."
end
end
end
# A Source can be searched for patterns, and wraps buffers and other
# objects and provides consumption of text
class Source
include Encoding
# The current buffer (what we're going to read next)
attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
# Constructor
# @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
@orig = @buffer = arg
if encoding
self.encoding = encoding
else
detect_encoding
end
@line = 0
end
# Inherited from Encoding
# Overridden to support optimized en/decoding
def encoding=(enc)
return unless super
encoding_updated
end
# Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier
# to construct and this method more robust, because this method
# generated search regexps on the fly; however, this was
# computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
# will be returned; the second group is used if the consume flag is
# set.
# @param consume if true, the pattern returned will be consumed, leaving
# everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found.
def scan(pattern, cons=false)
return nil if @buffer.nil?
rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0
rv
end
def read
end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match(pattern, cons=false)
md = pattern.match(@buffer)
@buffer = $' if cons and md
return md
end
# @return true if the Source is exhausted
def empty?
@buffer == ""
end
def position
@orig.index( @buffer )
end
# @return the current line in the source
def current_line
lines = @orig.split
res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
private
def detect_encoding
buffer_encoding = @buffer.encoding
detected_encoding = "UTF-8"
begin
@buffer.force_encoding("ASCII-8BIT")
if @buffer[0, 2] == "\xfe\xff"
@buffer[0, 2] = ""
detected_encoding = "UTF-16BE"
elsif @buffer[0, 2] == "\xff\xfe"
@buffer[0, 2] = ""
detected_encoding = "UTF-16LE"
elsif @buffer[0, 3] == "\xef\xbb\xbf"
@buffer[0, 3] = ""
detected_encoding = "UTF-8"
end
ensure
@buffer.force_encoding(buffer_encoding)
end
self.encoding = detected_encoding
end
def encoding_updated
if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
@buffer.force_encoding ::Encoding::UTF_8
end
end
end
# A Source that wraps an IO. See the Source class for method
# documentation
class IOSource < Source
#attr_reader :block_size
# block_size has been deprecated
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
@pending_buffer = nil
if encoding
super("", encoding)
else
super(@source.read(3) || "")
end
if !@to_utf and
@buffer.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
else
@force_utf8 = false
end
end
def scan(pattern, cons=false)
rv = super
# You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan()
# than the way match() does it; enough faster to warrant duplicating
# some code
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
@buffer << readline
rescue Iconv::IllegalSequence
raise
rescue
@source = nil
end
end
rv = super
end
rv.taint if RUBY_VERSION < '2.7'
rv
end
def read
begin
@buffer << readline
rescue Exception, NameError
@source = nil
end
end
def consume( pattern )
match( pattern, true )
end
def match( pattern, cons=false )
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
while !rv and @source
begin
@buffer << readline
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@source = nil
end
end
rv.taint if RUBY_VERSION < '2.7'
rv
end
def empty?
super and ( @source.nil? || @source.eof? )
end
def position
@er_source.pos rescue 0
end
# @return the current line in the source
def current_line
begin
pos = @er_source.pos # The byte position in the source
lineno = @er_source.lineno # The XML < position in the source
@er_source.rewind
line = 0 # The \r\n position in the source
begin
while @er_source.pos < pos
@er_source.readline
line += 1
end
rescue
end
@er_source.seek(pos)
rescue IOError
pos = -1
line = -1
end
[pos, lineno, line]
end
private
def readline
str = @source.readline(@line_break)
if @pending_buffer
if str.nil?
str = @pending_buffer
else
str = @pending_buffer + str
end
@pending_buffer = nil
end
return nil if str.nil?
if @to_utf
decode(str)
else
str.force_encoding(::Encoding::UTF_8) if @force_utf8
str
end
end
def encoding_updated
case @encoding
when "UTF-16BE", "UTF-16LE"
@source.binmode
@source.set_encoding(@encoding, @encoding)
end
@line_break = encode(">")
@pending_buffer, @buffer = @buffer, ""
@pending_buffer.force_encoding(@encoding)
super
end
end
end

View File

@ -1,93 +0,0 @@
# frozen_string_literal: false
module REXML
# A template for stream parser listeners.
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
# processed; REXML doesn't yet handle doctype entity declarations, so you
# have to parse them out yourself.
module StreamListener
# Called when a tag is encountered.
# @p name the tag name
# @p attrs an array of arrays of attribute/value pairs, suitable for
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
# will result in
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
def tag_start name, attrs
end
# Called when the end tag is reached. In the case of <tag/>, tag_end
# will be called immediately after tag_start
# @p the name of the tag
def tag_end name
end
# Called when text is encountered in the document
# @p text the text content.
def text text
end
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
# @p name the instruction name; in the example, "xsl"
# @p instruction the rest of the instruction. In the example,
# "sheet='foo'"
def instruction name, instruction
end
# Called when a comment is encountered.
# @p comment The content of the comment
def comment comment
end
# Handles a doctype declaration. Any attributes of the doctype which are
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
# @p name the name of the doctype; EG, "me"
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
# @p long_name the supplied long name, or nil. EG, "foo"
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
# Called when the doctype is done
def doctype_end
end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
# attr CDATA #REQUIRED". This is the same for all of the .*decl
# methods.
def attlistdecl element_name, attributes, raw_content
end
# <!ELEMENT ...>
def elementdecl content
end
# <!ENTITY ...>
# The argument passed to this method is an array of the entity
# declaration. It can be in a number of formats, but in general it
# returns (example, result):
# <!ENTITY % YN '"Yes"'>
# ["YN", "\"Yes\"", "%"]
# <!ENTITY % YN 'Yes'>
# ["YN", "Yes", "%"]
# <!ENTITY WhatHeSaid "He said %YN;">
# ["WhatHeSaid", "He said %YN;"]
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
# ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
def entitydecl content
end
# <!NOTATION ...>
def notationdecl content
end
# Called when %foo; is encountered in a doctype declaration.
# @p content "foo"
def entity content
end
# Called when <![CDATA[ ... ]]> is encountered in a document.
# @p content "..."
def cdata content
end
# Called when an XML PI is encountered in the document.
# EG: <?xml version="1.0" encoding="utf"?>
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
def xmldecl version, encoding, standalone
end
end
end

View File

@ -1,424 +0,0 @@
# frozen_string_literal: false
require_relative 'security'
require_relative 'entity'
require_relative 'doctype'
require_relative 'child'
require_relative 'doctype'
require_relative 'parseexception'
module REXML
# Represents text nodes in an XML document
class Text < Child
include Comparable
# The order in which the substitutions occur
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
# Characters which are substituted in written strings
SLAICEPS = [ '<', '>', '"', "'", '&' ]
SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
# If +raw+ is true, then REXML leaves the value alone
attr_accessor :raw
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
VALID_CHAR = [
0x9, 0xA, 0xD,
(0x20..0xD7FF),
(0xE000..0xFFFD),
(0x10000..0x10FFFF)
]
if String.method_defined? :encode
VALID_XML_CHARS = Regexp.new('^['+
VALID_CHAR.map { |item|
case item
when Integer
[item].pack('U').force_encoding('utf-8')
when Range
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
end
}.join +
']*$')
else
VALID_XML_CHARS = /^(
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
| \xEF[\x80-\xBE]{2} #
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$/nx;
end
# Constructor
# +arg+ if a String, the content is set to the String. If a Text,
# the object is shallowly cloned.
#
# +respect_whitespace+ (boolean, false) if true, whitespace is
# respected
#
# +parent+ (nil) if this is a Parent object, the parent
# will be set to this.
#
# +raw+ (nil) This argument can be given three values.
# If true, then the value of used to construct this object is expected to
# contain no unescaped XML markup, and REXML will not change the text. If
# this value is false, the string may contain any characters, and REXML will
# escape any and all defined entities whose values are contained in the
# text. If this value is nil (the default), then the raw value of the
# parent will be used as the raw value for this node. If there is no raw
# value for the parent, and no value is supplied, the default is false.
# Use this field if you have entities defined for some text, and you don't
# want REXML to escape that text in output.
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
# Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
# Text.new( "<&", false, nil, true ) #-> Parse exception
# Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
# # Assume that the entity "s" is defined to be "sean"
# # and that the entity "r" is defined to be "russell"
# Text.new( "sean russell" ) #-> "&s; &r;"
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
#
# +entity_filter+ (nil) This can be an array of entities to match in the
# supplied text. This argument is only useful if +raw+ is set to false.
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
# In the last example, the +entity_filter+ argument is ignored.
#
# +illegal+ INTERNAL USE ONLY
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
@raw = false
@parent = nil
@entity_filter = nil
if parent
super( parent )
@raw = parent.raw
end
if arg.kind_of? String
@string = arg.dup
elsif arg.kind_of? Text
@string = arg.instance_variable_get(:@string).dup
@raw = arg.raw
@entity_filter = arg.instance_variable_get(:@entity_filter)
else
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
end
@string.squeeze!(" \n\t") unless respect_whitespace
@string.gsub!(/\r\n?/, "\n")
@raw = raw unless raw.nil?
@entity_filter = entity_filter if entity_filter
clear_cache
Text.check(@string, illegal, doctype) if @raw
end
def parent= parent
super(parent)
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
end
# check for illegal characters
def Text.check string, pattern, doctype
# illegal anywhere
if string !~ VALID_XML_CHARS
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
when *VALID_CHAR
else
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
end
end
else
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
case c.unpack('U')
when *VALID_CHAR
else
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
end
end
end
end
# context sensitive
string.scan(pattern) do
if $1[-1] != ?;
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
elsif $1[0] == ?&
if $5 and $5[0] == ?#
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
when *VALID_CHAR
else
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
end
# FIXME: below can't work but this needs API change.
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
# if !doctype or !doctype.entities.has_key?($3)
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
# end
end
end
end
end
def node_type
:text
end
def empty?
@string.size==0
end
def clone
return Text.new(self, true)
end
# Appends text to this text node. The text is appended in the +raw+ mode
# of this text node.
#
# +returns+ the text itself to enable method chain like
# 'text << "XXX" << "YYY"'.
def <<( to_append )
@string << to_append.gsub( /\r\n?/, "\n" )
clear_cache
self
end
# +other+ a String or a Text
# +returns+ the result of (to_s <=> arg.to_s)
def <=>( other )
to_s() <=> other.to_s
end
def doctype
if @parent
doc = @parent.document
doc.doctype if doc
end
end
REFERENCE = /#{Entity::REFERENCE}/
# Returns the string value of this text node. This string is always
# escaped, meaning that it is a valid XML text node string, and all
# entities that can be escaped, have been inserted. This method respects
# the entity filter set in the constructor.
#
# # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.to_s #-> "&lt; &amp; &s; russell"
# t = Text.new( "< & &s; russell", false, nil, false )
# t.to_s #-> "&lt; &amp; &s; russell"
# u = Text.new( "sean russell", false, nil, true )
# u.to_s #-> "sean russell"
def to_s
return @string if @raw
@normalized ||= Text::normalize( @string, doctype, @entity_filter )
end
def inspect
@string.inspect
end
# Returns the string value of this text. This is the text without
# entities, as it might be used programmatically, or printed to the
# console. This ignores the 'raw' attribute setting, and any
# entity_filter.
#
# # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.value #-> "< & sean russell"
# t = Text.new( "< & &s; russell", false, nil, false )
# t.value #-> "< & sean russell"
# u = Text.new( "sean russell", false, nil, true )
# u.value #-> "sean russell"
def value
@unnormalized ||= Text::unnormalize( @string, doctype )
end
# Sets the contents of this text node. This expects the text to be
# unnormalized. It returns self.
#
# e = Element.new( "a" )
# e.add_text( "foo" ) # <a>foo</a>
# e[0].value = "bar" # <a>bar</a>
# e[0].value = "<a>" # <a>&lt;a&gt;</a>
def value=( val )
@string = val.gsub( /\r\n?/, "\n" )
clear_cache
@raw = false
end
def wrap(string, width, addnewline=false)
# Recursively wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
if addnewline then
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
else
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
new_string = ''
string.each_line { |line|
indent_string = style * level
new_line = (indent_string + line).sub(/[\s]+$/,'')
new_string << new_line
}
new_string.strip! unless indentfirstline
return new_string
end
# == DEPRECATED
# See REXML::Formatters
#
def write( writer, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
formatter = if indent > -1
REXML::Formatters::Pretty.new( indent )
else
REXML::Formatters::Default.new
end
formatter.write( self, writer )
end
# FIXME
# This probably won't work properly
def xpath
path = @parent.xpath
path += "/text()"
return path
end
# Writes out text, substituting special characters beforehand.
# +out+ A String, IO, or any other object supporting <<( String )
# +input+ the text to substitute and the write out
#
# z=utf8.unpack("U*")
# ascOut=""
# z.each{|r|
# if r < 0x100
# ascOut.concat(r.chr)
# else
# ascOut.concat(sprintf("&#x%x;", r))
# end
# }
# puts ascOut
def write_with_substitution out, input
copy = input.clone
# Doing it like this rather than in a loop improves the speed
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
out << copy
end
private
def clear_cache
@normalized = nil
@unnormalized = nil
end
# Reads text, substituting entities
def Text::read_with_substitution( input, illegal=nil )
copy = input.clone
if copy =~ illegal
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
end if illegal
copy.gsub!( /\r\n?/, "\n" )
if copy.include? ?&
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
m=$1
#m='0' if m==''
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
end
copy
end
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
copy = copy.gsub( entity.value,
"&#{entity.name};" ) if entity.value and
not( entity_filter and entity_filter.include?(entity.name) )
end
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
end
end
copy
end
# Unescapes all possible entities
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
sum = 0
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
s = Text.expand($&, doctype, filter)
if sum + s.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += s.bytesize
end
s
}
end
def Text.expand(ref, doctype, filter)
if ref[1] == ?#
if ref[2] == ?x
[ref[3...-1].to_i(16)].pack('U*')
else
[ref[2...-1].to_i].pack('U*')
end
elsif ref == '&amp;'
'&'
elsif filter and filter.include?( ref[1...-1] )
ref
elsif doctype
doctype.entity( ref[1...-1] ) or ref
else
entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
entity_value ? entity_value.value : ref
end
end
end
end

View File

@ -1,9 +0,0 @@
# frozen_string_literal: false
require_relative 'parseexception'
module REXML
class UndefinedNamespaceException < ParseException
def initialize( prefix, source, parser )
super( "Undefined prefix #{prefix} found" )
end
end
end

View File

@ -1,539 +0,0 @@
# frozen_string_literal: false
require_relative "validation"
require_relative "../parsers/baseparser"
module REXML
module Validation
# Implemented:
# * empty
# * element
# * attribute
# * text
# * optional
# * choice
# * oneOrMore
# * zeroOrMore
# * group
# * value
# * interleave
# * mixed
# * ref
# * grammar
# * start
# * define
#
# Not implemented:
# * data
# * param
# * include
# * externalRef
# * notAllowed
# * anyName
# * nsName
# * except
# * name
class RelaxNG
include Validator
INFINITY = 1.0 / 0.0
EMPTY = Event.new( nil )
TEXT = [:start_element, "text"]
attr_accessor :current
attr_accessor :count
attr_reader :references
# FIXME: Namespaces
def initialize source
parser = REXML::Parsers::BaseParser.new( source )
@count = 0
@references = {}
@root = @current = Sequence.new(self)
@root.previous = true
states = [ @current ]
begin
event = parser.pull
case event[0]
when :start_element
case event[1]
when "empty"
when "element", "attribute", "text", "value"
states[-1] << event
when "optional"
states << Optional.new( self )
states[-2] << states[-1]
when "choice"
states << Choice.new( self )
states[-2] << states[-1]
when "oneOrMore"
states << OneOrMore.new( self )
states[-2] << states[-1]
when "zeroOrMore"
states << ZeroOrMore.new( self )
states[-2] << states[-1]
when "group"
states << Sequence.new( self )
states[-2] << states[-1]
when "interleave"
states << Interleave.new( self )
states[-2] << states[-1]
when "mixed"
states << Interleave.new( self )
states[-2] << states[-1]
states[-1] << TEXT
when "define"
states << [ event[2]["name"] ]
when "ref"
states[-1] << Ref.new( event[2]["name"] )
when "anyName"
states << AnyName.new( self )
states[-2] << states[-1]
when "nsName"
when "except"
when "name"
when "data"
when "param"
when "include"
when "grammar"
when "start"
when "externalRef"
when "notAllowed"
end
when :end_element
case event[1]
when "element", "attribute"
states[-1] << event
when "zeroOrMore", "oneOrMore", "choice", "optional",
"interleave", "group", "mixed"
states.pop
when "define"
ref = states.pop
@references[ ref.shift ] = ref
#when "empty"
end
when :end_document
states[-1] << event
when :text
states[-1] << event
end
end while event[0] != :end_document
end
def receive event
validate( event )
end
end
class State
def initialize( context )
@previous = []
@events = []
@current = 0
@count = context.count += 1
@references = context.references
@value = false
end
def reset
return if @current == 0
@current = 0
@events.each {|s| s.reset if s.kind_of? State }
end
def previous=( previous )
@previous << previous
end
def next( event )
#print "In next with #{event.inspect}. "
#p @previous
return @previous.pop.next( event ) if @events[@current].nil?
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].kind_of? State )
@current += 1
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
return nil
end
end
def to_s
# Abbreviated:
self.class.name =~ /(?:::)(\w)\w+$/
# Full:
#self.class.name =~ /(?:::)(\w+)$/
"#$1.#@count"
end
def inspect
"< #{to_s} #{@events.collect{|e|
pre = e == @events[@current] ? '#' : ''
pre + e.inspect unless self == e
}.join(', ')} >"
end
def expected
return [@events[@current]]
end
def <<( event )
add_event_to_arry( @events, event )
end
protected
def expand_ref_in( arry, ind )
new_events = []
@references[ arry[ind].to_s ].each{ |evt|
add_event_to_arry(new_events,evt)
}
arry[ind,1] = new_events
end
def add_event_to_arry( arry, evt )
evt = generate_event( evt )
if evt.kind_of? String
arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
@value = false
else
arry << evt
end
end
def generate_event( event )
return event if event.kind_of? State or event.class == Ref
evt = nil
arg = nil
case event[0]
when :start_element
case event[1]
when "element"
evt = :start_element
arg = event[2]["name"]
when "attribute"
evt = :start_attribute
arg = event[2]["name"]
when "text"
evt = :text
when "value"
evt = :text
@value = true
end
when :text
return event[1]
when :end_document
return Event.new( event[0] )
else # then :end_element
case event[1]
when "element"
evt = :end_element
when "attribute"
evt = :end_attribute
end
end
return Event.new( evt, arg )
end
end
class Sequence < State
def matches?(event)
@events[@current].matches?( event )
end
end
class Optional < State
def next( event )
if @current == 0
rv = super
return rv if rv
@prior = @previous.pop
return @prior.next( event )
end
super
end
def matches?(event)
@events[@current].matches?(event) ||
(@current == 0 and @previous[-1].matches?(event))
end
def expected
return [ @prior.expected, @events[0] ].flatten if @current == 0
return [@events[@current]]
end
end
class ZeroOrMore < Optional
def next( event )
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
@current = 0
return self
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
@prior = @previous.pop
return @prior.next( event ) if @current == 0
return nil
end
end
def expected
return [ @prior.expected, @events[0] ].flatten if @current == 0
return [@events[@current]]
end
end
class OneOrMore < State
def initialize context
super
@ord = 0
end
def reset
super
@ord = 0
end
def next( event )
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].matches?(event) )
@current += 1
@ord += 1
if @events[@current].nil?
@current = 0
return self
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
return @previous.pop.next( event ) if @current == 0 and @ord > 0
return nil
end
end
def matches?( event )
@events[@current].matches?(event) ||
(@current == 0 and @ord > 0 and @previous[-1].matches?(event))
end
def expected
if @current == 0 and @ord > 0
return [@previous[-1].expected, @events[0]].flatten
else
return [@events[@current]]
end
end
end
class Choice < State
def initialize context
super
@choices = []
end
def reset
super
@events = []
@choices.each { |c| c.each { |s| s.reset if s.kind_of? State } }
end
def <<( event )
add_event_to_arry( @choices, event )
end
def next( event )
# Make the choice if we haven't
if @events.size == 0
c = 0 ; max = @choices.size
while c < max
if @choices[c][0].class == Ref
expand_ref_in( @choices[c], 0 )
@choices += @choices[c]
@choices.delete( @choices[c] )
max -= 1
else
c += 1
end
end
@events = @choices.find { |evt| evt[0].matches? event }
# Remove the references
# Find the events
end
unless @events
@events = []
return nil
end
super
end
def matches?( event )
return @events[@current].matches?( event ) if @events.size > 0
!@choices.find{|evt| evt[0].matches?(event)}.nil?
end
def expected
return [@events[@current]] if @events.size > 0
return @choices.collect do |x|
if x[0].kind_of? State
x[0].expected
else
x[0]
end
end.flatten
end
def inspect
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' or ')} >"
end
protected
def add_event_to_arry( arry, evt )
if evt.kind_of? State or evt.class == Ref
arry << [evt]
elsif evt[0] == :text
if arry[-1] and
arry[-1][-1].kind_of?( Event ) and
arry[-1][-1].event_type == :text and @value
arry[-1][-1].event_arg = evt[1]
@value = false
end
else
arry << [] if evt[0] == :start_element
arry[-1] << generate_event( evt )
end
end
end
class Interleave < Choice
def initialize context
super
@choice = 0
end
def reset
@choice = 0
end
def next_current( event )
# Expand references
c = 0 ; max = @choices.size
while c < max
if @choices[c][0].class == Ref
expand_ref_in( @choices[c], 0 )
@choices += @choices[c]
@choices.delete( @choices[c] )
max -= 1
else
c += 1
end
end
@events = @choices[@choice..-1].find { |evt| evt[0].matches? event }
@current = 0
if @events
# reorder the choices
old = @choices[@choice]
idx = @choices.index( @events )
@choices[@choice] = @events
@choices[idx] = old
@choice += 1
end
@events = [] unless @events
end
def next( event )
# Find the next series
next_current(event) unless @events[@current]
return nil unless @events[@current]
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].kind_of? State )
@current += 1
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
return @previous.pop.next( event ) if @events[@current].nil?
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
return self unless @choices[@choice].nil?
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
return nil
end
end
def matches?( event )
return @events[@current].matches?( event ) if @events[@current]
!@choices[@choice..-1].find{|evt| evt[0].matches?(event)}.nil?
end
def expected
return [@events[@current]] if @events[@current]
return @choices[@choice..-1].collect do |x|
if x[0].kind_of? State
x[0].expected
else
x[0]
end
end.flatten
end
def inspect
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' and ')} >"
end
end
class Ref
def initialize value
@value = value
end
def to_s
@value
end
def inspect
"{#{to_s}}"
end
end
end
end

View File

@ -1,144 +0,0 @@
# frozen_string_literal: false
require_relative 'validationexception'
module REXML
module Validation
module Validator
NILEVENT = [ nil ]
def reset
@current = @root
@root.reset
@root.previous = true
@attr_stack = []
self
end
def dump
puts @root.inspect
end
def validate( event )
@attr_stack = [] unless defined? @attr_stack
match = @current.next(event)
raise ValidationException.new( "Validation error. Expected: "+
@current.expected.join( " or " )+" from #{@current.inspect} "+
" but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
@current = match
# Check for attributes
case event[0]
when :start_element
@attr_stack << event[2]
begin
sattr = [:start_attribute, nil]
eattr = [:end_attribute]
text = [:text, nil]
k, = event[2].find { |key,value|
sattr[1] = key
m = @current.next( sattr )
if m
# If the state has text children...
if m.matches?( eattr )
@current = m
else
text[1] = value
m = m.next( text )
text[1] = nil
return false unless m
@current = m if m
end
m = @current.next( eattr )
if m
@current = m
true
else
false
end
else
false
end
}
event[2].delete(k) if k
end while k
when :end_element
attrs = @attr_stack.pop
raise ValidationException.new( "Validation error. Illegal "+
" attributes: #{attrs.inspect}") if attrs.length > 0
end
end
end
class Event
def initialize(event_type, event_arg=nil )
@event_type = event_type
@event_arg = event_arg
end
attr_reader :event_type
attr_accessor :event_arg
def done?
@done
end
def single?
return (@event_type != :start_element and @event_type != :start_attribute)
end
def matches?( event )
return false unless event[0] == @event_type
case event[0]
when nil
return true
when :start_element
return true if event[1] == @event_arg
when :end_element
return true
when :start_attribute
return true if event[1] == @event_arg
when :end_attribute
return true
when :end_document
return true
when :text
return (@event_arg.nil? or @event_arg == event[1])
=begin
when :processing_instruction
false
when :xmldecl
false
when :start_doctype
false
when :end_doctype
false
when :externalentity
false
when :elementdecl
false
when :entity
false
when :attlistdecl
false
when :notationdecl
false
when :end_doctype
false
=end
else
false
end
end
def ==( other )
return false unless other.kind_of? Event
@event_type == other.event_type and @event_arg == other.event_arg
end
def to_s
inspect
end
def inspect
"#{@event_type.inspect}( #@event_arg )"
end
end
end
end

View File

@ -1,10 +0,0 @@
# frozen_string_literal: false
module REXML
module Validation
class ValidationException < RuntimeError
def initialize msg
super
end
end
end
end

View File

@ -1,130 +0,0 @@
# frozen_string_literal: false
require_relative 'encoding'
require_relative 'source'
module REXML
# NEEDS DOCUMENTATION
class XMLDecl < Child
include Encoding
DEFAULT_VERSION = "1.0"
DEFAULT_ENCODING = "UTF-8"
DEFAULT_STANDALONE = "no"
START = "<?xml"
STOP = "?>"
attr_accessor :version, :standalone
attr_reader :writeencoding, :writethis
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@writeencoding = !encoding.nil?
if version.kind_of? XMLDecl
super()
@version = version.version
self.encoding = version.encoding
@writeencoding = version.writeencoding
@standalone = version.standalone
@writethis = version.writethis
else
super()
@version = version
self.encoding = encoding
@standalone = standalone
end
@version = DEFAULT_VERSION if @version.nil?
end
def clone
XMLDecl.new(self)
end
# indent::
# Ignored. There must be no whitespace before an XML declaration
# transitive::
# Ignored
# ie_hack::
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output
writer << START
writer << " #{content encoding}"
writer << STOP
end
def ==( other )
other.kind_of?(XMLDecl) and
other.version == @version and
other.encoding == self.encoding and
other.standalone == @standalone
end
def xmldecl version, encoding, standalone
@version = version
self.encoding = encoding
@standalone = standalone
end
def node_type
:xmldecl
end
alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
if enc.nil?
self.old_enc = "UTF-8"
@writeencoding = false
else
self.old_enc = enc
@writeencoding = true
end
self.dowrite
end
# Only use this if you do not want the XML declaration to be written;
# this object is ignored by the XML writer. Otherwise, instantiate your
# own XMLDecl and add it to the document.
#
# Note that XML 1.1 documents *must* include an XML declaration
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
rv
end
def nowrite
@writethis = false
end
def dowrite
@writethis = true
end
def inspect
"#{START} ... #{STOP}"
end
private
def content(enc)
context = nil
context = parent.context if parent
if context and context[:prologue_quote] == :quote
quote = "\""
else
quote = "'"
end
rv = "version=#{quote}#{@version}#{quote}"
if @writeencoding or enc !~ /\Autf-8\z/i
rv << " encoding=#{quote}#{enc}#{quote}"
end
if @standalone
rv << " standalone=#{quote}#{@standalone}#{quote}"
end
rv
end
end
end

View File

@ -1,85 +0,0 @@
# frozen_string_literal: false
module REXML
# Defines a number of tokens used for parsing XML. Not for general
# consumption.
module XMLTokens
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
#
# [4] NameStartChar ::=
# ":" |
# [A-Z] |
# "_" |
# [a-z] |
# [#xC0-#xD6] |
# [#xD8-#xF6] |
# [#xF8-#x2FF] |
# [#x370-#x37D] |
# [#x37F-#x1FFF] |
# [#x200C-#x200D] |
# [#x2070-#x218F] |
# [#x2C00-#x2FEF] |
# [#x3001-#xD7FF] |
# [#xF900-#xFDCF] |
# [#xFDF0-#xFFFD] |
# [#x10000-#xEFFFF]
name_start_chars = [
":",
"A-Z",
"_",
"a-z",
"\\u00C0-\\u00D6",
"\\u00D8-\\u00F6",
"\\u00F8-\\u02FF",
"\\u0370-\\u037D",
"\\u037F-\\u1FFF",
"\\u200C-\\u200D",
"\\u2070-\\u218F",
"\\u2C00-\\u2FEF",
"\\u3001-\\uD7FF",
"\\uF900-\\uFDCF",
"\\uFDF0-\\uFFFD",
"\\u{10000}-\\u{EFFFF}",
]
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
#
# [4a] NameChar ::=
# NameStartChar |
# "-" |
# "." |
# [0-9] |
# #xB7 |
# [#x0300-#x036F] |
# [#x203F-#x2040]
name_chars = name_start_chars + [
"\\-",
"\\.",
"0-9",
"\\u00B7",
"\\u0300-\\u036F",
"\\u203F-\\u2040",
]
NAME_START_CHAR = "[#{name_start_chars.join('')}]"
NAME_CHAR = "[#{name_chars.join('')}]"
NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
# From http://www.w3.org/TR/xml-names11/#NT-NCName
#
# [6] NCNameStartChar ::= NameStartChar - ':'
ncname_start_chars = name_start_chars - [":"]
# From http://www.w3.org/TR/xml-names11/#NT-NCName
#
# [5] NCNameChar ::= NameChar - ':'
ncname_chars = name_chars - [":"]
NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
NMTOKEN = "(?:#{NAME_CHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
#ENTITYREF = "&#{NAME};"
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
end
end

View File

@ -1,81 +0,0 @@
# frozen_string_literal: false
require_relative 'functions'
require_relative 'xpath_parser'
module REXML
# Wrapper class. Use this class to access the XPath functions.
class XPath
include Functions
# A base Hash object, supposing to be used when initializing a
# default empty namespaces set, but is currently unused.
# TODO: either set the namespaces=EMPTY_HASH, or deprecate this.
EMPTY_HASH = {}
# Finds and returns the first node that matches the supplied xpath.
# element::
# The context element
# path::
# The xpath to search for. If not supplied or nil, returns the first
# node matching '*'.
# namespaces::
# If supplied, a Hash which defines a namespace mapping.
# variables::
# If supplied, a Hash which maps $variables in the query
# to values. This can be used to avoid XPath injection attacks
# or to automatically handle escaping string values.
#
# XPath.first( node )
# XPath.first( doc, "//b"} )
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
# XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path, element).flatten[0]
end
# Iterates over nodes that match the given path, calling the supplied
# block with the match.
# element::
# The context element
# path::
# The xpath to search for. If not supplied or nil, defaults to '*'
# namespaces::
# If supplied, a Hash which defines a namespace mapping
# variables::
# If supplied, a Hash which maps $variables in the query
# to values. This can be used to avoid XPath injection attacks
# or to automatically handle escaping string values.
#
# XPath.each( node ) { |el| ... }
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
# XPath.each( node, 'ancestor::x' ) { |el| ... }
# XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
# {|el| ... }
def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block)
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path, element).each( &block )
end
# Returns an array of nodes matching a given XPath.
def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path,element)
end
end
end

View File

@ -1,968 +0,0 @@
# frozen_string_literal: false
require "pp"
require_relative 'namespace'
require_relative 'xmltokens'
require_relative 'attribute'
require_relative 'parsers/xpathparser'
class Object
# provides a unified +clone+ operation, for REXML::XPathParser
# to use across multiple Object types
def dclone
clone
end
end
class Symbol
# provides a unified +clone+ operation, for REXML::XPathParser
# to use across multiple Object types
def dclone ; self ; end
end
class Integer
# provides a unified +clone+ operation, for REXML::XPathParser
# to use across multiple Object types
def dclone ; self ; end
end
class Float
# provides a unified +clone+ operation, for REXML::XPathParser
# to use across multiple Object types
def dclone ; self ; end
end
class Array
# provides a unified +clone+ operation, for REXML::XPathParser
# to use across multiple Object+ types
def dclone
klone = self.clone
klone.clear
self.each{|v| klone << v.dclone}
klone
end
end
module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true")
def initialize(strict: false)
@debug = DEBUG
@parser = REXML::Parsers::XPathParser.new
@namespaces = nil
@variables = {}
@nest = 0
@strict = strict
end
def namespaces=( namespaces={} )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def variables=( vars={} )
Functions::variables = vars
@variables = vars
end
def parse path, nodeset
path_stack = @parser.parse( path )
match( path_stack, nodeset )
end
def get_first path, nodeset
path_stack = @parser.parse( path )
first( path_stack, nodeset )
end
def predicate path, nodeset
path_stack = @parser.parse( path )
match( path_stack, nodeset )
end
def []=( variable_name, value )
@variables[ variable_name ] = value
end
# Performs a depth-first (document order) XPath search, and returns the
# first match. This is the fastest, lightest way to return a single result.
#
# FIXME: This method is incomplete!
def first( path_stack, node )
return nil if path.size == 0
case path[0]
when :document
# do nothing
return first( path[1..-1], node )
when :child
for c in node.children
r = first( path[1..-1], c )
return r if r
end
when :qname
name = path[2]
if node.name == name
return node if path.size == 3
return first( path[3..-1], node )
else
return nil
end
when :descendant_or_self
r = first( path[1..-1], node )
return r if r
for c in node.children
r = first( path, c )
return r if r
end
when :node
return first( path[1..-1], node )
when :any
return first( path[1..-1], node )
end
return nil
end
def match(path_stack, nodeset)
nodeset = nodeset.collect.with_index do |node, i|
position = i + 1
XPathNode.new(node, position: position)
end
result = expr(path_stack, nodeset)
case result
when Array # nodeset
unnode(result)
else
[result]
end
end
private
def strict?
@strict
end
# Returns a String namespace for a node, given a prefix
# The rules are:
#
# 1. Use the supplied namespace mapping first.
# 2. If no mapping was supplied, use the context node to look up the namespace
def get_namespace( node, prefix )
if @namespaces
return @namespaces[prefix] || ''
else
return node.namespace( prefix ) if node.node_type == :element
return ''
end
end
# Expr takes a stack of path elements and a set of nodes (either a Parent
# or an Array and returns an Array of matching nodes
def expr( path_stack, nodeset, context=nil )
enter(:expr, path_stack, nodeset) if @debug
return nodeset if path_stack.length == 0 || nodeset.length == 0
while path_stack.length > 0
trace(:while, path_stack, nodeset) if @debug
if nodeset.length == 0
path_stack.clear
return []
end
op = path_stack.shift
case op
when :document
first_raw_node = nodeset.first.raw_node
nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
when :self
nodeset = step(path_stack) do
[nodeset]
end
when :child
nodeset = step(path_stack) do
child(nodeset)
end
when :literal
trace(:literal, path_stack, nodeset) if @debug
return path_stack.shift
when :attribute
nodeset = step(path_stack, any_type: :attribute) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
next unless raw_node.node_type == :element
attributes = raw_node.attributes
next if attributes.empty?
nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
XPathNode.new(attribute, position: i + 1)
end
end
nodesets
end
when :namespace
pre_defined_namespaces = {
"xml" => "http://www.w3.org/XML/1998/namespace",
}
nodeset = step(path_stack, any_type: :namespace) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
case raw_node.node_type
when :element
if @namespaces
nodesets << pre_defined_namespaces.merge(@namespaces)
else
nodesets << pre_defined_namespaces.merge(raw_node.namespaces)
end
when :attribute
if @namespaces
nodesets << pre_defined_namespaces.merge(@namespaces)
else
nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces)
end
end
end
nodesets
end
when :parent
nodeset = step(path_stack) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
if raw_node.node_type == :attribute
parent = raw_node.element
else
parent = raw_node.parent
end
nodesets << [XPathNode.new(parent, position: 1)] if parent
end
nodesets
end
when :ancestor
nodeset = step(path_stack) do
nodesets = []
# new_nodes = {}
nodeset.each do |node|
raw_node = node.raw_node
new_nodeset = []
while raw_node.parent
raw_node = raw_node.parent
# next if new_nodes.key?(node)
new_nodeset << XPathNode.new(raw_node,
position: new_nodeset.size + 1)
# new_nodes[node] = true
end
nodesets << new_nodeset unless new_nodeset.empty?
end
nodesets
end
when :ancestor_or_self
nodeset = step(path_stack) do
nodesets = []
# new_nodes = {}
nodeset.each do |node|
raw_node = node.raw_node
next unless raw_node.node_type == :element
new_nodeset = [XPathNode.new(raw_node, position: 1)]
# new_nodes[node] = true
while raw_node.parent
raw_node = raw_node.parent
# next if new_nodes.key?(node)
new_nodeset << XPathNode.new(raw_node,
position: new_nodeset.size + 1)
# new_nodes[node] = true
end
nodesets << new_nodeset unless new_nodeset.empty?
end
nodesets
end
when :descendant_or_self
nodeset = step(path_stack) do
descendant(nodeset, true)
end
when :descendant
nodeset = step(path_stack) do
descendant(nodeset, false)
end
when :following_sibling
nodeset = step(path_stack) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
next unless raw_node.respond_to?(:parent)
next if raw_node.parent.nil?
all_siblings = raw_node.parent.children
current_index = all_siblings.index(raw_node)
following_siblings = all_siblings[(current_index + 1)..-1]
next if following_siblings.empty?
nodesets << following_siblings.collect.with_index do |sibling, i|
XPathNode.new(sibling, position: i + 1)
end
end
nodesets
end
when :preceding_sibling
nodeset = step(path_stack, order: :reverse) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
next unless raw_node.respond_to?(:parent)
next if raw_node.parent.nil?
all_siblings = raw_node.parent.children
current_index = all_siblings.index(raw_node)
preceding_siblings = all_siblings[0, current_index].reverse
next if preceding_siblings.empty?
nodesets << preceding_siblings.collect.with_index do |sibling, i|
XPathNode.new(sibling, position: i + 1)
end
end
nodesets
end
when :preceding
nodeset = step(path_stack, order: :reverse) do
unnode(nodeset) do |node|
preceding(node)
end
end
when :following
nodeset = step(path_stack) do
unnode(nodeset) do |node|
following(node)
end
end
when :variable
var_name = path_stack.shift
return [@variables[var_name]]
when :eq, :neq, :lt, :lteq, :gt, :gteq
left = expr( path_stack.shift, nodeset.dup, context )
right = expr( path_stack.shift, nodeset.dup, context )
res = equality_relational_compare( left, op, right )
trace(op, left, right, res) if @debug
return res
when :or
left = expr(path_stack.shift, nodeset.dup, context)
return true if Functions.boolean(left)
right = expr(path_stack.shift, nodeset.dup, context)
return Functions.boolean(right)
when :and
left = expr(path_stack.shift, nodeset.dup, context)
return false unless Functions.boolean(left)
right = expr(path_stack.shift, nodeset.dup, context)
return Functions.boolean(right)
when :div, :mod, :mult, :plus, :minus
left = expr(path_stack.shift, nodeset, context)
right = expr(path_stack.shift, nodeset, context)
left = unnode(left) if left.is_a?(Array)
right = unnode(right) if right.is_a?(Array)
left = Functions::number(left)
right = Functions::number(right)
case op
when :div
return left / right
when :mod
return left % right
when :mult
return left * right
when :plus
return left + right
when :minus
return left - right
else
raise "[BUG] Unexpected operator: <#{op.inspect}>"
end
when :union
left = expr( path_stack.shift, nodeset, context )
right = expr( path_stack.shift, nodeset, context )
left = unnode(left) if left.is_a?(Array)
right = unnode(right) if right.is_a?(Array)
return (left | right)
when :neg
res = expr( path_stack, nodeset, context )
res = unnode(res) if res.is_a?(Array)
return -Functions.number(res)
when :not
when :function
func_name = path_stack.shift.tr('-','_')
arguments = path_stack.shift
if nodeset.size != 1
message = "[BUG] Node set size must be 1 for function call: "
message += "<#{func_name}>: <#{nodeset.inspect}>: "
message += "<#{arguments.inspect}>"
raise message
end
node = nodeset.first
if context
target_context = context
else
target_context = {:size => nodeset.size}
if node.is_a?(XPathNode)
target_context[:node] = node.raw_node
target_context[:index] = node.position
else
target_context[:node] = node
target_context[:index] = 1
end
end
args = arguments.dclone.collect do |arg|
result = expr(arg, nodeset, target_context)
result = unnode(result) if result.is_a?(Array)
result
end
Functions.context = target_context
return Functions.send(func_name, *args)
else
raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
end
end # while
return nodeset
ensure
leave(:expr, path_stack, nodeset) if @debug
end
def step(path_stack, any_type: :element, order: :forward)
nodesets = yield
begin
enter(:step, path_stack, nodesets) if @debug
nodesets = node_test(path_stack, nodesets, any_type: any_type)
while path_stack[0] == :predicate
path_stack.shift # :predicate
predicate_expression = path_stack.shift.dclone
nodesets = evaluate_predicate(predicate_expression, nodesets)
end
if nodesets.size == 1
ordered_nodeset = nodesets[0]
else
raw_nodes = []
nodesets.each do |nodeset|
nodeset.each do |node|
if node.respond_to?(:raw_node)
raw_nodes << node.raw_node
else
raw_nodes << node
end
end
end
ordered_nodeset = sort(raw_nodes, order)
end
new_nodeset = []
ordered_nodeset.each do |node|
# TODO: Remove duplicated
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
new_nodeset
ensure
leave(:step, path_stack, new_nodeset) if @debug
end
end
def node_test(path_stack, nodesets, any_type: :element)
enter(:node_test, path_stack, nodesets) if @debug
operator = path_stack.shift
case operator
when :qname
prefix = path_stack.shift
name = path_stack.shift
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
case raw_node.node_type
when :element
if prefix.nil?
raw_node.name == name
elsif prefix.empty?
if strict?
raw_node.name == name and raw_node.namespace == ""
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node, prefix)
raw_node.name == name and raw_node.namespace == ns
end
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node, prefix)
raw_node.name == name and raw_node.namespace == ns
end
when :attribute
if prefix.nil?
raw_node.name == name
elsif prefix.empty?
raw_node.name == name and raw_node.namespace == ""
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node.element, prefix)
raw_node.name == name and raw_node.namespace == ns
end
else
false
end
end
end
when :namespace
prefix = path_stack.shift
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
case raw_node.node_type
when :element
namespaces = @namespaces || raw_node.namespaces
raw_node.namespace == namespaces[prefix]
when :attribute
namespaces = @namespaces || raw_node.element.namespaces
raw_node.namespace == namespaces[prefix]
else
false
end
end
end
when :any
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
raw_node.node_type == any_type
end
end
when :comment
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
raw_node.node_type == :comment
end
end
when :text
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
raw_node.node_type == :text
end
end
when :processing_instruction
target = path_stack.shift
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
raw_node = node.raw_node
(raw_node.node_type == :processing_instruction) and
(target.empty? or (raw_node.target == target))
end
end
when :node
new_nodesets = nodesets.collect do |nodeset|
filter_nodeset(nodeset) do |node|
true
end
end
else
message = "[BUG] Unexpected node test: " +
"<#{operator.inspect}>: <#{path_stack.inspect}>"
raise message
end
new_nodesets
ensure
leave(:node_test, path_stack, new_nodesets) if @debug
end
def filter_nodeset(nodeset)
new_nodeset = []
nodeset.each do |node|
next unless yield(node)
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
new_nodeset
end
def evaluate_predicate(expression, nodesets)
enter(:predicate, expression, nodesets) if @debug
new_nodesets = nodesets.collect do |nodeset|
new_nodeset = []
subcontext = { :size => nodeset.size }
nodeset.each_with_index do |node, index|
if node.is_a?(XPathNode)
subcontext[:node] = node.raw_node
subcontext[:index] = node.position
else
subcontext[:node] = node
subcontext[:index] = index + 1
end
result = expr(expression.dclone, [node], subcontext)
trace(:predicate_evaluate, expression, node, subcontext, result) if @debug
result = result[0] if result.kind_of? Array and result.length == 1
if result.kind_of? Numeric
if result == node.position
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
if result.size > 0
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
end
else
if result
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
end
end
new_nodeset
end
new_nodesets
ensure
leave(:predicate, new_nodesets) if @debug
end
def trace(*args)
indent = " " * @nest
PP.pp(args, "").each_line do |line|
puts("#{indent}#{line}")
end
end
def enter(tag, *args)
trace(:enter, tag, *args)
@nest += 1
end
def leave(tag, *args)
@nest -= 1
trace(:leave, tag, *args)
end
# Reorders an array of nodes so that they are in document order
# It tries to do this efficiently.
#
# FIXME: I need to get rid of this, but the issue is that most of the XPath
# interpreter functions as a filter, which means that we lose context going
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
def sort(array_of_nodes, order)
new_arry = []
array_of_nodes.each { |node|
node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
node_idx << np.parent.index( np )
np = np.parent
end
new_arry << [ node_idx.reverse, node ]
}
ordered = new_arry.sort_by do |index, node|
if order == :forward
index
else
-index
end
end
ordered.collect do |_index, node|
node
end
end
def descendant(nodeset, include_self)
nodesets = []
nodeset.each do |node|
new_nodeset = []
new_nodes = {}
descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
nodesets << new_nodeset unless new_nodeset.empty?
end
nodesets
end
def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
if include_self
return if new_nodes.key?(raw_node)
new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
new_nodes[raw_node] = true
end
node_type = raw_node.node_type
if node_type == :element or node_type == :document
raw_node.children.each do |child|
descendant_recursive(child, new_nodeset, new_nodes, true)
end
end
end
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
# preceding:: includes every element in the document that precedes this node,
# except for ancestors
def preceding(node)
ancestors = []
parent = node.parent
while parent
ancestors << parent
parent = parent.parent
end
precedings = []
preceding_node = preceding_node_of(node)
while preceding_node
if ancestors.include?(preceding_node)
ancestors.delete(preceding_node)
else
precedings << XPathNode.new(preceding_node,
position: precedings.size + 1)
end
preceding_node = preceding_node_of(preceding_node)
end
precedings
end
def preceding_node_of( node )
psn = node.previous_sibling_node
if psn.nil?
if node.parent.nil? or node.parent.class == Document
return nil
end
return node.parent
#psn = preceding_node_of( node.parent )
end
while psn and psn.kind_of? Element and psn.children.size > 0
psn = psn.children[-1]
end
psn
end
def following(node)
followings = []
following_node = next_sibling_node(node)
while following_node
followings << XPathNode.new(following_node,
position: followings.size + 1)
following_node = following_node_of(following_node)
end
followings
end
def following_node_of( node )
if node.kind_of? Element and node.children.size > 0
return node.children[0]
end
return next_sibling_node(node)
end
def next_sibling_node(node)
psn = node.next_sibling_node
while psn.nil?
if node.parent.nil? or node.parent.class == Document
return nil
end
node = node.parent
psn = node.next_sibling_node
end
return psn
end
def child(nodeset)
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
node_type = raw_node.node_type
# trace(:child, node_type, node)
case node_type
when :element
nodesets << raw_node.children.collect.with_index do |child_node, i|
XPathNode.new(child_node, position: i + 1)
end
when :document
new_nodeset = []
raw_node.children.each do |child|
case child
when XMLDecl, Text
# Ignore
else
new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
end
end
nodesets << new_nodeset unless new_nodeset.empty?
end
end
nodesets
end
def norm b
case b
when true, false
return b
when 'true', 'false'
return Functions::boolean( b )
when /^\d+(\.\d+)?$/, Numeric
return Functions::number( b )
else
return Functions::string( b )
end
end
def equality_relational_compare(set1, op, set2)
set1 = unnode(set1) if set1.is_a?(Array)
set2 = unnode(set2) if set2.is_a?(Array)
if set1.kind_of? Array and set2.kind_of? Array
# If both objects to be compared are node-sets, then the
# comparison will be true if and only if there is a node in the
# first node-set and a node in the second node-set such that the
# result of performing the comparison on the string-values of
# the two nodes is true.
set1.product(set2).any? do |node1, node2|
node_string1 = Functions.string(node1)
node_string2 = Functions.string(node2)
compare(node_string1, op, node_string2)
end
elsif set1.kind_of? Array or set2.kind_of? Array
# If one is nodeset and other is number, compare number to each item
# in nodeset s.t. number op number(string(item))
# If one is nodeset and other is string, compare string to each item
# in nodeset s.t. string op string(item)
# If one is nodeset and other is boolean, compare boolean to each item
# in nodeset s.t. boolean op boolean(item)
if set1.kind_of? Array
a = set1
b = set2
else
a = set2
b = set1
end
case b
when true, false
each_unnode(a).any? do |unnoded|
compare(Functions.boolean(unnoded), op, b)
end
when Numeric
each_unnode(a).any? do |unnoded|
compare(Functions.number(unnoded), op, b)
end
when /\A\d+(\.\d+)?\z/
b = Functions.number(b)
each_unnode(a).any? do |unnoded|
compare(Functions.number(unnoded), op, b)
end
else
b = Functions::string(b)
each_unnode(a).any? do |unnoded|
compare(Functions::string(unnoded), op, b)
end
end
else
# If neither is nodeset,
# If op is = or !=
# If either boolean, convert to boolean
# If either number, convert to number
# Else, convert to string
# Else
# Convert both to numbers and compare
compare(set1, op, set2)
end
end
def value_type(value)
case value
when true, false
:boolean
when Numeric
:number
when String
:string
else
raise "[BUG] Unexpected value type: <#{value.inspect}>"
end
end
def normalize_compare_values(a, operator, b)
a_type = value_type(a)
b_type = value_type(b)
case operator
when :eq, :neq
if a_type == :boolean or b_type == :boolean
a = Functions.boolean(a) unless a_type == :boolean
b = Functions.boolean(b) unless b_type == :boolean
elsif a_type == :number or b_type == :number
a = Functions.number(a) unless a_type == :number
b = Functions.number(b) unless b_type == :number
else
a = Functions.string(a) unless a_type == :string
b = Functions.string(b) unless b_type == :string
end
when :lt, :lteq, :gt, :gteq
a = Functions.number(a) unless a_type == :number
b = Functions.number(b) unless b_type == :number
else
message = "[BUG] Unexpected compare operator: " +
"<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
raise message
end
[a, b]
end
def compare(a, operator, b)
a, b = normalize_compare_values(a, operator, b)
case operator
when :eq
a == b
when :neq
a != b
when :lt
a < b
when :lteq
a <= b
when :gt
a > b
when :gteq
a >= b
else
message = "[BUG] Unexpected compare operator: " +
"<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
raise message
end
end
def each_unnode(nodeset)
return to_enum(__method__, nodeset) unless block_given?
nodeset.each do |node|
if node.is_a?(XPathNode)
unnoded = node.raw_node
else
unnoded = node
end
yield(unnoded)
end
end
def unnode(nodeset)
each_unnode(nodeset).collect do |unnoded|
unnoded = yield(unnoded) if block_given?
unnoded
end
end
end
# @private
class XPathNode
attr_reader :raw_node, :context
def initialize(node, context=nil)
if node.is_a?(XPathNode)
@raw_node = node.raw_node
else
@raw_node = node
end
@context = context || {}
end
def position
@context[:position]
end
end
end