package XML::Parser::Lite::Tree;
use 5.006;
use strict;
use warnings;
use XML::Parser::LiteCopy;
our $VERSION = '0.14';
use vars qw( $parser );
sub instance {
return $parser if $parser;
$parser = __PACKAGE__->new;
}
sub new {
my $class = shift;
my $self = bless {}, $class;
my %opts = (ref $_[0]) ? ((ref $_[0] eq 'HASH') ? %{$_[0]} : () ) : @_;
$self->{opts} = \%opts;
$self->{__parser} = new XML::Parser::LiteCopy
Handlers => {
Start => sub { $self->_start_tag(@_); },
Char => sub { $self->_do_char(@_); },
CData => sub { $self->_do_cdata(@_); },
End => sub { $self->_end_tag(@_); },
Comment => sub { $self->_do_comment(@_); },
PI => sub { $self->_do_pi(@_); },
Doctype => sub { $self->_do_doctype(@_); },
};
$self->{process_ns} = $self->{opts}->{process_ns} || 0;
$self->{skip_white} = $self->{opts}->{skip_white} || 0;
return $self;
}
sub parse {
my ($self, $content) = @_;
my $root = {
'type' => 'root',
'children' => [],
};
$self->{tag_stack} = [$root];
$self->{__parser}->parse($content);
$self->cleanup($root);
if ($self->{skip_white}){
$self->strip_white($root);
}
if ($self->{process_ns}){
$self->{ns_stack} = {};
$self->mark_namespaces($root);
}
return $root;
}
sub _start_tag {
my $self = shift;
shift;
my $new_tag = {
'type' => 'element',
'name' => shift,
'attributes' => {},
'children' => [],
};
while (my $a_name = shift @_){
my $a_value = shift @_;
$new_tag->{attributes}->{$a_name} = $a_value;
}
push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
push @{$self->{tag_stack}}, $new_tag;
1;
}
sub _do_char {
my $self = shift;
shift;
for my $content(@_){
my $new_tag = {
'type' => 'text',
'content' => $content,
};
push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
}
1;
}
sub _do_cdata {
my $self = shift;
shift;
for my $content(@_){
my $new_tag = {
'type' => 'cdata',
'content' => $content,
};
push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
}
1;
}
sub _end_tag {
my $self = shift;
pop @{$self->{tag_stack}};
1;
}
sub _do_comment {
my $self = shift;
shift;
for my $content(@_){
my $new_tag = {
'type' => 'comment',
'content' => $content,
};
push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
}
1;
}
sub _do_pi {
my $self = shift;
shift;
push @{$self->{tag_stack}->[-1]->{children}}, {
'type' => 'pi',
'content' => shift,
};
1;
}
sub _do_doctype {
my $self = shift;
shift;
push @{$self->{tag_stack}->[-1]->{children}}, {
'type' => 'dtd',
'content' => shift,
};
1;
}
sub mark_namespaces {
my ($self, $obj) = @_;
my @ns_keys;
#
# mark
#
if ($obj->{type} eq 'element'){
#
# first, add any new NS's to the stack
#
my @keys = keys %{$obj->{attributes}};
for my $k(@keys){
if ($k =~ /^xmlns:(.*)$/){
push @{$self->{ns_stack}->{$1}}, $obj->{attributes}->{$k};
push @ns_keys, $1;
delete $obj->{attributes}->{$k};
}
if ($k eq 'xmlns'){
push @{$self->{ns_stack}->{__default__}}, $obj->{attributes}->{$k};
push @ns_keys, '__default__';
delete $obj->{attributes}->{$k};
}
}
#
# now - does this tag have a NS?
#
if ($obj->{name} =~ /^(.*?):(.*)$/){
$obj->{local_name} = $2;
$obj->{ns_key} = $1;
$obj->{ns} = $self->{ns_stack}->{$1}->[-1];
}else{
$obj->{local_name} = $obj->{name};
$obj->{ns} = $self->{ns_stack}->{__default__}->[-1];
}
#
# finally, add xpath-style namespace nodes
#
$obj->{namespaces} = {};
for my $key (keys %{$self->{ns_stack}}){
if (scalar @{$self->{ns_stack}->{$key}}){
my $uri = $self->{ns_stack}->{$key}->[-1];
$obj->{namespaces}->{$key} = $uri;
}
}
}
#
# descend
#
if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
for my $child (@{$obj->{children}}){
$self->mark_namespaces($child);
}
}
#
# pop from stack
#
for my $k (@ns_keys){
pop @{$self->{ns_stack}->{$k}};
}
}
sub strip_white {
my ($self, $obj) = @_;
if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
my $new_kids = [];
for my $child (@{$obj->{children}}){
if ($child->{type} eq 'text'){
if ($child->{content} =~ m/\S/){
push @{$new_kids}, $child;
}
}elsif ($child->{type} eq 'element'){
$self->strip_white($child);
push @{$new_kids}, $child;
}else{
push @{$new_kids}, $child;
}
}
$obj->{children} = $new_kids;
}
}
sub cleanup {
my ($self, $obj) = @_;
#
# cleanup PIs
#
if ($obj->{type} eq 'pi'){
my ($x, $y) = split /\s+/, $obj->{content}, 2;
$obj->{target} = $x;
$obj->{content} = $y;
}
#
# cleanup DTDs
#
if ($obj->{type} eq 'dtd'){
my ($x, $y) = split /\s+/, $obj->{content}, 2;
$obj->{name} = $x;
$obj->{content} = $y;
}
#
# recurse
#
if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
for my $child (@{$obj->{children}}){
$self->cleanup($child);
}
}
}
1;
__END__
=head1 NAME
XML::Parser::Lite::Tree - Lightweight XML tree builder
=head1 SYNOPSIS
use XML::Parser::Lite::Tree;
my $tree_parser = XML::Parser::Lite::Tree::instance();
my $tree = $tree_parser->parse($xml_data);
OR
my $tree = XML::Parser::Lite::Tree::instance()->parse($xml_data);
=head1 DESCRIPTION
This is a singleton class for parsing XML into a tree structure. How does this
differ from other XML tree generators? By using XML::Parser::Lite, which is a
pure perl XML parser. Using this module you can tree-ify simple XML without
having to compile any C.
For example, the following XML:
hoopla
Parses into the following tree:
{
'children' => [
{
'children' => [
{
'children' => [],
'attributes' => {
'a' => 'b',
'c' => 'd'
},
'type' => 'element',
'name' => 'bar'
},
{
'content' => 'hoopla',
'type' => 'text'
}
],
'attributes' => {
'woo' => 'yay'
},
'type' => 'element',
'name' => 'foo'
}
],
'type' => 'root'
};
Each node contains a C key, one of C, C and C. C is the
document root, and only contains an array ref C. C represents a normal
tag, and contains an array ref C, a hash ref C and a string C.
C nodes contain only a C string.
=head1 METHODS
=over 4
=item C
Returns an instance of the tree parser.
=item C
Creates a new parser. Valid options include C to process namespaces.
=item C
Parses the xml in C<$xml> and returns the tree as a hash ref.
=back
=head1 AUTHOR
Copyright (C) 2004-2008, Cal Henderson, Ecal@iamcal.comE
=head1 SEE ALSO
L.
=cut