Parsing XML with Perl

#! /usr/bin/perl

use strict;
use warnings;

use XML::Parser;

#
# A simple script shows how Expat stream parser works
#

my $depth = 0;
sub indent      # indent 4 spaces when depth increases
{
    my $s = shift;
    my $i = '';
    $i .= '    ' for (1..$depth);
    join("\n", map { $i.$_ } split("\n", $s))."\n";
}

# stream: event => handler
my $handlers = {
    Start => sub {
        my ($expat, $ele, %attr) = @_;
        my $s = "start: [$ele]\n";
        while (my ($k,$v) = each(%attr))
            { $s .= "    ----$k=$v\n"; }
        print indent("$s");
        $depth++;
    },
    End => sub {
        my ($expat, $ele) = @_;
        $depth--;
        print indent("end: [$ele]");
    },
    Char => sub {
        my ($expat, $str) = @_;
        $str =~ s/^\s*//;
        $str =~ s/\s*$//;
        print indent("char: [$str]") if (length($str));
    },
    Comment => sub {
        my ($expat, $str) = @_;
        print indent("comment: [$str]");
    },
    CdataStart => sub {
        my ($expat) = @_;
        print indent("cdataStart");
    },
    CdataEnd => sub {
        my ($expat) = @_;
        print indent("cdataEnd");
    },
};


my $p = XML::Parser->new( Handlers => $handlers );
my $fname = $ARGV[0];
eval { $p->parsefile($fname); };
print "ERROR: $@\n" if ($@);


Run the script with this input

<?xml version="1.0" encoding="utf-8"?>
<TestEnvironment name="test.xml" id="1" envFile="~/testhome/test.xml">
    <!--This is a comment-->
    <SqlWCharEncoding>UTF-16</SqlWCharEncoding>
    <empty />
    <BaselineDirectory>
        ResultSets
 <TSE-Sql2TableColumnMap>stcmap.txt</TSE-Sql2TableColumnMap>
        Post char
    </BaselineDirectory>
    <SQL>
      before
      <! [CDATA[select * from mytable; <tag /> ] ]>
      after
    </SQL>
</TestEnvironment>

and the result is

jasonz@jzdebian$ ./xml-test.pl ~/testhome/test.xml
start: [TestEnvironment]
    ----name=test.xml
    ----envFile=~/testhome/test.xml
    ----id=1
    comment: [This is a comment]
    start: [SqlWCharEncoding]
        char: [UTF-16]
    end: [SqlWCharEncoding]
    start: [empty]
    end: [empty]
    start: [BaselineDirectory]
        char: [ResultSets]
        start: [TSE-Sql2TableColumnMap]
            char: [stcmap.txt]
        end: [TSE-Sql2TableColumnMap]
        char: [Post char]
    end: [BaselineDirectory]
    start: [SQL]
        char: [before]
        cdataStart
        char: [select * from mytable; ]
        cdataEnd
        char: [after]
    end: [SQL]
end: [TestEnvironment]
jasonz@jzdebian$
Advertisements