#!/usr/bin/perl -w # Using XML::Parser and Handlers # Author: Ingo Macherius use strict; use XML::Parser; my $state={}; # holds the "global" values like rhs, lhs and counter my $parser = XML::Parser->new( Handlers => { Start => sub { tag_start( $state, @_) } , End => sub { tag_end( $state, @_) } , Char => sub { characters( $state, @_) }, Init => sub { init( $state, @_) }, }); $parser->parsefile('/article/ways_to_rome/ex_ps_parser_2/REC_xml_19980210.xml'); sub tag_start { my ($state, $p, $el) = @_; if ($el eq 'prod') { $state->{rhs} = ''; $state->{lhs} = ''; } } sub tag_end { my ($state, $p, $el) = @_; if ($el eq 'prod') { $state->{counter}++; my $prod= "[$state->{counter}] $state->{lhs} ::= $state->{rhs}"; print clean( $prod), "\n"; } } sub characters { my ( $state, $p, $txt) = @_; if ($p->within_element( 'lhs')) { $state->{lhs} .= $txt } elsif ($p->within_element( 'rhs')) { $state->{rhs} .= $txt } } sub init { my( $state, $p)= @_; $state->{counter} = 0; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }