#!/usr/bin/perl -w # Using XML::SAX::Expat # author: Michel Rodriguez use strict; use XML::SAX::Expat; my $handler= my_handler->new(); my $parser = XML::SAX::Expat->new( Handler => $handler, Features => { 'http://xml.org/sax/features/namespaces' => 0, }, ); $parser->parse_uri( '/article/ways_to_rome/ex_ps_sax_expat/REC_xml_19980210.xml'); exit; package my_handler; use base qw(XML::SAX::Base); sub new { return bless {}; } sub start_element { my ($h, $el) = @_; if ($el->{Name} eq 'rhs') { $h->in_rhs = 1; } elsif ($el->{Name} eq 'lhs') { $h->in_lhs = 1; } elsif ($el->{Name} eq 'prod') { $h->rhs = ''; $h->lhs = ''; } } sub end_element { my ($h, $el) = @_; if ($el->{Name} eq 'rhs') { $h->in_rhs = undef; } elsif ($el->{Name} eq 'lhs') { $h->in_lhs = undef; } elsif ($el->{Name} eq 'prod') { $h->counter++; my $prod = "[" . $h->counter. "] " . $h->lhs ." ::= " .$h->rhs ; $prod= clean( $prod); print $prod . "\n"; } } sub characters { my ($h, $characters) = @_; if ($h->in_lhs) { $h->lhs .= $characters->{Data}; } elsif ($h->in_rhs) { $h->rhs .= $characters->{Data}; } } # accessors, to be completely OO kosher # works only for perl 5.6.0 and above sub in_lhs : lvalue { my $self= shift; $self->{in_lhs} ; } sub lhs : lvalue { my $self= shift; $self->{lhs} ; } sub in_rhs : lvalue { my $self= shift; $self->{in_rhs} ; } sub rhs : lvalue { my $self= shift; $self->{rhs} ; } sub counter : lvalue { my $self= shift; $self->{counter}; } sub clean { my( $s)= @_; $s=~ s/\xc2\xa0/ /sg; $s=~ s/\s+/ /g; $s=~ s{\s$}{}; return $s; }