#!/usr/bin/perl -w # Using XML::Parser and Handlers # Author: Ingo Macherius # updated by Michel Rodriguez use strict; use XML::Parser; # variables used to track the state of the parser my( $in_lhs, $lhs, $in_rhs, $rhs, $i); my $parser = XML::Parser->new(Handlers => { Start => \&tag_start, End => \&tag_end, Char => \&characters, }); $parser->parsefile('/article/ways_to_rome/ex_ps_parser/REC_xml_19980210.xml'); sub tag_start { my ($xp, $el) = @_; if ($el eq 'rhs') { $in_rhs = 1; } elsif ($el eq 'lhs') { $in_lhs = 1; } elsif ($el eq 'prod') { $rhs = ''; $lhs = ''; } } sub tag_end { my ($xp, $el) = @_; if ($el eq 'rhs') { $in_rhs = 0; } elsif ($el eq 'lhs') { $in_lhs = 0; } elsif ($el eq 'prod') { print_production( ++$i, $lhs, $rhs); } } sub characters { my ($xp, $txt) = @_; if ($in_lhs) { $lhs .= $txt } elsif ($in_rhs) { $rhs .= $txt } } sub print_production { my( $i, $lhs, $rhs)= @_; my $prod = "[$i] $lhs ::= $rhs"; print clean( $prod) . "\n"; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }