#!/usr/bin/perl -w # Using XML::Parser and the Subs-style # Author: Ingo Macherius # modified by Michel Rodriguez use XML::Parser; use strict; my( $in_lhs, $lhs, $in_rhs, $rhs, $i); my $parser = XML::Parser->new('Style' => 'Subs' ); $parser->setHandlers('Char', \&characters); $parser->parsefile('/article/ways_to_rome/ex_ps_subs/REC_xml_19980210.xml'); sub rhs { $in_rhs = 1 } sub lhs { $in_lhs = 1 } sub prod { $rhs = ''; $lhs = ''; } sub rhs_ { $in_rhs = 0; } sub lhs_ { $in_lhs = 0; } sub prod_ { $i++; my $prod = "[$i] $lhs ::= $rhs"; print clean( $prod) . "\n"; } sub characters { my ($xp, $txt) = @_; if ($in_lhs) { $lhs .= $txt; } elsif ($in_rhs) { $rhs .= $txt; } } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }