#!/usr/bin/perl -w # Author: Michel Rodriguez use strict; use XML::LibXML; my $parser = XML::LibXML->new(); my $doc = $parser->parse_file( '/article/ways_to_rome/ex_ps_libxml/REC_xml_19980210.xml'); my @prods = $doc->findnodes("//prod"); my $i=0; foreach my $prod (@prods) { my $lhs = $prod->findvalue('./lhs'); my $rhs = $prod->findvalue('./rhs'); $i++; my $prod_text= "[$i] $lhs ::= $rhs"; print clean( $prod_text), "\n"; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }