#!/bin/perl -w # Using XML::DOM # Authors: Ingo Macherius # modified by Michel Rodriguez # with input from the XML::DOM author Enno Derksen # This example uses features from the Perl implementation of the DOM, # namely the fact that you can get an array from the getElementsByTagName # method, as in @nodes = $doc->getElementsByTagName ("prod") use strict; use XML::DOM; my $parser = XML::DOM::Parser->new; my $doc = $parser->parsefile ("/article/ways_to_rome/ex_ps_dom2/REC_xml_19980210.xml"); my @nodes = $doc->getElementsByTagName ("prod"); my $i=0; foreach my $node(@nodes) { my $lhs = $node->getElementsByTagName("lhs")->item(0); my @rhs = $node->getElementsByTagName("rhs"); $i++; my $prod= "[$i] " . $lhs->getFirstChild->getNodeValue() . " ::= " . rhs(@rhs); print clean( $prod), "\n"; } sub rhs { my $text; foreach my $rhs (@_) { my @nodes = $rhs->getChildNodes(); foreach my $node (@nodes ) { if ($node->getNodeType() == XML::DOM::Node::ELEMENT_NODE()) { $text .= $node->getFirstChild()->getNodeValue() unless( $node->getFirstChild()->getNodeName eq '#comment'); } else { $text .= $node->getNodeValue() unless( $node->getNodeName eq '#comment'); } } } return $text; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }