Recipe 21.4 Parsing XML with DOM


Problem

You want to examine an XML file in detail.

Solution

Use DOM to parse the document and process the resulting in-memory tree.

Discussion

The Document Object Model (DOM) is a tree-structured representation of the information in an XML document. It consists of several interfaces, the most important of which is the node . All are in the package org.w3c.dom , reflecting the influence of the World Wide Web Consortium (http://www.w3.org) in creating and promulgating the DOM. The major DOM interfaces are shown in Table 21-1.

Table 21-1. Major DOM interfaces

Interface

Function

Document

Top-level representation of an XML document

Node

Representation of any node in the XML tree

Element

An XML element

Text

A textual string


You don't have to implement these interfaces; the parser generates them. When you start creating or modifying XML documents in Recipe 21.6, you can create nodes. But even then there are implementing classes. Parsing an XML document with DOM is syntactically similar to processing a file with XSL, that is, you get a reference to a parser and call its methods with objects representing the input files. The difference is that the parser returns an XML DOM, a tree of objects in memory. XParse in Example 21-6 simply parses an XML document. Despite the simplicity, I use it a lot; whenever I have an XML file whose validity is in question, I just pass it to XParse.

Example 21-6. XParse.java
import java.io.File import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** Parse an XML file using DOM, via JAXP.  * @author Ian Darwin, http://www.darwinsys.com/  * @version $Id: ch21.xml,v 1.5 2004/05/04 20:13:38 ian Exp $  */ public class XParse {     /** Parse the file */     public static void parse(String fileName, boolean validate) {         try {             System.err.println("Parsing " + fileName + "...");             // Make the document a URL so relative DTD works.             String uri = "file:" + new File(fileName).getAbsolutePath( );             DocumentBuilderFactory f = DocumentBuilderFactory.newInstance( );             if (validate)                 f.setValidating(true);             DocumentBuilder p = f.newDocumentBuilder( );             // Get local copies of DTDs...             p.setEntityResolver(new MyDTDResolver( ));             Document doc = p.parse(uri);             System.out.println("Parsed OK");         } catch (SAXParseException ex) {             System.err.println("+================================+");             System.err.println("|       *SAX Parse Error*        |");             System.err.println("+================================+");             System.err.println(ex.toString( ));             System.err.println("At line " + ex.getLineNumber( ));             System.err.println("+================================+");         } catch (SAXException ex) {             System.err.println("+================================+");             System.err.println("|          *SAX Error*           |");             System.err.println("+================================+");             System.err.println(ex.toString( ));             System.err.println("+================================+");         } catch (Exception ex) {             System.err.println("+================================+");             System.err.println("|           *XML Error*          |");             System.err.println("+================================+");             System.err.println(ex.toString( ));          }     }     public static void main(String[] av) {         if (av.length == 0) {             System.err.println("Usage: XParse file");             return;         }         boolean validate = false;         for (int i=0; i<av.length; i++) {             if (av[i].equals("-v"))                 validate = true;             else                 parse(av[i], validate);         }     } }

DOM also provides tools to traverse the document. You can use the defined TreeWalker interface, or you can just use the algorithm shown in Example 21-7.

Example 21-7. XTW.java
import java.io.File; import java.io.Reader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.darwinsys.util.Debug; /** XML Tree Walker  * UPDATED FOR JAXP.  * @author Ian Darwin, http://www.darwinsys.com/  * @version $Id: ch21.xml,v 1.5 2004/05/04 20:13:38 ian Exp $  */ public class XTW {     public static void main(String[] av) {         if (av.length == 0) {             System.err.println("Usage: XTW file [...]");             return;         }         for (int i=0; i<av.length; i++) {             String name = av[i];             new XTW( ).convert(name, true);         }     }     /** Convert the file */     protected void convert(String fileName, boolean verbose) {         Reader is;         try {             if (verbose)                 System.err.println(">>>Parsing " + fileName + "...");             // Make the document a URL so relative DTD works.             String uri = "file:" + new File(fileName).getAbsolutePath( );             DocumentBuilderFactory factory =                 DocumentBuilderFactory.newInstance( );             DocumentBuilder builder = factory.newDocumentBuilder( );             Document doc = builder.parse( uri );               if (verbose)                 System.err.println(">>>Walking " + fileName + "...");             doRecursive(doc);         } catch (Exception ex) {             System.err.println("+============================+");             System.err.println("|        XTW Error           |");             System.err.println("+============================+");             System.err.println(ex.getClass( ));             System.err.println(ex.getMessage( ));             System.err.println("+============================+");         }         if (verbose) {             System.err.println(">>>Done " + fileName + "...");         }     }     /* Process all the nodes, recursively. */     protected void doRecursive(Node p) {         if (p == null) {             return;         }         NodeList nodes = p.getChildNodes( );         Debug.println("xml-tree", "Element has " +              nodes.getLength( ) + " children");         for (int i = 0; i < nodes.getLength( ); i++) {             Node n = nodes.item(i);             if (n == null) {                 continue;             }             doNode(n);         }     }     protected void doNode(Node n) {         switch(n.getNodeType( )) {             case Node.ELEMENT_NODE:                 System.out.println("ELEMENT<" + n.getNodeName( ) + ">");                 doRecursive(n);                 break;             case Node.TEXT_NODE:                 String text = n.getNodeValue( );                 if (text.length( ) == 0 ||                      text.equals("\n") || text.equals("\\r")) {                     break;                 }                 System.out.println("TEXT: " + text);                 break;             default:                 System.err.println( "OTHER NODE " +                     n.getNodeType( ) + ": " + n.getClass( ));                 break;         }     } }.



Java Cookbook
Java Cookbook, Second Edition
ISBN: 0596007019
EAN: 2147483647
Year: 2003
Pages: 409
Authors: Ian F Darwin

flylib.com © 2008-2017.
If you may any questions please contact us: flylib@qtcs.net