ProblemYou want to examine an XML file in detail. SolutionUse DOM to parse the document and process the resulting in-memory tree. DiscussionThe Document Object Model (DOM) is a tree-structured representation of the information in an XML document. It consists of several interfaces, the most important of which is the node . All are in the package org.w3c.dom , reflecting the influence of the World Wide Web Consortium (http://www.w3.org) in creating and promulgating the DOM. The major DOM interfaces are shown in Table 21-1.
You don't have to implement these interfaces; the parser generates them. When you start creating or modifying XML documents in Recipe 21.6, you can create nodes. But even then there are implementing classes. Parsing an XML document with DOM is syntactically similar to processing a file with XSL, that is, you get a reference to a parser and call its methods with objects representing the input files. The difference is that the parser returns an XML DOM, a tree of objects in memory. XParse in Example 21-6 simply parses an XML document. Despite the simplicity, I use it a lot; whenever I have an XML file whose validity is in question, I just pass it to XParse. Example 21-6. XParse.javaimport java.io.File import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** Parse an XML file using DOM, via JAXP. * @author Ian Darwin, http://www.darwinsys.com/ * @version $Id: ch21.xml,v 1.5 2004/05/04 20:13:38 ian Exp $ */ public class XParse { /** Parse the file */ public static void parse(String fileName, boolean validate) { try { System.err.println("Parsing " + fileName + "..."); // Make the document a URL so relative DTD works. String uri = "file:" + new File(fileName).getAbsolutePath( ); DocumentBuilderFactory f = DocumentBuilderFactory.newInstance( ); if (validate) f.setValidating(true); DocumentBuilder p = f.newDocumentBuilder( ); // Get local copies of DTDs... p.setEntityResolver(new MyDTDResolver( )); Document doc = p.parse(uri); System.out.println("Parsed OK"); } catch (SAXParseException ex) { System.err.println("+================================+"); System.err.println("| *SAX Parse Error* |"); System.err.println("+================================+"); System.err.println(ex.toString( )); System.err.println("At line " + ex.getLineNumber( )); System.err.println("+================================+"); } catch (SAXException ex) { System.err.println("+================================+"); System.err.println("| *SAX Error* |"); System.err.println("+================================+"); System.err.println(ex.toString( )); System.err.println("+================================+"); } catch (Exception ex) { System.err.println("+================================+"); System.err.println("| *XML Error* |"); System.err.println("+================================+"); System.err.println(ex.toString( )); } } public static void main(String[] av) { if (av.length == 0) { System.err.println("Usage: XParse file"); return; } boolean validate = false; for (int i=0; i<av.length; i++) { if (av[i].equals("-v")) validate = true; else parse(av[i], validate); } } } DOM also provides tools to traverse the document. You can use the defined TreeWalker interface, or you can just use the algorithm shown in Example 21-7. Example 21-7. XTW.javaimport java.io.File; import java.io.Reader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.darwinsys.util.Debug; /** XML Tree Walker * UPDATED FOR JAXP. * @author Ian Darwin, http://www.darwinsys.com/ * @version $Id: ch21.xml,v 1.5 2004/05/04 20:13:38 ian Exp $ */ public class XTW { public static void main(String[] av) { if (av.length == 0) { System.err.println("Usage: XTW file [...]"); return; } for (int i=0; i<av.length; i++) { String name = av[i]; new XTW( ).convert(name, true); } } /** Convert the file */ protected void convert(String fileName, boolean verbose) { Reader is; try { if (verbose) System.err.println(">>>Parsing " + fileName + "..."); // Make the document a URL so relative DTD works. String uri = "file:" + new File(fileName).getAbsolutePath( ); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance( ); DocumentBuilder builder = factory.newDocumentBuilder( ); Document doc = builder.parse( uri ); if (verbose) System.err.println(">>>Walking " + fileName + "..."); doRecursive(doc); } catch (Exception ex) { System.err.println("+============================+"); System.err.println("| XTW Error |"); System.err.println("+============================+"); System.err.println(ex.getClass( )); System.err.println(ex.getMessage( )); System.err.println("+============================+"); } if (verbose) { System.err.println(">>>Done " + fileName + "..."); } } /* Process all the nodes, recursively. */ protected void doRecursive(Node p) { if (p == null) { return; } NodeList nodes = p.getChildNodes( ); Debug.println("xml-tree", "Element has " + nodes.getLength( ) + " children"); for (int i = 0; i < nodes.getLength( ); i++) { Node n = nodes.item(i); if (n == null) { continue; } doNode(n); } } protected void doNode(Node n) { switch(n.getNodeType( )) { case Node.ELEMENT_NODE: System.out.println("ELEMENT<" + n.getNodeName( ) + ">"); doRecursive(n); break; case Node.TEXT_NODE: String text = n.getNodeValue( ); if (text.length( ) == 0 || text.equals("\n") || text.equals("\\r")) { break; } System.out.println("TEXT: " + text); break; default: System.err.println( "OTHER NODE " + n.getNodeType( ) + ": " + n.getClass( )); break; } } }. |