Writing a SAX Parser

I l @ ve RuBoard

Two modes are supported for XML parsing in Xerces. You can use SAX or DOM. SAX implements event-driven parsing. DOM loads the entire XML document into memory and hands you a pointer to an object hierarchy that represents it.

You'll write a SAX parser first. To parse this XML file, you need to write a class that extends the org.xml.sax.helpers.DefaultHandler class. This class provides a default set of handlers that are called whenever an event occurs during the parsing of the file. Events include finding the start of the document, encountering a start or end element tag, or reading character data.

You can leave most of these event handlers as is because they implement functionality that you don't need to parse a simple data file; a few you will override in your class, to be able to decipher the contents of the XML file, as you can see in Listing 14.3.

Listing 14.3 SAXProductImporter.java
 package com.bfg.xml; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import java.sql.*; import java.util.HashMap; import java.util.Vector; import java.util.Iterator; import java.text.NumberFormat; import java.text.DecimalFormat; import java.text.SimpleDateFormat; import java.util.ResourceBundle; public class SAXProductImporter extends DefaultHandler {     private static final String     DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";     private static ResourceBundle sql_bundle =       ResourceBundle.getBundle("com.bfg.xml.SQLQueries");     public static void loadProducts(String uri) {       SAXProductImporter myclass = new SAXProductImporter();         try {             XMLReader parser = (XMLReader)Class.forName(DEFAULT_PARSER_NAME).newIn stance( graphics/ccc.gif );             parser.setContentHandler(myclass);             parser.setErrorHandler(myclass);           parser.setFeature("http://xml.org/sax/features/validation", true);           parser.parse(uri);         } catch (org.xml.sax.SAXParseException spe) {             if (spe.getException() != null)                 spe.getException().printStackTrace(System.err);           else                 spe.printStackTrace(System.err);           myclass.rollbackAndQuit();         }  catch (org.xml.sax.SAXException se) {             if (se.getException() != null)                 se.getException().printStackTrace(System.err);             else                 se.printStackTrace(System.err);           myclass.rollbackAndQuit();         }  catch (Exception e) {             e.printStackTrace(System.err);           myclass.rollbackAndQuit();         }     }     public void error(SAXParseException se) {       if (se.getException() != null)           se.getException().printStackTrace(System.err);       else           se.printStackTrace(System.err);       rollbackAndQuit();     }     Connection conn = null;     HashMap authors = new HashMap();     HashMap categories = new HashMap();     int max_author = 0;     int max_cat = 0;     public void startDocument() {     try {         Class.forName("org.gjt.mm.mysql.Driver").newInstance();         conn =              DriverManager.getConnection("jdbc:mysql://localhost/BFG");        Statement st = conn.createStatement();        ResultSet rs = st.executeQuery("SELECT * FROM CATEGORY");        while (rs.next()) {            categories.put(rs.getString("CATEGORY_NAME"),                           new Integer(rs.getInt("CATEGORY_ID")));            if (rs.getInt("CATEGORY_ID") > max_cat) {                max_cat = rs.getInt("CATEGORY_ID");            }        }        rs.close();        rs = st.executeQuery("SELECT * FROM AUTHOR");        while (rs.next()) {            authors.put(rs.getString("AUTHOR_NAME"),                        new Integer(rs.getInt("AUTHOR_ID")));           if (rs.getInt("AUTHOR_ID") > max_author) {               max_author = rs.getInt("AUTHOR_ID");           }        }        rs.close();        st.executeUpdate("SET AUTOCOMMIT=0");        st.close();   }  catch (Exception ex) {       ex.printStackTrace(System.err);       rollbackAndQuit();   }    }    public void rollbackAndQuit() {      try {          if (conn != null) {              Statement st = conn.createStatement();              st.executeUpdate("ROLLBACK");              st.close();              conn.close();          }      }  catch (Exception ex) {}       System.out.println("Aborting import, rolling back and quitting");       System.exit(1);     }     public void endDocument() {       try {           Statement st = conn.createStatement();           st.executeUpdate("COMMIT");           st.close();           conn.close();       }  catch (java.sql.SQLException ex) {            ex.printStackTrace(System.err);       }     }     public String prod_ISBN = null;     public String prod_title = null;     public Vector prod_authors = new Vector();     public Vector prod_categories = new Vector();     public Date prod_pubdate = null;     public double prod_price = 0D;     public String prod_description = null;     public StringBuffer chars = new StringBuffer();     public void startElement(String uri, String local, String raw, Attributes attrs) {       chars.setLength(0);       if (local.equals("Product")) {           prod_ISBN = attrs.getValue("ISBN");           prod_authors.clear();           prod_categories.clear();           prod_pubdate = null;           prod_description = null;           prod_title = null;       }     }     public void endElement(String uri, String local, String raw) {       if (local.equals("Description")) {            prod_description = chars.toString();       }       if (local.equals("Title")) {          prod_title = chars.toString();      }      if (local.equals("Category")) {          prod_categories.add(chars.toString());      }      if (local.equals("Author")) {          prod_authors.add(chars.toString());      }      if (local.equals("Price")) {          try {              NumberFormat nf = new DecimalFormat();              prod_price = nf.parse(chars.toString()).doubleValue();          }  catch (Exception ex) {               System.out.println("Invalid value for price: " +                                   chars);               rollbackAndQuit();          }      }      if (local.equals("Pubdate")) {          try {              SimpleDateFormat nf = new SimpleDateFormat("MM/dd/yy");              prod_pubdate = new Date(nf.parse(chars.toString()).getTime());         }  catch (Exception ex) {               System.out.println("Invalid value for date: " +                                   chars);               rollbackAndQuit();          }      }      if (local.equals("Product")) {          createProduct();      }     }     public void characters(char ch[], int start, int length) {       chars.append(ch, start, length);     }     public void createProduct() {       try {           PreparedStatement pstmt =                conn.prepareStatement(sql_bundle.getString("deleteProd"));        pstmt.setString(1, prod_ISBN);        pstmt.executeUpdate();        pstmt.close();        pstmt =             conn.prepareStatement(sql_bundle.getString("deleteProdXref"));        pstmt.setString(1, prod_ISBN);        pstmt.executeUpdate();        pstmt.close();        pstmt =             conn.prepareStatement(sql_bundle.getString("deleteCatXref"));        pstmt.setString(1, prod_ISBN);        pstmt.executeUpdate();        pstmt.close();        pstmt =             conn.prepareStatement(sql_bundle.getString("insertProd"));        pstmt.setString(1, prod_ISBN);        pstmt.setString(2, prod_title);        pstmt.setDouble(3, prod_price);        pstmt.setDate(4, prod_pubdate);        pstmt.setString(5, prod_description);        pstmt.executeUpdate();        Iterator author_it = prod_authors.iterator();        while (author_it.hasNext()) {            String author = (String) author_it.next();            int author_id;            if (authors.get(author) != null) {                author_id = ((Integer)authors.get(author)).intValue();            }  else {                 pstmt =                      conn.prepareStatement(sql_bundle.getString("insertAuthor"));                author_id = ++max_author;                pstmt.setInt(1, author_id);                pstmt.setString(2, author);                pstmt.executeUpdate();                pstmt.close();                authors.put(author, new Integer(author_id));            }            pstmt =                 conn.prepareStatement(sql_bundle.getString("insertAuthorXref"));                pstmt.setString(1, prod_ISBN);                pstmt.setInt(2, author_id);                pstmt.executeUpdate();            }            Iterator cat_it = prod_categories.iterator();            while (cat_it.hasNext()) {                String cat = (String) cat_it.next();                int cat_id;                if (categories.get(cat) != null) {                    cat_id = ((Integer)categories.get(cat)).intValue();                }  else {                     pstmt =                          conn.prepareStatement(sql_bundle.getString("insertCategory"));                cat_id = ++max_cat;                pstmt.setInt(1, cat_id);                pstmt.setString(2, cat);                pstmt.executeUpdate();                pstmt.close();                categories.put(cat, new Integer(cat_id));               }               pstmt =                    conn.prepareStatement(sql_bundle.getString("insertCatXref"));               pstmt.setString(1, prod_ISBN);               pstmt.setInt(2, cat_id);               pstmt.executeUpdate();            }       }  catch (java.sql.SQLException ex) {          ex.printStackTrace(System.err);          rollbackAndQuit();     }    }     public static void main(String argv[]) {         if (argv.length == 0) {             System.exit(1);         }       loadProducts(argv[0]);     } } 

The LoadProducts method is the standard accessor that you're providing to this class. It begins by instantiating a copy of the SAX parser and sets the content and error handlers to the class you've created. These handlers are called whenever an element is encountered in the XML document or when errors or other events, such as the start of the document, occur.

You can think about the handlers by imagining that the SAX parser is responsible for breaking apart the structure of the document but doesn't know how to do anything with the contents. The handlers that you write are called by SAX to interpret the data as the parse encounters it.

You might want to configure a few features of the parser. Specifically, you'll want to validate against the DTD that you specified; the default for SAX parsers is not to validate the structure. In other words, unless validation is set to on, the DTD declaration of the XML file is ignored and any validly structured XML document is reported as successfully parsed, even if it doesn't match the DTD. After the feature is set, call the parser, wrapping the call in a catch to handle any fatal exceptions from the parse or your handler. All the rest of the functionality occurs as callbacks to the overriding methods you'll supply.

If the XML file doesn't validate against the DTD, the error method is called. You want the code to print the error message so that you can find the syntax error; then you want the code to abort the processing and roll back the database.

When you first start processing a document, the startDocument method opens a database connection and get the current list of categories and authors. It also gets the highest value currently being used for the author and category IDs so that the importer can use them to create new ones, if needed. Because no database locking is occurring in this code, you can run only one of these imports at any given time; otherwise , the same ID numbers would be used for different data.

rollbackAndQuit is a helper that rolls back the database and exits the program. Unfortunately, MySQL supports rollback for only certain types of databases, so this might not work 100% of the time. If you were going to take advantage of rollback functionality, you'd need to download the correct version of the MySQL binary.

When the SAX parser encounters the end of the document, endDocument commits the changes made and closes the connection.

When the parser sees an element start, it calls startElement , which checks to see if it's a product tag. If so, it clears all the values from any previous products encountered during this run.

On an end tag, endElement is called, which takes any character data that has been collected since the start tag and writes it to a holding variable. If it's an end product, the new product is created from the data collected. The result of this is that as each subelement of a product is encountered, it is stored into a holding variable. When the product end tag is found, all those variables are used to create the new product.

When the parser encounters character data, characters is called, which appends the data to a buffer. The buffer will be assigned to a tag when the tag ends. Note that because you can have multiple calls to this method for what seems to be a "single" piece of text, you must append all the data together to get the whole string. Some care must be exercised because any whitespace in the document (even the newline after a tag) is considered character data and will call this method. You want to make sure that you process characters only where they are meaningful.

createProduct deletes the product if it already exists, along with any cross-references from the product to authors or categories. Then it inserts the product, creates any new authors or categories as needed, and creates the cross-references.

As usual, with the JDBC code you've written, you will need your bundle of SQL queries to make it work (see Listing 14.4).

Listing 14.4 SQLQueries.properties
 deleteProd=DELETE FROM PRODUCT WHERE ISBN=? deleteProdXref=DELETE FROM PRODUCT_AUTHOR_XREF WHERE PRODUCT_ISBN=? deleteCatXref=DELETE FROM CATEGORY_PRODUCT_XREF WHERE PRODUCT_ISBN=? insertProd=INSERT INTO PRODUCT (ISBN, TITLE, PRICE, PUB_DATE, DESCRIPTION) \                    VALUES (?, ?, ?, ?, ?) insertAuthor=INSERT INTO AUTHOR (AUTHOR_ID, AUTHOR_NAME) \                    VALUES (?, ?) insertCategory=INSERT INTO CATEGORY (CATEGORY_ID, CATEGORY_NAME) \                    VALUES (?, ?) insertAuthorXref=INSERT INTO PRODUCT_AUTHOR_XREF (PRODUCT_ISBN, AUTHOR_ID) \                 VALUES (?, ?) insertCatXref=INSERT INTO CATEGORY_PRODUCT_XREF (PRODUCT_ISBN, CATEGORY_ID) \                 VALUES (?, ?) 

You can then add a bit of code to your build.xml Ant script (see Listing 14.5) to run the program.

Listing 14.5 Additions to the Ant Script
 <target name="testxml" depends="dist">    <java classname="com.bfg.xml.SAXProductImporter" fork="yes">      <classpath>      <pathelement path="${java.class.path} "/>        <fileset dir="c:\tomcat\lib">          <include name="**/*.jar"/>        </fileset>        <fileset dir="c:\tomcat\webapps\bfg\WEB-INF\lib">          <include name="**/*.jar"/>        </fileset>      </classpath>      <arg value="xml/Products.xml"/>    </java> </target> 

When you run it, everything works as expected.

 C:\CARTAPP\bfg>ant testxml Buildfile: build.xml init: compile: dist: testxml: BUILD SUCCESSFUL Total time: 2 seconds 
I l @ ve RuBoard


MySQL and JSP Web Applications. Data-Driven Programming Using Tomcat and MySQL
MySQL and JSP Web Applications: Data-Driven Programming Using Tomcat and MySQL
ISBN: 0672323095
EAN: 2147483647
Year: 2002
Pages: 203
Authors: James Turner

flylib.com © 2008-2017.
If you may any questions please contact us: flylib@qtcs.net