package xml; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class parser { public static List run (String xml_file) { /*parse the XML file and get the document object*/ Document doc = parse_xml_file (xml_file); /*get each image element and create a Icdar object*/ List list = parse_document (doc); return list; } private static Document parse_xml_file (String xml_file) { Document doc = null; /*get the factory*/ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { /*using factory get an instance of document builder*/ DocumentBuilder db = dbf.newDocumentBuilder(); /*parse using builder to get the document representation of the XML file*/ doc = db.parse(xml_file); } catch(ParserConfigurationException pce) { pce.printStackTrace(); } catch(SAXException se) { se.printStackTrace(); } catch(IOException ioe) { ioe.printStackTrace(); } return doc; } private static List parse_document (Document doc){ List list = new ArrayList(); /*get the root elememt*/ Element docEle = doc.getDocumentElement(); /*get a nodelist of elements*/ NodeList nl = docEle.getElementsByTagName("image"); if(nl != null && nl.getLength() > 0) { /*getting each image tag do*/ for(int i = 0 ; i < nl.getLength();i++) { /*get the element*/ Element elem = (Element)nl.item(i); /*get the image information element*/ register r = extract (elem); /*add it to list*/ list.add(r); } } return list; } /*get each image register in the ICDAR 2003 format*/ private static register extract (Element node) { LinkedList list = new LinkedList(); /*getting image name*/ String image_name = getTextValue(node, "imageName"); /*getting image dimensions*/ NodeList dimension = node.getElementsByTagName("resolution"); Element d = (Element)dimension.item(0); int image_width = Integer.parseInt(d.getAttribute("x")); /*image width*/ int image_height = Integer.parseInt(d.getAttribute("y")); /*image height*/ /*getting image regions*/ NodeList regions = node.getElementsByTagName("taggedRectangle"); /*getting the text region parameters*/ for(int i = 0 ; i < regions.getLength(); i++) { fields f = new fields (); Element reg = (Element)regions.item(i); f.set_x (Double.parseDouble(reg.getAttribute("x"))); f.set_y (Double.parseDouble(reg.getAttribute("y"))); f.set_w (Double.parseDouble(reg.getAttribute("width"))); f.set_h (Double.parseDouble(reg.getAttribute("height"))); list.add(f); } register r = new register (image_name, image_width, image_height, list, regions.getLength()); return r; } private static String getTextValue (Element ele, String tagName) { String textVal = null; NodeList nl = ele.getElementsByTagName(tagName); if(nl != null && nl.getLength() > 0) { Element el = (Element)nl.item(0); textVal = el.getFirstChild().getNodeValue(); } return textVal; } }