List of usage examples for org.dom4j.io SAXReader setXMLReaderClassName
public void setXMLReaderClassName(String xmlReaderClassName) throws SAXException
XMLReader
to be used to parse SAX events. From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from w w w. j a va 2 s. com public String convertTtxToTmx(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(baseName); info("Converting TTX file to TMX: `" + p_url + "'"); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); //reader.setEntityResolver(DtdResolver.getInstance()); //reader.setValidation(true); // Fetch the version info early. reader.addHandler("/TRADOStag", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue(Ttx.VERSION); } public void onEnd(ElementPath path) { } }); // Fetch the header info early. reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setTtxHeader(element); try { startOutputFile(baseName); } catch (Exception ex) { error(ex.toString()); System.exit(1); } // prune the current element to reduce memory element.detach(); element = null; } }); ElementHandler tuHandler = new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element = cleanupTu(element); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 50 == 0) { debug("Entry " + m_entryCount); } } }; // Path handlers cannot use "//", sooo specify all known paths. reader.addHandler("/TRADOStag/Body/Raw/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/ut/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/ut/Tu", tuHandler); // Read in the entire file (it's not too big normally). Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'"); return m_filename; }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
private SAXReader createXmlParser() { SAXReader result = new SAXReader(); try {/*w ww . j a v a 2 s . co m*/ result.setXMLReaderClassName("org.dom4j.io.aelfred.SAXDriver"); result.setValidation(false); } catch (Exception ex) { System.err.println("org.dom4j.io.aelfred.SAXDriver not found"); // Use the system default parser, better than nothing. result = new SAXReader(); result.setValidation(false); } return result; }
From source file:com.globalsight.terminology.importer.GTXmlReader.java
License:Apache License
/** * Reads an XML file and checks for correctness. If there's any * error in the file, an exception is thrown. *//* w w w .j ava 2s . com*/ private void analyzeXml(String p_url) throws Exception { SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); CATEGORY.debug("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/entries/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); } }); ImportUtil.filterateXmlIllegal(p_url, m_options.getEncoding()); Document document = reader.read(p_url); // all done }
From source file:com.globalsight.terminology.importer.GTXmlReaderThread.java
License:Apache License
public void run() { try {//from w w w.j a v a2s . c o m SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); // enable pruning to call me back as each Element is complete reader.addHandler("/entries/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); Document doc = m_factory.createDocument(element); Entry entry = new Entry(doc); m_result = m_results.hireResult(); m_result.setResultObject(entry); boolean done = m_results.put(m_result); m_result = null; // Stop reading the TMX file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted."); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. } finally { if (m_result != null) { m_results.fireResult(m_result); m_result = null; } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.terminology.importer.MtfReader.java
License:Apache License
/** * Reads an XML file and checks for correctness. If there's any * error in the file, an exception is thrown. *//* w w w.j a v a2 s . com*/ private void analyzeXml(String p_url) throws Exception { SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); CATEGORY.debug("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); // TODO: validate entry and report errors. } }); Document document = reader.read(p_url); // all done }
From source file:com.globalsight.terminology.importer.MtfReaderThread.java
License:Apache License
public void run() { try {// www . ja v a 2 s .c o m SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); // enable pruning to call me back as each Element is complete reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { m_count++; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); m_result = m_results.hireResult(); try { // Convert MultiTerm to GlobalSight. element = convertMtf(element); Document doc = m_factory.createDocument(element); Entry entry = new Entry(doc); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug(entry.getXml()); } m_result.setResultObject(entry); } catch (Throwable ex) { String msg = "Entry " + m_count + ": " + ex.getMessage(); m_result.setError(msg); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug(msg, ex); } else { CATEGORY.warn(msg, ex); } } boolean done = m_results.put(m_result); m_result = null; // Stop reading the XML file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); Document document = reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted."); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. CATEGORY.error("unexpected error", ignore); } finally { if (m_result != null) { m_results.fireResult(m_result); m_result = null; } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.terminology.importer.TbxReader.java
License:Apache License
private void analyzeTbx(String p_url) throws Exception { SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); CATEGORY.debug("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/martif/text/body/termEntry", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount;//from w w w. j ava2s. com } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); } }); Document document = reader.read(p_url); }
From source file:com.globalsight.terminology.importer.TbxReaderThread.java
License:Apache License
public void run() { try {/*from w ww . j a v a2 s . co m*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); // enable pruning to call me back as each Element is complete reader.addHandler("/martif/text/body/termEntry", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); Document doc = m_factory.createDocument(element); Entry entry = new Entry(doc); m_result = m_results.hireResult(); m_result.setResultObject(entry); boolean done = m_results.put(m_result); m_result = null; // Stop reading the TMX file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); Document document = reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted."); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. } finally { if (m_result != null) { m_results.fireResult(m_result); m_result = null; } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.terminology.util.MtfAnalyzer.java
License:Apache License
public void analyze(String p_url) throws Exception { m_entryCount = 0;//from ww w. j a va 2 s .c o m SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); System.err.println("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % 200 == 0) { log("Entry " + m_entryCount); } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); log("Total entries: " + m_entryCount); // all done }
From source file:com.globalsight.terminology.util.MtfSplitter.java
License:Apache License
public void split(String p_url, String p_numEntries) throws Exception { final int maxEntries = Integer.parseInt(p_numEntries); final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); m_entryCount = 0;/*from w w w . j a v a2 s . c o m*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); log("Splitting document `" + p_url + "'"); startFile(baseName, extension); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % maxEntries == 0) { try { closeFile(); startFile(baseName, extension); } catch (Exception ex) { log(ex.toString()); System.exit(1); } } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); closeFile(); // all done }