List of usage examples for org.dom4j.io SAXReader read
public Document read(InputSource in) throws DocumentException
Reads a Document from the given InputSource
using SAX
From source file:com.globalsight.terminology.importer.MtfReader.java
License:Apache License
/** * Reads an XML file and checks for correctness. If there's any * error in the file, an exception is thrown. *///from ww w.jav a 2 s. c o m private void analyzeXml(String p_url) throws Exception { SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); CATEGORY.debug("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); // TODO: validate entry and report errors. } }); Document document = reader.read(p_url); // all done }
From source file:com.globalsight.terminology.importer.MtfReaderThread.java
License:Apache License
public void run() { try {/* ww w . ja v a 2 s.c o m*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); // enable pruning to call me back as each Element is complete reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { m_count++; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); m_result = m_results.hireResult(); try { // Convert MultiTerm to GlobalSight. element = convertMtf(element); Document doc = m_factory.createDocument(element); Entry entry = new Entry(doc); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug(entry.getXml()); } m_result.setResultObject(entry); } catch (Throwable ex) { String msg = "Entry " + m_count + ": " + ex.getMessage(); m_result.setError(msg); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug(msg, ex); } else { CATEGORY.warn(msg, ex); } } boolean done = m_results.put(m_result); m_result = null; // Stop reading the XML file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); Document document = reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted."); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. CATEGORY.error("unexpected error", ignore); } finally { if (m_result != null) { m_results.fireResult(m_result); m_result = null; } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.terminology.importer.TbxReader.java
License:Apache License
private void analyzeTbx(String p_url) throws Exception { SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); CATEGORY.debug("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/martif/text/body/termEntry", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount;//from w w w . j a va2s.co m } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); } }); Document document = reader.read(p_url); }
From source file:com.globalsight.terminology.importer.TbxReaderThread.java
License:Apache License
public void run() { try {/*from w ww . j a v a2 s . c om*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); // enable pruning to call me back as each Element is complete reader.addHandler("/martif/text/body/termEntry", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); Document doc = m_factory.createDocument(element); Entry entry = new Entry(doc); m_result = m_results.hireResult(); m_result.setResultObject(entry); boolean done = m_results.put(m_result); m_result = null; // Stop reading the TMX file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); Document document = reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted."); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. } finally { if (m_result != null) { m_results.fireResult(m_result); m_result = null; } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.terminology.util.MtfAnalyzer.java
License:Apache License
public void analyze(String p_url) throws Exception { m_entryCount = 0;// w ww. j a v a 2 s .co m SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); System.err.println("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % 200 == 0) { log("Entry " + m_entryCount); } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); log("Total entries: " + m_entryCount); // all done }
From source file:com.globalsight.terminology.util.MtfSplitter.java
License:Apache License
public void split(String p_url, String p_numEntries) throws Exception { final int maxEntries = Integer.parseInt(p_numEntries); final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); m_entryCount = 0;//from w ww . j a va 2 s . co m SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); log("Splitting document `" + p_url + "'"); startFile(baseName, extension); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % maxEntries == 0) { try { closeFile(); startFile(baseName, extension); } catch (Exception ex) { log(ex.toString()); System.exit(1); } } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); closeFile(); // all done }
From source file:com.globalsight.util.ConfigUtil.java
License:Apache License
public static ConfigBO getConfigBO() { String hostName;/*from w w w. j a v a2s .com*/ int port = 80; String userName; String password; int intervalTimeForArchive; int intervalTime; boolean isUseHTTPS = false; try { SAXReader saxReader = new SAXReader(); Document document = saxReader.read(new File(Constants.CONFIG_FILE_NAME)); Node serverNode = document.selectSingleNode("/Configuration/server"); hostName = serverNode.selectSingleNode("host").getText(); port = Integer.valueOf(serverNode.selectSingleNode("port").getText()); userName = serverNode.selectSingleNode("username").getText(); password = serverNode.selectSingleNode("password").getText(); isUseHTTPS = Boolean.valueOf(serverNode.selectSingleNode("https").getText()); intervalTimeForArchive = Integer .valueOf(document.selectSingleNode("//intervalTimeForArchive").getText()); intervalTime = Integer.valueOf(document.selectSingleNode("//intervalTime").getText()); ConfigBO config = new ConfigBO(hostName, port, userName, password, intervalTimeForArchive, intervalTime, isUseHTTPS); return config; } catch (DocumentException e) { LogUtil.info("Fail to read configuration info", e); } return null; }
From source file:com.globalsight.util.file.XliffFileUtil.java
License:Apache License
/** * Check if specified file contains multiple <File> tags * // w w w . j a v a 2 s . c o m * @param p_filename * File name * @return Return true if file contains multiple <File> tags, otherwise * return false * * @version 1.0 * @since 8.2.2 */ public static boolean isMultipleFileTags(String p_filename) { if (StringUtil.isEmpty(p_filename)) return false; int numOfFileTags = 0; try { SAXReader saxReader = new SAXReader(); Document document = null; Element rootElement = null; File file = new File(p_filename); if (file.exists() && file.isFile()) { document = saxReader.read(file); rootElement = document.getRootElement(); String tag = ""; for (Iterator<Element> iterator = rootElement.elementIterator(); iterator.hasNext();) { tag = iterator.next().getName().trim().toLowerCase(); if ("file".equals(tag)) numOfFileTags++; } } return numOfFileTags > 1; } catch (Exception e) { logger.error("Can not verify if current file contains multiple file tags.", e); return false; } }
From source file:com.globalsight.util.file.XliffFileUtil.java
License:Apache License
public static boolean containsFileTag(String p_filename) { if (StringUtil.isEmpty(p_filename)) return false; try {// ww w .j a v a2 s . co m SAXReader saxReader = new SAXReader(); Document document = null; Element rootElement = null; File file = new File(p_filename); if (file.exists() && file.isFile()) { document = saxReader.read(file); rootElement = document.getRootElement(); String tag = ""; List fileTags = rootElement.elements("file"); if (fileTags != null) { for (int i = 0; i < fileTags.size(); i++) { Element element = (Element) fileTags.get(i); String attr = element.attributeValue("tool"); if (attr == null) return false; else { if (attr.toLowerCase().contains("worldserver")) return true; } } } } return false; } catch (Exception e) { logger.error("Can not verify if current file contains multiple file tags.", e); return false; } }
From source file:com.globalsight.util.file.XliffFileUtil.java
License:Apache License
/** * Generate separated files in single file level according with content in * orignal Xliff file//from w w w . j a va2 s. co m * * @param multipleFileTagsXliff * Object contains all separated files * @param absoluteFilename * File name with absolute path * @param header * Header content of original Xliff file * @param content * Main content of original Xliff file * @param footer * Footer content of original Xliff file * * @version 1.0 * @since 8.2.2 */ private static void generateSeparatedFiles(MultipleFileTagsXliff multipleFileTagsXliff, String absoluteFilename, String header, String content, String footer) { try { String absolutePath = getBaseAbsoluatePath(absoluteFilename); String mainName = getMainFilename(absoluteFilename); String separatedDirPath = mainName + SEPARATE_FLAG; String absouateSeparatedPath = absolutePath + File.separator + separatedDirPath; File file = new File(absouateSeparatedPath); file.mkdirs(); int lengthOfFileEndTag = "</file>".length(); int beginIndex = -1, endIndex = -1; String subContent = ""; SAXReader saxReader = new SAXReader(); Document document = null; Element rootElement = null, fileTagElement = null, fileHeaderElement = null; String originalSubFilename = ""; String segAssetId = ""; BufferedWriter fout = null; ArrayList<String> separatedFiles = new ArrayList<String>(); String subFilename = ""; int count = 1; while ((beginIndex = content.indexOf("<file ")) > -1) { endIndex = content.indexOf("</file>") + lengthOfFileEndTag; subContent = header + content.substring(beginIndex, endIndex) + footer; // Use XML parser to get element value document = saxReader.read(new StringReader(subContent)); rootElement = document.getRootElement(); fileTagElement = rootElement.element("file"); originalSubFilename = fileTagElement.attributeValue("original").replace("/", File.separator); originalSubFilename = originalSubFilename .substring(originalSubFilename.lastIndexOf(File.separator) + 1); fileHeaderElement = fileTagElement.element("header"); if (fileHeaderElement != null && fileHeaderElement.element("asset-data") != null) { segAssetId = fileHeaderElement.element("asset-data").elementText("seg_asset_id"); } // Generate file name of separated file subFilename = originalSubFilename + "_" + segAssetId + "_" + count + ".xlf"; // Write separated file into disk file = new File(absouateSeparatedPath, subFilename); fout = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); fout.write(subContent); fout.close(); separatedFiles.add(getRelativePath(file.getAbsolutePath())); content = content.substring(endIndex); count++; } multipleFileTagsXliff.setCount(count); multipleFileTagsXliff.setSeparatedFiles(separatedFiles); multipleFileTagsXliff.setSeparatedFolderName(separatedDirPath); } catch (Exception e) { logger.error("Error in generateExtractedFile.", e); } }