List of usage examples for org.dom4j.io SAXReader SAXReader
public SAXReader()
From source file:com.globalsight.everest.edit.offline.OfflineEditManagerLocal.java
License:Apache License
/** * Convert xliff elements to Pseudo//from w w w . j a v a2s . co m * * @param file * : the file to offline upload. * @return * @throws Exception */ private String convertXlif2Pseudo(DetectionResult detect, File file, User p_user, String p_fileName) throws Exception { String errMsg = null; org.dom4j.Document doc = null; try { SAXReader reader = new SAXReader(); doc = reader.read(file); } catch (Exception e) { s_category.error(e.getMessage(), e); throw new AmbassadorDwUpException(AmbassadorDwUpExceptionConstants.INVALID_FILE_FORMAT, e); } // Get all jobIds from uploading file. If combined, there will be // multiple tasks/pages/jobs in one file. HashSet<Long> jobIds = getJobIdsFromDoc(doc); PtagErrorPageWriter errWriter = new PtagErrorPageWriter(); errWriter.setFileName(p_fileName); errWriter.setPageId(XliffFileUtil.getPageId(doc)); errWriter.setTaskId(XliffFileUtil.getTaskId(doc)); errWriter.setWorkflowId(XliffFileUtil.getWorkflowId(doc)); reWrapXliff(doc, jobIds); errMsg = convertNode2Pseudo(doc, XliffConstants.SOURCE, p_fileName, jobIds, errWriter); if (errMsg != null) return errMsg; errMsg = convertNode2Pseudo(doc, XliffConstants.TARGET, p_fileName, jobIds, errWriter); if (errMsg != null) return errMsg; Transaction tx = HibernateUtil.getTransaction(); try { addComment(doc, p_user, jobIds); HibernateUtil.commit(tx); } catch (Exception e) { HibernateUtil.rollback(tx); throw e; } try { XlfParser parser = new XlfParser(); String xlfContent = parser.parseToTxt(doc); StringReader sr = new StringReader(xlfContent); detect.m_reader = new BufferedReader(sr); } catch (Exception e) { s_category.error(e.getMessage(), e); throw e; } return errMsg; }
From source file:com.globalsight.everest.edit.offline.ttx.TTXParser.java
License:Apache License
public Document getDocument(Reader reader) throws DocumentException { SAXReader saxReader = new SAXReader(); Document document = (Document) saxReader.read(reader); return document; }
From source file:com.globalsight.everest.edit.offline.ttx.TTXParser.java
License:Apache License
public Document getDocument(File file) throws Exception { SAXReader saxReader = new SAXReader(); Document document = (Document) saxReader.read(file); return document; }
From source file:com.globalsight.everest.edit.offline.xliff.ListViewWorkXLIFFWriter.java
License:Apache License
private static SAXReader getSAXReader() { if (reader == null) { reader = new SAXReader(); } return reader; }
From source file:com.globalsight.everest.page.WsPageTemplateExtention.java
License:Apache License
private String getAltTransOfMatch(List<LeverageMatch> p_list, long p_jobId) { String altStr = new String(); ListViewWorkXLIFFWriter lvwx = new ListViewWorkXLIFFWriter(); if (p_list != null) { LeverageMatch.orderMatchResult(p_list); SAXReader reader = new SAXReader(); for (int i = 0; i < p_list.size(); i++) { LeverageMatch leverageMatch = p_list.get(i); if (judgeIfneedAdd(leverageMatch)) { altStr = altStr + lvwx.getAltByMatch(leverageMatch, null, reader, p_jobId); }/*from w ww .j av a2 s .com*/ } } return altStr; }
From source file:com.globalsight.everest.projecthandler.importer.XmlReader.java
License:Apache License
/** * Reads an XML file and checks its correctness by validating * against the TMX DTD. If there's any error in the file, an * exception is thrown./*w w w .ja v a 2s .c o m*/ */ private void analyzeXml(String p_url) throws Exception { CATEGORY.debug("Analyzing document: " + p_url); SAXReader reader = new SAXReader(); // TODO: Read the DTD and validate. // See com.globalsight.everest.tm.util.DtdResolver; // reader.setEntityResolver(DtdResolver.getInstance()); // reader.setValidation(true); // enable element complete notifications to conserve memory //TODO reader.addHandler("/projectdata/data", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); } }); Document document = reader.read(p_url); // all done }
From source file:com.globalsight.everest.projecthandler.importer.XmlReaderThread.java
License:Apache License
public void run() { try {// w ww . j ava 2 s.co m SAXReader reader = new SAXReader(); // TODO: Read the DTD and validate. // See com.globalsight.everest.tm.util.DtdResolver; // reader.setEntityResolver(DtdResolver.getInstance()); // reader.setValidation(true); reader.addHandler("/projectdata", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); } public void onEnd(ElementPath path) { } }); // enable pruning to call me back as each Element is complete reader.addHandler("/projectdata/data", new ElementHandler() { public void onStart(ElementPath path) { m_count++; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); m_result = m_results.hireResult(); try { // TODO: Create data objects Object o = /*createObject*/(element); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug(o); } m_result.setResultObject(o); } catch (Throwable ex) { m_result.setError(ex.toString()); CATEGORY.warn("Error in object " + m_count, ex); } boolean done = m_results.put(m_result); m_result = null; // Stop reading the file. if (done) { throw new ThreadDeath(); } } }); String url = m_options.getFileName(); Document document = reader.read(url); } catch (ThreadDeath ignore) { CATEGORY.info("ReaderThread: interrupted"); } catch (Throwable ignore) { // Should never happen, and I don't know how to handle // this case other than passing the exception in // m_results, which I won't do for now. CATEGORY.error("unexpected error", ignore); } finally { if (m_result != null) { m_results.fireResult(m_result); } m_results.producerDone(); m_results = null; CATEGORY.debug("ReaderThread: done."); } }
From source file:com.globalsight.everest.segmentationhelper.XmlLoader.java
License:Apache License
/** * Transfer xml file into Document./*from w w w.j av a2 s . c o m*/ * * @param file */ private static Document parserWithSAX(File file) throws Exception { SAXReader xmlReader = new SAXReader(); Document doc = null; try { doc = xmlReader.read(file); } catch (Exception e) { e.printStackTrace(); throw new Exception(e.getMessage()); } return doc; }
From source file:com.globalsight.everest.segmentationhelper.XmlLoader.java
License:Apache License
/** * Transfer xml text into Document.// w w w . j a v a 2 s . c om * * @param xmltext */ private static Document parseWithSAX(String xmltext) throws Exception { Document doc = null; StringReader sr = new StringReader(xmltext); SAXReader xmlReader = new SAXReader(); try { doc = xmlReader.read(sr); } catch (Exception e) { e.printStackTrace(); throw new Exception(e.getMessage()); } return doc; }
From source file:com.globalsight.everest.tm.importer.ImportUtil.java
License:Apache License
/** * Saves a TM file with sample validation. * /*from w w w . ja v a2 s . co m*/ * For some TM files, it vary easy to happen encoding error or xml role * error and can't be import correct. This method try to do some sample * validations for each tu. If a tu will be give up if inducing a error. * * @param fileName * @throws Exception */ public void saveTmFileWithValidation(File file, File newFile, TmProcessStatus status) throws Exception { String encoding = "UTF-8"; String outEncoding = "UTF-8"; String logEncoding = "Unicode"; String strLine = System.getProperty("line.separator"); int errorCount = 0; int totalCount = 0; long lineCounter = 0; String s = null; try { if (file.exists()) { CATEGORY.info("Validating TM file: " + newFile.getAbsolutePath()); Date startTime = new Date(); File errorFile = getErrorFile(newFile); File infoFile = getInfoFile(newFile); File logFile = getLogFile(newFile); encoding = getEncodingOfXml(file); // GBS-2932 : UTF-8 by default if (encoding == null) { encoding = "UTF-8"; } // Initialize IO. FileInputStream fIn = new FileInputStream(file); BufferedReader in = new BufferedReader(new InputStreamReader(fIn, encoding)); FileOutputStream fOut = new FileOutputStream(newFile); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fOut, outEncoding)); FileOutputStream fError = new FileOutputStream(errorFile); OutputStreamWriter error = new OutputStreamWriter(fError, logEncoding); FileOutputStream fInfo = new FileOutputStream(infoFile); BufferedWriter info = new BufferedWriter(new OutputStreamWriter(fInfo, logEncoding)); FileOutputStream fLog = new FileOutputStream(logFile); OutputStreamWriter log = new OutputStreamWriter(fLog, logEncoding); writeHead(error); writeHead(log); StringBuilder sb = new StringBuilder(); // It must be <?xml ... s = in.readLine(); s = changeXmlEncodingDec(s, outEncoding); status.addSize(s.getBytes(encoding).length); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug("The content of in.readLine for encoding is " + s); } sb.append(s); sb.append(strLine); status.addSize(s.getBytes(encoding).length); // If the second line is define dtd s = in.readLine(); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug("The content of in.readLine for doctype is " + s); } if (s != null && s.indexOf("<!DOCTYPE") > -1) { status.addSize(s.getBytes(encoding).length); sb.append(s); sb.append(strLine); s = in.readLine(); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug("The content of in.readLine is " + s); } } else if (newFile.getName().endsWith("tmx")) { // Don't define the dtd, add it. sb.append(TMX_DTD_LINE); sb.append(strLine); } boolean isRemoved = false; int count = 0; SAXReader reader = new SAXReader(); while (s != null) { if (status.isCanceled()) { CATEGORY.info("Cancelled validating"); break; } status.addSize(s.getBytes(encoding).length); if (isHeaderStart(s) && isTradosFontTableStart(s) && isHeaderEnd(s)) { int headerEndTag = s.indexOf(">"); sb.append(s.subSequence(0, headerEndTag + 1)); int endHeaderTag = s.indexOf("</header>"); sb.append(s.substring(endHeaderTag)); sb.append(endHeaderTag); sb.append(strLine); } if (isRemoved) { if (isTradosFontTableEnd(s)) { isRemoved = false; } s = in.readLine(); continue; } if (isTuStartTag(s)) { /* The begin of the tu */ // Saves information recoded. if (sb.length() > 0) { out.write(sb.toString()); out.flush(); } sb = resetStringBuilder(sb); sb.append(s); sb.append(strLine); totalCount++; } // Validate for the tu. else if (isTuEndTag(s)) { /* The end of the tu */ sb.append(s); sb.append(strLine); String content = sb.toString(); try { /* verify the content */ reader.read(new StringReader(content)); // Saves the tu if no exception happen. out.write(content); out.flush(); } catch (Exception e) { // Give up the tu if any exception happened. error.write(content); log.write(strLine); log.write(SPLIT_LINE); log.write(Integer.toString(++errorCount)); log.write(SPLIT_LINE); log.write(strLine); log.write(content); log.write(strLine); log.write(e.getMessage()); log.write(strLine); } sb = resetStringBuilder(sb); } else if (isTradosFontTableStart(s)) { count++; isRemoved = true; } else if (count > 0 && isTradosFontTableEnd(s)) { isRemoved = false; } else if ((count > 0) && isHeaderEnd(s)) { sb.append("</header>"); sb.append(strLine); } else { // Records informations which not included in tu, first // line // etc. sb.append(s); sb.append(strLine); } s = in.readLine(); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug("The content of in.readLine is " + s); } lineCounter++; } // Records informations which not included in tu and not saved // to // file. Usually it is "</body> </tmx>". if (sb.length() > 0) { out.write(sb.toString()); out.flush(); } in.close(); out.close(); CATEGORY.info("Done validating"); log.write(SPLIT_LINE + SPLIT_LINE + strLine + strLine); log.write("Error: " + errorCount + strLine); log.write("Total: " + totalCount + strLine); // Gets the cost time. Date endTime = new Date(); long costTime = endTime.getTime() - startTime.getTime(); long h = costTime / (1000 * 60 * 60); costTime = costTime % (1000 * 60 * 60); long m = costTime / (1000 * 60); costTime = costTime % (1000 * 60); long se = costTime / 1000; StringBuffer time = new StringBuffer("Cost time: "); time.append(h).append(" h ").append(m).append(" m ").append(se).append(" s "); // Recodes some sample informations. String msg = "Error: " + errorCount + strLine; info.write(msg); msg = "Total: " + totalCount + strLine; info.write(msg); info.write(time.toString()); writeFoot(error); writeFoot(log); error.flush(); error.close(); info.flush(); info.close(); log.flush(); log.close(); if (lineCounter > 10000) { CATEGORY.debug("forces jvm to perform gc when the line count reaches 10000. line count: " + lineCounter); System.gc(); } } } catch (IOException ie) { CATEGORY.error("IO Exception occured when save the tm file."); CATEGORY.error("The content of current line is " + s); CATEGORY.error("The stacktrace of the exception is ", ie); throw ie; } catch (Exception e) { CATEGORY.error("error occured when save the tm file."); CATEGORY.error("The content of current line is " + s); CATEGORY.error("The stacktrace of the exception is ", e); throw e; } status.setErrorTus(Integer.toString(errorCount)); status.setTotalTus(Integer.toString(totalCount)); }