List of usage examples for org.dom4j.io SAXReader read
public Document read(InputSource in) throws DocumentException
Reads a Document from the given InputSource
using SAX
From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from ww w . j a va2s . co m public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { String gxml = handleTuv(element); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. */// w w w .j a va2 s . co m public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { element = removeUtElements(element); String gxml = handleTuv(element.getText()); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosTmxToRtf.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///www . ja va 2 s . c o m public String convertToRtf(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to RTF: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); Element prop = (Element) element.selectSingleNode("/prop[@type='RTFFontTable']"); if (prop != null) writeEntry(prop.getText()); prop = (Element) element.selectSingleNode("/prop[@type='RTFStyleSheet']"); if (prop != null) writeEntry(prop.getText()); writeOtherRtfHeader(); writeDummyParagraph(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element = removeUtElements(element); writeEntry(replaceUnicodeChars(removeRtfParagraphs(element.asXML()))); writeEntry("\\par"); // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'"); return m_filename; }
From source file:com.globalsight.everest.tm.util.ttx.TtxClean.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from w w w . j av a 2 s . c o m public String cleanTtx(String p_url, boolean p_cleanTarget, String p_encoding) throws Exception { m_cleanTarget = p_cleanTarget; // File is called <file>.<ext>.<ttx> final String origName = getBaseName(p_url); final String baseName = getBaseName(origName); final String extension = getExtension(origName); info("Cleaning TTX file to " + (m_cleanTarget ? "target" : "source") + ": `" + p_url + "'"); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); //reader.setEntityResolver(DtdResolver.getInstance()); //reader.setValidation(true); // Fetch the version info early. reader.addHandler("/TRADOStag", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue(Ttx.VERSION); } public void onEnd(ElementPath path) { } }); // Fetch the header info early. reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); } }); // Read in the entire file (it's not too big normally). Document document = reader.read(p_url); Element body = (Element) document.getRootElement().selectSingleNode("//Body/Raw"); // Remove <ut>, <df> and pull out one TUV. processBody(body); String content = getInnerText(body); String encoding; if (m_cleanTarget) { if (p_encoding != null) { encoding = p_encoding; } else { encoding = "UTF-8"; } } else { // reuse original encoding encoding = m_header.getOriginalEncoding(); } String locale; if (m_cleanTarget) { locale = m_header.getTargetLanguage(); } else { locale = m_header.getSourceLanguage(); } startOutputFile(baseName, locale, extension, encoding); writeEntry(content); closeOutputFile(); info("Result written to file `" + m_filename + "'."); return m_filename; }
From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *//* w w w.j a v a 2 s. c o m*/ public String convertTtxToTmx(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(baseName); info("Converting TTX file to TMX: `" + p_url + "'"); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); //reader.setEntityResolver(DtdResolver.getInstance()); //reader.setValidation(true); // Fetch the version info early. reader.addHandler("/TRADOStag", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue(Ttx.VERSION); } public void onEnd(ElementPath path) { } }); // Fetch the header info early. reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setTtxHeader(element); try { startOutputFile(baseName); } catch (Exception ex) { error(ex.toString()); System.exit(1); } // prune the current element to reduce memory element.detach(); element = null; } }); ElementHandler tuHandler = new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element = cleanupTu(element); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 50 == 0) { debug("Entry " + m_entryCount); } } }; // Path handlers cannot use "//", sooo specify all known paths. reader.addHandler("/TRADOStag/Body/Raw/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/ut/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/ut/Tu", tuHandler); // Read in the entire file (it's not too big normally). Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'"); return m_filename; }
From source file:com.globalsight.everest.webapp.pagehandler.administration.config.xmldtd.XmlDtdManager.java
License:Apache License
/** * Validates xml files with specified dtd file. * //from w ww . j ava 2 s.com * @param id * The xml dtd id. * @param file * The xml file need to validate. * @throws DtdException */ public static void validateXmlFile(long id, File file) throws DtdException { Assert.assertFileExist(file); if (file.getName().endsWith(".xml")) { logger.debug("File: " + file.getPath()); File dtdFile = DtdFileManager.getDtdFile(id, file); if (dtdFile != null && dtdFile.exists()) { logger.debug("DTD: " + dtdFile.getPath()); SAXReader reader = new SAXReader(); DtdEntityResolver resolver = new DtdEntityResolver(dtdFile); reader.setEntityResolver(resolver); reader.setValidation(true); Document document; try { document = reader.read(file); document.clearContent(); logger.debug("Successful"); } catch (Exception e) { logger.info("DTD validation failed: " + e.getMessage()); throw new DtdException(e); } } } }
From source file:com.globalsight.everest.webapp.pagehandler.administration.workflow.WorkflowTemplateHandler.java
License:Apache License
private void importWorkFlow(HttpServletRequest p_request, HttpSession session) { FileUploadHelper o_upload = new FileUploadHelper(); try {// ww w . ja v a2s . co m o_upload.doUpload(p_request); String list = o_upload.getFieldValue("localePairs"); String name = o_upload.getFieldValue("nameTF"); String projectId = o_upload.getFieldValue("project"); String fileName = o_upload.getSavedFilepath(); SAXReader reader = new SAXReader(); Document doc = reader.read(new File(fileName)); ArrayList<LocalePair> alist = new ArrayList<LocalePair>(); StringTokenizer st = new StringTokenizer(list, ","); while (st.hasMoreTokens()) { String id = (String) st.nextToken(); alist.add(ServerProxy.getLocaleManager().getLocalePairById(Long.parseLong(id))); } WorkflowTemplateHandlerHelper.importWorkflowTemplateInfo(doc, alist, name, projectId, getBundle(session)); for (LocalePair localePair : alist) { String importWorkflowTemplateName = generateName(name, localePair); OperationLog.log(m_userId, OperationLog.EVENT_ADD, OperationLog.COMPONET_WORKFLOW, importWorkflowTemplateName); } } catch (Exception e) { throw new EnvoyServletException(e); } }
From source file:com.globalsight.jobsAutoArchiver.AutoArchiver.java
License:Apache License
public void doWork() throws Exception { SAXReader saxReader = new SAXReader(); Document document = saxReader.read(new File(Constants.CONFIG_FILE_NAME)); Element rootElt = document.getRootElement(); runOnce = document.selectSingleNode("//runOnce").getText(); intervalTime = Integer.valueOf(document.selectSingleNode("//intervalTime").getText()); Iterator serverIter = rootElt.elementIterator("server"); while (serverIter.hasNext()) { final Element serverElement = (Element) serverIter.next(); Runnable runnable = new Runnable() { public void run() { try { String hostName = serverElement.elementTextTrim("host"); int port = Integer.valueOf(serverElement.elementTextTrim("port")); boolean isUseHTTPS = Boolean.valueOf(serverElement.elementTextTrim("https")); int intervalTimeForArchive = Integer .valueOf(serverElement.elementTextTrim("intervalTimeForArchive")); Iterator usersIter = serverElement.elementIterator("users"); while (usersIter.hasNext()) { Element usersElement = (Element) usersIter.next(); Iterator userIter = usersElement.elementIterator("user"); while (userIter.hasNext()) { Element userElement = (Element) userIter.next(); String userName = userElement.elementTextTrim("username"); String password = userElement.elementTextTrim("password"); autoArchive(hostName, port, isUseHTTPS, userName, password, intervalTimeForArchive); }/*from w w w . j a va 2 s. c o m*/ } } catch (Throwable e) { LogUtil.info("error : " + e); } } }; Thread t = new Thread(runnable); t.start(); } }
From source file:com.globalsight.jobsAutoArchiver.AutoArchiver.java
License:Apache License
private void autoArchive(String hostName, int port, boolean isUseHTTPS, String userName, String password, int intervalTimeForArchive) { try {//from ww w.j a va2s .co m Ambassador ambassador = WebServiceClientHelper.getClientAmbassador(hostName, port, userName, password, isUseHTTPS); String accessToken = ambassador.login(userName, password); String result = ambassador.fetchJobsByState(accessToken, Constants.JOB_STATE_EXPORTED, 0, 100, false); if (result == null) { LogUtil.info( "server " + hostName + " , user " + userName + " : no jobs that are in exported state."); } else { LogUtil.info("server " + hostName + " , user " + userName + " , returning of fetchJobsByState API:\n" + result); SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yy HH:mm:ss"); long diffInHours = 0; long now = (new Date()).getTime(); SAXReader saxReader = new SAXReader(); Document document2 = saxReader.read(new ByteArrayInputStream(result.getBytes("UTF-8"))); Element rootEltJob = document2.getRootElement(); Iterator iterJob = rootEltJob.elementIterator("Job"); Set<Long> jobIds = new HashSet<Long>(); while (iterJob.hasNext()) { String jobId = null; String completedDateStr = null; try { Element jobElement = (Element) iterJob.next(); jobId = jobElement.elementTextTrim("id"); completedDateStr = jobElement.elementTextTrim("completedDate"); Date completedDate = sdf.parse(completedDateStr.substring(0, 18).trim()); long completedDateLong = completedDate.getTime(); diffInHours = (now - completedDateLong) / 1000 / 60 / 60; if (diffInHours >= intervalTimeForArchive) { jobIds.add(Long.parseLong(jobId)); } } catch (Exception e) { LogUtil.info("Error to check job with jobID: " + jobId + " and completedDate '" + completedDateStr + "'.", e); } } StringBuffer jobs = new StringBuffer(); for (long id : jobIds) { jobs.append(id).append(","); } if (jobs.length() > 0 && jobs.toString().endsWith(",")) { String jobs2 = jobs.toString().substring(0, jobs.length() - 1); ambassador.archiveJob(accessToken, jobs2); String[] jobs2_array = jobs2.toString().split(","); for (String job : jobs2_array) { LogUtil.info("server " + hostName + ", user " + userName + " : the job " + job + " can be archived"); } } else { LogUtil.info("server " + hostName + ", user " + userName + " : no jobs that can be archived."); } } } catch (Exception e) { LogUtil.info("server " + hostName + ", user " + userName + ", error : " + e); } }
From source file:com.globalsight.reports.util.ReportHandlerFactory.java
License:Apache License
/** * Reads in the Report handler mapping from an XML file, parses it * and populates this data structure//from ww w .j a v a2 s . c o m * * @param reportConfigXML the name of the XML file that * holds the report mapping. * @return <code>true</code> if successful, <code>false</code> * otherwise. */ public static boolean createReportHandlerMap(String reportConfigXML) { boolean retVal = false; SAXReader reader = new SAXReader(); Document document = null; try { InputStream is = ReportHandlerFactory.class.getResourceAsStream(reportConfigXML); document = reader.read(new InputSource(is)); // parse XML file in order to get className counterpart to pagename List<?> moduleList = document.selectNodes(Constants.REPORTMODULE_NODE_XPATH); for (Iterator<?> iter = moduleList.iterator(); iter.hasNext();) { Element element = (Element) iter.next(); Attribute attribute = element.attribute(Constants.REPORTNAME_ATTRIBUTE); Element elementNode = element.element(Constants.REPORTHANDLER_NODE); Attribute attributeNode = elementNode.attribute(Constants.CLASS_ATTRIBUTE); if (CATEGORY.isDebugEnabled()) { CATEGORY.debug("CreateReportHandlerMap key: " + attribute.getValue() + " value: " + attributeNode.getValue()); } reportHandlerMap.put(attribute.getValue(), attributeNode.getValue()); } // parse XML file in order to get target Url counterpart to pagename List<?> urlList = document.selectNodes(Constants.REPORTURL_NODE_XPATH); for (Iterator<?> iterUrl = urlList.iterator(); iterUrl.hasNext();) { Element urlElement = (Element) iterUrl.next(); Attribute jspNameAttribute = urlElement.attribute(Constants.JSPNAME_ATTRIBUTE); Attribute urlAttribute = urlElement.attribute(Constants.TARGETURL_ATTRIBUTE); reportTargetUrlMap.put(jspNameAttribute.getValue(), urlAttribute.getValue()); } if (instance == null) { instance = new ReportHandlerFactory(); retVal = true; } } catch (DocumentException e) { CATEGORY.error("Cannot read the ReportConfig.xml", e); } return retVal; }