Example usage for org.dom4j.io SAXReader read

List of usage examples for org.dom4j.io SAXReader read

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader read.

Prototype

public Document read(InputSource in) throws DocumentException 

Source Link

Document

Reads a Document from the given InputSource using SAX

Usage

From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from   ww w .  j  a va2s  .  co  m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///  w  w w .j  a  va2 s .  co  m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                element = removeUtElements(element);

                String gxml = handleTuv(element.getText());
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosTmxToRtf.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///www . ja  va 2 s  .  c  o m
public String convertToRtf(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to RTF: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);

            Element prop = (Element) element.selectSingleNode("/prop[@type='RTFFontTable']");

            if (prop != null)
                writeEntry(prop.getText());

            prop = (Element) element.selectSingleNode("/prop[@type='RTFStyleSheet']");

            if (prop != null)
                writeEntry(prop.getText());

            writeOtherRtfHeader();

            writeDummyParagraph();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element = removeUtElements(element);

            writeEntry(replaceUnicodeChars(removeRtfParagraphs(element.asXML())));
            writeEntry("\\par");

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.ttx.TtxClean.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from w  w w  . j  av  a  2 s  .  c  o m
public String cleanTtx(String p_url, boolean p_cleanTarget, String p_encoding) throws Exception {
    m_cleanTarget = p_cleanTarget;

    // File is called <file>.<ext>.<ttx>
    final String origName = getBaseName(p_url);
    final String baseName = getBaseName(origName);
    final String extension = getExtension(origName);

    info("Cleaning TTX file to " + (m_cleanTarget ? "target" : "source") + ": `" + p_url + "'");

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    //reader.setEntityResolver(DtdResolver.getInstance());
    //reader.setValidation(true);

    // Fetch the version info early.
    reader.addHandler("/TRADOStag", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue(Ttx.VERSION);
        }

        public void onEnd(ElementPath path) {
        }
    });

    // Fetch the header info early.
    reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
        }
    });

    // Read in the entire file (it's not too big normally).
    Document document = reader.read(p_url);

    Element body = (Element) document.getRootElement().selectSingleNode("//Body/Raw");

    // Remove <ut>, <df> and pull out one TUV.
    processBody(body);

    String content = getInnerText(body);
    String encoding;

    if (m_cleanTarget) {
        if (p_encoding != null) {
            encoding = p_encoding;
        } else {
            encoding = "UTF-8";
        }
    } else {
        // reuse original encoding
        encoding = m_header.getOriginalEncoding();
    }

    String locale;

    if (m_cleanTarget) {
        locale = m_header.getTargetLanguage();
    } else {
        locale = m_header.getSourceLanguage();
    }

    startOutputFile(baseName, locale, extension, encoding);
    writeEntry(content);
    closeOutputFile();

    info("Result written to file `" + m_filename + "'.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *//* w  w w.j  a  v a  2 s.  c o m*/
public String convertTtxToTmx(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(baseName);

    info("Converting TTX file to TMX: `" + p_url + "'");

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    //reader.setEntityResolver(DtdResolver.getInstance());
    //reader.setValidation(true);

    // Fetch the version info early.
    reader.addHandler("/TRADOStag", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue(Ttx.VERSION);
        }

        public void onEnd(ElementPath path) {
        }
    });

    // Fetch the header info early.
    reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setTtxHeader(element);

            try {
                startOutputFile(baseName);
            } catch (Exception ex) {
                error(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    ElementHandler tuHandler = new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element = cleanupTu(element);

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();
            element = null;

            if (m_entryCount % 50 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    };

    // Path handlers cannot use "//", sooo specify all known paths.
    reader.addHandler("/TRADOStag/Body/Raw/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/df/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/ut/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/df/ut/Tu", tuHandler);

    // Read in the entire file (it's not too big normally).
    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'");

    return m_filename;
}

From source file:com.globalsight.everest.webapp.pagehandler.administration.config.xmldtd.XmlDtdManager.java

License:Apache License

/**
 * Validates xml files with specified dtd file.
 * //from  w ww  . j  ava  2  s.com
 * @param id
 *            The xml dtd id.
 * @param file
 *            The xml file need to validate.
 * @throws DtdException
 */
public static void validateXmlFile(long id, File file) throws DtdException {
    Assert.assertFileExist(file);
    if (file.getName().endsWith(".xml")) {
        logger.debug("File: " + file.getPath());
        File dtdFile = DtdFileManager.getDtdFile(id, file);
        if (dtdFile != null && dtdFile.exists()) {
            logger.debug("DTD: " + dtdFile.getPath());
            SAXReader reader = new SAXReader();
            DtdEntityResolver resolver = new DtdEntityResolver(dtdFile);
            reader.setEntityResolver(resolver);
            reader.setValidation(true);
            Document document;
            try {
                document = reader.read(file);
                document.clearContent();
                logger.debug("Successful");
            } catch (Exception e) {
                logger.info("DTD validation failed: " + e.getMessage());
                throw new DtdException(e);
            }
        }
    }
}

From source file:com.globalsight.everest.webapp.pagehandler.administration.workflow.WorkflowTemplateHandler.java

License:Apache License

private void importWorkFlow(HttpServletRequest p_request, HttpSession session) {
    FileUploadHelper o_upload = new FileUploadHelper();
    try {//  ww w . ja v a2s . co  m
        o_upload.doUpload(p_request);
        String list = o_upload.getFieldValue("localePairs");
        String name = o_upload.getFieldValue("nameTF");
        String projectId = o_upload.getFieldValue("project");
        String fileName = o_upload.getSavedFilepath();
        SAXReader reader = new SAXReader();
        Document doc = reader.read(new File(fileName));
        ArrayList<LocalePair> alist = new ArrayList<LocalePair>();
        StringTokenizer st = new StringTokenizer(list, ",");
        while (st.hasMoreTokens()) {
            String id = (String) st.nextToken();
            alist.add(ServerProxy.getLocaleManager().getLocalePairById(Long.parseLong(id)));
        }

        WorkflowTemplateHandlerHelper.importWorkflowTemplateInfo(doc, alist, name, projectId,
                getBundle(session));
        for (LocalePair localePair : alist) {
            String importWorkflowTemplateName = generateName(name, localePair);
            OperationLog.log(m_userId, OperationLog.EVENT_ADD, OperationLog.COMPONET_WORKFLOW,
                    importWorkflowTemplateName);
        }

    } catch (Exception e) {
        throw new EnvoyServletException(e);
    }

}

From source file:com.globalsight.jobsAutoArchiver.AutoArchiver.java

License:Apache License

public void doWork() throws Exception {
    SAXReader saxReader = new SAXReader();
    Document document = saxReader.read(new File(Constants.CONFIG_FILE_NAME));
    Element rootElt = document.getRootElement();
    runOnce = document.selectSingleNode("//runOnce").getText();
    intervalTime = Integer.valueOf(document.selectSingleNode("//intervalTime").getText());
    Iterator serverIter = rootElt.elementIterator("server");
    while (serverIter.hasNext()) {
        final Element serverElement = (Element) serverIter.next();

        Runnable runnable = new Runnable() {
            public void run() {
                try {
                    String hostName = serverElement.elementTextTrim("host");
                    int port = Integer.valueOf(serverElement.elementTextTrim("port"));
                    boolean isUseHTTPS = Boolean.valueOf(serverElement.elementTextTrim("https"));
                    int intervalTimeForArchive = Integer
                            .valueOf(serverElement.elementTextTrim("intervalTimeForArchive"));
                    Iterator usersIter = serverElement.elementIterator("users");
                    while (usersIter.hasNext()) {
                        Element usersElement = (Element) usersIter.next();
                        Iterator userIter = usersElement.elementIterator("user");
                        while (userIter.hasNext()) {
                            Element userElement = (Element) userIter.next();
                            String userName = userElement.elementTextTrim("username");
                            String password = userElement.elementTextTrim("password");
                            autoArchive(hostName, port, isUseHTTPS, userName, password, intervalTimeForArchive);
                        }/*from w  w  w  . j  a  va 2  s. c  o m*/
                    }
                } catch (Throwable e) {
                    LogUtil.info("error : " + e);
                }
            }
        };
        Thread t = new Thread(runnable);
        t.start();
    }
}

From source file:com.globalsight.jobsAutoArchiver.AutoArchiver.java

License:Apache License

private void autoArchive(String hostName, int port, boolean isUseHTTPS, String userName, String password,
        int intervalTimeForArchive) {
    try {//from ww  w.j a  va2s  .co m
        Ambassador ambassador = WebServiceClientHelper.getClientAmbassador(hostName, port, userName, password,
                isUseHTTPS);
        String accessToken = ambassador.login(userName, password);
        String result = ambassador.fetchJobsByState(accessToken, Constants.JOB_STATE_EXPORTED, 0, 100, false);
        if (result == null) {
            LogUtil.info(
                    "server " + hostName + " , user " + userName + " : no jobs that are in exported state.");
        } else {
            LogUtil.info("server " + hostName + " , user " + userName
                    + " , returning of fetchJobsByState API:\n" + result);
            SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yy HH:mm:ss");
            long diffInHours = 0;
            long now = (new Date()).getTime();

            SAXReader saxReader = new SAXReader();
            Document document2 = saxReader.read(new ByteArrayInputStream(result.getBytes("UTF-8")));
            Element rootEltJob = document2.getRootElement();
            Iterator iterJob = rootEltJob.elementIterator("Job");
            Set<Long> jobIds = new HashSet<Long>();
            while (iterJob.hasNext()) {
                String jobId = null;
                String completedDateStr = null;
                try {
                    Element jobElement = (Element) iterJob.next();
                    jobId = jobElement.elementTextTrim("id");
                    completedDateStr = jobElement.elementTextTrim("completedDate");
                    Date completedDate = sdf.parse(completedDateStr.substring(0, 18).trim());
                    long completedDateLong = completedDate.getTime();
                    diffInHours = (now - completedDateLong) / 1000 / 60 / 60;
                    if (diffInHours >= intervalTimeForArchive) {
                        jobIds.add(Long.parseLong(jobId));
                    }
                } catch (Exception e) {
                    LogUtil.info("Error to check job with jobID: " + jobId + " and completedDate '"
                            + completedDateStr + "'.", e);
                }
            }
            StringBuffer jobs = new StringBuffer();
            for (long id : jobIds) {
                jobs.append(id).append(",");
            }
            if (jobs.length() > 0 && jobs.toString().endsWith(",")) {
                String jobs2 = jobs.toString().substring(0, jobs.length() - 1);
                ambassador.archiveJob(accessToken, jobs2);
                String[] jobs2_array = jobs2.toString().split(",");
                for (String job : jobs2_array) {
                    LogUtil.info("server " + hostName + ", user " + userName + " : the job " + job
                            + " can be archived");
                }
            } else {
                LogUtil.info("server " + hostName + ", user " + userName + " : no jobs that can be archived.");
            }
        }
    } catch (Exception e) {
        LogUtil.info("server " + hostName + ", user " + userName + ", error : " + e);
    }
}

From source file:com.globalsight.reports.util.ReportHandlerFactory.java

License:Apache License

/**
 * Reads in the Report handler mapping from an XML file, parses it
 * and populates this data structure//from  ww w  .j a v a2 s .  c  o m
 *
 * @param reportConfigXML the name of the XML file that
 * holds the report mapping.
 * @return <code>true</code> if successful, <code>false</code>
 * otherwise.
 */
public static boolean createReportHandlerMap(String reportConfigXML) {
    boolean retVal = false;
    SAXReader reader = new SAXReader();
    Document document = null;

    try {
        InputStream is = ReportHandlerFactory.class.getResourceAsStream(reportConfigXML);
        document = reader.read(new InputSource(is));

        // parse XML file in order to get className counterpart to pagename
        List<?> moduleList = document.selectNodes(Constants.REPORTMODULE_NODE_XPATH);
        for (Iterator<?> iter = moduleList.iterator(); iter.hasNext();) {
            Element element = (Element) iter.next();
            Attribute attribute = element.attribute(Constants.REPORTNAME_ATTRIBUTE);
            Element elementNode = element.element(Constants.REPORTHANDLER_NODE);

            Attribute attributeNode = elementNode.attribute(Constants.CLASS_ATTRIBUTE);

            if (CATEGORY.isDebugEnabled()) {
                CATEGORY.debug("CreateReportHandlerMap key: " + attribute.getValue() + " value: "
                        + attributeNode.getValue());
            }

            reportHandlerMap.put(attribute.getValue(), attributeNode.getValue());
        }

        // parse XML file in order to get target Url counterpart to pagename 
        List<?> urlList = document.selectNodes(Constants.REPORTURL_NODE_XPATH);
        for (Iterator<?> iterUrl = urlList.iterator(); iterUrl.hasNext();) {
            Element urlElement = (Element) iterUrl.next();
            Attribute jspNameAttribute = urlElement.attribute(Constants.JSPNAME_ATTRIBUTE);
            Attribute urlAttribute = urlElement.attribute(Constants.TARGETURL_ATTRIBUTE);
            reportTargetUrlMap.put(jspNameAttribute.getValue(), urlAttribute.getValue());
        }

        if (instance == null) {
            instance = new ReportHandlerFactory();
            retVal = true;
        }

    } catch (DocumentException e) {
        CATEGORY.error("Cannot read the ReportConfig.xml", e);
    }

    return retVal;
}