Example usage for org.dom4j.io SAXReader setValidation

List of usage examples for org.dom4j.io SAXReader setValidation

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader setValidation.

Prototype

public void setValidation(boolean validation) 

Source Link

Document

Sets the validation mode.

Usage

From source file:com.globalsight.everest.tm.util.TmxLevelSplitter.java

License:Apache License

public void split(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/*from   www .  j a v  a  2 s. co  m*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    log("Splitting document `" + p_url + "'");

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = element;

            try {
                startFiles(baseName, extension);
            } catch (Exception ex) {
                log(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (containsTags(element)) {
                writeTagsEntry(element.asXML());

                m_tagsCount++;
            } else {
                writeTextEntry(element.asXML());

                m_textCount++;
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFiles();

    log("Processed " + m_entryCount + " TUs, " + m_textCount + " level 1 (text), " + m_tagsCount
            + " level 2 (tags)");

    // all done
}

From source file:com.globalsight.everest.tm.util.TmxSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;//from  www.jav a  2 s  .c  om

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    log("Splitting document `" + p_url + "'");

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = element;

            try {
                startFile(baseName, extension);
            } catch (Exception ex) {
                log(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.everest.tm.util.trados.TradosFmSgmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from   w  w w . j a  v  a2s .co m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 */// www  . jav  a 2  s . c o  m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from   www .  ja  v  a 2s.com
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                element = removeUtElements(element);

                String gxml = handleTuv(element.getText());
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosTmxToRtf.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *//*from  ww  w.j  a v  a2  s.  com*/
public String convertToRtf(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to RTF: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);

            Element prop = (Element) element.selectSingleNode("/prop[@type='RTFFontTable']");

            if (prop != null)
                writeEntry(prop.getText());

            prop = (Element) element.selectSingleNode("/prop[@type='RTFStyleSheet']");

            if (prop != null)
                writeEntry(prop.getText());

            writeOtherRtfHeader();

            writeDummyParagraph();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element = removeUtElements(element);

            writeEntry(replaceUnicodeChars(removeRtfParagraphs(element.asXML())));
            writeEntry("\\par");

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'");

    return m_filename;
}

From source file:com.globalsight.everest.webapp.pagehandler.administration.config.xmldtd.XmlDtdManager.java

License:Apache License

/**
 * Validates xml files with specified dtd file.
 * //from  w ww. j  a  v  a 2  s.co m
 * @param id
 *            The xml dtd id.
 * @param file
 *            The xml file need to validate.
 * @throws DtdException
 */
public static void validateXmlFile(long id, File file) throws DtdException {
    Assert.assertFileExist(file);
    if (file.getName().endsWith(".xml")) {
        logger.debug("File: " + file.getPath());
        File dtdFile = DtdFileManager.getDtdFile(id, file);
        if (dtdFile != null && dtdFile.exists()) {
            logger.debug("DTD: " + dtdFile.getPath());
            SAXReader reader = new SAXReader();
            DtdEntityResolver resolver = new DtdEntityResolver(dtdFile);
            reader.setEntityResolver(resolver);
            reader.setValidation(true);
            Document document;
            try {
                document = reader.read(file);
                document.clearContent();
                logger.debug("Successful");
            } catch (Exception e) {
                logger.info("DTD validation failed: " + e.getMessage());
                throw new DtdException(e);
            }
        }
    }
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

private SAXReader createXmlParser() {
    SAXReader result = new SAXReader();

    try {/*www.j av a 2s  .  c  om*/
        result.setXMLReaderClassName("org.dom4j.io.aelfred.SAXDriver");
        result.setValidation(false);
    } catch (Exception ex) {
        System.err.println("org.dom4j.io.aelfred.SAXDriver not found");

        // Use the system default parser, better than nothing.
        result = new SAXReader();
        result.setValidation(false);
    }

    return result;
}

From source file:com.globalsight.smartbox.bussiness.process.Usecase01PreProcess.java

License:Apache License

private void parseFile(Vector<String> p_sourceFiles) {
    // Parse Bookmark XML File to get fileProfileName and job name.
    Map<String, String> srcMap = new HashMap<String, String>();
    String fileName = null;/* w  ww .  java  2 s.co  m*/
    for (String path : p_sourceFiles) {
        String temp = path.substring(path.lastIndexOf(File.separator) + 1);
        srcMap.put(temp, path);
        if (temp.endsWith(".pdf")) {
            fileName = temp.replace(".pdf", ".xml");
        }
    }
    String path = srcMap.get(fileName);
    try {
        SAXReader saxReader = new SAXReader();
        saxReader.setValidation(false);
        saxReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document doc = saxReader.read(path);
        Element node = (Element) doc.selectSingleNode("//bookmeta/data[@datatype='GSDATA']");
        String gsDataValue = node.attributeValue("value");
        String jobName = gsDataValue.substring(0, gsDataValue.indexOf("~"));
        String customerFPName = gsDataValue.substring(gsDataValue.indexOf("~") + 1);
        FileProfile configFP = getConfigFP(customerFPName);
        if (configFP == null) {
            String message = "Can't find the fileProfileNameMapping for " + customerFPName;
            LogUtil.info(message);
            return;
        }
        fp = gsFPMap.get(configFP.getName());
        if (fp == null) {
            String message = "Can't find the file profile.";
            LogUtil.info(message);
            return;
        }

        // Get Target Locale.
        node = (Element) doc.selectSingleNode("/bookmap");
        String lang = node.attributeValue("lang");
        for (String locale : fp.getTargetLocale()) {
            if (locale.startsWith(lang)) {
                trgLocale = locale;
                break;
            }
        }
        if (trgLocale == null || trgLocale.trim().length() == 0) {
            String message = "Can't find the correct Target Locale in File Profile.";
            LogUtil.info(message);
            return;
        }

        unExtractedFP = gsFPMap.get(configFP.getGsUnExtractedFPName());
        if (unExtractedFP == null) {
            String message = "Can't find the UnExtracted file profile: " + fp.getGsUnExtractedFPName();
            LogUtil.info(message);
            return;
        }

        basicJobName = jobName + "_" + customerFPName + "_" + lang;
    } catch (Exception e) {
        String message = "Read XML error: " + path;
        LogUtil.fail(message, e);
        return;
    }
}

From source file:com.globalsight.smartbox.bussiness.process.Usecase04PreProcess.java

License:Apache License

private void parseFile(Vector<String> p_sourceFiles) {
    String path = null;/*from  ww  w  . ja  v  a2s. c  o m*/

    try {
        // Parse Bookmark XML File to get fileProfileName and job name.
        path = getBookMapFile(p_sourceFiles);

        SAXReader saxReader = new SAXReader();
        saxReader.setValidation(false);
        saxReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document doc = saxReader.read(path);
        Element node = (Element) doc.selectSingleNode("//bookmeta/data[@datatype='GSDATA']");
        String gsDataValue = node.attributeValue("value");
        String jobName = gsDataValue.substring(0, gsDataValue.indexOf("~"));
        String customerFPName = gsDataValue.substring(gsDataValue.indexOf("~") + 1);
        FileProfile configFP = getConfigFP(customerFPName);
        if (configFP == null) {
            String message = "Can't find the fileProfileNameMapping for " + customerFPName;
            LogUtil.info(message);
            return;
        }
        fp = gsFPMap.get(configFP.getName());
        if (fp == null) {
            String message = "Can't find the file profile.";
            LogUtil.info(message);
            return;
        }

        // Get Target Locale.
        node = (Element) doc.selectSingleNode("/bookmap");
        String lang = node.attributeValue("lang");
        for (String locale : fp.getTargetLocale()) {
            if (locale.startsWith(lang)) {
                trgLocale = locale;
                break;
            }
        }
        if (trgLocale == null || trgLocale.trim().length() == 0) {
            String message = "Can't find the correct Target Locale in File Profile.";
            LogUtil.info(message);
            return;
        }

        unExtractedFP = gsFPMap.get(configFP.getGsUnExtractedFPName());
        if (unExtractedFP == null) {
            String message = "Can't find the UnExtracted file profile: " + fp.getGsUnExtractedFPName();
            LogUtil.info(message);
            return;
        }

        basicJobName = jobName + "_" + customerFPName + "_" + lang;
    } catch (FileNotFoundException e) {
        LogUtil.fail("", e);
        return;
    } catch (Exception e) {
        String message = "Read XML error: " + path;
        LogUtil.fail(message, e);
        return;
    }
}