Example usage for org.dom4j.io SAXReader read

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader read.

Prototype

public Document read(InputSource in) throws DocumentException

Source Link

Document

Reads a Document from the given InputSource using SAX

Usage

From source file:com.globalsight.terminology.importer.MtfReader.java

License:Apache License

/**
 * Reads an XML file and checks for correctness. If there's any
 * error in the file, an exception is thrown.
 *///from   ww  w.jav a 2 s.  c o m
private void analyzeXml(String p_url) throws Exception {
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    CATEGORY.debug("Analyzing document: " + p_url);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();

            // TODO: validate entry and report errors.
        }
    });

    Document document = reader.read(p_url);

    // all done
}

From source file:com.globalsight.terminology.importer.MtfReaderThread.java

License:Apache License

public void run() {
    try {/* ww  w .  ja  v  a  2  s.c  o m*/
        SAXReader reader = new SAXReader();
        reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
            public void onStart(ElementPath path) {
                m_count++;
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();

                // prune the current element to reduce memory
                element.detach();

                m_result = m_results.hireResult();

                try {
                    // Convert MultiTerm to GlobalSight.
                    element = convertMtf(element);

                    Document doc = m_factory.createDocument(element);
                    Entry entry = new Entry(doc);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(entry.getXml());
                    }

                    m_result.setResultObject(entry);
                } catch (Throwable ex) {
                    String msg = "Entry " + m_count + ": " + ex.getMessage();

                    m_result.setError(msg);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(msg, ex);
                    } else {
                        CATEGORY.warn(msg, ex);
                    }
                }

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the XML file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted.");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
        CATEGORY.error("unexpected error", ignore);
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
            m_result = null;
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.terminology.importer.TbxReader.java

License:Apache License

private void analyzeTbx(String p_url) throws Exception {
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    CATEGORY.debug("Analyzing document: " + p_url);
    // enable element complete notifications to conserve memory

    reader.addHandler("/martif/text/body/termEntry", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;//from w w w  . j  a va2s.co  m
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();
        }
    });

    Document document = reader.read(p_url);
}

From source file:com.globalsight.terminology.importer.TbxReaderThread.java

License:Apache License

public void run() {
    try {/*from w ww . j  a  v a2 s  . c  om*/
        SAXReader reader = new SAXReader();
        reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/martif/text/body/termEntry", new ElementHandler() {
            public void onStart(ElementPath path) {
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();

                // prune the current element to reduce memory
                element.detach();

                Document doc = m_factory.createDocument(element);
                Entry entry = new Entry(doc);

                m_result = m_results.hireResult();
                m_result.setResultObject(entry);

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the TMX file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted.");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
            m_result = null;
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.terminology.util.MtfAnalyzer.java

License:Apache License

public void analyze(String p_url) throws Exception {
    m_entryCount = 0;// w  ww.  j  a  v  a 2 s .co m

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    System.err.println("Analyzing document: " + p_url);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % 200 == 0) {
                log("Entry " + m_entryCount);
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    log("Total entries: " + m_entryCount);

    // all done
}

From source file:com.globalsight.terminology.util.MtfSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;//from   w ww . j a va  2  s  . co  m

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    log("Splitting document `" + p_url + "'");

    startFile(baseName, extension);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.util.ConfigUtil.java

License:Apache License

public static ConfigBO getConfigBO() {
    String hostName;/*from  w  w  w. j a v a2s  .com*/
    int port = 80;
    String userName;
    String password;
    int intervalTimeForArchive;
    int intervalTime;
    boolean isUseHTTPS = false;

    try {
        SAXReader saxReader = new SAXReader();
        Document document = saxReader.read(new File(Constants.CONFIG_FILE_NAME));
        Node serverNode = document.selectSingleNode("/Configuration/server");

        hostName = serverNode.selectSingleNode("host").getText();
        port = Integer.valueOf(serverNode.selectSingleNode("port").getText());
        userName = serverNode.selectSingleNode("username").getText();
        password = serverNode.selectSingleNode("password").getText();
        isUseHTTPS = Boolean.valueOf(serverNode.selectSingleNode("https").getText());

        intervalTimeForArchive = Integer
                .valueOf(document.selectSingleNode("//intervalTimeForArchive").getText());
        intervalTime = Integer.valueOf(document.selectSingleNode("//intervalTime").getText());

        ConfigBO config = new ConfigBO(hostName, port, userName, password, intervalTimeForArchive, intervalTime,
                isUseHTTPS);
        return config;
    } catch (DocumentException e) {
        LogUtil.info("Fail to read configuration info", e);
    }

    return null;
}

From source file:com.globalsight.util.file.XliffFileUtil.java

License:Apache License

/**
 * Check if specified file contains multiple <File> tags
 * //  w w  w .  j a v a  2  s . c o m
 * @param p_filename
 *            File name
 * @return Return true if file contains multiple <File> tags, otherwise
 *         return false
 * 
 * @version 1.0
 * @since 8.2.2
 */
public static boolean isMultipleFileTags(String p_filename) {
    if (StringUtil.isEmpty(p_filename))
        return false;

    int numOfFileTags = 0;
    try {
        SAXReader saxReader = new SAXReader();
        Document document = null;
        Element rootElement = null;

        File file = new File(p_filename);
        if (file.exists() && file.isFile()) {
            document = saxReader.read(file);
            rootElement = document.getRootElement();
            String tag = "";
            for (Iterator<Element> iterator = rootElement.elementIterator(); iterator.hasNext();) {
                tag = iterator.next().getName().trim().toLowerCase();
                if ("file".equals(tag))
                    numOfFileTags++;
            }
        }
        return numOfFileTags > 1;
    } catch (Exception e) {
        logger.error("Can not verify if current file contains multiple file tags.", e);
        return false;
    }
}

From source file:com.globalsight.util.file.XliffFileUtil.java

License:Apache License

public static boolean containsFileTag(String p_filename) {
    if (StringUtil.isEmpty(p_filename))
        return false;

    try {//  ww  w .j  a  v  a2 s . co m
        SAXReader saxReader = new SAXReader();
        Document document = null;
        Element rootElement = null;

        File file = new File(p_filename);
        if (file.exists() && file.isFile()) {
            document = saxReader.read(file);
            rootElement = document.getRootElement();
            String tag = "";
            List fileTags = rootElement.elements("file");
            if (fileTags != null) {
                for (int i = 0; i < fileTags.size(); i++) {
                    Element element = (Element) fileTags.get(i);
                    String attr = element.attributeValue("tool");
                    if (attr == null)
                        return false;
                    else {
                        if (attr.toLowerCase().contains("worldserver"))
                            return true;
                    }
                }
            }
        }
        return false;
    } catch (Exception e) {
        logger.error("Can not verify if current file contains multiple file tags.", e);
        return false;
    }
}

From source file:com.globalsight.util.file.XliffFileUtil.java

License:Apache License

/**
 * Generate separated files in single file level according with content in
 * orignal Xliff file//from w  w w .  j a va2  s. co m
 * 
 * @param multipleFileTagsXliff
 *            Object contains all separated files
 * @param absoluteFilename
 *            File name with absolute path
 * @param header
 *            Header content of original Xliff file
 * @param content
 *            Main content of original Xliff file
 * @param footer
 *            Footer content of original Xliff file
 * 
 * @version 1.0
 * @since 8.2.2
 */
private static void generateSeparatedFiles(MultipleFileTagsXliff multipleFileTagsXliff, String absoluteFilename,
        String header, String content, String footer) {
    try {
        String absolutePath = getBaseAbsoluatePath(absoluteFilename);
        String mainName = getMainFilename(absoluteFilename);
        String separatedDirPath = mainName + SEPARATE_FLAG;
        String absouateSeparatedPath = absolutePath + File.separator + separatedDirPath;
        File file = new File(absouateSeparatedPath);
        file.mkdirs();

        int lengthOfFileEndTag = "</file>".length();
        int beginIndex = -1, endIndex = -1;
        String subContent = "";

        SAXReader saxReader = new SAXReader();
        Document document = null;
        Element rootElement = null, fileTagElement = null, fileHeaderElement = null;
        String originalSubFilename = "";
        String segAssetId = "";
        BufferedWriter fout = null;
        ArrayList<String> separatedFiles = new ArrayList<String>();
        String subFilename = "";
        int count = 1;
        while ((beginIndex = content.indexOf("<file ")) > -1) {
            endIndex = content.indexOf("</file>") + lengthOfFileEndTag;
            subContent = header + content.substring(beginIndex, endIndex) + footer;

            // Use XML parser to get element value
            document = saxReader.read(new StringReader(subContent));
            rootElement = document.getRootElement();
            fileTagElement = rootElement.element("file");
            originalSubFilename = fileTagElement.attributeValue("original").replace("/", File.separator);
            originalSubFilename = originalSubFilename
                    .substring(originalSubFilename.lastIndexOf(File.separator) + 1);
            fileHeaderElement = fileTagElement.element("header");
            if (fileHeaderElement != null && fileHeaderElement.element("asset-data") != null) {
                segAssetId = fileHeaderElement.element("asset-data").elementText("seg_asset_id");
            }

            // Generate file name of separated file
            subFilename = originalSubFilename + "_" + segAssetId + "_" + count + ".xlf";

            // Write separated file into disk
            file = new File(absouateSeparatedPath, subFilename);
            fout = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
            fout.write(subContent);
            fout.close();

            separatedFiles.add(getRelativePath(file.getAbsolutePath()));

            content = content.substring(endIndex);
            count++;
        }
        multipleFileTagsXliff.setCount(count);
        multipleFileTagsXliff.setSeparatedFiles(separatedFiles);
        multipleFileTagsXliff.setSeparatedFolderName(separatedDirPath);
    } catch (Exception e) {
        logger.error("Error in generateExtractedFile.", e);
    }
}