Example usage for org.dom4j.io SAXReader SAXReader

List of usage examples for org.dom4j.io SAXReader SAXReader

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader SAXReader.

Prototype

public SAXReader() 

Source Link

Usage

From source file:com.globalsight.everest.edit.offline.OfflineEditManagerLocal.java

License:Apache License

/**
 * Convert xliff elements to Pseudo//from  w w  w . j  a  v a2s  .  co m
 * 
 * @param file
 *            : the file to offline upload.
 * @return
 * @throws Exception
 */
private String convertXlif2Pseudo(DetectionResult detect, File file, User p_user, String p_fileName)
        throws Exception {
    String errMsg = null;
    org.dom4j.Document doc = null;
    try {
        SAXReader reader = new SAXReader();
        doc = reader.read(file);
    } catch (Exception e) {
        s_category.error(e.getMessage(), e);
        throw new AmbassadorDwUpException(AmbassadorDwUpExceptionConstants.INVALID_FILE_FORMAT, e);
    }

    // Get all jobIds from uploading file. If combined, there will be
    // multiple tasks/pages/jobs in one file.
    HashSet<Long> jobIds = getJobIdsFromDoc(doc);

    PtagErrorPageWriter errWriter = new PtagErrorPageWriter();
    errWriter.setFileName(p_fileName);
    errWriter.setPageId(XliffFileUtil.getPageId(doc));
    errWriter.setTaskId(XliffFileUtil.getTaskId(doc));
    errWriter.setWorkflowId(XliffFileUtil.getWorkflowId(doc));

    reWrapXliff(doc, jobIds);

    errMsg = convertNode2Pseudo(doc, XliffConstants.SOURCE, p_fileName, jobIds, errWriter);
    if (errMsg != null)
        return errMsg;

    errMsg = convertNode2Pseudo(doc, XliffConstants.TARGET, p_fileName, jobIds, errWriter);
    if (errMsg != null)
        return errMsg;

    Transaction tx = HibernateUtil.getTransaction();
    try {
        addComment(doc, p_user, jobIds);
        HibernateUtil.commit(tx);
    } catch (Exception e) {
        HibernateUtil.rollback(tx);
        throw e;
    }

    try {
        XlfParser parser = new XlfParser();
        String xlfContent = parser.parseToTxt(doc);
        StringReader sr = new StringReader(xlfContent);
        detect.m_reader = new BufferedReader(sr);
    } catch (Exception e) {
        s_category.error(e.getMessage(), e);
        throw e;
    }

    return errMsg;
}

From source file:com.globalsight.everest.edit.offline.ttx.TTXParser.java

License:Apache License

public Document getDocument(Reader reader) throws DocumentException {
    SAXReader saxReader = new SAXReader();
    Document document = (Document) saxReader.read(reader);
    return document;
}

From source file:com.globalsight.everest.edit.offline.ttx.TTXParser.java

License:Apache License

public Document getDocument(File file) throws Exception {
    SAXReader saxReader = new SAXReader();
    Document document = (Document) saxReader.read(file);
    return document;
}

From source file:com.globalsight.everest.edit.offline.xliff.ListViewWorkXLIFFWriter.java

License:Apache License

private static SAXReader getSAXReader() {
    if (reader == null) {
        reader = new SAXReader();
    }
    return reader;
}

From source file:com.globalsight.everest.page.WsPageTemplateExtention.java

License:Apache License

private String getAltTransOfMatch(List<LeverageMatch> p_list, long p_jobId) {
    String altStr = new String();
    ListViewWorkXLIFFWriter lvwx = new ListViewWorkXLIFFWriter();

    if (p_list != null) {
        LeverageMatch.orderMatchResult(p_list);

        SAXReader reader = new SAXReader();
        for (int i = 0; i < p_list.size(); i++) {
            LeverageMatch leverageMatch = p_list.get(i);

            if (judgeIfneedAdd(leverageMatch)) {
                altStr = altStr + lvwx.getAltByMatch(leverageMatch, null, reader, p_jobId);
            }/*from  w  ww  .j av a2 s .com*/
        }
    }

    return altStr;
}

From source file:com.globalsight.everest.projecthandler.importer.XmlReader.java

License:Apache License

/**
 * Reads an XML file and checks its correctness by validating
 * against the TMX DTD. If there's any error in the file, an
 * exception is thrown./*w w  w .ja  v a 2s .c o m*/
 */
private void analyzeXml(String p_url) throws Exception {
    CATEGORY.debug("Analyzing document: " + p_url);

    SAXReader reader = new SAXReader();

    // TODO: Read the DTD and validate.
    // See com.globalsight.everest.tm.util.DtdResolver;

    // reader.setEntityResolver(DtdResolver.getInstance());
    // reader.setValidation(true);

    // enable element complete notifications to conserve memory
    //TODO
    reader.addHandler("/projectdata/data", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();
        }
    });

    Document document = reader.read(p_url);

    // all done
}

From source file:com.globalsight.everest.projecthandler.importer.XmlReaderThread.java

License:Apache License

public void run() {
    try {//  w ww  . j ava  2 s.co m
        SAXReader reader = new SAXReader();

        // TODO: Read the DTD and validate.
        // See com.globalsight.everest.tm.util.DtdResolver;

        // reader.setEntityResolver(DtdResolver.getInstance());
        // reader.setValidation(true);

        reader.addHandler("/projectdata", new ElementHandler() {
            public void onStart(ElementPath path) {
                Element element = path.getCurrent();
            }

            public void onEnd(ElementPath path) {
            }
        });

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/projectdata/data", new ElementHandler() {
            public void onStart(ElementPath path) {
                m_count++;
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();

                // prune the current element to reduce memory
                element.detach();

                m_result = m_results.hireResult();

                try {
                    // TODO: Create data objects
                    Object o = /*createObject*/(element);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(o);
                    }

                    m_result.setResultObject(o);
                } catch (Throwable ex) {
                    m_result.setError(ex.toString());

                    CATEGORY.warn("Error in object " + m_count, ex);
                }

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
        CATEGORY.error("unexpected error", ignore);
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.everest.segmentationhelper.XmlLoader.java

License:Apache License

/**
 * Transfer xml file into Document./*from   w w  w.j av  a2  s  .  c o  m*/
 * 
 * @param file
 */
private static Document parserWithSAX(File file) throws Exception {
    SAXReader xmlReader = new SAXReader();
    Document doc = null;
    try {
        doc = xmlReader.read(file);
    } catch (Exception e) {
        e.printStackTrace();
        throw new Exception(e.getMessage());
    }

    return doc;
}

From source file:com.globalsight.everest.segmentationhelper.XmlLoader.java

License:Apache License

/**
 * Transfer xml text into Document.// w w w .  j  a v a  2  s  .  c om
 * 
 * @param xmltext
 */
private static Document parseWithSAX(String xmltext) throws Exception {
    Document doc = null;
    StringReader sr = new StringReader(xmltext);
    SAXReader xmlReader = new SAXReader();
    try {
        doc = xmlReader.read(sr);
    } catch (Exception e) {
        e.printStackTrace();
        throw new Exception(e.getMessage());
    }
    return doc;
}

From source file:com.globalsight.everest.tm.importer.ImportUtil.java

License:Apache License

/**
 * Saves a TM file with sample validation.
 * /*from  w w  w . ja v a2 s . co  m*/
 * For some TM files, it vary easy to happen encoding error or xml role
 * error and can't be import correct. This method try to do some sample
 * validations for each tu. If a tu will be give up if inducing a error.
 * 
 * @param fileName
 * @throws Exception
 */
public void saveTmFileWithValidation(File file, File newFile, TmProcessStatus status) throws Exception {
    String encoding = "UTF-8";
    String outEncoding = "UTF-8";
    String logEncoding = "Unicode";
    String strLine = System.getProperty("line.separator");

    int errorCount = 0;
    int totalCount = 0;
    long lineCounter = 0;

    String s = null;

    try {
        if (file.exists()) {
            CATEGORY.info("Validating TM file: " + newFile.getAbsolutePath());

            Date startTime = new Date();

            File errorFile = getErrorFile(newFile);
            File infoFile = getInfoFile(newFile);
            File logFile = getLogFile(newFile);

            encoding = getEncodingOfXml(file);
            // GBS-2932 : UTF-8 by default
            if (encoding == null) {
                encoding = "UTF-8";
            }

            // Initialize IO.
            FileInputStream fIn = new FileInputStream(file);
            BufferedReader in = new BufferedReader(new InputStreamReader(fIn, encoding));
            FileOutputStream fOut = new FileOutputStream(newFile);
            BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fOut, outEncoding));
            FileOutputStream fError = new FileOutputStream(errorFile);
            OutputStreamWriter error = new OutputStreamWriter(fError, logEncoding);
            FileOutputStream fInfo = new FileOutputStream(infoFile);
            BufferedWriter info = new BufferedWriter(new OutputStreamWriter(fInfo, logEncoding));
            FileOutputStream fLog = new FileOutputStream(logFile);
            OutputStreamWriter log = new OutputStreamWriter(fLog, logEncoding);

            writeHead(error);
            writeHead(log);

            StringBuilder sb = new StringBuilder();

            // It must be <?xml ...
            s = in.readLine();
            s = changeXmlEncodingDec(s, outEncoding);

            status.addSize(s.getBytes(encoding).length);
            if (CATEGORY.isDebugEnabled()) {
                CATEGORY.debug("The content of in.readLine for encoding is " + s);
            }
            sb.append(s);
            sb.append(strLine);

            status.addSize(s.getBytes(encoding).length);

            // If the second line is define dtd
            s = in.readLine();
            if (CATEGORY.isDebugEnabled()) {
                CATEGORY.debug("The content of in.readLine for doctype is " + s);
            }
            if (s != null && s.indexOf("<!DOCTYPE") > -1) {
                status.addSize(s.getBytes(encoding).length);

                sb.append(s);
                sb.append(strLine);
                s = in.readLine();
                if (CATEGORY.isDebugEnabled()) {
                    CATEGORY.debug("The content of in.readLine is " + s);
                }
            } else if (newFile.getName().endsWith("tmx")) {
                // Don't define the dtd, add it.
                sb.append(TMX_DTD_LINE);
                sb.append(strLine);
            }
            boolean isRemoved = false;
            int count = 0;
            SAXReader reader = new SAXReader();
            while (s != null) {
                if (status.isCanceled()) {
                    CATEGORY.info("Cancelled validating");
                    break;
                }
                status.addSize(s.getBytes(encoding).length);

                if (isHeaderStart(s) && isTradosFontTableStart(s) && isHeaderEnd(s)) {
                    int headerEndTag = s.indexOf(">");
                    sb.append(s.subSequence(0, headerEndTag + 1));
                    int endHeaderTag = s.indexOf("</header>");
                    sb.append(s.substring(endHeaderTag));
                    sb.append(endHeaderTag);
                    sb.append(strLine);
                }
                if (isRemoved) {
                    if (isTradosFontTableEnd(s)) {
                        isRemoved = false;
                    }
                    s = in.readLine();
                    continue;
                }
                if (isTuStartTag(s)) {
                    /* The begin of the tu */
                    // Saves information recoded.
                    if (sb.length() > 0) {
                        out.write(sb.toString());
                        out.flush();
                    }

                    sb = resetStringBuilder(sb);
                    sb.append(s);
                    sb.append(strLine);

                    totalCount++;
                }

                // Validate for the tu.
                else if (isTuEndTag(s)) {
                    /* The end of the tu */
                    sb.append(s);
                    sb.append(strLine);
                    String content = sb.toString();

                    try {
                        /* verify the content */
                        reader.read(new StringReader(content));

                        // Saves the tu if no exception happen.
                        out.write(content);
                        out.flush();
                    } catch (Exception e) {
                        // Give up the tu if any exception happened.
                        error.write(content);

                        log.write(strLine);
                        log.write(SPLIT_LINE);
                        log.write(Integer.toString(++errorCount));
                        log.write(SPLIT_LINE);
                        log.write(strLine);

                        log.write(content);
                        log.write(strLine);
                        log.write(e.getMessage());
                        log.write(strLine);

                    }

                    sb = resetStringBuilder(sb);
                } else if (isTradosFontTableStart(s)) {
                    count++;
                    isRemoved = true;
                } else if (count > 0 && isTradosFontTableEnd(s)) {
                    isRemoved = false;
                } else if ((count > 0) && isHeaderEnd(s)) {
                    sb.append("</header>");
                    sb.append(strLine);
                } else {
                    // Records informations which not included in tu, first
                    // line
                    // etc.
                    sb.append(s);
                    sb.append(strLine);
                }

                s = in.readLine();
                if (CATEGORY.isDebugEnabled()) {
                    CATEGORY.debug("The content of in.readLine is " + s);
                }
                lineCounter++;
            }

            // Records informations which not included in tu and not saved
            // to
            // file. Usually it is "</body> </tmx>".
            if (sb.length() > 0) {
                out.write(sb.toString());
                out.flush();
            }

            in.close();

            out.close();

            CATEGORY.info("Done validating");

            log.write(SPLIT_LINE + SPLIT_LINE + strLine + strLine);
            log.write("Error: " + errorCount + strLine);
            log.write("Total: " + totalCount + strLine);

            // Gets the cost time.
            Date endTime = new Date();
            long costTime = endTime.getTime() - startTime.getTime();
            long h = costTime / (1000 * 60 * 60);
            costTime = costTime % (1000 * 60 * 60);
            long m = costTime / (1000 * 60);
            costTime = costTime % (1000 * 60);
            long se = costTime / 1000;
            StringBuffer time = new StringBuffer("Cost time: ");
            time.append(h).append(" h ").append(m).append(" m ").append(se).append(" s ");

            // Recodes some sample informations.
            String msg = "Error: " + errorCount + strLine;
            info.write(msg);
            msg = "Total: " + totalCount + strLine;
            info.write(msg);
            info.write(time.toString());

            writeFoot(error);
            writeFoot(log);

            error.flush();
            error.close();
            info.flush();
            info.close();
            log.flush();
            log.close();

            if (lineCounter > 10000) {
                CATEGORY.debug("forces jvm to perform gc when the line count reaches 10000. line count: "
                        + lineCounter);
                System.gc();
            }
        }
    } catch (IOException ie) {
        CATEGORY.error("IO Exception occured when save the tm file.");
        CATEGORY.error("The content of current line is " + s);
        CATEGORY.error("The stacktrace of the exception is ", ie);
        throw ie;
    } catch (Exception e) {
        CATEGORY.error("error occured when save the tm file.");
        CATEGORY.error("The content of current line is " + s);
        CATEGORY.error("The stacktrace of the exception is ", e);
        throw e;
    }

    status.setErrorTus(Integer.toString(errorCount));
    status.setTotalTus(Integer.toString(totalCount));
}