Example usage for org.dom4j ElementHandler ElementHandler

List of usage examples for org.dom4j ElementHandler ElementHandler

Introduction

In this page you can find the example usage for org.dom4j ElementHandler ElementHandler.

Prototype

ElementHandler

Source Link

Usage

From source file:StreamFlusher.java

License:Apache License

private Fst xml2fst(String filepath) throws Exception {

    // read an XML file representing a network, return the network

    final Fst fst = lib.EmptyLanguageFst();

    final HashSet<Integer> sigma = fst.getSigma();

    SAXReader reader = new SAXReader(); // SAXReader from dom4j

    // each SAXReader handler must define onStart() and onEnd() methods

    // when the kleeneFst element is first found
    reader.addHandler("/kleeneFst", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element current = path.getCurrent();

            // semiring is an attribute on the kleeneFst node
            String semiring = current.attribute("semiring").getValue();
        }//www.j a v  a  2 s. co m

        public void onEnd(ElementPath path) {
        }
    });

    reader.addHandler("/kleeneFst/sigma", new ElementHandler() {
        public void onStart(ElementPath path) {
            if (path.getCurrent().attribute("containsOther").getValue().equals("true")) {
                fst.setContainsOther(true);
            } else {
                fst.setContainsOther(false);
            }
            ;
        }

        public void onEnd(ElementPath path) {
        }
    });

    reader.addHandler("/kleeneFst/sigma/sym", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element sym = path.getCurrent();
            sigma.add(symmap.putsym(sym.getText()));

            sym.detach();
        }
    });

    // when the arcs element is first found
    reader.addHandler("/kleeneFst/arcs", new ElementHandler() {
        public void onStart(ElementPath path) {
            // grab the two attrs and convert to int
            int startState = Integer.parseInt(path.getCurrent().attribute("start").getValue());
            int numStates = Integer.parseInt(path.getCurrent().attribute("numStates").getValue());
            lib.AddStates(fst, numStates);
            // native function, add this many  
            // states to the new Fst

            lib.SetStart(fst, startState); // set the start state
        }

        public void onEnd(ElementPath path) {
        }
    });

    // handle each whole arc element
    reader.addHandler("/kleeneFst/arcs/arc", new ElementHandler() {

        // in an ElementHandler, need to supply .onStart(),
        // called when the start tag is found, and
        // .onEnd(), which is called when the end tag is found.

        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {

            // retrieve the entire arc element
            Element arc = path.getCurrent();

            // these two are always present
            int src_id = Integer.parseInt(arc.attribute("s").getValue());
            int dest_id = Integer.parseInt(arc.attribute("d").getValue());

            // there will be either one io attr xor separate i and o attrs
            // (keep the io option to facilitate hand-written XML files)

            String input;
            String output;
            Attribute io = arc.attribute("io");
            if (io != null) {
                input = io.getValue();
                output = io.getValue();
            } else {
                input = arc.attribute("i").getValue();
                output = arc.attribute("o").getValue();
            }

            if (!symmap.containsKey(input)) {
                // symbol name in XML file not in the 
                //     current internal symtab
                symmap.putsym(input);
            }
            if (!symmap.containsKey(output)) {
                symmap.putsym(output);
            }

            // the w attr is optional in the arc elmt
            Attribute w = arc.attribute("w");
            if (w != null) {
                // call AddArc to add an arc to the fst 
                //      being built from the XML file description
                // semiring generalization point
                lib.AddArc(fst, src_id, symmap.getint(input), symmap.getint(output),
                        Float.parseFloat(w.getValue()), dest_id);

            } else {
                // semiring generalization point
                lib.AddArcNeutralWeight(fst, src_id, symmap.getint(input), symmap.getint(output), dest_id);
            }

            arc.detach();
        }
    });

    // for each full final element
    reader.addHandler("/kleeneFst/arcs/final", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element arc = path.getCurrent();

            // s attr is always present
            int src_id = Integer.parseInt(arc.attribute("s").getValue());

            // the w attr is optional
            Attribute w = arc.attribute("w");
            if (w != null) {
                lib.SetFinal(fst, src_id, Float.parseFloat(w.getValue()));
            } else {
                lib.SetFinalNeutralWeight(fst, src_id);
            }

            arc.detach();
        }
    });

    Document document = null;

    // the actual XML reading/parsing is done here
    try {
        // XmlReader detects the encoding of the XML document and
        // handles BOMs, including the UTF-8 BOMs that Java usually
        // chokes on
        document = reader.read(new XmlReader(new FileInputStream(filepath)));

        // Old, pre-XmlReader code
        //if (encoding.equals("UTF-8")) {
        //   // then need to work around SUN's irresponsible decision not to
        //   //  handle the optional UTF-8 BOM correctly
        //   document = reader.read(new InputStreamReader(
        //            new UTF8BOMStripperInputStream(new FileInputStream(filepath)),
        //            "UTF-8")
        //                 ) ;
        //} else {
        //   document = reader.read(new InputStreamReader(
        //                           new FileInputStream(filepath),
        //                           encoding)
        //                 ) ;
        //}
    } catch (DocumentException de) {
        // dom4j DocumentException extends Exception
        de.printStackTrace();
        throw de;
    } catch (FileNotFoundException fnfe) {
        fnfe.printStackTrace();
        throw fnfe;
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }

    correctSigmaOther(fst);

    return fst;
}

From source file:com.globalsight.everest.projecthandler.importer.XmlReader.java

License:Apache License

/**
 * Reads an XML file and checks its correctness by validating
 * against the TMX DTD. If there's any error in the file, an
 * exception is thrown./*from   www.j a va  2 s.com*/
 */
private void analyzeXml(String p_url) throws Exception {
    CATEGORY.debug("Analyzing document: " + p_url);

    SAXReader reader = new SAXReader();

    // TODO: Read the DTD and validate.
    // See com.globalsight.everest.tm.util.DtdResolver;

    // reader.setEntityResolver(DtdResolver.getInstance());
    // reader.setValidation(true);

    // enable element complete notifications to conserve memory
    //TODO
    reader.addHandler("/projectdata/data", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();
        }
    });

    Document document = reader.read(p_url);

    // all done
}

From source file:com.globalsight.everest.projecthandler.importer.XmlReaderThread.java

License:Apache License

public void run() {
    try {//ww w  . j a va 2s .c om
        SAXReader reader = new SAXReader();

        // TODO: Read the DTD and validate.
        // See com.globalsight.everest.tm.util.DtdResolver;

        // reader.setEntityResolver(DtdResolver.getInstance());
        // reader.setValidation(true);

        reader.addHandler("/projectdata", new ElementHandler() {
            public void onStart(ElementPath path) {
                Element element = path.getCurrent();
            }

            public void onEnd(ElementPath path) {
            }
        });

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/projectdata/data", new ElementHandler() {
            public void onStart(ElementPath path) {
                m_count++;
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();

                // prune the current element to reduce memory
                element.detach();

                m_result = m_results.hireResult();

                try {
                    // TODO: Create data objects
                    Object o = /*createObject*/(element);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(o);
                    }

                    m_result.setResultObject(o);
                } catch (Throwable ex) {
                    m_result.setError(ex.toString());

                    CATEGORY.warn("Error in object " + m_count, ex);
                }

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
        CATEGORY.error("unexpected error", ignore);
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.everest.tm.importer.TmxReader.java

License:Apache License

/**
 * Reads an XML file and checks its correctness by validating
 * against the TMX DTD. If there's any error in the file, an
 * exception is thrown.//www.j a va  2 s  .  c  o m
 *
 * As a side effect, this method builds a list of source and
 * target locales found in the file, including the declared source
 * locale from the header.
 */
private void analyzeXml(String p_url) throws Exception {
    if (m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_RTF || m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_HTML
            || m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_FM || m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_FM_SGML
            || m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_IL
            || m_tmxLevel == ImportUtil.TMX_LEVEL_TRADOS_XPTAG) {
        // Convert the Trados codes to native System4 codes by
        // converting the file to RTF, saving it as HTML and
        // extracting the resulting TUVs.

        CATEGORY.info("Converting Trados TMX to native TMX: " + p_url);

        p_url = convertTradosTmx(p_url, m_tmxLevel);

        // Now we have a new file that contains native content.
        m_options.setFileName(p_url);
        m_options.setFileType(com.globalsight.everest.tm.importer.ImportOptions.TYPE_XML);

        m_tmxLevel = ImportUtil.TMX_LEVEL_NATIVE;
    }

    CATEGORY.debug("Analyzing document: " + p_url);

    // Reset list of locales found in the file.
    m_sourceLocales = new HashSet();
    m_targetLocales = new HashSet();

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    // Read the DTD and validate.
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue(Tmx.VERSION);
        }

        public void onEnd(ElementPath path) {
        }
    });

    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new Tmx(element);
            m_header.setTmxVersion(m_tmxVersion);

            element.detach();
        }
    });

    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // Record optional source language declared on TU.
            String srclang = element.attributeValue(Tmx.SRCLANG);
            if (srclang != null) {
                m_sourceLocales.add(ImportUtil.normalizeLocale(srclang));
            }

            // Find target languages
            HashSet langs = new HashSet();
            List tuvs = element.selectNodes("./tuv");

            for (int i = 0, max = tuvs.size(); i < max; i++) {
                Element tuv = (Element) tuvs.get(i);

                String lang = tuv.attributeValue(Tmx.LANG);

                // Collect TUV locales
                langs.add(ImportUtil.normalizeLocale(lang));
            }

            langs.remove(srclang);
            m_targetLocales.addAll(langs);

            // prune the current element to reduce memory
            element.detach();
        }
    });

    Document document = reader.read(p_url);

    // Add declared source language from header.
    String sourceLocale = ImportUtil.normalizeLocale(m_header.getSourceLang());

    m_sourceLocales.add(sourceLocale);
}

From source file:com.globalsight.everest.tm.importer.TmxReaderThread.java

License:Apache License

public void run() {
    try {//from w w  w . j a  v a2 s. co  m
        SAXReader reader = new SAXReader();
        reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

        // Read the DTD and validate.
        reader.setEntityResolver(DtdResolver.getInstance());
        reader.setValidation(true);

        reader.addHandler("/tmx", new ElementHandler() {
            public void onStart(ElementPath path) {
                Element element = path.getCurrent();

                m_tmxVersion = element.attributeValue(Tmx.VERSION);
            }

            public void onEnd(ElementPath path) {
            }
        });

        reader.addHandler("/tmx/header", new ElementHandler() {
            public void onStart(ElementPath path) {
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();
                element.detach();

                m_tmx = new Tmx(element);
                m_tmx.setTmxVersion(m_tmxVersion);

                m_defaultSrcLang = ImportUtil.normalizeLocale(m_tmx.getSourceLang());
            }
        });

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/tmx/body/tu", new ElementHandler() {
            public void onStart(ElementPath path) {
                m_count++;
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();
                element.detach();

                m_result = m_results.hireResult();

                try {
                    // Normalize spelling of locales.
                    normalizeTu(element);
                    // Filter out targets not to be imported.
                    filterTu(element);
                    // Validate we have source and target.
                    validateTu(element);

                    // Create TU objects
                    SegmentTmTu tu = createTu(element);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(tu.toDebugString(true));
                    }

                    m_result.setResultObject(tu);
                } catch (Throwable ex) {
                    String msg = "Entry " + m_count + ": " + ex.getMessage();

                    m_result.setError(msg);

                    if (CATEGORY.isDebugEnabled()) {
                        CATEGORY.debug(msg, ex);
                    } else {
                        CATEGORY.warn(msg);
                    }
                }

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the TMX file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted.");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
        CATEGORY.error("unexpected error", ignore);
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.everest.tm.util.TmxAnalyzer.java

License:Apache License

public void analyze(String p_url) throws Exception {
    m_tuCount = 0;//from   www. ja va 2s  . c  om
    m_tuvCount = 0;
    m_localeCount = 0;
    m_locales = new HashSet();
    m_tmxVersion = "";
    m_tmx = null;

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    log("Analyzing document: " + p_url);

    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element.detach();

            m_tmx = new Tmx(element);
            m_tmx.setTmxVersion(m_tmxVersion);
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_tuCount;

            if (m_tuCount % 1000 == 0) {
                log("TU " + m_tuCount);
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            List tuvs = element.selectNodes("//tuv");

            m_tuvCount += tuvs.size();

            for (int i = 0, max = tuvs.size(); i < max; i++) {
                Element tuv = (Element) tuvs.get(i);

                String locale = tuv.attributeValue("lang");
                m_locales.add(locale);
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    m_localeCount = m_locales.size();

    log("File: " + p_url);
    log("TMX version: " + m_tmxVersion);
    log("Total TUs: " + m_tuCount);
    log("Total TUVs: " + m_tuvCount);
    log("Total Locales: " + m_localeCount);

    for (Iterator it = m_locales.iterator(); it.hasNext();) {
        String locale = (String) it.next();

        log(locale);
    }

    // all done
}

From source file:com.globalsight.everest.tm.util.TmxLevelSplitter.java

License:Apache License

public void split(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/*from   w ww  . j  a v a  2s .co  m*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    log("Splitting document `" + p_url + "'");

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = element;

            try {
                startFiles(baseName, extension);
            } catch (Exception ex) {
                log(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (containsTags(element)) {
                writeTagsEntry(element.asXML());

                m_tagsCount++;
            } else {
                writeTextEntry(element.asXML());

                m_textCount++;
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFiles();

    log("Processed " + m_entryCount + " TUs, " + m_textCount + " level 1 (text), " + m_tagsCount
            + " level 2 (tags)");

    // all done
}

From source file:com.globalsight.everest.tm.util.TmxSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/* w ww  . j a  v a 2  s .  c  o m*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    log("Splitting document `" + p_url + "'");

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = element;

            try {
                startFile(baseName, extension);
            } catch (Exception ex) {
                log(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.everest.tm.util.trados.TradosFmSgmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///w  w  w. java2s.  c  o m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 */// w  w w . ja v a 2s .  c om
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}