Example usage for org.dom4j.io SAXReader addHandler

List of usage examples for org.dom4j.io SAXReader addHandler

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader addHandler.

Prototype

public void addHandler(String path, ElementHandler handler) 

Source Link

Document

Adds the ElementHandler to be called when the specified path is encounted.

Usage

From source file:com.globalsight.terminology.importer.TbxReaderThread.java

License:Apache License

public void run() {
    try {//from  w ww . ja  v a2  s.  co m
        SAXReader reader = new SAXReader();
        reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

        // enable pruning to call me back as each Element is complete
        reader.addHandler("/martif/text/body/termEntry", new ElementHandler() {
            public void onStart(ElementPath path) {
            }

            public void onEnd(ElementPath path) {
                Element element = path.getCurrent();

                // prune the current element to reduce memory
                element.detach();

                Document doc = m_factory.createDocument(element);
                Entry entry = new Entry(doc);

                m_result = m_results.hireResult();
                m_result.setResultObject(entry);

                boolean done = m_results.put(m_result);
                m_result = null;

                // Stop reading the TMX file.
                if (done) {
                    throw new ThreadDeath();
                }
            }
        });

        String url = m_options.getFileName();

        Document document = reader.read(url);
    } catch (ThreadDeath ignore) {
        CATEGORY.info("ReaderThread: interrupted.");
    } catch (Throwable ignore) {
        // Should never happen, and I don't know how to handle
        // this case other than passing the exception in
        // m_results, which I won't do for now.
    } finally {
        if (m_result != null) {
            m_results.fireResult(m_result);
            m_result = null;
        }

        m_results.producerDone();
        m_results = null;

        CATEGORY.debug("ReaderThread: done.");
    }
}

From source file:com.globalsight.terminology.util.MtfAnalyzer.java

License:Apache License

public void analyze(String p_url) throws Exception {
    m_entryCount = 0;/*from   w ww  .ja  v a2s .  c o m*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    System.err.println("Analyzing document: " + p_url);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % 200 == 0) {
                log("Entry " + m_entryCount);
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    log("Total entries: " + m_entryCount);

    // all done
}

From source file:com.globalsight.terminology.util.MtfSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/* w  ww .ja  va 2s  . c o  m*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    log("Splitting document `" + p_url + "'");

    startFile(baseName, extension);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.webservices.Ambassador.java

License:Apache License

/**
 * Try to repair the segment./*from ww w. j a va2s . c  o  m*/
 * <p>
 * Will throw out a WebServiceException if the format is wrong and can not
 * be repaired.
 * <p>
 * 
 * @see SegmentHandler
 * @see #validateSegment(Element, IntHolder)
 * 
 * @param s
 *            The segment to be repaired
 * @return The repaired segment
 * @throws WebServiceException
 */
private String repairSegment(String s) throws WebServiceException {
    Assert.assertNotEmpty(s, "segment");
    SAXReader reader = new SAXReader();
    SegmentHandler segmentHandler = new SegmentHandler(s);
    reader.addHandler("/segment", segmentHandler);
    try {
        reader.read(new StringReader(s));
        if (segmentHandler.hasError()) {
            throw new WebServiceException(segmentHandler.getError());
        }
        return segmentHandler.getSegment();
    } catch (DocumentException e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    }
}

From source file:com.globalsight.webservices.Ambassador.java

License:Apache License

/**
 * Updates a tu in database.//from www.j  a  va2  s.  co  m
 * 
 * @param accessToken
 *            To judge caller has logon or not, can not be null. you can get
 *            it by calling method <code>login(username, password)</code>.
 * @param tmName
 *            TM name, will used to get tm id.
 * @param companyName
 *            company name, will used to get tm id.
 * @param tmx
 *            A tmx formate string inlcluding all tu information.
 * @return "true" if succeed
 * @throws WebServiceException
 */
public String editTu(String accessToken, String tmName, String companyName, String tmx)
        throws WebServiceException {
    try {
        Assert.assertNotEmpty(accessToken, "access token");
        Assert.assertNotEmpty(tmx, "tmx format");
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    }

    checkAccess(accessToken, "editEntry");
    checkPermission(accessToken, Permission.TM_EDIT_ENTRY);

    Company company = getCompanyByName(companyName);
    if (company == null) {
        throw new WebServiceException("Can not find the company with name (" + companyName + ")");
    }
    final ProjectTM ptm = getProjectTm(tmName, company.getIdAsLong());
    if (ptm == null) {
        throw new WebServiceException(
                "Can not find the tm with tm name (" + tmName + ") and company name (" + companyName + ")");
    }

    SAXReader reader = new SAXReader();
    ElementHandler handler = new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            element.detach();

            try {
                normalizeTu(element);
                validateTu(element);
                if (ptm.getTm3Id() == null) {
                    editTm2Tu(element);
                } else {
                    editTm3Tu(element, ptm);
                }
            } catch (Throwable ex) {
                logger.error(ex.getMessage(), ex);
                throw new ThreadDeath();
            }
        }
    };
    reader.addHandler("/tu", handler);

    WebServicesLog.Start activityStart = null;
    try {
        String loggedUserName = this.getUsernameFromSession(accessToken);
        Map<Object, Object> activityArgs = new HashMap<Object, Object>();
        activityArgs.put("loggedUserName", loggedUserName);
        activityStart = WebServicesLog.start(Ambassador.class, "editTu(accessToken,tmx)", activityArgs);
        reader.read(new StringReader(tmx));
    } catch (DocumentException e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    } finally {
        if (activityStart != null) {
            activityStart.end();
        }
    }

    return "true";
}

From source file:com.nokia.config.SAXConfigParser.java

License:Open Source License

/**
 * Constructor//from w w w  . j a  v  a  2  s  .  co m
 * @return list of available configurations that can be built.
 */
public String getConfigs() {
    File file = new File(sysdefFile);
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/build/target", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    configs += (String) child.getValue() + ",";
                }
            }
            row.detach();
        }
    });
    try {
        Document doc = reader.read(file);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return configs;
}

From source file:com.nokia.helium.sbs.SAXSysdefParser.java

License:Open Source License

/**
 * Constructor/*from w  ww  .ja  v  a 2  s .com*/
 * 
 * @return list of available configurations that can be built.
 */
public void parseConfig(String nodeToGet) {
    layers = new ArrayList<String>();
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/systemModel/" + nodeToGet, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    layers.add(child.getValue());
                }
            }
            row.detach();
        }
    });
    try {
        reader.read(sysdefFile);
    } catch (DocumentException e) {
        e.printStackTrace();
    }
}

From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java

License:Open Source License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    try {/* w  ww  . j  av  a2 s.co  m*/
        SAXReader reader = new SAXReader();
        data.stopPruning = false;

        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        }

        // Ignore comments?
        if (meta.isIgnoreComments())
            reader.setIgnoreComments(true);

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all
            // is processed in the handler
            if (log.isDetailed())
                logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated"));
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be
                        // stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the
                        // document and this will lead into a
                        // NPE or other errors depending on the parsing
                        // location - this will be treated in the catch part
                        // below
                        // any better idea is welcome
                        if (log.isBasic())
                            logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped"));
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick
                        // to
                        // stop
                        // reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing"));
                    Element row = path.getCurrent();
                    try {
                        processStreaming(row.getDocument());
                    } catch (Exception e) {
                        // catch the KettleException or others and forward
                        // to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing"));
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl) {
            // read url as source
            data.document = reader.read(new URL(StringXML));
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Const.isEmpty(meta.getEncoding()))
                encoding = meta.getEncoding();
            data.document = reader.read(KettleVFS.getInputStream(file), encoding);
        }

        if (meta.isNamespaceAware())
            prepareNSMap(data.document.getRootElement());
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:galign.helpers.tmx.TmxFile.java

License:Apache License

/**
 * Reads and validates a TMX XML string.
 *//*ww  w  . j  a  va2 s.  c  o  m*/
protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException {
    SAXReader reader = p_reader;

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new TmxHeader(element);
            m_header.setTmxVersion(m_tmxVersion);

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            addTu(new Tu(element));

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    Document document = reader.read(p_input);
    // all done.
}

From source file:musite.io.xml.ProteinsXMLReader.java

License:Open Source License

public Proteins read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }//www .  j  a  va2s.com

    if (nullData)
        data = new ProteinsImpl();

    SAXReader saxReader = new SAXReader();

    for (Map.Entry<String, ElementHandler> entry : saxReaderHandler.entrySet()) {
        saxReader.addHandler(entry.getKey(), entry.getValue());
    }

    BufferedInputStream bis = new BufferedInputStream(is);

    try {
        System.out.println("Reading...");
        saxReader.read(bis);
        System.out.println("" + data.proteinCount() + " protein were read.");
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    return data;
}