Example usage for org.dom4j.io SAXWriter SAXWriter

List of usage examples for org.dom4j.io SAXWriter SAXWriter

Introduction

In this page you can find the example usage for org.dom4j.io SAXWriter SAXWriter.

Prototype

public SAXWriter(ContentHandler contentHandler) 

Source Link

Usage

From source file:bixo.examples.webmining.AnalyzeHtml.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//www  .  j a  v  a 2  s .  c o m
protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector, FlowProcess process)
        throws Exception {
    SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler();
    SAXWriter writer = new SAXWriter(bodyContentHandler);
    writer.write(doc);

    float pageScore = getScore(bodyContentHandler.toString());

    // Get the outlinks.
    Outlink[] outlinks = getOutlinks(doc);

    // Extract all of the images, and use them as page results.
    PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks);

    _result.setUrl(datum.getUrl());
    _result.setPageScore(pageScore);
    _result.setOutlinks(outlinks);
    _result.setPageResults(pageResults);

    collector.add(BixoPlatform.clone(_result.getTuple(), process));
}

From source file:com.alibaba.citrus.springext.util.DomUtil.java

License:Open Source License

/** W3C element??SAX */
public static void convertElement(Element element, ContentHandler contentHandler) throws SAXException {
    SAXWriter writer = new SAXWriter(contentHandler);

    if (contentHandler instanceof ErrorHandler) {
        writer.setErrorHandler((ErrorHandler) contentHandler);
    }/*  w w w  . j a va 2  s  .c o  m*/

    if (contentHandler instanceof LexicalHandler) {
        writer.setLexicalHandler((LexicalHandler) contentHandler);
    }

    writer.write(convertElement(element));
}

From source file:com.finderbots.miner.AnalyzeHtml.java

License:Apache License

@Override
protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception {
    SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler();
    SAXWriter writer = new SAXWriter(bodyContentHandler);
    writer.write(doc);//w  ww  . ja va 2  s . c o  m

    //float pageScore = getScore(bodyContentHandler.toString());
    float pageScore = (float) 1.0;

    // Get the outlinks.
    Outlink[] outlinks = getOutlinks(doc);

    // Extract all of the images, and use them as page results.
    PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks);

    _result.setUrl(datum.getUrl());
    _result.setPageScore(pageScore);
    _result.setOutlinks(outlinks);
    _result.setPageResults(pageResults);

    collector.add(_result.getTuple());
}

From source file:com.finderbots.miner2.pinterest.AnalyzeHtml.java

License:Apache License

@Override
protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception {
    if (_urlsToMineFilter == null || !_urlsToMineFilter.isRemove(datum.getUrl().toString())) {
        SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler();
        SAXWriter writer = new SAXWriter(bodyContentHandler);
        writer.write(doc);//from  w  ww . j av a2 s.c  o  m

        //float pageScore = getScore(bodyContentHandler.toString());
        float pageScore = (float) 1.0;

        // Get the outlinks.
        Outlink[] outlinks = getOutlinks(doc);
        BooleanPreference[] pageResults = getPrefs(datum.getUrl().toString(), doc);

        // Extract all of the images, and use them as page results.
        //PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks);

        _result.setUrl(datum.getUrl());
        _result.setPageScore(pageScore);
        _result.setOutlinks(outlinks);
        _result.setPageResults(pageResults);

        collector.add(_result.getTuple());
    }
}

From source file:com.finderbots.miner2.tomatoes.MineRTCriticsPreferences.java

License:Apache License

@Override
protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception {
    LOGGER.debug(this.getClass().toString() + " Got datum for url: " + datum.getUrl());
    if (_urlsToMineFilter == null || !_urlsToMineFilter.isRemove(datum.getUrl().toString())) {
        // currently mines all pages so the fields in the tuple/datum must
        // ALL be set every time. Either set for prefs OR media pages, not both
        // todo: split into two datum types and mine separately?
        SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler();
        SAXWriter writer = new SAXWriter(bodyContentHandler);
        writer.write(doc);/*  w  ww.j  av a  2  s.co  m*/

        // Mine that data.
        String url = datum.getUrl();
        _result.setUrl(url);

        if (url.contains("/critic/")) {// mining a critic page
            _result.setItemId("");
            _result.setItemName("");
            _result.setPosterImageUrl("");
            MultiValuePreference[] prefs = minePrefs(url, doc);
            _result.setPrefs(prefs);
        } else if (url.contains("/m/")) {//mining a media page
            _result.setItemId(mineItemId(url));
            _result.setItemName(mineItemName(doc));
            _result.setPosterImageUrl(minePosterImageUrl(doc));
            _result.setPrefs(new MultiValuePreference[0]);
        } else {// not a page to mine, should be filtered out so throw an exception?
            //throw new Exception("Got a page that should not be mined: "+url);
            LOGGER.info("URLs to mine not working, getting urls that we don't mine like: " + url);
        }

        collector.add(_result.getTuple());
    }
}

From source file:de.tudarmstadt.ukp.clarin.webanno.tei.TeiReader.java

License:Apache License

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    initCas(aCAS, currentResource);/*from w  w w. java2 s.  c o m*/

    InputStream is = null;

    try {
        JCas jcas = aCAS.getJCas();

        // Create handler
        Handler handler = newSaxHandler();
        handler.setJCas(jcas);
        handler.setLogger(getLogger());

        // Parse TEI text
        SAXWriter writer = new SAXWriter(handler);
        writer.write(currentTeiElement);
        handler.endDocument();
    } catch (CASException e) {
        throw new CollectionException(e);
    } catch (SAXException e) {
        throw new IOException(e);
    } catch (Exception e) {
        throw new IOException("This is not a valid WebAnno CPH TEI file");
    } finally {
        closeQuietly(is);
    }

    // Move currentTeiElement to the next text
    nextTeiElement();
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.tei.TeiReader.java

License:Apache License

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    initCas(aCAS, currentResource);/*from  ww w.  ja  v  a 2  s. c  o m*/

    // Set up language
    if (getConfigParameterValue(PARAM_LANGUAGE) != null) {
        aCAS.setDocumentLanguage((String) getConfigParameterValue(PARAM_LANGUAGE));
    }

    // Configure mapping only now, because now the language is set in the CAS
    try {
        posMappingProvider.configure(aCAS);
    } catch (AnalysisEngineProcessException e1) {
        throw new IOException(e1);
    }

    InputStream is = null;

    try {
        JCas jcas = aCAS.getJCas();

        // Create handler
        Handler handler = newSaxHandler();
        handler.setJCas(jcas);
        handler.setLogger(getLogger());

        // Parse TEI text
        SAXWriter writer = new SAXWriter(handler);
        writer.write(currentTeiElement);
        handler.endDocument();
    } catch (CASException e) {
        throw new CollectionException(e);
    } catch (SAXException e) {
        throw new IOException(e);
    } finally {
        closeQuietly(is);
    }

    // Move currentTeiElement to the next text
    nextTeiElement();
}

From source file:edu.umd.cs.buildServer.inspection.FindBugsDocumentBuilder.java

License:Apache License

@Override
protected void documentFinished() {
    try {//from www.  j a v a  2 s.co m
        // Generate a BugCollection from the dom4j tree
        SAXBugCollectionHandler handler = new SAXBugCollectionHandler(bugCollection);
        SAXWriter saxWriter = new SAXWriter(handler);
        saxWriter.write(getDocument());
    } catch (SAXException e) {
        getLog().info("Couldn't generate BugCollection from findbugs XML output", e);
    }
}

From source file:itensil.io.xml.SAXHandler.java

License:Open Source License

/**
 * /* www.ja v  a  2  s  .  c  o  m*/
 * @param doc
 * @throws IOException
 * @throws SAXException
 */
public void parse(Document doc) throws IOException, SAXException {
    SAXWriter adapter = new SAXWriter(this);
    adapter.write(doc);
}

From source file:org.dom4j.samples.validate.JARVDemo.java

License:Open Source License

/** Validate document using MSV */
protected void process(Document document, String schemaURI) throws Exception {

    System.out.println("Loaded schema document: " + schemaURI);

    // use autodetection of schemas
    VerifierFactory factory = new com.sun.msv.verifier.jarv.TheFactoryImpl();
    Schema schema = factory.compileSchema(schemaURI);

    Verifier verifier = schema.newVerifier();
    verifier.setErrorHandler(new ErrorHandler() {
        public void error(SAXParseException e) {
            System.out.println("ERROR: " + e);
        }//  w  w w . j ava  2 s. c  o m

        public void fatalError(SAXParseException e) {
            System.out.println("FATAL: " + e);
        }

        public void warning(SAXParseException e) {
            System.out.println("WARNING: " + e);
        }
    });

    System.out.println("Validating XML document");

    VerifierHandler handler = verifier.getVerifierHandler();
    SAXWriter writer = new SAXWriter(handler);
    writer.write(document);
}