List of usage examples for org.dom4j.io SAXWriter SAXWriter
public SAXWriter(ContentHandler contentHandler)
From source file:bixo.examples.webmining.AnalyzeHtml.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//www . j a v a 2 s . c o m protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector, FlowProcess process) throws Exception { SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler(); SAXWriter writer = new SAXWriter(bodyContentHandler); writer.write(doc); float pageScore = getScore(bodyContentHandler.toString()); // Get the outlinks. Outlink[] outlinks = getOutlinks(doc); // Extract all of the images, and use them as page results. PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks); _result.setUrl(datum.getUrl()); _result.setPageScore(pageScore); _result.setOutlinks(outlinks); _result.setPageResults(pageResults); collector.add(BixoPlatform.clone(_result.getTuple(), process)); }
From source file:com.alibaba.citrus.springext.util.DomUtil.java
License:Open Source License
/** W3C element??SAX */ public static void convertElement(Element element, ContentHandler contentHandler) throws SAXException { SAXWriter writer = new SAXWriter(contentHandler); if (contentHandler instanceof ErrorHandler) { writer.setErrorHandler((ErrorHandler) contentHandler); }/* w w w . j a va 2 s .c o m*/ if (contentHandler instanceof LexicalHandler) { writer.setLexicalHandler((LexicalHandler) contentHandler); } writer.write(convertElement(element)); }
From source file:com.finderbots.miner.AnalyzeHtml.java
License:Apache License
@Override protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception { SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler(); SAXWriter writer = new SAXWriter(bodyContentHandler); writer.write(doc);//w ww . ja va 2 s . c o m //float pageScore = getScore(bodyContentHandler.toString()); float pageScore = (float) 1.0; // Get the outlinks. Outlink[] outlinks = getOutlinks(doc); // Extract all of the images, and use them as page results. PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks); _result.setUrl(datum.getUrl()); _result.setPageScore(pageScore); _result.setOutlinks(outlinks); _result.setPageResults(pageResults); collector.add(_result.getTuple()); }
From source file:com.finderbots.miner2.pinterest.AnalyzeHtml.java
License:Apache License
@Override protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception { if (_urlsToMineFilter == null || !_urlsToMineFilter.isRemove(datum.getUrl().toString())) { SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler(); SAXWriter writer = new SAXWriter(bodyContentHandler); writer.write(doc);//from w ww . j av a2 s.c o m //float pageScore = getScore(bodyContentHandler.toString()); float pageScore = (float) 1.0; // Get the outlinks. Outlink[] outlinks = getOutlinks(doc); BooleanPreference[] pageResults = getPrefs(datum.getUrl().toString(), doc); // Extract all of the images, and use them as page results. //PageResult[] pageResults = extractImages(datum.getUrl(), doc, outlinks); _result.setUrl(datum.getUrl()); _result.setPageScore(pageScore); _result.setOutlinks(outlinks); _result.setPageResults(pageResults); collector.add(_result.getTuple()); } }
From source file:com.finderbots.miner2.tomatoes.MineRTCriticsPreferences.java
License:Apache License
@Override protected void process(ParsedDatum datum, Document doc, TupleEntryCollector collector) throws Exception { LOGGER.debug(this.getClass().toString() + " Got datum for url: " + datum.getUrl()); if (_urlsToMineFilter == null || !_urlsToMineFilter.isRemove(datum.getUrl().toString())) { // currently mines all pages so the fields in the tuple/datum must // ALL be set every time. Either set for prefs OR media pages, not both // todo: split into two datum types and mine separately? SimpleBodyContentHandler bodyContentHandler = new SimpleBodyContentHandler(); SAXWriter writer = new SAXWriter(bodyContentHandler); writer.write(doc);/* w ww.j av a 2 s.co m*/ // Mine that data. String url = datum.getUrl(); _result.setUrl(url); if (url.contains("/critic/")) {// mining a critic page _result.setItemId(""); _result.setItemName(""); _result.setPosterImageUrl(""); MultiValuePreference[] prefs = minePrefs(url, doc); _result.setPrefs(prefs); } else if (url.contains("/m/")) {//mining a media page _result.setItemId(mineItemId(url)); _result.setItemName(mineItemName(doc)); _result.setPosterImageUrl(minePosterImageUrl(doc)); _result.setPrefs(new MultiValuePreference[0]); } else {// not a page to mine, should be filtered out so throw an exception? //throw new Exception("Got a page that should not be mined: "+url); LOGGER.info("URLs to mine not working, getting urls that we don't mine like: " + url); } collector.add(_result.getTuple()); } }
From source file:de.tudarmstadt.ukp.clarin.webanno.tei.TeiReader.java
License:Apache License
@Override public void getNext(CAS aCAS) throws IOException, CollectionException { initCas(aCAS, currentResource);/*from w w w. java2 s. c o m*/ InputStream is = null; try { JCas jcas = aCAS.getJCas(); // Create handler Handler handler = newSaxHandler(); handler.setJCas(jcas); handler.setLogger(getLogger()); // Parse TEI text SAXWriter writer = new SAXWriter(handler); writer.write(currentTeiElement); handler.endDocument(); } catch (CASException e) { throw new CollectionException(e); } catch (SAXException e) { throw new IOException(e); } catch (Exception e) { throw new IOException("This is not a valid WebAnno CPH TEI file"); } finally { closeQuietly(is); } // Move currentTeiElement to the next text nextTeiElement(); }
From source file:de.tudarmstadt.ukp.dkpro.core.io.tei.TeiReader.java
License:Apache License
@Override public void getNext(CAS aCAS) throws IOException, CollectionException { initCas(aCAS, currentResource);/*from ww w. ja v a 2 s. c o m*/ // Set up language if (getConfigParameterValue(PARAM_LANGUAGE) != null) { aCAS.setDocumentLanguage((String) getConfigParameterValue(PARAM_LANGUAGE)); } // Configure mapping only now, because now the language is set in the CAS try { posMappingProvider.configure(aCAS); } catch (AnalysisEngineProcessException e1) { throw new IOException(e1); } InputStream is = null; try { JCas jcas = aCAS.getJCas(); // Create handler Handler handler = newSaxHandler(); handler.setJCas(jcas); handler.setLogger(getLogger()); // Parse TEI text SAXWriter writer = new SAXWriter(handler); writer.write(currentTeiElement); handler.endDocument(); } catch (CASException e) { throw new CollectionException(e); } catch (SAXException e) { throw new IOException(e); } finally { closeQuietly(is); } // Move currentTeiElement to the next text nextTeiElement(); }
From source file:edu.umd.cs.buildServer.inspection.FindBugsDocumentBuilder.java
License:Apache License
@Override protected void documentFinished() { try {//from www. j a v a 2 s.co m // Generate a BugCollection from the dom4j tree SAXBugCollectionHandler handler = new SAXBugCollectionHandler(bugCollection); SAXWriter saxWriter = new SAXWriter(handler); saxWriter.write(getDocument()); } catch (SAXException e) { getLog().info("Couldn't generate BugCollection from findbugs XML output", e); } }
From source file:itensil.io.xml.SAXHandler.java
License:Open Source License
/** * /* www.ja v a 2 s . c o m*/ * @param doc * @throws IOException * @throws SAXException */ public void parse(Document doc) throws IOException, SAXException { SAXWriter adapter = new SAXWriter(this); adapter.write(doc); }
From source file:org.dom4j.samples.validate.JARVDemo.java
License:Open Source License
/** Validate document using MSV */ protected void process(Document document, String schemaURI) throws Exception { System.out.println("Loaded schema document: " + schemaURI); // use autodetection of schemas VerifierFactory factory = new com.sun.msv.verifier.jarv.TheFactoryImpl(); Schema schema = factory.compileSchema(schemaURI); Verifier verifier = schema.newVerifier(); verifier.setErrorHandler(new ErrorHandler() { public void error(SAXParseException e) { System.out.println("ERROR: " + e); }// w w w . j ava 2 s. c o m public void fatalError(SAXParseException e) { System.out.println("FATAL: " + e); } public void warning(SAXParseException e) { System.out.println("WARNING: " + e); } }); System.out.println("Validating XML document"); VerifierHandler handler = verifier.getVerifierHandler(); SAXWriter writer = new SAXWriter(handler); writer.write(document); }