List of usage examples for org.dom4j.io SAXReader setEntityResolver
public void setEntityResolver(EntityResolver entityResolver)
From source file:au.com.acegi.xmlformat.FormatUtil.java
License:Apache License
/** * Ingest an input stream, writing formatted XML to the output stream. The * caller is responsible for closing the input and output streams. Any errors * in the input stream will cause an exception and the output stream should * not be relied upon./*from ww w.j a va2 s . c o m*/ * * @param in input XML stream * @param out output XML stream * @param fmt format configuration to apply * @throws DocumentException if input XML could not be parsed * @throws IOException if output XML stream could not be written */ static void format(final InputStream in, final OutputStream out, final OutputFormat fmt) throws DocumentException, IOException { final SAXReader reader = new SAXReader(); reader.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(final String publicId, final String systemId) throws SAXException, IOException { return new InputSource(new StringReader("")); } }); final Document xmlDoc = reader.read(in); final XMLWriter xmlWriter = new XMLWriter(out, fmt); xmlWriter.write(xmlDoc); xmlWriter.flush(); }
From source file:cn.kee.engine.common.SystemInitServlet.java
private Document readerDom(File flie) { Document doc = null;//ww w. j a v a 2 s .co m SAXReader reader = new SAXReader(); reader.setValidation(false); reader.setEntityResolver(new NoOpEntityResolver()); try { doc = reader.read(flie); } catch (DocumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } return doc; }
From source file:com.ai.tools.generator.util.SAXReaderFactory.java
License:Open Source License
public static SAXReader getInstance(boolean validate) { // Crimson cannot do XSD validation. See the following links: ///*from w w w . j av a 2s .c o m*/ // http://www.geocities.com/herong_yang/jdk/xsd_validation.html // http://www.burnthacker.com/archives/000086.html // http://www.theserverside.com/news/thread.tss?thread_id=22525 SAXReader reader = null; try { reader = new SAXReader(_SAX_PARSER_IMPL, validate); reader.setEntityResolver(new EntityResolver()); reader.setFeature(_FEATURES_VALIDATION, validate); reader.setFeature(_FEATURES_VALIDATION_SCHEMA, validate); reader.setFeature(_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, validate); reader.setFeature(_FEATURES_DYNAMIC, validate); } catch (Exception e) { _log.warn("XSD validation is diasabled because " + e.getMessage()); reader = new SAXReader(validate); reader.setEntityResolver(new EntityResolver()); } return reader; }
From source file:com.alibaba.stonelab.toolkit.learning.xml.Dom4jParser.java
License:Open Source License
public static void dom4j() throws Exception { SAXReader reader = new SAXReader(true); reader.setEntityResolver(new EntityResolver()); reader.setFeature("http://xml.org/sax/features/validation", true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); Document doc = reader.read(Dom4jParser.class.getResourceAsStream(XML_LOCATION)); System.out.println(doc);/*from w w w.j av a 2 s .co m*/ }
From source file:com.appeligo.ccdataindexer.Indexer.java
License:Apache License
public boolean indexProgram(File programFile, Network network) throws IOException { log.debug("processing file " + programFile + " for " + network.getStationName()); boolean needToClose = openIndex(); StringBuilder captions = new StringBuilder(); InputStream is = null;// www . j a va 2s .c om try { is = new GZIPInputStream(new BufferedInputStream(new FileInputStream(programFile))); SAXReader reader = new SAXReader(); reader.setEntityResolver(new ExternalResolver()); Document document = null; try { document = reader.read(is); } catch (DocumentException e) { log.warn("Could not open document " + programFile + "; ", e); return false; } //Node startTimeNode = document.selectSingleNode("//meta[@name='StartTime']"); Node startTimeNode = document.selectSingleNode("//*[name()='meta'][@name='StartTime']"); long startTime; try { startTime = Long.parseLong(startTimeNode.valueOf("@content")); } catch (NumberFormatException e) { log.warn("Error parsing StartTime " + startTimeNode + "; ", e); return false; } //Node programNode = document.selectSingleNode("//meta[@name='ProgramID']"); Node programNode = document.selectSingleNode("//*[name()='meta'][@name='ProgramID']"); String programId = programNode.valueOf("@content"); programId = updateProgramId(programId); //Node endTimeNode = document.selectSingleNode("//meta[@name='EndTime']"); Node endTimeNode = document.selectSingleNode("//*[name()='meta'][@name='EndTime']"); long endTime; try { endTime = Long.parseLong(endTimeNode.valueOf("@content")); } catch (NumberFormatException e) { log.warn("Error parsing endTime " + endTimeNode + "; ", e); return false; } //List divs = document.selectNodes("/html/body/div"); List divs = document.selectNodes("/*[name()='html']/*[name()='body']/*[name()='div']"); while (divs.size() > 0) { Element div = (Element) divs.remove(0); List children = div.selectNodes("child::node()"); while (children.size() > 0) { Node a = (Node) children.remove(0); while (!("a".equals(a.getName()))) { if (children.size() == 0) { break; } a = (Node) children.remove(0); } if (!("a".equals(a.getName()))) { break; } Node afterA = (Node) children.remove(0); if (afterA instanceof Element) { if (!("span".equals(afterA.getName()))) { throw new IOException("span expected... bad data in " + programFile); } //Don't include this in the captions or should we? //Element span = (Element)afterA; //captions.append(' '); //captions.append(span.getText().replace(">>", "").trim()); afterA = (Node) children.remove(0); } StringBuilder sentence = new StringBuilder(); if (afterA instanceof Text) { Text sentenceNode = (Text) afterA; sentence.append(sentenceNode.getText()); } else { Entity entity = (Entity) afterA; sentence.append(entity.asXML()); } /* while (children.get(0) instanceof Text) { Text moreText = (Text)children.remove(0); captions.append(' '); captions.append(DocumentUtil.prettySentence(moreText.getText())); } */ while (children.get(0) instanceof Text || children.get(0) instanceof Entity) { if (children.get(0) instanceof Text) { Text moreText = (Text) children.remove(0); sentence.append(moreText.getText()); } else { Entity entity = (Entity) children.remove(0); sentence.append(entity.asXML()); } } captions.append(DocumentUtil.prettySentence(sentence.toString().trim())); captions.append(' '); } } ArrayList<ScheduledProgram> skedulePrograms = new ArrayList<ScheduledProgram>(); long lookupTimeStart = System.currentTimeMillis(); for (String lineupId : networkLineups) { log.debug("looking for future scheduled program: " + programId + " on lineup " + lineupId); ScheduledProgram skedProg = epg.getNextShowing(lineupId, programId, false, true); if (skedProg == null) { log.debug("looking for past scheduled program: " + programId + " on lineup " + lineupId); skedProg = epg.getLastShowing(lineupId, programId); } log.debug("Sked prog for program: " + programId + " is null " + (skedProg == null)); //We Can we fake a scheduled program? //Program program = epg.getProgram(programId); if (skedProg == null) { log.debug("Unable to locate ScheduleProgram for " + programId); Program program = epg.getProgram(programId); if (program != null) { skedProg = new ScheduledProgram(); skedProg.setNetwork(network); skedProg.setDescription(program.getDescription()); skedProg.setDescriptionWithActors(program.getDescriptionWithActors()); skedProg.setEndTime(new Date(endTime)); skedProg.setStartTime(new Date(startTime)); skedProg.setCredits(program.getCredits()); skedProg.setGenreDescription(program.getGenreDescription()); skedProg.setProgramId(program.getProgramId()); skedProg.setProgramTitle(program.getProgramTitle()); skedProg.setEpisodeTitle(program.getEpisodeTitle()); skedProg.setLastModified(program.getLastModified()); skedProg.setOriginalAirDate(program.getOriginalAirDate()); skedProg.setRunTime(program.getRunTime()); skedProg.setScheduleId(0); skedProg.setTvRating(program.getTvRating()); skedProg.setNewEpisode(false); skedProg.setLineupId(lineupId); } } if (skedProg != null) { skedulePrograms.add(skedProg); if (indexWriter == null) { // probably only doing composite index, so one is all we need break; } } } lookupTime += (System.currentTimeMillis() - lookupTimeStart); if (captions.length() > 250 && skedulePrograms.size() > 0) { //Delete any old duplicates Term term = new Term("programID", skedulePrograms.get(0).getProgramId()); if (indexWriter != null) { indexWriter.deleteDocuments(term); //Now insert the new document. org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); DocumentUtil.addCaptions(doc, captions.toString()); DocumentUtil.populateDocument(doc, skedulePrograms, new Date()); indexWriter.addDocument(doc); } if (compositeIndexWriter != null) { compositeIndexWriter.deleteDocuments(term); org.apache.lucene.document.Document compositeDoc = new org.apache.lucene.document.Document(); DocumentUtil.populateCompositeDocument(compositeDoc, captions.toString().replaceAll("[.!?]* ", " "), skedulePrograms); compositeIndexWriter.addDocument(compositeDoc); } log.debug("Adding to index now:" + skedulePrograms.get(0).getProgramId() + " " + skedulePrograms.get(0).getProgramTitle()); return true; } else { log.debug("Limited CC data(" + captions.length() + " character) or unable to locate EPG date for the program. Not adding this program to the index:" + programId); return false; } } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error("Error closing file", e); } } if (needToClose) { closeIndex(); } } }
From source file:com.augmentum.common.util.SAXReaderFactory.java
License:Open Source License
public static SAXReader getInstance(boolean validate) { // Crimson cannot do XSD validation. See the following links: ////from w w w . java2 s .c o m // http://www.geocities.com/herong_yang/jdk/xsd_validation.html // http://www.burnthacker.com/archives/000086.html // http://www.theserverside.com/news/thread.tss?thread_id=22525 SAXReader reader = null; try { reader = new SAXReader(_SAX_PARSER_IMPL, validate); reader.setEntityResolver(new EntityResolver()); reader.setFeature(_FEATURES_VALIDATION, validate); reader.setFeature(_FEATURES_VALIDATION_SCHEMA, validate); reader.setFeature(_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, validate); reader.setFeature(_FEATURES_DYNAMIC, validate); } catch (Exception e) { _log.warn("XSD validation is diasabled because " + e.getMessage()); reader = new SAXReader(validate); reader.setEntityResolver(new EntityResolver()); } return reader; }
From source file:com.cladonia.xml.XMLUtilities.java
License:Open Source License
/** * Creates a new SAXReader./*from w ww. j av a 2s . c o m*/ * * @param validate when true the reader validates the input. * * @return the reader. */ public static SAXReader createReader(boolean validate, boolean loadExternalDTD) { SAXReader reader = new SAXReader(XDocumentFactory.getInstance(), validate); reader.setStripWhitespaceText(false); reader.setMergeAdjacentText(true); // reader.setMergeAdjacentText( true); if (!validate) { reader.setIncludeExternalDTDDeclarations(false); reader.setIncludeInternalDTDDeclarations(true); try { if (loadExternalDTD) { reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", true); // System.out.println( "http://apache.org/xml/features/nonvalidating/load-external-dtd = "+reader.getXMLReader().getFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd")); reader.setEntityResolver(getCatalogResolver()); } else { reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } } catch (Exception e) { e.printStackTrace(); } } else { try { reader.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", true); } catch (Exception e) { e.printStackTrace(); } } return reader; }
From source file:com.cladonia.xml.XMLUtilities.java
License:Open Source License
/** * Creates a new SAXReader./*from w w w . java2s . c o m*/ * * @param validate when true the reader validates the input. * * @return the reader. */ public static SAXReader createReader(boolean validate, boolean loadExternalDTD, boolean stripWhiteSpace) { SAXReader reader = new SAXReader(XDocumentFactory.getInstance(), validate); reader.setStripWhitespaceText(stripWhiteSpace); reader.setMergeAdjacentText(true); // reader.setMergeAdjacentText( true); if (!validate) { reader.setIncludeExternalDTDDeclarations(false); reader.setIncludeInternalDTDDeclarations(true); try { if (loadExternalDTD) { reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", true); // System.out.println( "http://apache.org/xml/features/nonvalidating/load-external-dtd = "+reader.getXMLReader().getFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd")); reader.setEntityResolver(getCatalogResolver()); } else { reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } } catch (Exception e) { e.printStackTrace(); } } else { try { reader.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", true); } catch (Exception e) { e.printStackTrace(); } } return reader; }
From source file:com.cladonia.xml.XMLUtilities.java
License:Open Source License
/** * Parses the document for this reader./*from w ww. ja va2 s . c om*/ * * @param reader the reader with all the information. * @param systemId the systemId of the document. * * @return the Dom4J document. */ public static synchronized XDocument parse(BufferedReader reader, int length, String systemId, String grammarLocation, boolean stripWhiteSpace) throws IOException, SAXParseException { if (DEBUG) System.out.println("XMLUtilities.parse( " + reader + ", " + systemId + ", " + grammarLocation + ")"); SAXReader saxReader = createReader(false, loadDTDGrammar, stripWhiteSpace); saxReader.setEntityResolver(new DummyEntityResolver(grammarLocation)); if (resolveEntities) { String encoding; try { reader.mark(length); encoding = getXMLDeclaration(reader).getEncoding(); } finally { reader.reset(); } reader.mark(length + 1); // // parse without substituting the entities parse(saxReader, createReader(reader, encoding), systemId); reader.reset(); } return parse(saxReader, replaceAmp(reader), systemId); }
From source file:com.cladonia.xml.XMLUtilities.java
License:Open Source License
/** * Parses the document for this reader.//from w w w . j a va 2s .com * * @param reader the reader with all the information. * @param systemId the systemId of the document. * * @return the Dom4J document. */ public static synchronized XDocument parse(BufferedReader reader, int length, String systemId, String grammarLocation) throws IOException, SAXParseException { if (DEBUG) System.out.println("XMLUtilities.parse( " + reader + ", " + systemId + ", " + grammarLocation + ")"); SAXReader saxReader = createReader(false, loadDTDGrammar); saxReader.setEntityResolver(new DummyEntityResolver(grammarLocation)); if (resolveEntities) { String encoding; try { reader.mark(length); encoding = getXMLDeclaration(reader).getEncoding(); } finally { reader.reset(); } reader.mark(length + 1); // // parse without substituting the entities parse(saxReader, createReader(reader, encoding), systemId); reader.reset(); } return parse(saxReader, replaceAmp(reader), systemId); }