Example usage for org.dom4j.io SAXReader setEntityResolver

List of usage examples for org.dom4j.io SAXReader setEntityResolver

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader setEntityResolver.

Prototype

public void setEntityResolver(EntityResolver entityResolver) 

Source Link

Document

Sets the entity resolver used to resolve entities.

Usage

From source file:au.com.acegi.xmlformat.FormatUtil.java

License:Apache License

/**
 * Ingest an input stream, writing formatted XML to the output stream. The
 * caller is responsible for closing the input and output streams. Any errors
 * in the input stream will cause an exception and the output stream should
 * not be relied upon./*from   ww  w.j a  va2 s .  c o m*/
 *
 * @param in  input XML stream
 * @param out output XML stream
 * @param fmt format configuration to apply
 * @throws DocumentException if input XML could not be parsed
 * @throws IOException       if output XML stream could not be written
 */
static void format(final InputStream in, final OutputStream out, final OutputFormat fmt)
        throws DocumentException, IOException {
    final SAXReader reader = new SAXReader();
    reader.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(final String publicId, final String systemId)
                throws SAXException, IOException {
            return new InputSource(new StringReader(""));
        }
    });
    final Document xmlDoc = reader.read(in);

    final XMLWriter xmlWriter = new XMLWriter(out, fmt);
    xmlWriter.write(xmlDoc);
    xmlWriter.flush();
}

From source file:cn.kee.engine.common.SystemInitServlet.java

private Document readerDom(File flie) {
    Document doc = null;//ww w. j a  v  a 2  s .co m
    SAXReader reader = new SAXReader();
    reader.setValidation(false);
    reader.setEntityResolver(new NoOpEntityResolver());
    try {
        doc = reader.read(flie);
    } catch (DocumentException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return doc;
}

From source file:com.ai.tools.generator.util.SAXReaderFactory.java

License:Open Source License

public static SAXReader getInstance(boolean validate) {
    // Crimson cannot do XSD validation. See the following links:
    ///*from  w  w  w  . j av a 2s .c  o  m*/
    // http://www.geocities.com/herong_yang/jdk/xsd_validation.html
    // http://www.burnthacker.com/archives/000086.html
    // http://www.theserverside.com/news/thread.tss?thread_id=22525
    SAXReader reader = null;

    try {
        reader = new SAXReader(_SAX_PARSER_IMPL, validate);

        reader.setEntityResolver(new EntityResolver());

        reader.setFeature(_FEATURES_VALIDATION, validate);
        reader.setFeature(_FEATURES_VALIDATION_SCHEMA, validate);
        reader.setFeature(_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, validate);
        reader.setFeature(_FEATURES_DYNAMIC, validate);
    } catch (Exception e) {
        _log.warn("XSD validation is diasabled because " + e.getMessage());

        reader = new SAXReader(validate);

        reader.setEntityResolver(new EntityResolver());
    }

    return reader;
}

From source file:com.alibaba.stonelab.toolkit.learning.xml.Dom4jParser.java

License:Open Source License

public static void dom4j() throws Exception {
    SAXReader reader = new SAXReader(true);
    reader.setEntityResolver(new EntityResolver());
    reader.setFeature("http://xml.org/sax/features/validation", true);
    reader.setFeature("http://apache.org/xml/features/validation/schema", true);
    Document doc = reader.read(Dom4jParser.class.getResourceAsStream(XML_LOCATION));
    System.out.println(doc);/*from  w  w w.j av a 2  s .co m*/
}

From source file:com.appeligo.ccdataindexer.Indexer.java

License:Apache License

public boolean indexProgram(File programFile, Network network) throws IOException {
    log.debug("processing file " + programFile + " for " + network.getStationName());
    boolean needToClose = openIndex();
    StringBuilder captions = new StringBuilder();
    InputStream is = null;//  www .  j  a va 2s .c  om
    try {
        is = new GZIPInputStream(new BufferedInputStream(new FileInputStream(programFile)));

        SAXReader reader = new SAXReader();
        reader.setEntityResolver(new ExternalResolver());
        Document document = null;
        try {
            document = reader.read(is);
        } catch (DocumentException e) {
            log.warn("Could not open document " + programFile + "; ", e);
            return false;
        }

        //Node startTimeNode = document.selectSingleNode("//meta[@name='StartTime']");
        Node startTimeNode = document.selectSingleNode("//*[name()='meta'][@name='StartTime']");
        long startTime;
        try {
            startTime = Long.parseLong(startTimeNode.valueOf("@content"));
        } catch (NumberFormatException e) {
            log.warn("Error parsing StartTime " + startTimeNode + "; ", e);
            return false;
        }
        //Node programNode = document.selectSingleNode("//meta[@name='ProgramID']");
        Node programNode = document.selectSingleNode("//*[name()='meta'][@name='ProgramID']");
        String programId = programNode.valueOf("@content");
        programId = updateProgramId(programId);
        //Node endTimeNode = document.selectSingleNode("//meta[@name='EndTime']");
        Node endTimeNode = document.selectSingleNode("//*[name()='meta'][@name='EndTime']");
        long endTime;
        try {
            endTime = Long.parseLong(endTimeNode.valueOf("@content"));
        } catch (NumberFormatException e) {
            log.warn("Error parsing endTime " + endTimeNode + "; ", e);
            return false;
        }

        //List divs = document.selectNodes("/html/body/div");
        List divs = document.selectNodes("/*[name()='html']/*[name()='body']/*[name()='div']");

        while (divs.size() > 0) {
            Element div = (Element) divs.remove(0);
            List children = div.selectNodes("child::node()");
            while (children.size() > 0) {
                Node a = (Node) children.remove(0);
                while (!("a".equals(a.getName()))) {
                    if (children.size() == 0) {
                        break;
                    }
                    a = (Node) children.remove(0);
                }
                if (!("a".equals(a.getName()))) {
                    break;
                }

                Node afterA = (Node) children.remove(0);
                if (afterA instanceof Element) {
                    if (!("span".equals(afterA.getName()))) {
                        throw new IOException("span expected... bad data in " + programFile);
                    }
                    //Don't include this in the captions or should we?
                    //Element span = (Element)afterA;
                    //captions.append(' ');
                    //captions.append(span.getText().replace(">>", "").trim());
                    afterA = (Node) children.remove(0);
                }

                StringBuilder sentence = new StringBuilder();
                if (afterA instanceof Text) {
                    Text sentenceNode = (Text) afterA;
                    sentence.append(sentenceNode.getText());
                } else {
                    Entity entity = (Entity) afterA;
                    sentence.append(entity.asXML());
                }
                /*
                while (children.get(0) instanceof Text) {
                   Text moreText = (Text)children.remove(0);
                   captions.append(' ');
                   captions.append(DocumentUtil.prettySentence(moreText.getText()));
                }
                */
                while (children.get(0) instanceof Text || children.get(0) instanceof Entity) {
                    if (children.get(0) instanceof Text) {
                        Text moreText = (Text) children.remove(0);
                        sentence.append(moreText.getText());
                    } else {
                        Entity entity = (Entity) children.remove(0);
                        sentence.append(entity.asXML());
                    }
                }
                captions.append(DocumentUtil.prettySentence(sentence.toString().trim()));
                captions.append(' ');
            }
        }
        ArrayList<ScheduledProgram> skedulePrograms = new ArrayList<ScheduledProgram>();
        long lookupTimeStart = System.currentTimeMillis();
        for (String lineupId : networkLineups) {
            log.debug("looking for future scheduled program: " + programId + "  on lineup " + lineupId);
            ScheduledProgram skedProg = epg.getNextShowing(lineupId, programId, false, true);
            if (skedProg == null) {
                log.debug("looking for past scheduled program: " + programId + "  on lineup " + lineupId);
                skedProg = epg.getLastShowing(lineupId, programId);
            }
            log.debug("Sked prog for program: " + programId + " is null " + (skedProg == null));
            //We Can we fake a scheduled program?
            //Program program = epg.getProgram(programId);
            if (skedProg == null) {
                log.debug("Unable to locate ScheduleProgram for " + programId);
                Program program = epg.getProgram(programId);
                if (program != null) {
                    skedProg = new ScheduledProgram();
                    skedProg.setNetwork(network);
                    skedProg.setDescription(program.getDescription());
                    skedProg.setDescriptionWithActors(program.getDescriptionWithActors());
                    skedProg.setEndTime(new Date(endTime));
                    skedProg.setStartTime(new Date(startTime));
                    skedProg.setCredits(program.getCredits());
                    skedProg.setGenreDescription(program.getGenreDescription());
                    skedProg.setProgramId(program.getProgramId());
                    skedProg.setProgramTitle(program.getProgramTitle());
                    skedProg.setEpisodeTitle(program.getEpisodeTitle());
                    skedProg.setLastModified(program.getLastModified());
                    skedProg.setOriginalAirDate(program.getOriginalAirDate());
                    skedProg.setRunTime(program.getRunTime());
                    skedProg.setScheduleId(0);
                    skedProg.setTvRating(program.getTvRating());
                    skedProg.setNewEpisode(false);
                    skedProg.setLineupId(lineupId);
                }
            }
            if (skedProg != null) {
                skedulePrograms.add(skedProg);
                if (indexWriter == null) { // probably only doing composite index, so one is all we need
                    break;
                }
            }
        }
        lookupTime += (System.currentTimeMillis() - lookupTimeStart);
        if (captions.length() > 250 && skedulePrograms.size() > 0) {
            //Delete any old duplicates
            Term term = new Term("programID", skedulePrograms.get(0).getProgramId());
            if (indexWriter != null) {
                indexWriter.deleteDocuments(term);
                //Now insert the new document.
                org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
                DocumentUtil.addCaptions(doc, captions.toString());
                DocumentUtil.populateDocument(doc, skedulePrograms, new Date());
                indexWriter.addDocument(doc);
            }

            if (compositeIndexWriter != null) {
                compositeIndexWriter.deleteDocuments(term);
                org.apache.lucene.document.Document compositeDoc = new org.apache.lucene.document.Document();
                DocumentUtil.populateCompositeDocument(compositeDoc,
                        captions.toString().replaceAll("[.!?]* ", " "), skedulePrograms);
                compositeIndexWriter.addDocument(compositeDoc);
            }

            log.debug("Adding to index now:" + skedulePrograms.get(0).getProgramId() + " "
                    + skedulePrograms.get(0).getProgramTitle());
            return true;
        } else {
            log.debug("Limited CC data(" + captions.length()
                    + " character) or unable to locate EPG date for the program. Not adding this program to the index:"
                    + programId);
            return false;
        }
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                log.error("Error closing file", e);
            }
        }
        if (needToClose) {
            closeIndex();
        }
    }
}

From source file:com.augmentum.common.util.SAXReaderFactory.java

License:Open Source License

public static SAXReader getInstance(boolean validate) {

    // Crimson cannot do XSD validation. See the following links:
    ////from  w w w .  java2 s .c o  m
    // http://www.geocities.com/herong_yang/jdk/xsd_validation.html
    // http://www.burnthacker.com/archives/000086.html
    // http://www.theserverside.com/news/thread.tss?thread_id=22525

    SAXReader reader = null;

    try {
        reader = new SAXReader(_SAX_PARSER_IMPL, validate);

        reader.setEntityResolver(new EntityResolver());

        reader.setFeature(_FEATURES_VALIDATION, validate);
        reader.setFeature(_FEATURES_VALIDATION_SCHEMA, validate);
        reader.setFeature(_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, validate);
        reader.setFeature(_FEATURES_DYNAMIC, validate);
    } catch (Exception e) {
        _log.warn("XSD validation is diasabled because " + e.getMessage());

        reader = new SAXReader(validate);

        reader.setEntityResolver(new EntityResolver());
    }

    return reader;
}

From source file:com.cladonia.xml.XMLUtilities.java

License:Open Source License

/**
 * Creates a new SAXReader./*from w  ww. j av  a  2s .  c o  m*/
 *
 * @param validate when true the reader validates the input.
 *
 * @return the reader.
 */
public static SAXReader createReader(boolean validate, boolean loadExternalDTD) {
    SAXReader reader = new SAXReader(XDocumentFactory.getInstance(), validate);

    reader.setStripWhitespaceText(false);
    reader.setMergeAdjacentText(true);
    //      reader.setMergeAdjacentText( true);

    if (!validate) {
        reader.setIncludeExternalDTDDeclarations(false);
        reader.setIncludeInternalDTDDeclarations(true);

        try {
            if (loadExternalDTD) {
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", true);
                //               System.out.println( "http://apache.org/xml/features/nonvalidating/load-external-dtd = "+reader.getXMLReader().getFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd"));
                reader.setEntityResolver(getCatalogResolver());
            } else {
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    } else {
        try {
            reader.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", true);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    return reader;
}

From source file:com.cladonia.xml.XMLUtilities.java

License:Open Source License

/**
 * Creates a new SAXReader./*from  w  w w .  java2s  .  c o  m*/
 *
 * @param validate when true the reader validates the input.
 *
 * @return the reader.
 */
public static SAXReader createReader(boolean validate, boolean loadExternalDTD, boolean stripWhiteSpace) {
    SAXReader reader = new SAXReader(XDocumentFactory.getInstance(), validate);

    reader.setStripWhitespaceText(stripWhiteSpace);
    reader.setMergeAdjacentText(true);
    //      reader.setMergeAdjacentText( true);

    if (!validate) {
        reader.setIncludeExternalDTDDeclarations(false);
        reader.setIncludeInternalDTDDeclarations(true);

        try {
            if (loadExternalDTD) {
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", true);
                //               System.out.println( "http://apache.org/xml/features/nonvalidating/load-external-dtd = "+reader.getXMLReader().getFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd"));
                reader.setEntityResolver(getCatalogResolver());
            } else {
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    } else {
        try {
            reader.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", true);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    return reader;
}

From source file:com.cladonia.xml.XMLUtilities.java

License:Open Source License

/**
 * Parses the document for this reader./*from  w ww.  ja  va2 s  . c  om*/
 *
 * @param reader the reader with all the information.
 * @param systemId the systemId of the document.
 *
 * @return the Dom4J document.
 */
public static synchronized XDocument parse(BufferedReader reader, int length, String systemId,
        String grammarLocation, boolean stripWhiteSpace) throws IOException, SAXParseException {
    if (DEBUG)
        System.out.println("XMLUtilities.parse( " + reader + ", " + systemId + ", " + grammarLocation + ")");

    SAXReader saxReader = createReader(false, loadDTDGrammar, stripWhiteSpace);
    saxReader.setEntityResolver(new DummyEntityResolver(grammarLocation));

    if (resolveEntities) {
        String encoding;

        try {
            reader.mark(length);
            encoding = getXMLDeclaration(reader).getEncoding();
        } finally {
            reader.reset();
        }

        reader.mark(length + 1);
        //         // parse without substituting the entities
        parse(saxReader, createReader(reader, encoding), systemId);
        reader.reset();
    }

    return parse(saxReader, replaceAmp(reader), systemId);
}

From source file:com.cladonia.xml.XMLUtilities.java

License:Open Source License

/**
 * Parses the document for this reader.//from  w  w w .  j a  va 2s .com
 *
 * @param reader the reader with all the information.
 * @param systemId the systemId of the document.
 *
 * @return the Dom4J document.
 */
public static synchronized XDocument parse(BufferedReader reader, int length, String systemId,
        String grammarLocation) throws IOException, SAXParseException {
    if (DEBUG)
        System.out.println("XMLUtilities.parse( " + reader + ", " + systemId + ", " + grammarLocation + ")");

    SAXReader saxReader = createReader(false, loadDTDGrammar);
    saxReader.setEntityResolver(new DummyEntityResolver(grammarLocation));

    if (resolveEntities) {
        String encoding;

        try {
            reader.mark(length);
            encoding = getXMLDeclaration(reader).getEncoding();
        } finally {
            reader.reset();
        }

        reader.mark(length + 1);
        //         // parse without substituting the entities
        parse(saxReader, createReader(reader, encoding), systemId);
        reader.reset();
    }

    return parse(saxReader, replaceAmp(reader), systemId);
}