Example usage for org.xml.sax XMLReader setFeature

List of usage examples for org.xml.sax XMLReader setFeature

Introduction

In this page you can find the example usage for org.xml.sax XMLReader setFeature.

Prototype

public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException;

Source Link

Document

Set the value of a feature flag.

Usage

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed entry ids matching the query.
 *//*  w ww . ja  v  a  2 s . c o m*/
private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception {
    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    PubMedSearchHandler searchHandler = new PubMedSearchHandler();
    reader.setContentHandler(searchHandler);

    final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term="
            + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax="
            + Integer.toString(requestedResults);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document IDs
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return searchHandler.getPubMedPrimaryIds();
}

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed abstracts corresponding to the provided ids.
 *///from w w w .java  2s. c o m
private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception {
    if (ids.isEmpty()) {
        return new SearchEngineResponse();
    }

    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    final PubMedFetchHandler fetchHandler = new PubMedFetchHandler();
    reader.setContentHandler(fetchHandler);

    final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document contents
    // No URL logging here, as the url can get really long
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return fetchHandler.getResponse();
}

From source file:org.carrot2.source.yahoo.YahooSearchService.java

/**
 * Parse the response stream, assuming it is XML.
 *///from   w w  w.  ja va2s  .  c om
private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException {
    try {
        final XMLResponseParser parser = new XMLResponseParser();
        final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/namespaces", true);
        reader.setContentHandler(parser);

        reader.parse(new InputSource(is));

        return parser.response;
    } catch (final SAXException e) {
        final Throwable cause = e.getException();
        if (cause != null && cause instanceof IOException) {
            throw (IOException) cause;
        }
        throw new IOException("XML parsing exception: " + e.getMessage());
    } catch (final ParserConfigurationException e) {
        throw new IOException("Could not acquire XML parser.");
    }
}

From source file:org.chiba.xml.xforms.connector.SchemaValidator.java

/**
 * validate the instance according to the schema specified on the model
 *
 * @return false if the instance is not valid
 *//* ww w.ja v  a 2 s  .c  o m*/
public boolean validateSchema(Model model, Node instance) throws XFormsException {
    boolean valid = true;
    String message;
    if (LOGGER.isDebugEnabled())
        LOGGER.debug("SchemaValidator.validateSchema: validating instance");

    //needed if we want to load schemas from Model + set it as "schemaLocation" attribute
    String schemas = model.getElement().getAttributeNS(NamespaceConstants.XFORMS_NS, "schema");
    if (schemas != null && !schemas.equals("")) {
        //          valid=false;

        //add schemas to element
        //shouldn't it be done on a copy of the doc ?
        Element el = null;
        if (instance.getNodeType() == Node.ELEMENT_NODE)
            el = (Element) instance;
        else if (instance.getNodeType() == Node.DOCUMENT_NODE)
            el = ((Document) instance).getDocumentElement();
        else {
            if (LOGGER.isDebugEnabled())
                LOGGER.debug("instance node type is: " + instance.getNodeType());
        }

        String prefix = NamespaceResolver.getPrefix(el, XMLSCHEMA_INSTANCE_NS);
        //test if with targetNamespace or not
        //if more than one schema : namespaces are mandatory ! (optional only for 1)
        StringTokenizer tokenizer = new StringTokenizer(schemas, " ", false);
        String schemaLocations = null;
        String noNamespaceSchemaLocation = null;
        while (tokenizer.hasMoreElements()) {
            String token = (String) tokenizer.nextElement();
            //check that it is an URL
            URI uri = null;
            try {
                uri = new java.net.URI(token);
            } catch (java.net.URISyntaxException ex) {
                if (LOGGER.isDebugEnabled())
                    LOGGER.debug(token + " is not an URI");
            }

            if (uri != null) {
                String ns;
                try {
                    ns = this.getSchemaNamespace(uri);

                    if (ns != null && !ns.equals("")) {
                        if (schemaLocations == null)
                            schemaLocations = ns + " " + token;
                        else
                            schemaLocations = schemaLocations + " " + ns + " " + token;

                        ///add the namespace declaration if it is not on the instance?
                        //TODO: how to know with which prefix ?
                        String nsPrefix = NamespaceResolver.getPrefix(el, ns);
                        if (nsPrefix == null) { //namespace not declared !
                            LOGGER.warn("SchemaValidator: targetNamespace " + ns + " of schema " + token
                                    + " is not declared in instance: declaring it as default...");
                            el.setAttributeNS(NamespaceConstants.XMLNS_NS, NamespaceConstants.XMLNS_PREFIX, ns);
                        }
                    } else if (noNamespaceSchemaLocation == null)
                        noNamespaceSchemaLocation = token;
                    else { //we have more than one schema without namespace
                        LOGGER.warn("SchemaValidator: There is more than one schema without namespace !");
                    }
                } catch (Exception ex) {
                    LOGGER.warn(
                            "Exception while trying to load schema: " + uri.toString() + ": " + ex.getMessage(),
                            ex);
                    //in case there was an exception: do nothing, do not set the schema
                }
            }
        }
        //write schemaLocations found
        if (schemaLocations != null && !schemaLocations.equals(""))
            el.setAttributeNS(XMLSCHEMA_INSTANCE_NS, prefix + ":schemaLocation", schemaLocations);
        if (noNamespaceSchemaLocation != null)
            el.setAttributeNS(XMLSCHEMA_INSTANCE_NS, prefix + ":noNamespaceSchemaLocation",
                    noNamespaceSchemaLocation);

        //save and parse the doc
        ValidationErrorHandler handler = null;
        File f;
        try {
            //save document
            f = File.createTempFile("instance", ".xml");
            f.deleteOnExit();
            TransformerFactory trFact = TransformerFactory.newInstance();
            Transformer trans = trFact.newTransformer();
            DOMSource source = new DOMSource(el);
            StreamResult result = new StreamResult(f);
            trans.transform(source, result);
            if (LOGGER.isDebugEnabled())
                LOGGER.debug("Validator.validateSchema: file temporarily saved in " + f.getAbsolutePath());

            //parse it with error handler to validate it
            handler = new ValidationErrorHandler();
            SAXParserFactory parserFact = SAXParserFactory.newInstance();
            parserFact.setValidating(true);
            parserFact.setNamespaceAware(true);
            SAXParser parser = parserFact.newSAXParser();
            XMLReader reader = parser.getXMLReader();

            //validation activated
            reader.setFeature("http://xml.org/sax/features/validation", true);
            //schema validation activated
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
            //used only to validate the schema, not the instance
            //reader.setFeature( "http://apache.org/xml/features/validation/schema-full-checking", true);
            //validate only if there is a grammar
            reader.setFeature("http://apache.org/xml/features/validation/dynamic", true);

            parser.parse(f, handler);
        } catch (Exception ex) {
            LOGGER.warn("Validator.validateSchema: Exception in XMLSchema validation: " + ex.getMessage(), ex);
            //throw new XFormsException("XMLSchema validation failed. "+message);
        }

        //if no exception
        if (handler != null && handler.isValid())
            valid = true;
        else {
            message = handler.getMessage();
            //TODO: find a way to get the error message displayed
            throw new XFormsException("XMLSchema validation failed. " + message);
        }

        if (LOGGER.isDebugEnabled())
            LOGGER.debug("Validator.validateSchema: result=" + valid);

    }

    return valid;
}

From source file:org.commonjava.maven.galley.maven.parse.XMLInfrastructure.java

private Document fallbackParseDocument(String xml, final Object docSource, final Exception e)
        throws GalleyMavenXMLException {
    logger.debug(/*  w w w.  jav a 2 s.c  om*/
            "Failed to parse: {}. DOM error: {}. Trying STaX parse with IS_REPLACING_ENTITY_REFERENCES == false...",
            e, docSource, e.getMessage());
    try {
        Source source;

        if (safeInputFactory != null) {
            xml = repairXmlDeclaration(xml);

            final XMLEventReader eventReader = safeInputFactory.createXMLEventReader(new StringReader(xml));
            source = new StAXSource(eventReader);
        } else {
            // Deal with &oslash; and other undeclared entities...
            xml = escapeNonXMLEntityRefs(xml);

            final XMLReader reader = XMLReaderFactory.createXMLReader();
            reader.setFeature("http://xml.org/sax/features/validation", false);

            source = new SAXSource(reader, new InputSource(new StringReader(xml)));
        }

        final DOMResult result = new DOMResult();

        final Transformer transformer = newTransformer();
        transformer.transform(source, result);

        return (Document) result.getNode();
    } catch (final TransformerException e1) {
        throw new GalleyMavenXMLException("Failed to parse: %s. Transformer error: %s.\nOriginal DOM error: %s",
                e1, docSource, e1.getMessage(), e.getMessage());
    } catch (final SAXException e1) {
        throw new GalleyMavenXMLException("Failed to parse: %s. SAX error: %s.\nOriginal DOM error: %s", e1,
                docSource, e1.getMessage(), e.getMessage());
    } catch (final XMLStreamException e1) {
        throw new GalleyMavenXMLException("Failed to parse: %s. STaX error: %s.\nOriginal DOM error: %s", e1,
                docSource, e1.getMessage(), e.getMessage());
    }
}

From source file:org.deegree.tools.metadata.ISO19139Validator.java

/**
 * @param srcOpt//from w w w.j av a 2  s .  c  o m
 * @param schemaOpt
 * @param resultOpt
 * @throws IOException
 * @throws SAXException
 */
public void run(String srcOpt, String schemaOpt, String resultOpt) throws IOException, SAXException {
    File src = new File(srcOpt);
    if (!src.exists()) {
        throw new IllegalArgumentException("src does not exist: " + srcOpt + ". Check parameter " + OPT_SRC);
    }
    File result;
    if (resultOpt != null && resultOpt.length() > 0) {
        result = new File(resultOpt);
        if (!result.exists()) {
            result.createNewFile();
        }
    } else {
        result = File.createTempFile(DEFAULT_FILENAME, ".txt");
    }

    SCHEMAVERSION schemaVersion = SCHEMAVERSION.V2007;
    if (schemaOpt != null) {
        try {
            schemaVersion = SCHEMAVERSION.valueOf(schemaOpt);
        } catch (Exception e) {
            throw new IllegalArgumentException("Invalid argument for " + OPT_SCHEMA_VERSION + ": " + schemaOpt);
        }
    }

    String schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20070417/gmd/metadataEntity.xsd";
    if (SCHEMAVERSION.V2006.equals(schemaVersion)) {
        schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20060504/gmd/metadataEntity.xsd";
    }
    URL u = ISO19139Validator.class.getResource(schema);
    XMLReader parser = XMLReaderFactory.createXMLReader();
    parser.setFeature("http://xml.org/sax/features/validation", true);
    parser.setFeature("http://apache.org/xml/features/validation/schema", true);
    parser.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation",
            "http://www.isotc211.org/2005/gmd " + u.toExternalForm());

    final FileWriter fw = new FileWriter(result);

    File[] filesToValidate;
    if (src.isDirectory()) {
        filesToValidate = src.listFiles();
        fw.write("validate " + filesToValidate.length + " files from directory " + src);
        fw.write("\n");
    } else {
        filesToValidate = new File[] { src };
    }
    System.out.println("Start validation");
    int noOfValidRecords = 0;
    for (int i = 0; i < filesToValidate.length; i++) {
        FileErrorHandler feh = new FileErrorHandler(fw);
        parser.setErrorHandler(feh);
        if (filesToValidate.length > 1) {
            fw.write("validate record " + i + " of " + filesToValidate.length);
            fw.write("\n");
        }
        File fileToValidate = filesToValidate[i];
        System.out.println(fileToValidate);
        fw.write("validate file " + fileToValidate.getAbsolutePath());
        fw.write("\n");
        try {
            parser.parse(new InputSource(new FileInputStream(fileToValidate)));
        } catch (Exception e) {
            String msg = "Could not validate current occured: " + e.getMessage()
                    + ". Continue with next record";
            System.err.println(msg);
            fw.write(msg);
            fw.write("\n");
            continue;
        }
        fw.flush();
        if (feh.isValid())
            noOfValidRecords++;
    }
    fw.write(noOfValidRecords + " of " + filesToValidate.length + " records are valid.");
    fw.close();
    System.out.println("Validation finished, result file: " + result.getAbsolutePath());
}

From source file:org.dhatim.delivery.AbstractParser.java

protected XMLReader createXMLReader() throws SAXException {
    XMLReader reader;
    ExecutionContext execContext = getExecContext();
    Source source = FilterSource.getSource(execContext);

    if (saxDriverConfig != null && saxDriverConfig.getResource() != null) {
        String className = saxDriverConfig.getResource();

        reader = XMLReaderFactory.createXMLReader(className);
    } else if (source instanceof JavaSource) {
        JavaSource javaSource = (JavaSource) source;

        if (isFeatureOn(JavaSource.FEATURE_GENERATE_EVENT_STREAM, saxDriverConfig)
                && !javaSource.isEventStreamRequired()) {
            throw new SAXException("Invalid Smooks configuration.  Feature '"
                    + JavaSource.FEATURE_GENERATE_EVENT_STREAM
                    + "' is explicitly configured 'on' in the Smooks configuration, while the supplied JavaSource has explicitly configured event streaming to be off (through a call to JavaSource.setEventStreamRequired).");
        }/*from   w ww . j  av a2  s. c o m*/

        // Event streaming must be explicitly turned off.  If is on as long as it is (a) not configured "off" in
        // the smooks config (via the reader features) and (b) not turned off via the supplied JavaSource...
        boolean eventStreamingOn = (!isFeatureOff(JavaSource.FEATURE_GENERATE_EVENT_STREAM, saxDriverConfig)
                && javaSource.isEventStreamRequired());
        if (eventStreamingOn && javaSource.getSourceObjects() != null) {
            reader = new XStreamXMLReader();
        } else {
            reader = new NullSourceXMLReader();
        }
    } else {
        reader = XMLReaderFactory.createXMLReader();
    }

    if (reader instanceof SmooksXMLReader) {
        if (saxDriverConfig != null) {
            Configurator.configure(reader, saxDriverConfig, execContext.getContext());
        } else {
            Configurator.initialise(reader);
        }
    }

    reader.setFeature("http://xml.org/sax/features/namespaces", true);
    reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);

    setHandlers(reader);
    setFeatures(reader);

    return reader;
}

From source file:org.dhatim.delivery.AbstractParser.java

private void setFeatures(XMLReader reader) throws SAXNotSupportedException, SAXNotRecognizedException {
    // Try setting the xerces "notify-char-refs" feature, may fail if it's not Xerces but that's OK...
    try {/*  w w w  . j ava2s . c o m*/
        reader.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
    } catch (Throwable t) {
        // Ignore
    }
    // Report namespace decls as per SAX 2.0.2 spec...
    try {
        // http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html#package_description
        reader.setFeature("http://xml.org/sax/features/xmlns-uris", true);
    } catch (Throwable t) {
        // Not a SAX 2.0.2 compliant parser... Ignore
    }

    if (saxDriverConfig != null) {
        List<Parameter> features;

        features = saxDriverConfig.getParameters(FEATURE_ON);
        if (features != null) {
            for (Parameter feature : features) {
                reader.setFeature(feature.getValue(), true);
            }
        }

        features = saxDriverConfig.getParameters(FEATURE_OFF);
        if (features != null) {
            for (Parameter feature : features) {
                reader.setFeature(feature.getValue(), false);
            }
        }
    }
}

From source file:org.dita.dost.module.reader.AbstractReaderModule.java

/**
 * Read a file and process it for list information.
 *
 * @param ref system path of the file to process
 * @param parseFile file to parse, may be {@code null}
 * @throws DITAOTException if processing failed
 *//*from w  w w .j  a v a  2 s  .  c  om*/
void readFile(final Reference ref, final URI parseFile) throws DITAOTException {
    currentFile = ref.filename;
    assert currentFile.isAbsolute();
    final URI src = parseFile != null ? parseFile : currentFile;
    assert src.isAbsolute();
    final URI rel = tempFileNameScheme.generateTempFileName(currentFile);
    outputFile = new File(job.tempDirURI.resolve(rel));
    final File outputDir = outputFile.getParentFile();
    if (!outputDir.exists() && !outputDir.mkdirs()) {
        logger.error("Failed to create output directory " + outputDir.getAbsolutePath());
        return;
    }
    validateMap = Collections.emptyMap();
    defaultValueMap = Collections.emptyMap();
    logger.info("Processing " + currentFile + " to " + outputFile.toURI());
    final String[] params = { currentFile.toString() };

    // Verify stub for current file is in Job
    final FileInfo fi = job.getFileInfo(currentFile);
    if (fi == null) {
        final FileInfo stub = new FileInfo.Builder().src(currentFile).uri(rel).result(currentFile)
                .isInput(currentFile.equals(rootFile)).build();
        job.add(stub);
    }

    //        InputSource in = null;
    Result out = null;
    try {
        final TransformerFactory tf = TransformerFactory.newInstance();
        final SAXTransformerFactory stf = (SAXTransformerFactory) tf;
        final TransformerHandler serializer = stf.newTransformerHandler();

        XMLReader parser = getXmlReader(ref.format);
        XMLReader xmlSource = parser;
        for (final XMLFilter f : getProcessingPipe(currentFile)) {
            f.setParent(xmlSource);
            f.setEntityResolver(CatalogUtils.getCatalogResolver());
            xmlSource = f;
        }

        try {
            final LexicalHandler lexicalHandler = new DTDForwardHandler(xmlSource);
            parser.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler);
            parser.setFeature("http://xml.org/sax/features/lexical-handler", true);
        } catch (final SAXNotRecognizedException e) {
        }

        //            in = new InputSource(src.toString());
        out = new StreamResult(new FileOutputStream(outputFile));
        serializer.setResult(out);
        xmlSource.setContentHandler(serializer);
        xmlSource.parse(src.toString());

        if (listFilter.isValidInput()) {
            processParseResult(currentFile);
            categorizeCurrentFile(ref);
        } else if (!currentFile.equals(rootFile)) {
            logger.warn(MessageUtils.getMessage("DOTJ021W", params).toString());
            failureList.add(currentFile);
        }
    } catch (final RuntimeException e) {
        throw e;
    } catch (final SAXParseException sax) {
        final Exception inner = sax.getException();
        if (inner != null && inner instanceof DITAOTException) {
            throw (DITAOTException) inner;
        }
        if (currentFile.equals(rootFile)) {
            throw new DITAOTException(
                    MessageUtils.getMessage("DOTJ012F", params).toString() + ": " + sax.getMessage(), sax);
        } else if (processingMode == Mode.STRICT) {
            throw new DITAOTException(
                    MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + sax.getMessage(), sax);
        } else {
            logger.error(MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + sax.getMessage(), sax);
        }
        failureList.add(currentFile);
    } catch (final FileNotFoundException e) {
        if (!exists(currentFile)) {
            if (currentFile.equals(rootFile)) {
                throw new DITAOTException(MessageUtils.getMessage("DOTA069F", params).toString(), e);
            } else if (processingMode == Mode.STRICT) {
                throw new DITAOTException(MessageUtils.getMessage("DOTX008E", params).toString(), e);
            } else {
                logger.error(MessageUtils.getMessage("DOTX008E", params).toString());
            }
        } else if (currentFile.equals(rootFile)) {
            throw new DITAOTException(MessageUtils.getMessage("DOTJ078F", params).toString()
                    + " Cannot load file: " + e.getMessage(), e);
        } else if (processingMode == Mode.STRICT) {
            throw new DITAOTException(MessageUtils.getMessage("DOTJ079E", params).toString()
                    + " Cannot load file: " + e.getMessage(), e);
        } else {
            logger.error(MessageUtils.getMessage("DOTJ079E", params).toString() + " Cannot load file: "
                    + e.getMessage());
        }
        failureList.add(currentFile);
    } catch (final Exception e) {
        if (currentFile.equals(rootFile)) {
            throw new DITAOTException(
                    MessageUtils.getMessage("DOTJ012F", params).toString() + ": " + e.getMessage(), e);
        } else if (processingMode == Mode.STRICT) {
            throw new DITAOTException(
                    MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + e.getMessage(), e);
        } else {
            logger.error(MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + e.getMessage(), e);
        }
        failureList.add(currentFile);
    } finally {
        if (out != null) {
            try {
                close(out);
            } catch (final IOException e) {
                logger.error(e.getMessage(), e);
            }
        }
        if (failureList.contains(currentFile)) {
            FileUtils.deleteQuietly(outputFile);
        }
    }

    if (!listFilter.isValidInput() && currentFile.equals(rootFile)) {
        if (validate) {
            // stop the build if all content in the input file was filtered out.
            throw new DITAOTException(MessageUtils.getMessage("DOTJ022F", params).toString());
        } else {
            // stop the build if the content of the file is not valid.
            throw new DITAOTException(MessageUtils.getMessage("DOTJ034F", params).toString());
        }
    }

    doneList.add(currentFile);
    listFilter.reset();
    keydefFilter.reset();

}

From source file:org.dita.dost.reader.TestGenListModuleReader.java

private XMLReader initXMLReader(final File ditaDir, final boolean validate, final File rootFile)
        throws SAXException, IOException {
    final XMLReader parser = XMLUtils.getXMLReader();
    if (validate == true) {
        parser.setFeature(FEATURE_VALIDATION, true);
        try {/*from w  w  w  . j  a v  a 2  s .  c o  m*/
            parser.setFeature(FEATURE_VALIDATION_SCHEMA, true);
        } catch (final SAXNotRecognizedException e) {
            // Not Xerces, ignore exception
        }
    } else {
        final String msg = MessageUtils.getInstance().getMessage("DOTJ037W").toString();
    }
    CatalogUtils.setDitaDir(ditaDir);
    parser.setEntityResolver(CatalogUtils.getCatalogResolver());

    return parser;
}