Example usage for org.xml.sax XMLReader parse

List of usage examples for org.xml.sax XMLReader parse

Introduction

In this page you can find the example usage for org.xml.sax XMLReader parse.

Prototype

public void parse(String systemId) throws IOException, SAXException;

Source Link

Document

Parse an XML document from a system identifier (URI).

Usage

From source file:org.betaconceptframework.astroboa.test.util.JAXBValidationUtils.java

public void validateUsingSAX(InputStream is) throws Exception {
    SAXParser saxParser = parserFactory.newSAXParser();

    XMLReader xmlReader = saxParser.getXMLReader();
    xmlReader.setEntityResolver(entityResolver);
    xmlReader.setErrorHandler(errorHandler);

    errorHandler.setIgnoreInvalidElementSequence(false);

    is = encodeURLsFoundInXML(is);//from  w ww .ja  v a 2s.  c  o m

    xmlReader.parse(new InputSource(is));
}

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed entry ids matching the query.
 *//*from  w w  w.  j av a 2 s.c o m*/
private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception {
    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    PubMedSearchHandler searchHandler = new PubMedSearchHandler();
    reader.setContentHandler(searchHandler);

    final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term="
            + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax="
            + Integer.toString(requestedResults);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document IDs
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return searchHandler.getPubMedPrimaryIds();
}

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed abstracts corresponding to the provided ids.
 *///from  www . ja v  a 2  s  . co m
private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception {
    if (ids.isEmpty()) {
        return new SearchEngineResponse();
    }

    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    final PubMedFetchHandler fetchHandler = new PubMedFetchHandler();
    reader.setContentHandler(fetchHandler);

    final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document contents
    // No URL logging here, as the url can get really long
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return fetchHandler.getResponse();
}

From source file:org.carrot2.source.yahoo.YahooSearchService.java

/**
 * Parse the response stream, assuming it is XML.
 *///from w ww  .j a  v  a2s.  co m
private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException {
    try {
        final XMLResponseParser parser = new XMLResponseParser();
        final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/namespaces", true);
        reader.setContentHandler(parser);

        reader.parse(new InputSource(is));

        return parser.response;
    } catch (final SAXException e) {
        final Throwable cause = e.getException();
        if (cause != null && cause instanceof IOException) {
            throw (IOException) cause;
        }
        throw new IOException("XML parsing exception: " + e.getMessage());
    } catch (final ParserConfigurationException e) {
        throw new IOException("Could not acquire XML parser.");
    }
}

From source file:org.cauldron.tests.util.DigesterContext.java

protected Object unmarshal(XMLReader reader, InputSource input) throws JAXBException {
    reader.setContentHandler(digester);/*from  w w  w . j  a v  a2 s  .  com*/

    try {
        reader.parse(input);
        return digester.getRoot();
    } catch (Exception e) {
        throw new JAXBException(e);
    }
}

From source file:org.corpus_tools.pepper.core.PepperJobImpl.java

/**
 * {@inheritDoc PepperJob#load(URI)}//from  w  w  w.ja v  a2 s .c o  m
 */
@Override
public void load(URI uri) {
    if (uri.isFile()) {
        File wdFile = new File(uri.toFileString());
        // set folder containing workflow description as base dir
        setBaseDir(uri.trimSegments(1));

        SAXParser parser;
        XMLReader xmlReader;
        SAXParserFactory factory = SAXParserFactory.newInstance();

        WorkflowDescriptionReader contentHandler = new WorkflowDescriptionReader();
        contentHandler.setPepperJob(this);
        contentHandler.setLocation(uri);

        // remove all existing steps
        clear();

        try {
            parser = factory.newSAXParser();
            xmlReader = parser.getXMLReader();
            xmlReader.setContentHandler(contentHandler);
        } catch (ParserConfigurationException e) {
            throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                    + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e);
        } catch (Exception e) {
            throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                    + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e);
        }
        try {
            InputStream inputStream = new FileInputStream(wdFile);
            Reader reader = new InputStreamReader(inputStream, "UTF-8");
            InputSource is = new InputSource(reader);
            is.setEncoding("UTF-8");
            xmlReader.parse(is);
        } catch (SAXException e) {
            try {
                parser = factory.newSAXParser();
                xmlReader = parser.getXMLReader();
                xmlReader.setContentHandler(contentHandler);
                xmlReader.parse(wdFile.getAbsolutePath());
            } catch (Exception e1) {
                throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                        + wdFile.getAbsolutePath() + "': " + e1.getMessage() + ". ", e1);
            }
        } catch (Exception e) {
            if (e instanceof PepperModuleException) {
                throw (PepperModuleException) e;
            } else {
                throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file'"
                        + wdFile + "', because of a nested exception: " + e.getMessage() + ". ", e);
            }
        }
    } else {
        throw new UnsupportedOperationException(
                "Currently Pepper can only load workflow description from local files.");
    }
}

From source file:org.corpus_tools.salt.util.SaltUtil.java

/**
 * Loads a list of root objects coming from a SaltXML (.{@link #FILE_ENDING_SALT_XML})
 * and returns it.//from w w  w  .ja  va  2 s . co  m
 * 
 * @param objectURI
 *            {@link URI} to SaltXML file containing the object
 * @return loaded objects
 */
public static List<Object> loadObjects(URI location) {
    if (location == null) {
        throw new SaltResourceException("Cannot load Salt object, because the given uri is null.");
    }
    File objectFile = new File(
            (location.toFileString() == null) ? location.toString() : location.toFileString());
    if (!objectFile.exists()) {
        throw new SaltResourceException("Cannot load Salt object, because the file '"
                + objectFile.getAbsolutePath() + "' does not exist.");
    }

    SAXParser parser;
    XMLReader xmlReader;
    SAXParserFactory factory = SAXParserFactory.newInstance();
    SaltXML10Handler contentHandler = new SaltXML10Handler();

    try {
        parser = factory.newSAXParser();
        xmlReader = parser.getXMLReader();
        xmlReader.setContentHandler(contentHandler);
    } catch (ParserConfigurationException e) {
        throw new SaltResourceException(
                "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e);
    } catch (Exception e) {
        throw new SaltResourceException(
                "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e);
    }
    try {
        InputStream inputStream = new FileInputStream(objectFile);
        Reader reader = new InputStreamReader(inputStream, "UTF-8");
        InputSource is = new InputSource(reader);
        is.setEncoding("UTF-8");
        xmlReader.parse(is);
    } catch (SAXException e) {
        try {
            parser = factory.newSAXParser();
            xmlReader = parser.getXMLReader();
            xmlReader.setContentHandler(contentHandler);
            xmlReader.parse(objectFile.getAbsolutePath());
        } catch (Exception e1) {
            throw new SaltResourceException(
                    "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e1);
        }
    } catch (Exception e) {
        if (e instanceof SaltException) {
            throw (SaltException) e;
        } else {
            throw new SaltResourceException(
                    "Cannot load Salt object from file'" + objectFile + "', because of a nested exception. ",
                    e);
        }
    }
    return contentHandler.getRootObjects();
}

From source file:org.deegree.tools.metadata.ISO19139Validator.java

/**
 * @param srcOpt/*from  w  w w  . j av a2 s  . c om*/
 * @param schemaOpt
 * @param resultOpt
 * @throws IOException
 * @throws SAXException
 */
public void run(String srcOpt, String schemaOpt, String resultOpt) throws IOException, SAXException {
    File src = new File(srcOpt);
    if (!src.exists()) {
        throw new IllegalArgumentException("src does not exist: " + srcOpt + ". Check parameter " + OPT_SRC);
    }
    File result;
    if (resultOpt != null && resultOpt.length() > 0) {
        result = new File(resultOpt);
        if (!result.exists()) {
            result.createNewFile();
        }
    } else {
        result = File.createTempFile(DEFAULT_FILENAME, ".txt");
    }

    SCHEMAVERSION schemaVersion = SCHEMAVERSION.V2007;
    if (schemaOpt != null) {
        try {
            schemaVersion = SCHEMAVERSION.valueOf(schemaOpt);
        } catch (Exception e) {
            throw new IllegalArgumentException("Invalid argument for " + OPT_SCHEMA_VERSION + ": " + schemaOpt);
        }
    }

    String schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20070417/gmd/metadataEntity.xsd";
    if (SCHEMAVERSION.V2006.equals(schemaVersion)) {
        schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20060504/gmd/metadataEntity.xsd";
    }
    URL u = ISO19139Validator.class.getResource(schema);
    XMLReader parser = XMLReaderFactory.createXMLReader();
    parser.setFeature("http://xml.org/sax/features/validation", true);
    parser.setFeature("http://apache.org/xml/features/validation/schema", true);
    parser.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation",
            "http://www.isotc211.org/2005/gmd " + u.toExternalForm());

    final FileWriter fw = new FileWriter(result);

    File[] filesToValidate;
    if (src.isDirectory()) {
        filesToValidate = src.listFiles();
        fw.write("validate " + filesToValidate.length + " files from directory " + src);
        fw.write("\n");
    } else {
        filesToValidate = new File[] { src };
    }
    System.out.println("Start validation");
    int noOfValidRecords = 0;
    for (int i = 0; i < filesToValidate.length; i++) {
        FileErrorHandler feh = new FileErrorHandler(fw);
        parser.setErrorHandler(feh);
        if (filesToValidate.length > 1) {
            fw.write("validate record " + i + " of " + filesToValidate.length);
            fw.write("\n");
        }
        File fileToValidate = filesToValidate[i];
        System.out.println(fileToValidate);
        fw.write("validate file " + fileToValidate.getAbsolutePath());
        fw.write("\n");
        try {
            parser.parse(new InputSource(new FileInputStream(fileToValidate)));
        } catch (Exception e) {
            String msg = "Could not validate current occured: " + e.getMessage()
                    + ". Continue with next record";
            System.err.println(msg);
            fw.write(msg);
            fw.write("\n");
            continue;
        }
        fw.flush();
        if (feh.isValid())
            noOfValidRecords++;
    }
    fw.write(noOfValidRecords + " of " + filesToValidate.length + " records are valid.");
    fw.close();
    System.out.println("Validation finished, result file: " + result.getAbsolutePath());
}

From source file:org.devtcg.five.meta.LastfmMetaTask.java

public void run() throws Exception {
    System.out.println("Accessing " + getMethodUrl());
    HttpGet request = new HttpGet(getMethodUrl());

    HttpResponse response = mClient.execute(request);

    if (mTask.isCancelled())
        return;//w  w  w.  ja v a  2 s .  co  m

    if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
        if (LOG.isWarnEnabled())
            LOG.warn(getMethodUrl() + " replied " + response.getStatusLine());
        response.getEntity().consumeContent();
        return;
    }

    HttpEntity entity = response.getEntity();
    InputStream in = entity.getContent();

    try {
        XMLReader reader = XMLReaderFactory.createXMLReader();
        reader.setContentHandler(getContentHandler());
        reader.parse(new InputSource(in));
    } finally {
        IOUtils.closeQuietlyNullSafe(in);
    }

    if (mTask.isCancelled())
        return;

    onPostParse();
}

From source file:org.dhatim.delivery.dom.DOMParser.java

/**
* Perform the actual parse into the supplied content handler.
* @param source Source content stream to be parsed.
* @param contentHandler Content handler instance that will build/append-to the DOM.
* @throws SAXException Unable to parse the content.
* @throws IOException Unable to read the input stream.
*///from   w  ww.  j  a  v a 2  s . c  o m
private void parse(Source source, DOMBuilder contentHandler) throws SAXException, IOException {
    ExecutionContext executionContext = getExecContext();

    if (executionContext != null) {
        ContentDeliveryConfig deliveryConfig = executionContext.getDeliveryConfig();

        XMLReader domReader = getXMLReader(executionContext);

        try {
            if (domReader == null) {
                domReader = deliveryConfig.getXMLReader();
            }
            if (domReader == null) {
                domReader = createXMLReader();
            }

            if (domReader instanceof HierarchyChangeReader) {
                ((HierarchyChangeReader) domReader)
                        .setHierarchyChangeListener(new XMLReaderHierarchyChangeListener(executionContext));
            }

            NamespaceDeclarationStack namespaceDeclarationStack = new NamespaceDeclarationStack();
            NamespaceMappings.setNamespaceDeclarationStack(namespaceDeclarationStack, executionContext);
            attachNamespaceDeclarationStack(domReader, executionContext);

            attachXMLReader(domReader, executionContext);
            configureReader(domReader, contentHandler, executionContext, source);
            domReader.parse(createInputSource(source, executionContext.getContentEncoding()));
        } finally {
            try {
                if (domReader instanceof HierarchyChangeReader) {
                    ((HierarchyChangeReader) domReader).setHierarchyChangeListener(null);
                }
            } finally {
                try {
                    try {
                        detachXMLReader(executionContext);
                    } finally {
                        if (domReader != null) {
                            deliveryConfig.returnXMLReader(domReader);
                        }
                    }
                } finally {
                    contentHandler.detachHandler();
                }
            }
        }
    } else {
        XMLReader domReader = createXMLReader();
        configureReader(domReader, contentHandler, null, source);
        domReader.parse(createInputSource(source, Charset.defaultCharset().name()));
    }
}