Example usage for org.xml.sax XMLReader setContentHandler

List of usage examples for org.xml.sax XMLReader setContentHandler

Introduction

In this page you can find the example usage for org.xml.sax XMLReader setContentHandler.

Prototype

public void setContentHandler(ContentHandler handler);

Source Link

Document

Allow an application to register a content event handler.

Usage

From source file:org.archive.crawler.settings.XMLSettingsHandler.java

/** Read the CrawlerSettings object from a specific file.
 *
 * @param settings the settings object to be updated with data from the
 *                 persistent storage.//from w w w.j a v  a 2  s.c om
 * @param f the file to read from.
 * @return the updated settings object or null if there was no data for this
 *         in the persistent storage.
 */
protected final CrawlerSettings readSettingsObject(CrawlerSettings settings, File f) {
    CrawlerSettings result = null;
    try {
        InputStream is = null;
        if (!f.exists()) {
            // Perhaps the file we're looking for is on the CLASSPATH.
            // DON'T look on the CLASSPATH for 'settings.xml' files.  The
            // look for 'settings.xml' files happens frequently. Not looking
            // on classpath for 'settings.xml' is an optimization based on
            // ASSUMPTION that there will never be a 'settings.xml' saved
            // on classpath.
            if (!f.getName().startsWith(settingsFilename)) {
                is = XMLSettingsHandler.class.getResourceAsStream(toResourcePath(f));
            }
        } else {
            is = new FileInputStream(f);
        }
        if (is != null) {
            XMLReader parser = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
            InputStream file = new BufferedInputStream(is);
            parser.setContentHandler(new CrawlSettingsSAXHandler(settings));
            InputSource source = new InputSource(file);
            source.setSystemId(f.toURL().toExternalForm());
            parser.parse(source);
            result = settings;
        }
    } catch (SAXParseException e) {
        logger.warning(e.getMessage() + " in '" + e.getSystemId() + "', line: " + e.getLineNumber()
                + ", column: " + e.getColumnNumber());
    } catch (SAXException e) {
        logger.warning(e.getMessage() + ": " + e.getException().getMessage());
    } catch (ParserConfigurationException e) {
        logger.warning(e.getMessage() + ": " + e.getCause().getMessage());
    } catch (FactoryConfigurationError e) {
        logger.warning(e.getMessage() + ": " + e.getException().getMessage());
    } catch (IOException e) {
        logger.warning("Could not access file '" + f.getAbsolutePath() + "': " + e.getMessage());
    }
    return result;
}

From source file:org.atombeat.xquery.functions.util.RequestGetData.java

public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {

    RequestModule myModule = (RequestModule) context.getModule(RequestModule.NAMESPACE_URI);

    // request object is read from global variable $request
    Variable var = myModule.resolveVariable(RequestModule.REQUEST_VAR);

    if (var == null || var.getValue() == null)
        throw new XPathException(this, "No request object found in the current XQuery context.");

    if (var.getValue().getItemType() != Type.JAVA_OBJECT)
        throw new XPathException(this, "Variable $request is not bound to an Java object.");

    JavaObjectValue value = (JavaObjectValue) var.getValue().itemAt(0);

    if (value.getObject() instanceof RequestWrapper) {
        RequestWrapper request = (RequestWrapper) value.getObject();

        //if the content length is unknown, return
        if (request.getContentLength() == -1) {
            return Sequence.EMPTY_SEQUENCE;
        }/*from w  ww  .j a v  a2s  . c o m*/

        //first, get the content of the request
        byte[] bufRequestData = null;
        try {
            InputStream is = request.getInputStream();
            ByteArrayOutputStream bos = new ByteArrayOutputStream(request.getContentLength());
            byte[] buf = new byte[256];
            int l = 0;
            while ((l = is.read(buf)) > -1) {
                bos.write(buf, 0, l);
            }
            bufRequestData = bos.toByteArray();
        } catch (IOException ioe) {
            throw new XPathException(this, "An IO exception ocurred: " + ioe.getMessage(), ioe);
        }

        //was there any POST content
        if (bufRequestData != null) {
            //determine if exists mime database considers this binary data
            String contentType = request.getContentType();
            if (contentType != null) {
                //strip off any charset encoding info
                if (contentType.indexOf(";") > -1)
                    contentType = contentType.substring(0, contentType.indexOf(";"));

                MimeType mimeType = MimeTable.getInstance().getContentType(contentType);
                //<atombeat>
                // this code will only encode the request data if the mimeType
                // is present in the mime table, and the mimeType is stated
                // as binary...

                //               if(mimeType != null)
                //               {
                //                  if(!mimeType.isXMLType())
                //                  {
                //                     //binary data
                //                     return new Base64Binary(bufRequestData);
                //                  }
                //               }

                // this code takes a more conservative position and assumes that
                // if the mime type is not present in the table, the request
                // data should be treated as binary, and should be encoded as 
                // base 64...

                if (mimeType == null || !mimeType.isXMLType()) {
                    return new Base64Binary(bufRequestData);
                }
                //</atombeat>               
            }

            //try and parse as an XML documemnt, otherwise fallback to returning the data as a string
            context.pushDocumentContext();
            try {
                //try and construct xml document from input stream, we use eXist's in-memory DOM implementation
                SAXParserFactory factory = SAXParserFactory.newInstance();
                factory.setNamespaceAware(true);
                //TODO : we should be able to cope with context.getBaseURI()            
                InputSource src = new InputSource(new ByteArrayInputStream(bufRequestData));
                SAXParser parser = factory.newSAXParser();
                XMLReader reader = parser.getXMLReader();
                MemTreeBuilder builder = context.getDocumentBuilder();
                DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true);
                reader.setContentHandler(receiver);
                reader.parse(src);
                Document doc = receiver.getDocument();
                return (NodeValue) doc.getDocumentElement();
            } catch (ParserConfigurationException e) {
                //do nothing, we will default to trying to return a string below
            } catch (SAXException e) {
                //do nothing, we will default to trying to return a string below
            } catch (IOException e) {
                //do nothing, we will default to trying to return a string below
            } finally {
                context.popDocumentContext();
            }

            //not a valid XML document, return a string representation of the document
            String encoding = request.getCharacterEncoding();
            if (encoding == null) {
                encoding = "UTF-8";
            }
            try {
                String s = new String(bufRequestData, encoding);
                return new StringValue(s);
            } catch (IOException e) {
                throw new XPathException(this, "An IO exception ocurred: " + e.getMessage(), e);
            }
        } else {
            //no post data
            return Sequence.EMPTY_SEQUENCE;
        }
    } else {
        throw new XPathException(this, "Variable $request is not bound to a Request object.");
    }
}

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed entry ids matching the query.
 *//*from  w w  w. j a  v  a 2 s .c om*/
private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception {
    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    PubMedSearchHandler searchHandler = new PubMedSearchHandler();
    reader.setContentHandler(searchHandler);

    final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term="
            + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax="
            + Integer.toString(requestedResults);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document IDs
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return searchHandler.getPubMedPrimaryIds();
}

From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java

/**
 * Gets PubMed abstracts corresponding to the provided ids.
 *//* w  ww.java 2s .c  o  m*/
private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception {
    if (ids.isEmpty()) {
        return new SearchEngineResponse();
    }

    final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
    reader.setFeature("http://xml.org/sax/features/validation", false);
    reader.setFeature("http://xml.org/sax/features/namespaces", true);

    final PubMedFetchHandler fetchHandler = new PubMedFetchHandler();
    reader.setContentHandler(fetchHandler);

    final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids);

    final HttpUtils.Response response = HttpUtils.doGET(url, null, null);

    // Get document contents
    // No URL logging here, as the url can get really long
    if (response.status == HttpStatus.SC_OK) {
        reader.parse(new InputSource(response.getPayloadAsStream()));
    } else {
        throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: "
                + new String(response.payload, "iso8859-1"));
    }

    return fetchHandler.getResponse();
}

From source file:org.carrot2.source.yahoo.YahooSearchService.java

/**
 * Parse the response stream, assuming it is XML.
 *//*from  w w w . j a v a 2 s  . co m*/
private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException {
    try {
        final XMLResponseParser parser = new XMLResponseParser();
        final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/namespaces", true);
        reader.setContentHandler(parser);

        reader.parse(new InputSource(is));

        return parser.response;
    } catch (final SAXException e) {
        final Throwable cause = e.getException();
        if (cause != null && cause instanceof IOException) {
            throw (IOException) cause;
        }
        throw new IOException("XML parsing exception: " + e.getMessage());
    } catch (final ParserConfigurationException e) {
        throw new IOException("Could not acquire XML parser.");
    }
}

From source file:org.cauldron.tests.util.DigesterContext.java

protected Object unmarshal(XMLReader reader, InputSource input) throws JAXBException {
    reader.setContentHandler(digester);

    try {//from  w ww  .  jav  a 2  s  .  co m
        reader.parse(input);
        return digester.getRoot();
    } catch (Exception e) {
        throw new JAXBException(e);
    }
}

From source file:org.corpus_tools.pepper.core.PepperJobImpl.java

/**
 * {@inheritDoc PepperJob#load(URI)}/*  w  w w .  ja va  2s.c o  m*/
 */
@Override
public void load(URI uri) {
    if (uri.isFile()) {
        File wdFile = new File(uri.toFileString());
        // set folder containing workflow description as base dir
        setBaseDir(uri.trimSegments(1));

        SAXParser parser;
        XMLReader xmlReader;
        SAXParserFactory factory = SAXParserFactory.newInstance();

        WorkflowDescriptionReader contentHandler = new WorkflowDescriptionReader();
        contentHandler.setPepperJob(this);
        contentHandler.setLocation(uri);

        // remove all existing steps
        clear();

        try {
            parser = factory.newSAXParser();
            xmlReader = parser.getXMLReader();
            xmlReader.setContentHandler(contentHandler);
        } catch (ParserConfigurationException e) {
            throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                    + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e);
        } catch (Exception e) {
            throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                    + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e);
        }
        try {
            InputStream inputStream = new FileInputStream(wdFile);
            Reader reader = new InputStreamReader(inputStream, "UTF-8");
            InputSource is = new InputSource(reader);
            is.setEncoding("UTF-8");
            xmlReader.parse(is);
        } catch (SAXException e) {
            try {
                parser = factory.newSAXParser();
                xmlReader = parser.getXMLReader();
                xmlReader.setContentHandler(contentHandler);
                xmlReader.parse(wdFile.getAbsolutePath());
            } catch (Exception e1) {
                throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '"
                        + wdFile.getAbsolutePath() + "': " + e1.getMessage() + ". ", e1);
            }
        } catch (Exception e) {
            if (e instanceof PepperModuleException) {
                throw (PepperModuleException) e;
            } else {
                throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file'"
                        + wdFile + "', because of a nested exception: " + e.getMessage() + ". ", e);
            }
        }
    } else {
        throw new UnsupportedOperationException(
                "Currently Pepper can only load workflow description from local files.");
    }
}

From source file:org.corpus_tools.salt.util.SaltUtil.java

/**
 * Loads a list of root objects coming from a SaltXML (.{@link #FILE_ENDING_SALT_XML})
 * and returns it.//from   www .j  av a  2  s . c  om
 * 
 * @param objectURI
 *            {@link URI} to SaltXML file containing the object
 * @return loaded objects
 */
public static List<Object> loadObjects(URI location) {
    if (location == null) {
        throw new SaltResourceException("Cannot load Salt object, because the given uri is null.");
    }
    File objectFile = new File(
            (location.toFileString() == null) ? location.toString() : location.toFileString());
    if (!objectFile.exists()) {
        throw new SaltResourceException("Cannot load Salt object, because the file '"
                + objectFile.getAbsolutePath() + "' does not exist.");
    }

    SAXParser parser;
    XMLReader xmlReader;
    SAXParserFactory factory = SAXParserFactory.newInstance();
    SaltXML10Handler contentHandler = new SaltXML10Handler();

    try {
        parser = factory.newSAXParser();
        xmlReader = parser.getXMLReader();
        xmlReader.setContentHandler(contentHandler);
    } catch (ParserConfigurationException e) {
        throw new SaltResourceException(
                "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e);
    } catch (Exception e) {
        throw new SaltResourceException(
                "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e);
    }
    try {
        InputStream inputStream = new FileInputStream(objectFile);
        Reader reader = new InputStreamReader(inputStream, "UTF-8");
        InputSource is = new InputSource(reader);
        is.setEncoding("UTF-8");
        xmlReader.parse(is);
    } catch (SAXException e) {
        try {
            parser = factory.newSAXParser();
            xmlReader = parser.getXMLReader();
            xmlReader.setContentHandler(contentHandler);
            xmlReader.parse(objectFile.getAbsolutePath());
        } catch (Exception e1) {
            throw new SaltResourceException(
                    "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e1);
        }
    } catch (Exception e) {
        if (e instanceof SaltException) {
            throw (SaltException) e;
        } else {
            throw new SaltResourceException(
                    "Cannot load Salt object from file'" + objectFile + "', because of a nested exception. ",
                    e);
        }
    }
    return contentHandler.getRootObjects();
}

From source file:org.devtcg.five.meta.LastfmMetaTask.java

public void run() throws Exception {
    System.out.println("Accessing " + getMethodUrl());
    HttpGet request = new HttpGet(getMethodUrl());

    HttpResponse response = mClient.execute(request);

    if (mTask.isCancelled())
        return;/*from ww w.  j  a v  a2s.c  o  m*/

    if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
        if (LOG.isWarnEnabled())
            LOG.warn(getMethodUrl() + " replied " + response.getStatusLine());
        response.getEntity().consumeContent();
        return;
    }

    HttpEntity entity = response.getEntity();
    InputStream in = entity.getContent();

    try {
        XMLReader reader = XMLReaderFactory.createXMLReader();
        reader.setContentHandler(getContentHandler());
        reader.parse(new InputSource(in));
    } finally {
        IOUtils.closeQuietlyNullSafe(in);
    }

    if (mTask.isCancelled())
        return;

    onPostParse();
}

From source file:org.dhatim.delivery.AbstractParser.java

protected void configureReader(XMLReader reader, DefaultHandler2 handler, ExecutionContext execContext,
        Source source) throws SAXException {
    if (reader instanceof SmooksXMLReader) {
        ((SmooksXMLReader) reader).setExecutionContext(execContext);
    }/*  w w w .  j  ava 2  s. c  o m*/

    if (reader instanceof JavaXMLReader) {
        if (!(source instanceof JavaSource)) {
            throw new SAXException("A " + JavaSource.class.getName() + " source must be supplied for "
                    + JavaXMLReader.class.getName() + " implementations.");
        }
        ((JavaXMLReader) reader).setSourceObjects(((JavaSource) source).getSourceObjects());
    }

    reader.setContentHandler(handler);

    try {
        reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
    } catch (SAXNotRecognizedException e) {
        logger.debug(
                "XMLReader property 'http://xml.org/sax/properties/lexical-handler' not recognized by XMLReader '"
                        + reader.getClass().getName() + "'.");
    }
}