List of usage examples for org.xml.sax XMLReader setContentHandler
public void setContentHandler(ContentHandler handler);
From source file:org.archive.crawler.settings.XMLSettingsHandler.java
/** Read the CrawlerSettings object from a specific file. * * @param settings the settings object to be updated with data from the * persistent storage.//from w w w.j a v a 2 s.c om * @param f the file to read from. * @return the updated settings object or null if there was no data for this * in the persistent storage. */ protected final CrawlerSettings readSettingsObject(CrawlerSettings settings, File f) { CrawlerSettings result = null; try { InputStream is = null; if (!f.exists()) { // Perhaps the file we're looking for is on the CLASSPATH. // DON'T look on the CLASSPATH for 'settings.xml' files. The // look for 'settings.xml' files happens frequently. Not looking // on classpath for 'settings.xml' is an optimization based on // ASSUMPTION that there will never be a 'settings.xml' saved // on classpath. if (!f.getName().startsWith(settingsFilename)) { is = XMLSettingsHandler.class.getResourceAsStream(toResourcePath(f)); } } else { is = new FileInputStream(f); } if (is != null) { XMLReader parser = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); InputStream file = new BufferedInputStream(is); parser.setContentHandler(new CrawlSettingsSAXHandler(settings)); InputSource source = new InputSource(file); source.setSystemId(f.toURL().toExternalForm()); parser.parse(source); result = settings; } } catch (SAXParseException e) { logger.warning(e.getMessage() + " in '" + e.getSystemId() + "', line: " + e.getLineNumber() + ", column: " + e.getColumnNumber()); } catch (SAXException e) { logger.warning(e.getMessage() + ": " + e.getException().getMessage()); } catch (ParserConfigurationException e) { logger.warning(e.getMessage() + ": " + e.getCause().getMessage()); } catch (FactoryConfigurationError e) { logger.warning(e.getMessage() + ": " + e.getException().getMessage()); } catch (IOException e) { logger.warning("Could not access file '" + f.getAbsolutePath() + "': " + e.getMessage()); } return result; }
From source file:org.atombeat.xquery.functions.util.RequestGetData.java
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { RequestModule myModule = (RequestModule) context.getModule(RequestModule.NAMESPACE_URI); // request object is read from global variable $request Variable var = myModule.resolveVariable(RequestModule.REQUEST_VAR); if (var == null || var.getValue() == null) throw new XPathException(this, "No request object found in the current XQuery context."); if (var.getValue().getItemType() != Type.JAVA_OBJECT) throw new XPathException(this, "Variable $request is not bound to an Java object."); JavaObjectValue value = (JavaObjectValue) var.getValue().itemAt(0); if (value.getObject() instanceof RequestWrapper) { RequestWrapper request = (RequestWrapper) value.getObject(); //if the content length is unknown, return if (request.getContentLength() == -1) { return Sequence.EMPTY_SEQUENCE; }/*from w ww .j a v a2s . c o m*/ //first, get the content of the request byte[] bufRequestData = null; try { InputStream is = request.getInputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream(request.getContentLength()); byte[] buf = new byte[256]; int l = 0; while ((l = is.read(buf)) > -1) { bos.write(buf, 0, l); } bufRequestData = bos.toByteArray(); } catch (IOException ioe) { throw new XPathException(this, "An IO exception ocurred: " + ioe.getMessage(), ioe); } //was there any POST content if (bufRequestData != null) { //determine if exists mime database considers this binary data String contentType = request.getContentType(); if (contentType != null) { //strip off any charset encoding info if (contentType.indexOf(";") > -1) contentType = contentType.substring(0, contentType.indexOf(";")); MimeType mimeType = MimeTable.getInstance().getContentType(contentType); //<atombeat> // this code will only encode the request data if the mimeType // is present in the mime table, and the mimeType is stated // as binary... // if(mimeType != null) // { // if(!mimeType.isXMLType()) // { // //binary data // return new Base64Binary(bufRequestData); // } // } // this code takes a more conservative position and assumes that // if the mime type is not present in the table, the request // data should be treated as binary, and should be encoded as // base 64... if (mimeType == null || !mimeType.isXMLType()) { return new Base64Binary(bufRequestData); } //</atombeat> } //try and parse as an XML documemnt, otherwise fallback to returning the data as a string context.pushDocumentContext(); try { //try and construct xml document from input stream, we use eXist's in-memory DOM implementation SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); //TODO : we should be able to cope with context.getBaseURI() InputSource src = new InputSource(new ByteArrayInputStream(bufRequestData)); SAXParser parser = factory.newSAXParser(); XMLReader reader = parser.getXMLReader(); MemTreeBuilder builder = context.getDocumentBuilder(); DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true); reader.setContentHandler(receiver); reader.parse(src); Document doc = receiver.getDocument(); return (NodeValue) doc.getDocumentElement(); } catch (ParserConfigurationException e) { //do nothing, we will default to trying to return a string below } catch (SAXException e) { //do nothing, we will default to trying to return a string below } catch (IOException e) { //do nothing, we will default to trying to return a string below } finally { context.popDocumentContext(); } //not a valid XML document, return a string representation of the document String encoding = request.getCharacterEncoding(); if (encoding == null) { encoding = "UTF-8"; } try { String s = new String(bufRequestData, encoding); return new StringValue(s); } catch (IOException e) { throw new XPathException(this, "An IO exception ocurred: " + e.getMessage(), e); } } else { //no post data return Sequence.EMPTY_SEQUENCE; } } else { throw new XPathException(this, "Variable $request is not bound to a Request object."); } }
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed entry ids matching the query. *//*from w w w. j a v a 2 s .c om*/ private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception { final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); PubMedSearchHandler searchHandler = new PubMedSearchHandler(); reader.setContentHandler(searchHandler); final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term=" + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax=" + Integer.toString(requestedResults); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document IDs if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return searchHandler.getPubMedPrimaryIds(); }
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed abstracts corresponding to the provided ids. *//* w ww.java 2s .c o m*/ private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception { if (ids.isEmpty()) { return new SearchEngineResponse(); } final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); final PubMedFetchHandler fetchHandler = new PubMedFetchHandler(); reader.setContentHandler(fetchHandler); final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document contents // No URL logging here, as the url can get really long if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return fetchHandler.getResponse(); }
From source file:org.carrot2.source.yahoo.YahooSearchService.java
/** * Parse the response stream, assuming it is XML. *//*from w w w . j a v a 2 s . co m*/ private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException { try { final XMLResponseParser parser = new XMLResponseParser(); final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setContentHandler(parser); reader.parse(new InputSource(is)); return parser.response; } catch (final SAXException e) { final Throwable cause = e.getException(); if (cause != null && cause instanceof IOException) { throw (IOException) cause; } throw new IOException("XML parsing exception: " + e.getMessage()); } catch (final ParserConfigurationException e) { throw new IOException("Could not acquire XML parser."); } }
From source file:org.cauldron.tests.util.DigesterContext.java
protected Object unmarshal(XMLReader reader, InputSource input) throws JAXBException { reader.setContentHandler(digester); try {//from w ww . jav a 2 s . co m reader.parse(input); return digester.getRoot(); } catch (Exception e) { throw new JAXBException(e); } }
From source file:org.corpus_tools.pepper.core.PepperJobImpl.java
/** * {@inheritDoc PepperJob#load(URI)}/* w w w . ja va 2s.c o m*/ */ @Override public void load(URI uri) { if (uri.isFile()) { File wdFile = new File(uri.toFileString()); // set folder containing workflow description as base dir setBaseDir(uri.trimSegments(1)); SAXParser parser; XMLReader xmlReader; SAXParserFactory factory = SAXParserFactory.newInstance(); WorkflowDescriptionReader contentHandler = new WorkflowDescriptionReader(); contentHandler.setPepperJob(this); contentHandler.setLocation(uri); // remove all existing steps clear(); try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); } catch (ParserConfigurationException e) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e); } catch (Exception e) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e); } try { InputStream inputStream = new FileInputStream(wdFile); Reader reader = new InputStreamReader(inputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); xmlReader.parse(is); } catch (SAXException e) { try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); xmlReader.parse(wdFile.getAbsolutePath()); } catch (Exception e1) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e1.getMessage() + ". ", e1); } } catch (Exception e) { if (e instanceof PepperModuleException) { throw (PepperModuleException) e; } else { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file'" + wdFile + "', because of a nested exception: " + e.getMessage() + ". ", e); } } } else { throw new UnsupportedOperationException( "Currently Pepper can only load workflow description from local files."); } }
From source file:org.corpus_tools.salt.util.SaltUtil.java
/** * Loads a list of root objects coming from a SaltXML (.{@link #FILE_ENDING_SALT_XML}) * and returns it.//from www .j av a 2 s . c om * * @param objectURI * {@link URI} to SaltXML file containing the object * @return loaded objects */ public static List<Object> loadObjects(URI location) { if (location == null) { throw new SaltResourceException("Cannot load Salt object, because the given uri is null."); } File objectFile = new File( (location.toFileString() == null) ? location.toString() : location.toFileString()); if (!objectFile.exists()) { throw new SaltResourceException("Cannot load Salt object, because the file '" + objectFile.getAbsolutePath() + "' does not exist."); } SAXParser parser; XMLReader xmlReader; SAXParserFactory factory = SAXParserFactory.newInstance(); SaltXML10Handler contentHandler = new SaltXML10Handler(); try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); } catch (ParserConfigurationException e) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e); } catch (Exception e) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e); } try { InputStream inputStream = new FileInputStream(objectFile); Reader reader = new InputStreamReader(inputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); xmlReader.parse(is); } catch (SAXException e) { try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); xmlReader.parse(objectFile.getAbsolutePath()); } catch (Exception e1) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e1); } } catch (Exception e) { if (e instanceof SaltException) { throw (SaltException) e; } else { throw new SaltResourceException( "Cannot load Salt object from file'" + objectFile + "', because of a nested exception. ", e); } } return contentHandler.getRootObjects(); }
From source file:org.devtcg.five.meta.LastfmMetaTask.java
public void run() throws Exception { System.out.println("Accessing " + getMethodUrl()); HttpGet request = new HttpGet(getMethodUrl()); HttpResponse response = mClient.execute(request); if (mTask.isCancelled()) return;/*from ww w. j a v a2s.c o m*/ if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { if (LOG.isWarnEnabled()) LOG.warn(getMethodUrl() + " replied " + response.getStatusLine()); response.getEntity().consumeContent(); return; } HttpEntity entity = response.getEntity(); InputStream in = entity.getContent(); try { XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(getContentHandler()); reader.parse(new InputSource(in)); } finally { IOUtils.closeQuietlyNullSafe(in); } if (mTask.isCancelled()) return; onPostParse(); }
From source file:org.dhatim.delivery.AbstractParser.java
protected void configureReader(XMLReader reader, DefaultHandler2 handler, ExecutionContext execContext, Source source) throws SAXException { if (reader instanceof SmooksXMLReader) { ((SmooksXMLReader) reader).setExecutionContext(execContext); }/* w w w . j ava 2 s. c o m*/ if (reader instanceof JavaXMLReader) { if (!(source instanceof JavaSource)) { throw new SAXException("A " + JavaSource.class.getName() + " source must be supplied for " + JavaXMLReader.class.getName() + " implementations."); } ((JavaXMLReader) reader).setSourceObjects(((JavaSource) source).getSourceObjects()); } reader.setContentHandler(handler); try { reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); } catch (SAXNotRecognizedException e) { logger.debug( "XMLReader property 'http://xml.org/sax/properties/lexical-handler' not recognized by XMLReader '" + reader.getClass().getName() + "'."); } }