List of usage examples for org.xml.sax XMLReader parse
public void parse(String systemId) throws IOException, SAXException;
From source file:org.betaconceptframework.astroboa.test.util.JAXBValidationUtils.java
public void validateUsingSAX(InputStream is) throws Exception { SAXParser saxParser = parserFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setEntityResolver(entityResolver); xmlReader.setErrorHandler(errorHandler); errorHandler.setIgnoreInvalidElementSequence(false); is = encodeURLsFoundInXML(is);//from w ww .ja v a 2s. c o m xmlReader.parse(new InputSource(is)); }
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed entry ids matching the query. *//*from w w w. j av a 2 s.c o m*/ private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception { final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); PubMedSearchHandler searchHandler = new PubMedSearchHandler(); reader.setContentHandler(searchHandler); final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term=" + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax=" + Integer.toString(requestedResults); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document IDs if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return searchHandler.getPubMedPrimaryIds(); }
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed abstracts corresponding to the provided ids. *///from www . ja v a 2 s . co m private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception { if (ids.isEmpty()) { return new SearchEngineResponse(); } final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); final PubMedFetchHandler fetchHandler = new PubMedFetchHandler(); reader.setContentHandler(fetchHandler); final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document contents // No URL logging here, as the url can get really long if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return fetchHandler.getResponse(); }
From source file:org.carrot2.source.yahoo.YahooSearchService.java
/** * Parse the response stream, assuming it is XML. *///from w ww .j a v a2s. co m private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException { try { final XMLResponseParser parser = new XMLResponseParser(); final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setContentHandler(parser); reader.parse(new InputSource(is)); return parser.response; } catch (final SAXException e) { final Throwable cause = e.getException(); if (cause != null && cause instanceof IOException) { throw (IOException) cause; } throw new IOException("XML parsing exception: " + e.getMessage()); } catch (final ParserConfigurationException e) { throw new IOException("Could not acquire XML parser."); } }
From source file:org.cauldron.tests.util.DigesterContext.java
protected Object unmarshal(XMLReader reader, InputSource input) throws JAXBException { reader.setContentHandler(digester);/*from w w w . j a v a2 s . com*/ try { reader.parse(input); return digester.getRoot(); } catch (Exception e) { throw new JAXBException(e); } }
From source file:org.corpus_tools.pepper.core.PepperJobImpl.java
/** * {@inheritDoc PepperJob#load(URI)}//from w w w.ja v a2 s .c o m */ @Override public void load(URI uri) { if (uri.isFile()) { File wdFile = new File(uri.toFileString()); // set folder containing workflow description as base dir setBaseDir(uri.trimSegments(1)); SAXParser parser; XMLReader xmlReader; SAXParserFactory factory = SAXParserFactory.newInstance(); WorkflowDescriptionReader contentHandler = new WorkflowDescriptionReader(); contentHandler.setPepperJob(this); contentHandler.setLocation(uri); // remove all existing steps clear(); try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); } catch (ParserConfigurationException e) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e); } catch (Exception e) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e.getMessage() + ". ", e); } try { InputStream inputStream = new FileInputStream(wdFile); Reader reader = new InputStreamReader(inputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); xmlReader.parse(is); } catch (SAXException e) { try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); xmlReader.parse(wdFile.getAbsolutePath()); } catch (Exception e1) { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file '" + wdFile.getAbsolutePath() + "': " + e1.getMessage() + ". ", e1); } } catch (Exception e) { if (e instanceof PepperModuleException) { throw (PepperModuleException) e; } else { throw new PepperModuleXMLResourceException("Cannot load Pepper workflow description file'" + wdFile + "', because of a nested exception: " + e.getMessage() + ". ", e); } } } else { throw new UnsupportedOperationException( "Currently Pepper can only load workflow description from local files."); } }
From source file:org.corpus_tools.salt.util.SaltUtil.java
/** * Loads a list of root objects coming from a SaltXML (.{@link #FILE_ENDING_SALT_XML}) * and returns it.//from w w w .ja va 2 s . co m * * @param objectURI * {@link URI} to SaltXML file containing the object * @return loaded objects */ public static List<Object> loadObjects(URI location) { if (location == null) { throw new SaltResourceException("Cannot load Salt object, because the given uri is null."); } File objectFile = new File( (location.toFileString() == null) ? location.toString() : location.toFileString()); if (!objectFile.exists()) { throw new SaltResourceException("Cannot load Salt object, because the file '" + objectFile.getAbsolutePath() + "' does not exist."); } SAXParser parser; XMLReader xmlReader; SAXParserFactory factory = SAXParserFactory.newInstance(); SaltXML10Handler contentHandler = new SaltXML10Handler(); try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); } catch (ParserConfigurationException e) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e); } catch (Exception e) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e); } try { InputStream inputStream = new FileInputStream(objectFile); Reader reader = new InputStreamReader(inputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); xmlReader.parse(is); } catch (SAXException e) { try { parser = factory.newSAXParser(); xmlReader = parser.getXMLReader(); xmlReader.setContentHandler(contentHandler); xmlReader.parse(objectFile.getAbsolutePath()); } catch (Exception e1) { throw new SaltResourceException( "Cannot load Salt object from file '" + objectFile.getAbsolutePath() + "'.", e1); } } catch (Exception e) { if (e instanceof SaltException) { throw (SaltException) e; } else { throw new SaltResourceException( "Cannot load Salt object from file'" + objectFile + "', because of a nested exception. ", e); } } return contentHandler.getRootObjects(); }
From source file:org.deegree.tools.metadata.ISO19139Validator.java
/** * @param srcOpt/*from w w w . j av a2 s . c om*/ * @param schemaOpt * @param resultOpt * @throws IOException * @throws SAXException */ public void run(String srcOpt, String schemaOpt, String resultOpt) throws IOException, SAXException { File src = new File(srcOpt); if (!src.exists()) { throw new IllegalArgumentException("src does not exist: " + srcOpt + ". Check parameter " + OPT_SRC); } File result; if (resultOpt != null && resultOpt.length() > 0) { result = new File(resultOpt); if (!result.exists()) { result.createNewFile(); } } else { result = File.createTempFile(DEFAULT_FILENAME, ".txt"); } SCHEMAVERSION schemaVersion = SCHEMAVERSION.V2007; if (schemaOpt != null) { try { schemaVersion = SCHEMAVERSION.valueOf(schemaOpt); } catch (Exception e) { throw new IllegalArgumentException("Invalid argument for " + OPT_SCHEMA_VERSION + ": " + schemaOpt); } } String schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20070417/gmd/metadataEntity.xsd"; if (SCHEMAVERSION.V2006.equals(schemaVersion)) { schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20060504/gmd/metadataEntity.xsd"; } URL u = ISO19139Validator.class.getResource(schema); XMLReader parser = XMLReaderFactory.createXMLReader(); parser.setFeature("http://xml.org/sax/features/validation", true); parser.setFeature("http://apache.org/xml/features/validation/schema", true); parser.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", "http://www.isotc211.org/2005/gmd " + u.toExternalForm()); final FileWriter fw = new FileWriter(result); File[] filesToValidate; if (src.isDirectory()) { filesToValidate = src.listFiles(); fw.write("validate " + filesToValidate.length + " files from directory " + src); fw.write("\n"); } else { filesToValidate = new File[] { src }; } System.out.println("Start validation"); int noOfValidRecords = 0; for (int i = 0; i < filesToValidate.length; i++) { FileErrorHandler feh = new FileErrorHandler(fw); parser.setErrorHandler(feh); if (filesToValidate.length > 1) { fw.write("validate record " + i + " of " + filesToValidate.length); fw.write("\n"); } File fileToValidate = filesToValidate[i]; System.out.println(fileToValidate); fw.write("validate file " + fileToValidate.getAbsolutePath()); fw.write("\n"); try { parser.parse(new InputSource(new FileInputStream(fileToValidate))); } catch (Exception e) { String msg = "Could not validate current occured: " + e.getMessage() + ". Continue with next record"; System.err.println(msg); fw.write(msg); fw.write("\n"); continue; } fw.flush(); if (feh.isValid()) noOfValidRecords++; } fw.write(noOfValidRecords + " of " + filesToValidate.length + " records are valid."); fw.close(); System.out.println("Validation finished, result file: " + result.getAbsolutePath()); }
From source file:org.devtcg.five.meta.LastfmMetaTask.java
public void run() throws Exception { System.out.println("Accessing " + getMethodUrl()); HttpGet request = new HttpGet(getMethodUrl()); HttpResponse response = mClient.execute(request); if (mTask.isCancelled()) return;//w w w. ja v a 2 s . co m if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { if (LOG.isWarnEnabled()) LOG.warn(getMethodUrl() + " replied " + response.getStatusLine()); response.getEntity().consumeContent(); return; } HttpEntity entity = response.getEntity(); InputStream in = entity.getContent(); try { XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(getContentHandler()); reader.parse(new InputSource(in)); } finally { IOUtils.closeQuietlyNullSafe(in); } if (mTask.isCancelled()) return; onPostParse(); }
From source file:org.dhatim.delivery.dom.DOMParser.java
/** * Perform the actual parse into the supplied content handler. * @param source Source content stream to be parsed. * @param contentHandler Content handler instance that will build/append-to the DOM. * @throws SAXException Unable to parse the content. * @throws IOException Unable to read the input stream. *///from w ww. j a v a 2 s . c o m private void parse(Source source, DOMBuilder contentHandler) throws SAXException, IOException { ExecutionContext executionContext = getExecContext(); if (executionContext != null) { ContentDeliveryConfig deliveryConfig = executionContext.getDeliveryConfig(); XMLReader domReader = getXMLReader(executionContext); try { if (domReader == null) { domReader = deliveryConfig.getXMLReader(); } if (domReader == null) { domReader = createXMLReader(); } if (domReader instanceof HierarchyChangeReader) { ((HierarchyChangeReader) domReader) .setHierarchyChangeListener(new XMLReaderHierarchyChangeListener(executionContext)); } NamespaceDeclarationStack namespaceDeclarationStack = new NamespaceDeclarationStack(); NamespaceMappings.setNamespaceDeclarationStack(namespaceDeclarationStack, executionContext); attachNamespaceDeclarationStack(domReader, executionContext); attachXMLReader(domReader, executionContext); configureReader(domReader, contentHandler, executionContext, source); domReader.parse(createInputSource(source, executionContext.getContentEncoding())); } finally { try { if (domReader instanceof HierarchyChangeReader) { ((HierarchyChangeReader) domReader).setHierarchyChangeListener(null); } } finally { try { try { detachXMLReader(executionContext); } finally { if (domReader != null) { deliveryConfig.returnXMLReader(domReader); } } } finally { contentHandler.detachHandler(); } } } } else { XMLReader domReader = createXMLReader(); configureReader(domReader, contentHandler, null, source); domReader.parse(createInputSource(source, Charset.defaultCharset().name())); } }