List of usage examples for org.xml.sax XMLReader setFeature
public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException;
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed entry ids matching the query. *//* w ww . ja v a 2 s . c o m*/ private List<String> getPubMedIds(final String query, final int requestedResults) throws Exception { final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); PubMedSearchHandler searchHandler = new PubMedSearchHandler(); reader.setContentHandler(searchHandler); final String url = E_SEARCH_URL + "?db=pubmed&usehistory=n&term=" + StringUtils.urlEncodeWrapException(query, "UTF-8") + "&retmax=" + Integer.toString(requestedResults); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document IDs if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return searchHandler.getPubMedPrimaryIds(); }
From source file:org.carrot2.source.pubmed.PubMedDocumentSource.java
/** * Gets PubMed abstracts corresponding to the provided ids. *///from w w w .java 2s. c o m private SearchEngineResponse getPubMedAbstracts(List<String> ids) throws Exception { if (ids.isEmpty()) { return new SearchEngineResponse(); } final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); final PubMedFetchHandler fetchHandler = new PubMedFetchHandler(); reader.setContentHandler(fetchHandler); final String url = E_FETCH_URL + "?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(ids); final HttpUtils.Response response = HttpUtils.doGET(url, null, null); // Get document contents // No URL logging here, as the url can get really long if (response.status == HttpStatus.SC_OK) { reader.parse(new InputSource(response.getPayloadAsStream())); } else { throw new IOException("PubMed returned HTTP Error: " + response.status + ", HTTP payload: " + new String(response.payload, "iso8859-1")); } return fetchHandler.getResponse(); }
From source file:org.carrot2.source.yahoo.YahooSearchService.java
/** * Parse the response stream, assuming it is XML. *///from w w w. ja va2s . c om private static SearchEngineResponse parseResponseXML(final InputStream is) throws IOException { try { final XMLResponseParser parser = new XMLResponseParser(); final XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setContentHandler(parser); reader.parse(new InputSource(is)); return parser.response; } catch (final SAXException e) { final Throwable cause = e.getException(); if (cause != null && cause instanceof IOException) { throw (IOException) cause; } throw new IOException("XML parsing exception: " + e.getMessage()); } catch (final ParserConfigurationException e) { throw new IOException("Could not acquire XML parser."); } }
From source file:org.chiba.xml.xforms.connector.SchemaValidator.java
/** * validate the instance according to the schema specified on the model * * @return false if the instance is not valid *//* ww w.ja v a 2 s .c o m*/ public boolean validateSchema(Model model, Node instance) throws XFormsException { boolean valid = true; String message; if (LOGGER.isDebugEnabled()) LOGGER.debug("SchemaValidator.validateSchema: validating instance"); //needed if we want to load schemas from Model + set it as "schemaLocation" attribute String schemas = model.getElement().getAttributeNS(NamespaceConstants.XFORMS_NS, "schema"); if (schemas != null && !schemas.equals("")) { // valid=false; //add schemas to element //shouldn't it be done on a copy of the doc ? Element el = null; if (instance.getNodeType() == Node.ELEMENT_NODE) el = (Element) instance; else if (instance.getNodeType() == Node.DOCUMENT_NODE) el = ((Document) instance).getDocumentElement(); else { if (LOGGER.isDebugEnabled()) LOGGER.debug("instance node type is: " + instance.getNodeType()); } String prefix = NamespaceResolver.getPrefix(el, XMLSCHEMA_INSTANCE_NS); //test if with targetNamespace or not //if more than one schema : namespaces are mandatory ! (optional only for 1) StringTokenizer tokenizer = new StringTokenizer(schemas, " ", false); String schemaLocations = null; String noNamespaceSchemaLocation = null; while (tokenizer.hasMoreElements()) { String token = (String) tokenizer.nextElement(); //check that it is an URL URI uri = null; try { uri = new java.net.URI(token); } catch (java.net.URISyntaxException ex) { if (LOGGER.isDebugEnabled()) LOGGER.debug(token + " is not an URI"); } if (uri != null) { String ns; try { ns = this.getSchemaNamespace(uri); if (ns != null && !ns.equals("")) { if (schemaLocations == null) schemaLocations = ns + " " + token; else schemaLocations = schemaLocations + " " + ns + " " + token; ///add the namespace declaration if it is not on the instance? //TODO: how to know with which prefix ? String nsPrefix = NamespaceResolver.getPrefix(el, ns); if (nsPrefix == null) { //namespace not declared ! LOGGER.warn("SchemaValidator: targetNamespace " + ns + " of schema " + token + " is not declared in instance: declaring it as default..."); el.setAttributeNS(NamespaceConstants.XMLNS_NS, NamespaceConstants.XMLNS_PREFIX, ns); } } else if (noNamespaceSchemaLocation == null) noNamespaceSchemaLocation = token; else { //we have more than one schema without namespace LOGGER.warn("SchemaValidator: There is more than one schema without namespace !"); } } catch (Exception ex) { LOGGER.warn( "Exception while trying to load schema: " + uri.toString() + ": " + ex.getMessage(), ex); //in case there was an exception: do nothing, do not set the schema } } } //write schemaLocations found if (schemaLocations != null && !schemaLocations.equals("")) el.setAttributeNS(XMLSCHEMA_INSTANCE_NS, prefix + ":schemaLocation", schemaLocations); if (noNamespaceSchemaLocation != null) el.setAttributeNS(XMLSCHEMA_INSTANCE_NS, prefix + ":noNamespaceSchemaLocation", noNamespaceSchemaLocation); //save and parse the doc ValidationErrorHandler handler = null; File f; try { //save document f = File.createTempFile("instance", ".xml"); f.deleteOnExit(); TransformerFactory trFact = TransformerFactory.newInstance(); Transformer trans = trFact.newTransformer(); DOMSource source = new DOMSource(el); StreamResult result = new StreamResult(f); trans.transform(source, result); if (LOGGER.isDebugEnabled()) LOGGER.debug("Validator.validateSchema: file temporarily saved in " + f.getAbsolutePath()); //parse it with error handler to validate it handler = new ValidationErrorHandler(); SAXParserFactory parserFact = SAXParserFactory.newInstance(); parserFact.setValidating(true); parserFact.setNamespaceAware(true); SAXParser parser = parserFact.newSAXParser(); XMLReader reader = parser.getXMLReader(); //validation activated reader.setFeature("http://xml.org/sax/features/validation", true); //schema validation activated reader.setFeature("http://apache.org/xml/features/validation/schema", true); //used only to validate the schema, not the instance //reader.setFeature( "http://apache.org/xml/features/validation/schema-full-checking", true); //validate only if there is a grammar reader.setFeature("http://apache.org/xml/features/validation/dynamic", true); parser.parse(f, handler); } catch (Exception ex) { LOGGER.warn("Validator.validateSchema: Exception in XMLSchema validation: " + ex.getMessage(), ex); //throw new XFormsException("XMLSchema validation failed. "+message); } //if no exception if (handler != null && handler.isValid()) valid = true; else { message = handler.getMessage(); //TODO: find a way to get the error message displayed throw new XFormsException("XMLSchema validation failed. " + message); } if (LOGGER.isDebugEnabled()) LOGGER.debug("Validator.validateSchema: result=" + valid); } return valid; }
From source file:org.commonjava.maven.galley.maven.parse.XMLInfrastructure.java
private Document fallbackParseDocument(String xml, final Object docSource, final Exception e) throws GalleyMavenXMLException { logger.debug(/* w w w. jav a 2 s.c om*/ "Failed to parse: {}. DOM error: {}. Trying STaX parse with IS_REPLACING_ENTITY_REFERENCES == false...", e, docSource, e.getMessage()); try { Source source; if (safeInputFactory != null) { xml = repairXmlDeclaration(xml); final XMLEventReader eventReader = safeInputFactory.createXMLEventReader(new StringReader(xml)); source = new StAXSource(eventReader); } else { // Deal with ø and other undeclared entities... xml = escapeNonXMLEntityRefs(xml); final XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setFeature("http://xml.org/sax/features/validation", false); source = new SAXSource(reader, new InputSource(new StringReader(xml))); } final DOMResult result = new DOMResult(); final Transformer transformer = newTransformer(); transformer.transform(source, result); return (Document) result.getNode(); } catch (final TransformerException e1) { throw new GalleyMavenXMLException("Failed to parse: %s. Transformer error: %s.\nOriginal DOM error: %s", e1, docSource, e1.getMessage(), e.getMessage()); } catch (final SAXException e1) { throw new GalleyMavenXMLException("Failed to parse: %s. SAX error: %s.\nOriginal DOM error: %s", e1, docSource, e1.getMessage(), e.getMessage()); } catch (final XMLStreamException e1) { throw new GalleyMavenXMLException("Failed to parse: %s. STaX error: %s.\nOriginal DOM error: %s", e1, docSource, e1.getMessage(), e.getMessage()); } }
From source file:org.deegree.tools.metadata.ISO19139Validator.java
/** * @param srcOpt//from w w w.j av a 2 s . c o m * @param schemaOpt * @param resultOpt * @throws IOException * @throws SAXException */ public void run(String srcOpt, String schemaOpt, String resultOpt) throws IOException, SAXException { File src = new File(srcOpt); if (!src.exists()) { throw new IllegalArgumentException("src does not exist: " + srcOpt + ". Check parameter " + OPT_SRC); } File result; if (resultOpt != null && resultOpt.length() > 0) { result = new File(resultOpt); if (!result.exists()) { result.createNewFile(); } } else { result = File.createTempFile(DEFAULT_FILENAME, ".txt"); } SCHEMAVERSION schemaVersion = SCHEMAVERSION.V2007; if (schemaOpt != null) { try { schemaVersion = SCHEMAVERSION.valueOf(schemaOpt); } catch (Exception e) { throw new IllegalArgumentException("Invalid argument for " + OPT_SCHEMA_VERSION + ": " + schemaOpt); } } String schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20070417/gmd/metadataEntity.xsd"; if (SCHEMAVERSION.V2006.equals(schemaVersion)) { schema = "/META-INF/SCHEMAS_OPENGIS_NET/iso/19139/20060504/gmd/metadataEntity.xsd"; } URL u = ISO19139Validator.class.getResource(schema); XMLReader parser = XMLReaderFactory.createXMLReader(); parser.setFeature("http://xml.org/sax/features/validation", true); parser.setFeature("http://apache.org/xml/features/validation/schema", true); parser.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", "http://www.isotc211.org/2005/gmd " + u.toExternalForm()); final FileWriter fw = new FileWriter(result); File[] filesToValidate; if (src.isDirectory()) { filesToValidate = src.listFiles(); fw.write("validate " + filesToValidate.length + " files from directory " + src); fw.write("\n"); } else { filesToValidate = new File[] { src }; } System.out.println("Start validation"); int noOfValidRecords = 0; for (int i = 0; i < filesToValidate.length; i++) { FileErrorHandler feh = new FileErrorHandler(fw); parser.setErrorHandler(feh); if (filesToValidate.length > 1) { fw.write("validate record " + i + " of " + filesToValidate.length); fw.write("\n"); } File fileToValidate = filesToValidate[i]; System.out.println(fileToValidate); fw.write("validate file " + fileToValidate.getAbsolutePath()); fw.write("\n"); try { parser.parse(new InputSource(new FileInputStream(fileToValidate))); } catch (Exception e) { String msg = "Could not validate current occured: " + e.getMessage() + ". Continue with next record"; System.err.println(msg); fw.write(msg); fw.write("\n"); continue; } fw.flush(); if (feh.isValid()) noOfValidRecords++; } fw.write(noOfValidRecords + " of " + filesToValidate.length + " records are valid."); fw.close(); System.out.println("Validation finished, result file: " + result.getAbsolutePath()); }
From source file:org.dhatim.delivery.AbstractParser.java
protected XMLReader createXMLReader() throws SAXException { XMLReader reader; ExecutionContext execContext = getExecContext(); Source source = FilterSource.getSource(execContext); if (saxDriverConfig != null && saxDriverConfig.getResource() != null) { String className = saxDriverConfig.getResource(); reader = XMLReaderFactory.createXMLReader(className); } else if (source instanceof JavaSource) { JavaSource javaSource = (JavaSource) source; if (isFeatureOn(JavaSource.FEATURE_GENERATE_EVENT_STREAM, saxDriverConfig) && !javaSource.isEventStreamRequired()) { throw new SAXException("Invalid Smooks configuration. Feature '" + JavaSource.FEATURE_GENERATE_EVENT_STREAM + "' is explicitly configured 'on' in the Smooks configuration, while the supplied JavaSource has explicitly configured event streaming to be off (through a call to JavaSource.setEventStreamRequired)."); }/*from w ww . j av a2 s. c o m*/ // Event streaming must be explicitly turned off. If is on as long as it is (a) not configured "off" in // the smooks config (via the reader features) and (b) not turned off via the supplied JavaSource... boolean eventStreamingOn = (!isFeatureOff(JavaSource.FEATURE_GENERATE_EVENT_STREAM, saxDriverConfig) && javaSource.isEventStreamRequired()); if (eventStreamingOn && javaSource.getSourceObjects() != null) { reader = new XStreamXMLReader(); } else { reader = new NullSourceXMLReader(); } } else { reader = XMLReaderFactory.createXMLReader(); } if (reader instanceof SmooksXMLReader) { if (saxDriverConfig != null) { Configurator.configure(reader, saxDriverConfig, execContext.getContext()); } else { Configurator.initialise(reader); } } reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); setHandlers(reader); setFeatures(reader); return reader; }
From source file:org.dhatim.delivery.AbstractParser.java
private void setFeatures(XMLReader reader) throws SAXNotSupportedException, SAXNotRecognizedException { // Try setting the xerces "notify-char-refs" feature, may fail if it's not Xerces but that's OK... try {/* w w w . j ava2s . c o m*/ reader.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); } catch (Throwable t) { // Ignore } // Report namespace decls as per SAX 2.0.2 spec... try { // http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html#package_description reader.setFeature("http://xml.org/sax/features/xmlns-uris", true); } catch (Throwable t) { // Not a SAX 2.0.2 compliant parser... Ignore } if (saxDriverConfig != null) { List<Parameter> features; features = saxDriverConfig.getParameters(FEATURE_ON); if (features != null) { for (Parameter feature : features) { reader.setFeature(feature.getValue(), true); } } features = saxDriverConfig.getParameters(FEATURE_OFF); if (features != null) { for (Parameter feature : features) { reader.setFeature(feature.getValue(), false); } } } }
From source file:org.dita.dost.module.reader.AbstractReaderModule.java
/** * Read a file and process it for list information. * * @param ref system path of the file to process * @param parseFile file to parse, may be {@code null} * @throws DITAOTException if processing failed *//*from w w w .j a v a 2 s . c om*/ void readFile(final Reference ref, final URI parseFile) throws DITAOTException { currentFile = ref.filename; assert currentFile.isAbsolute(); final URI src = parseFile != null ? parseFile : currentFile; assert src.isAbsolute(); final URI rel = tempFileNameScheme.generateTempFileName(currentFile); outputFile = new File(job.tempDirURI.resolve(rel)); final File outputDir = outputFile.getParentFile(); if (!outputDir.exists() && !outputDir.mkdirs()) { logger.error("Failed to create output directory " + outputDir.getAbsolutePath()); return; } validateMap = Collections.emptyMap(); defaultValueMap = Collections.emptyMap(); logger.info("Processing " + currentFile + " to " + outputFile.toURI()); final String[] params = { currentFile.toString() }; // Verify stub for current file is in Job final FileInfo fi = job.getFileInfo(currentFile); if (fi == null) { final FileInfo stub = new FileInfo.Builder().src(currentFile).uri(rel).result(currentFile) .isInput(currentFile.equals(rootFile)).build(); job.add(stub); } // InputSource in = null; Result out = null; try { final TransformerFactory tf = TransformerFactory.newInstance(); final SAXTransformerFactory stf = (SAXTransformerFactory) tf; final TransformerHandler serializer = stf.newTransformerHandler(); XMLReader parser = getXmlReader(ref.format); XMLReader xmlSource = parser; for (final XMLFilter f : getProcessingPipe(currentFile)) { f.setParent(xmlSource); f.setEntityResolver(CatalogUtils.getCatalogResolver()); xmlSource = f; } try { final LexicalHandler lexicalHandler = new DTDForwardHandler(xmlSource); parser.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler); parser.setFeature("http://xml.org/sax/features/lexical-handler", true); } catch (final SAXNotRecognizedException e) { } // in = new InputSource(src.toString()); out = new StreamResult(new FileOutputStream(outputFile)); serializer.setResult(out); xmlSource.setContentHandler(serializer); xmlSource.parse(src.toString()); if (listFilter.isValidInput()) { processParseResult(currentFile); categorizeCurrentFile(ref); } else if (!currentFile.equals(rootFile)) { logger.warn(MessageUtils.getMessage("DOTJ021W", params).toString()); failureList.add(currentFile); } } catch (final RuntimeException e) { throw e; } catch (final SAXParseException sax) { final Exception inner = sax.getException(); if (inner != null && inner instanceof DITAOTException) { throw (DITAOTException) inner; } if (currentFile.equals(rootFile)) { throw new DITAOTException( MessageUtils.getMessage("DOTJ012F", params).toString() + ": " + sax.getMessage(), sax); } else if (processingMode == Mode.STRICT) { throw new DITAOTException( MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + sax.getMessage(), sax); } else { logger.error(MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + sax.getMessage(), sax); } failureList.add(currentFile); } catch (final FileNotFoundException e) { if (!exists(currentFile)) { if (currentFile.equals(rootFile)) { throw new DITAOTException(MessageUtils.getMessage("DOTA069F", params).toString(), e); } else if (processingMode == Mode.STRICT) { throw new DITAOTException(MessageUtils.getMessage("DOTX008E", params).toString(), e); } else { logger.error(MessageUtils.getMessage("DOTX008E", params).toString()); } } else if (currentFile.equals(rootFile)) { throw new DITAOTException(MessageUtils.getMessage("DOTJ078F", params).toString() + " Cannot load file: " + e.getMessage(), e); } else if (processingMode == Mode.STRICT) { throw new DITAOTException(MessageUtils.getMessage("DOTJ079E", params).toString() + " Cannot load file: " + e.getMessage(), e); } else { logger.error(MessageUtils.getMessage("DOTJ079E", params).toString() + " Cannot load file: " + e.getMessage()); } failureList.add(currentFile); } catch (final Exception e) { if (currentFile.equals(rootFile)) { throw new DITAOTException( MessageUtils.getMessage("DOTJ012F", params).toString() + ": " + e.getMessage(), e); } else if (processingMode == Mode.STRICT) { throw new DITAOTException( MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + e.getMessage(), e); } else { logger.error(MessageUtils.getMessage("DOTJ013E", params).toString() + ": " + e.getMessage(), e); } failureList.add(currentFile); } finally { if (out != null) { try { close(out); } catch (final IOException e) { logger.error(e.getMessage(), e); } } if (failureList.contains(currentFile)) { FileUtils.deleteQuietly(outputFile); } } if (!listFilter.isValidInput() && currentFile.equals(rootFile)) { if (validate) { // stop the build if all content in the input file was filtered out. throw new DITAOTException(MessageUtils.getMessage("DOTJ022F", params).toString()); } else { // stop the build if the content of the file is not valid. throw new DITAOTException(MessageUtils.getMessage("DOTJ034F", params).toString()); } } doneList.add(currentFile); listFilter.reset(); keydefFilter.reset(); }
From source file:org.dita.dost.reader.TestGenListModuleReader.java
private XMLReader initXMLReader(final File ditaDir, final boolean validate, final File rootFile) throws SAXException, IOException { final XMLReader parser = XMLUtils.getXMLReader(); if (validate == true) { parser.setFeature(FEATURE_VALIDATION, true); try {/*from w w w . j a v a 2 s . c o m*/ parser.setFeature(FEATURE_VALIDATION_SCHEMA, true); } catch (final SAXNotRecognizedException e) { // Not Xerces, ignore exception } } else { final String msg = MessageUtils.getInstance().getMessage("DOTJ037W").toString(); } CatalogUtils.setDitaDir(ditaDir); parser.setEntityResolver(CatalogUtils.getCatalogResolver()); return parser; }