List of usage examples for org.apache.commons.io.input CloseShieldInputStream CloseShieldInputStream
public CloseShieldInputStream(InputStream in)
From source file:org.apache.tika.parser.pkg.PackageParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //lazily load the MediaTypeRegistry at parse time //only want to call getDefaultConfig() once, and can't //load statically because of the ForkParser TikaConfig config = context.get(TikaConfig.class); MediaTypeRegistry mediaTypeRegistry = null; if (config != null) { mediaTypeRegistry = config.getMediaTypeRegistry(); } else {// w ww. j av a 2s. c o m if (bufferedMediaTypeRegistry == null) { //buffer this for next time. synchronized (lock) { //now that we're locked, check again if (bufferedMediaTypeRegistry == null) { bufferedMediaTypeRegistry = TikaConfig.getDefaultConfig().getMediaTypeRegistry(); } } } mediaTypeRegistry = bufferedMediaTypeRegistry; } // Ensure that the stream supports the mark feature if (!stream.markSupported()) { stream = new BufferedInputStream(stream); } TemporaryResources tmp = new TemporaryResources(); ArchiveInputStream ais = null; try { ArchiveStreamFactory factory = context.get(ArchiveStreamFactory.class, new ArchiveStreamFactory()); // At the end we want to close the archive stream to release // any associated resources, but the underlying document stream // should not be closed ais = factory.createArchiveInputStream(new CloseShieldInputStream(stream)); } catch (StreamingNotSupportedException sne) { // Most archive formats work on streams, but a few need files if (sne.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) { // Rework as a file, and wrap stream.reset(); TikaInputStream tstream = TikaInputStream.get(stream, tmp); // Seven Zip suports passwords, was one given? String password = null; PasswordProvider provider = context.get(PasswordProvider.class); if (provider != null) { password = provider.getPassword(metadata); } SevenZFile sevenz; if (password == null) { sevenz = new SevenZFile(tstream.getFile()); } else { sevenz = new SevenZFile(tstream.getFile(), password.getBytes("UnicodeLittleUnmarked")); } // Pending a fix for COMPRESS-269 / TIKA-1525, this bit is a little nasty ais = new SevenZWrapper(sevenz); } else { tmp.close(); throw new TikaException("Unknown non-streaming format " + sne.getFormat(), sne); } } catch (ArchiveException e) { tmp.close(); throw new TikaException("Unable to unpack document stream", e); } updateMediaType(ais, mediaTypeRegistry, metadata); // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { ArchiveEntry entry = ais.getNextEntry(); while (entry != null) { if (!entry.isDirectory()) { parseEntry(ais, entry, extractor, metadata, xhtml); } entry = ais.getNextEntry(); } } catch (UnsupportedZipFeatureException zfe) { // If it's an encrypted document of unknown password, report as such if (zfe.getFeature() == Feature.ENCRYPTION) { throw new EncryptedDocumentException(zfe); } // Otherwise throw the exception throw new TikaException("UnsupportedZipFeature", zfe); } catch (PasswordRequiredException pre) { throw new EncryptedDocumentException(pre); } finally { ais.close(); tmp.close(); } xhtml.endDocument(); }
From source file:org.apache.tika.parser.txt.TXTParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Automatically detect the character encoding try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) { //try to get detected content type; could be a subclass of text/plain //such as vcal, etc. String incomingMime = metadata.get(Metadata.CONTENT_TYPE); MediaType mediaType = MediaType.TEXT_PLAIN; if (incomingMime != null) { MediaType tmpMediaType = MediaType.parse(incomingMime); if (tmpMediaType != null) { mediaType = tmpMediaType; }//w ww .j a va 2s.c o m } Charset charset = reader.getCharset(); MediaType type = new MediaType(mediaType, charset); metadata.set(Metadata.CONTENT_TYPE, type.toString()); // deprecated, see TIKA-431 metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); char[] buffer = new char[4096]; int n = reader.read(buffer); while (n != -1) { xhtml.characters(buffer, 0, n); n = reader.read(buffer); } xhtml.endElement("p"); xhtml.endDocument(); } }
From source file:org.apache.tika.parser.xml.XMLParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (metadata.get(Metadata.CONTENT_TYPE) == null) { metadata.set(Metadata.CONTENT_TYPE, "application/xml"); }/*www . j a va 2 s .c o m*/ final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler( new EmbeddedContentHandler(getContentHandler(tagged, metadata, context)))); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
From source file:org.dataconservancy.packaging.tool.ser.JenaModelSerializer.java
/** * {@inheritDoc}/* w w w . j av a 2 s . c o m*/ * @param inputStream {@inheritDoc} * @return {@inheritDoc} * @throws XmlMappingException {@inheritDoc} * @throws IOException {@inheritDoc} */ @Override protected Object unmarshalInputStream(InputStream inputStream) throws XmlMappingException, IOException { Model model = modelFactory.newModel(); // Prevent Jena readers from closing the input stream model.read(new CloseShieldInputStream(inputStream), base, lang); return model; }
From source file:org.eclipse.packagedrone.repo.aspect.common.p2.internal.P2Unzipper.java
private void processMetaData(final Context context, final InputStream in, final String filename, final String xpath) throws Exception { // parse input final Document doc = this.xml.newDocumentBuilder().parse(new CloseShieldInputStream(in)); final XPathExpression path = this.xml.newXPathFactory().newXPath().compile(xpath); // filter/*w w w . j ava 2 s .co m*/ final NodeList result = XmlHelper.executePath(doc, path); // write filtered output final Document fragmentDoc = this.xml.newDocumentBuilder().newDocument(); Node node = result.item(0); node = fragmentDoc.adoptNode(node); fragmentDoc.appendChild(node); // create artifact context.createVirtualArtifact(filename, out -> { try { XmlHelper.write(this.xml.newTransformerFactory(), fragmentDoc, new StreamResult(out)); } catch (final Exception e) { throw new IOException(e); } }, null); }
From source file:org.erdc.cobie.shared.COBieUtility.java
public static boolean isValidSchemaDocument(InputStream inputStream, SchemaType type) { boolean valid = false; try {/*from ww w . j av a2 s .c om*/ CloseShieldInputStream inputStreamCopy = new CloseShieldInputStream(inputStream); org.apache.xmlbeans.XmlBeans.getContextTypeLoader().parse(inputStreamCopy, type, null); valid = true; inputStreamCopy.close(); } catch (Exception ex) { ex.printStackTrace(); } return valid; }
From source file:org.erdc.cobie.shared.spreadsheetml.transformation.cobietab.COBieSpreadSheet.java
License:asdf
public static boolean isWorkbook(InputStream candidateWorksheet) { CloseShieldInputStream inputStreamCopy = new CloseShieldInputStream(candidateWorksheet); boolean isWorkbook = false; try {//w w w . j a v a2 s. c o m nl.fountain.xelem.lex.ExcelReader rdr = new nl.fountain.xelem.lex.ExcelReader(); Workbook workbook = rdr.getWorkbook(new InputSource(inputStreamCopy)); isWorkbook = ((workbook != null) && workbook.hasExcelWorkbook()); } catch (Exception ex) { } finally { inputStreamCopy.close(); } return isWorkbook; }
From source file:org.exist.collections.Collection.java
private InputSource closeShieldInputSource(final InputSource source) { final InputSource protectedInputSource = new InputSource(); protectedInputSource.setEncoding(source.getEncoding()); protectedInputSource.setSystemId(source.getSystemId()); protectedInputSource.setPublicId(source.getPublicId()); if (source.getByteStream() != null) { //TODO consider AutoCloseInputStream final InputStream closeShieldByteStream = new CloseShieldInputStream(source.getByteStream()); protectedInputSource.setByteStream(closeShieldByteStream); }/*from ww w . ja v a 2 s . c o m*/ if (source.getCharacterStream() != null) { //TODO consider AutoCloseReader final Reader closeShieldReader = new CloseShieldReader(source.getCharacterStream()); protectedInputSource.setCharacterStream(closeShieldReader); } return protectedInputSource; }
From source file:org.exist.collections.MutableCollection.java
private InputSource closeShieldInputSource(final InputSource source) { final InputSource protectedInputSource = new InputSource(); protectedInputSource.setEncoding(source.getEncoding()); protectedInputSource.setSystemId(source.getSystemId()); protectedInputSource.setPublicId(source.getPublicId()); if (source.getByteStream() != null) { //TODO consider AutoCloseInputStream final InputStream closeShieldByteStream = new CloseShieldInputStream(source.getByteStream()); protectedInputSource.setByteStream(closeShieldByteStream); }/*ww w . j a va 2 s . co m*/ if (source.getCharacterStream() != null) { //TODO consider AutoCloseReader final Reader closeShieldReader = new CloseShieldReader(source.getCharacterStream()); protectedInputSource.setCharacterStream(closeShieldReader); } return protectedInputSource; }
From source file:org.exist.xquery.functions.request.GetData.java
private Sequence parseAsXml(InputStream is) { Sequence result = Sequence.EMPTY_SEQUENCE; XMLReader reader = null;//w w w .ja v a2 s . c o m context.pushDocumentContext(); try { //try and construct xml document from input stream, we use eXist's in-memory DOM implementation //we have to use CloseShieldInputStream otherwise the parser closes the stream and we cant later reread final InputSource src = new InputSource(new CloseShieldInputStream(is)); reader = context.getBroker().getBrokerPool().getParserPool().borrowXMLReader(); final MemTreeBuilder builder = context.getDocumentBuilder(); final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true); reader.setContentHandler(receiver); reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, receiver); reader.parse(src); final Document doc = receiver.getDocument(); result = (NodeValue) doc; } catch (final SAXException saxe) { //do nothing, we will default to trying to return a string below } catch (final IOException ioe) { //do nothing, we will default to trying to return a string below } finally { context.popDocumentContext(); if (reader != null) { context.getBroker().getBrokerPool().getParserPool().returnXMLReader(reader); } } return result; }