List of usage examples for org.apache.commons.compress.archivers StreamingNotSupportedException getFormat
public String getFormat()
From source file:org.apache.tika.parser.pkg.PackageParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //lazily load the MediaTypeRegistry at parse time //only want to call getDefaultConfig() once, and can't //load statically because of the ForkParser TikaConfig config = context.get(TikaConfig.class); MediaTypeRegistry mediaTypeRegistry = null; if (config != null) { mediaTypeRegistry = config.getMediaTypeRegistry(); } else {/* w ww . ja v a 2 s . co m*/ if (bufferedMediaTypeRegistry == null) { //buffer this for next time. synchronized (lock) { //now that we're locked, check again if (bufferedMediaTypeRegistry == null) { bufferedMediaTypeRegistry = TikaConfig.getDefaultConfig().getMediaTypeRegistry(); } } } mediaTypeRegistry = bufferedMediaTypeRegistry; } // Ensure that the stream supports the mark feature if (!stream.markSupported()) { stream = new BufferedInputStream(stream); } TemporaryResources tmp = new TemporaryResources(); ArchiveInputStream ais = null; try { ArchiveStreamFactory factory = context.get(ArchiveStreamFactory.class, new ArchiveStreamFactory()); // At the end we want to close the archive stream to release // any associated resources, but the underlying document stream // should not be closed ais = factory.createArchiveInputStream(new CloseShieldInputStream(stream)); } catch (StreamingNotSupportedException sne) { // Most archive formats work on streams, but a few need files if (sne.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) { // Rework as a file, and wrap stream.reset(); TikaInputStream tstream = TikaInputStream.get(stream, tmp); // Seven Zip suports passwords, was one given? String password = null; PasswordProvider provider = context.get(PasswordProvider.class); if (provider != null) { password = provider.getPassword(metadata); } SevenZFile sevenz; if (password == null) { sevenz = new SevenZFile(tstream.getFile()); } else { sevenz = new SevenZFile(tstream.getFile(), password.getBytes("UnicodeLittleUnmarked")); } // Pending a fix for COMPRESS-269 / TIKA-1525, this bit is a little nasty ais = new SevenZWrapper(sevenz); } else { tmp.close(); throw new TikaException("Unknown non-streaming format " + sne.getFormat(), sne); } } catch (ArchiveException e) { tmp.close(); throw new TikaException("Unable to unpack document stream", e); } updateMediaType(ais, mediaTypeRegistry, metadata); // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { ArchiveEntry entry = ais.getNextEntry(); while (entry != null) { if (!entry.isDirectory()) { parseEntry(ais, entry, extractor, metadata, xhtml); } entry = ais.getNextEntry(); } } catch (UnsupportedZipFeatureException zfe) { // If it's an encrypted document of unknown password, report as such if (zfe.getFeature() == Feature.ENCRYPTION) { throw new EncryptedDocumentException(zfe); } // Otherwise throw the exception throw new TikaException("UnsupportedZipFeature", zfe); } catch (PasswordRequiredException pre) { throw new EncryptedDocumentException(pre); } finally { ais.close(); tmp.close(); } xhtml.endDocument(); }