List of usage examples for org.apache.poi.poifs.filesystem DirectoryNode getEntryNames
public Set<String> getEntryNames()
From source file:org.apache.tika.parser.wordperfect.QPWTextExtractor.java
License:Apache License
@SuppressWarnings("resource") public void extract(InputStream input, XHTMLContentHandler xhtml, Metadata metadata) throws IOException, SAXException, TikaException { POIFSFileSystem pfs = new POIFSFileSystem(input); DirectoryNode rootNode = pfs.getRoot(); if (rootNode == null || !rootNode.hasEntry(OLE_DOCUMENT_NAME)) { throw new UnsupportedFormatException("Unsupported QuattroPro file format. " + "Looking for OLE entry \"" + OLE_DOCUMENT_NAME + "\". Found: " + rootNode.getEntryNames()); }/* ww w.j av a 2 s .co m*/ //TODO shall we validate and throw warning/error if the file does not //start with a BOF and ends with a EOF? xhtml.startElement("p"); try (WPInputStream in = new WPInputStream(pfs.createDocumentInputStream(OLE_DOCUMENT_NAME))) { Context ctx = new Context(in, xhtml, metadata); while (hasNext(in)) { ctx.type = in.readWPShort(); ctx.bodyLength = in.readWPShort(); Extractor extractor = EXTRACTORS.get(ctx.type); if (extractor != null) { extractor.extract(ctx); } else { // Use DEBUG to find out what we are ignoring // Extractor.DEBUG.extract(ctx); Extractor.IGNORE.extract(ctx); } } } xhtml.endElement("p"); }