List of usage examples for org.apache.poi.poifs.filesystem Entry getName
public String getName();
From source file:NewEmptyJUnitTest.java
/** * [RESOLVED FIXED] Bug 51686 - Update to POI 3.8 beta 4 causes * ConcurrentModificationException in Tika's OfficeParser */// w ww. j ava 2 s . c o m public void testBug51686() throws IOException { InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug51686.doc"); POIFSFileSystem fs = new POIFSFileSystem(is); String text = null; for (Entry entry : fs.getRoot()) { if ("WordDocument".equals(entry.getName())) { WordExtractor ex = new WordExtractor(fs); try { text = ex.getText(); } finally { ex.close(); } } } assertNotNull(text); }
From source file:com.argo.hwp.v5.HwpTextExtractorV5.java
License:Open Source License
/** * ? /*from ww w.j a va 2 s . c om*/ * * @param writer * @param source * * @return * @throws IOException */ private static void extractText(FileHeader header, NPOIFSFileSystem fs, Writer writer) throws IOException { DirectoryNode root = fs.getRoot(); // BodyText ? Entry bodyText = root.getEntry("BodyText"); if (bodyText == null || !bodyText.isDirectoryEntry()) throw new IOException("Invalid BodyText"); Iterator<Entry> iterator = ((DirectoryEntry) bodyText).getEntries(); while (iterator.hasNext()) { Entry entry = iterator.next(); if (entry.getName().startsWith("Section") && entry instanceof DocumentEntry) { log.debug("extract {}", entry.getName()); InputStream input = new NDocumentInputStream((DocumentEntry) entry); if (header.compressed) input = new InflaterInputStream(input, new Inflater(true)); HwpStreamReader sectionStream = new HwpStreamReader(input); try { extractText(sectionStream, writer); } finally { // ? ? ? try { input.close(); } catch (IOException e) { log.error("? ??", e); } } } else { log.warn(" Entry '{}'({})", entry.getName(), entry); } } }
From source file:com.hp.application.automation.tools.octane.actions.UFTParameterFactory.java
License:Apache License
public static String convertResourceMtrAsJSON(InputStream resourceMtrInputStream) throws IOException { //TODO: Check is exists poiFS = new POIFSFileSystem(resourceMtrInputStream); DirectoryNode root = poiFS.getRoot(); for (Entry entry : root) { String name = entry.getName(); if (name.equals("ComponentInfo")) { if (entry instanceof DirectoryEntry) { System.out.println(entry); } else if (entry instanceof DocumentEntry) { byte[] content = new byte[((DocumentEntry) entry).getSize()]; poiFS.createDocumentInputStream("ComponentInfo").read(content); String fromUnicodeLE = StringUtil.getFromUnicodeLE(content); xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", ""); // System.out.println(xmlData); }/*from ww w .j a v a2 s.co m*/ } } try { SAXBuilder saxBuilder = new SAXBuilder(XMLReaders.NONVALIDATING, (SAXHandlerFactory) null, (JDOMFactory) null); Document document = null; document = saxBuilder.build(new StringReader(xmlData)); Element classElement = document.getRootElement(); List<Element> studentList = classElement.getChildren(); ObjectMapper mapper = new ObjectMapper(); ArrayList<UFTParameter> uftParameters = new ArrayList<UFTParameter>(); UFTParameter uftParameter = new UFTParameter(); for (int temp = 0; temp < studentList.size(); temp++) { Element tag = studentList.get(temp); if ("ArgumentsCollection".equalsIgnoreCase(tag.getName())) { List<Element> children = tag.getChildren(); for (int i = 0; i < children.size(); i++) { Element element = children.get(i); List<Element> elements = element.getChildren(); for (int j = 0; j < elements.size(); j++) { Element element1 = elements.get(j); switch (element1.getName()) { case "ArgName": uftParameter.setArgName(element1.getValue()); break; case "ArgDirection": uftParameter.setArgDirection(Integer.parseInt(element1.getValue())); break; case "ArgDefaultValue": uftParameter.setArgDefaultValue(element1.getValue()); break; case "ArgType": uftParameter.setArgType(element1.getValue()); break; case "ArgIsExternal": uftParameter.setArgIsExternal(Integer.parseInt(element1.getValue())); break; default: logger.warning( String.format("Element name %s didn't match any case", element1.getName())); break; } } uftParameters.add(uftParameter); } return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(uftParameters); } } } catch (Exception e) { logger.severe(e.getMessage()); } return null; }
From source file:com.hp.octane.integrations.uft.UftTestDiscoveryUtils.java
License:Apache License
private static String extractXmlContentFromTspFile(InputStream stream) throws IOException { POIFSFileSystem poiFS = new POIFSFileSystem(stream); DirectoryNode root = poiFS.getRoot(); String xmlData = ""; for (Entry entry : root) { String name = entry.getName(); if ("ComponentInfo".equals(name)) { if (entry instanceof DirectoryEntry) { System.out.println(entry); } else if (entry instanceof DocumentEntry) { byte[] content = new byte[((DocumentEntry) entry).getSize()]; int readBytes = poiFS.createDocumentInputStream("ComponentInfo").read(content); if (readBytes < content.length) { // [YG] probably should handle this case and continue to read logger.warn("expected to read " + content.length + " bytes, but read and stopped after " + readBytes); }/*from w w w. j av a 2 s .c o m*/ String fromUnicodeLE = StringUtil.getFromUnicodeLE(content); xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", ""); } } } return xmlData; }
From source file:com.hpe.application.automation.tools.octane.actions.UFTTestUtil.java
License:Open Source License
public static String decodeXmlContent(InputStream stream) throws IOException { POIFSFileSystem poiFS = new POIFSFileSystem(stream); DirectoryNode root = poiFS.getRoot(); String xmlData = ""; for (Entry entry : root) { String name = entry.getName(); if ("ComponentInfo".equals(name)) { if (entry instanceof DirectoryEntry) { System.out.println(entry); } else if (entry instanceof DocumentEntry) { byte[] content = new byte[((DocumentEntry) entry).getSize()]; poiFS.createDocumentInputStream("ComponentInfo").read(content); String fromUnicodeLE = StringUtil.getFromUnicodeLE(content); xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", ""); }//w w w.j a v a 2s . c o m } } return xmlData; }
From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java
License:Apache License
private void addAll(ContainerStream parent, DirectoryEntry root) throws IOException { // Iterate through all entries in the current fs directory for (Entry e : root) { // If it's another directory entry, recurse deeper if (e instanceof DirectoryEntry) { // Recurse and parse files, if any, within the current directory DirectoryEntry d = (DirectoryEntry) e; ClassID id = d.getStorageClsid(); if (Stream.isVbaStorage(d.getName())) { VbaContainerStream vba = new VbaContainerStream(parent, id, d.getName()); addAll(vba, d);//from ww w . ja va2s . com } else { ContainerStream currDir = new ContainerStream(parent, id, d.getName()); addAll(currDir, d); } } else if (e instanceof DocumentEntry) { // Retrieve chained representation of files in image DocumentEntry doc = (DocumentEntry) e; // Create byte array around contents of file. byte[] data = new byte[doc.getSize()]; // Read data from image file into buffer boolean error = false; try (DocumentInputStream stream = new DocumentInputStream(doc)) { try { stream.readFully(data); } catch (IndexOutOfBoundsException i) { error = true; } } catch (IOException e1) { error = true; } ByteBuffer buff = ByteBuffer.wrap(data); new DocumentStream(parent, e.getName(), buff, error); // constructor takes care of notifying parent of a new child } } }
From source file:Coop.argo.hwp.v5.HwpTextExtractorV5.java
License:Apache License
/** * ? // ww w . j av a2 s . co m * * @param writer * @param source * * @return * @throws IOException */ private static void extractBodyText(FileHeader header, NPOIFSFileSystem fs, Writer writer) throws IOException { DirectoryNode root = fs.getRoot(); // BodyText ? Entry bodyText = root.getEntry("BodyText"); if (bodyText == null || !bodyText.isDirectoryEntry()) throw new IOException("Invalid BodyText"); Iterator<Entry> iterator = ((DirectoryEntry) bodyText).getEntries(); while (iterator.hasNext()) { Entry entry = iterator.next(); if (entry.getName().startsWith("Section") && entry instanceof DocumentEntry) { log.debug("extract {}", entry.getName()); InputStream input = new NDocumentInputStream((DocumentEntry) entry); try { if (header.compressed) input = new InflaterInputStream(input, new Inflater(true)); HwpStreamReader sectionStream = new HwpStreamReader(input); extractText(sectionStream, writer); } finally { // ? ? ? try { input.close(); } catch (IOException e) { log.error("? ??", e); } } } else { log.warn(" Entry '{}'({})", entry.getName(), entry); } } }
From source file:Coop.argo.hwp.v5.HwpTextExtractorV5.java
License:Apache License
/** * ? /* w w w .j a va 2 s.c om*/ * * @param writer * @param source * * @return * @throws IOException */ private static void extractViewText(FileHeader header, NPOIFSFileSystem fs, Writer writer) throws IOException { DirectoryNode root = fs.getRoot(); // BodyText ? Entry bodyText = root.getEntry("ViewText"); if (bodyText == null || !bodyText.isDirectoryEntry()) throw new IOException("Invalid ViewText"); Iterator<Entry> iterator = ((DirectoryEntry) bodyText).getEntries(); while (iterator.hasNext()) { Entry entry = iterator.next(); if (entry.getName().startsWith("Section") && entry instanceof DocumentEntry) { log.debug("extract {}", entry.getName()); InputStream input = new NDocumentInputStream((DocumentEntry) entry); // FIXME ? Key key = readKey(input); try { input = createDecryptStream(input, key); if (header.compressed) input = new InflaterInputStream(input, new Inflater(true)); HwpStreamReader sectionStream = new HwpStreamReader(input); extractText(sectionStream, writer); } catch (InvalidKeyException e) { throw new IOException(e); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } catch (NoSuchPaddingException e) { throw new IOException(e); } finally { // ? ? ? try { input.close(); } catch (IOException e) { log.error("? ??", e); } } } else { log.warn(" Entry '{}'({})", entry.getName(), entry); } } }
From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java
License:Open Source License
@SuppressWarnings("unchecked") public EIPImporter(String file, String destDir) { this.destDir = destDir; buffer = new byte[8192]; POIFSFileSystem eipSystem;/*from ww w . j ava 2s .com*/ try { // eip eipStream = new FileInputStream(file); eipSystem = new POIFSFileSystem(eipStream); // DirectoryEntry configDir = null, fileDir = null; DirectoryEntry root = eipSystem.getRoot(); Iterator<Entry> i = root.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDirectoryEntry()) { if (CONFIG_DIRECTORY.equals(e.getName().toLowerCase())) configDir = (DirectoryEntry) e; else if (FILES_DIRECTORY.equals(e.getName().toLowerCase())) fileDir = (DirectoryEntry) e; } } // ?? if (configDir == null || fileDir == null) throw new IOException("Can't find correct directories"); // ?face.xml i = configDir.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDocumentEntry() && CONFIG_FILE.equals(e.getName().toLowerCase())) { DocumentInputStream dis = new DocumentInputStream((DocumentEntry) e); parser = new FaceXMLParser(dis); dis.close(); break; } } // ??face.xml if (parser == null) throw new IOException("Can't find " + CONFIG_FILE); // iterator groupIterator = fileDir.getEntries(); currentDir = fileDir; faceIterator = currentDir.getEntries(); } catch (IOException e) { eipSystem = null; try { if (eipStream != null) { eipStream.close(); eipStream = null; } } catch (IOException e1) { } } }
From source file:FeatureExtraction.FeatureExtractorDocStreamPaths.java
private static void GetStreamsPaths(DirectoryNode dir, String parentPath, Map<String, Integer> streamPaths) { // run over all directory chidlrens for (Iterator<Entry> entryIter = dir.getEntries(); entryIter.hasNext();) { Entry entry = entryIter.next(); String entryName = entry.getName(); // Some entry names starts with binary value that are not printable - remove it if (entryName.charAt(0) < 10) { entryName = entryName.substring(1); }/*w ww.ja v a 2 s . com*/ // Recursively search for directory (storage) children if (entry instanceof DirectoryNode) { GetStreamsPaths((DirectoryNode) entry, parentPath + "\\" + entryName, streamPaths); } else { // Add stream path to set of paths String filePath = parentPath + "\\" + entryName; AddStreamPath(filePath, streamPaths); } } }