List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot
public DirectoryNode getRoot()
From source file:com.progdan.doc2txt.WordExtractor.java
License:Apache License
/** * Gets the text from a Word document./*ww w .j a v a 2 s . c om*/ * * @param in The InputStream representing the Word file. */ public String extractText(InputStream in) throws Exception { ArrayList text = new ArrayList(); POIFSFileSystem fsys = new POIFSFileSystem(in); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header); din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { throw new PasswordProtectedException("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header); } //Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; //get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin); // load our text pieces and our character runs ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); List textPieces = tpt.getTextPieces(); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.vodafone.poms.ii.helpers.ExportManager.java
private static String addFile(XSSFSheet sh, String filePath, double oleId) throws IOException, InvalidFormatException { File file = new File(filePath); FileInputStream fin = new FileInputStream(file); byte[] data;//from ww w. ja va2 s . c om data = new byte[fin.available()]; fin.read(data); Ole10Native ole10 = new Ole10Native(file.getAbsolutePath(), file.getAbsolutePath(), file.getAbsolutePath(), data); ByteArrayOutputStream bos = new ByteArrayOutputStream(500); ole10.writeOut(bos); POIFSFileSystem poifs = new POIFSFileSystem(); poifs.getRoot().createDocument(Ole10Native.OLE10_NATIVE, new ByteArrayInputStream(bos.toByteArray())); poifs.getRoot().setStorageClsid(ClassID.OLE10_PACKAGE); final PackagePartName pnOLE = PackagingURIHelper .createPartName("/xl/embeddings/oleObject" + oleId + Math.random() + ".bin"); final PackagePart partOLE = sh.getWorkbook().getPackage().createPart(pnOLE, "application/vnd.openxmlformats-officedocument.oleObject"); PackageRelationship prOLE = sh.getPackagePart().addRelationship(pnOLE, TargetMode.INTERNAL, POIXMLDocument.OLE_OBJECT_REL_TYPE); OutputStream os = partOLE.getOutputStream(); poifs.writeFilesystem(os); os.close(); poifs.close(); return prOLE.getId(); }
From source file:edu.ku.brc.specify.tasks.subpane.wb.ConfigureXLS.java
License:Open Source License
/** * @param poifs/*from w ww . j a v a 2 s . c om*/ * @returns the DocumentSummaryInformation for poifs, or null if no DocumentSummaryInformation is found. */ protected DocumentSummaryInformation getDocSummary(final POIFSFileSystem poifs) { DirectoryEntry dir = poifs.getRoot(); DocumentSummaryInformation result = null; try { DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(dsiEntry); PropertySet ps = new PropertySet(dis); dis.close(); result = new DocumentSummaryInformation(ps); } catch (FileNotFoundException ex) { // There is no document summary information. result = null; } /* * just returning null if anything weird happens. If there is a problem with the xls file, * something else will probably blow up later. */ catch (IOException ex) { edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex); log.debug(ex); result = null; } catch (NoPropertySetStreamException ex) { //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex); log.debug(ex); result = null; } catch (MarkUnsupportedException ex) { //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex); log.debug(ex); result = null; } catch (UnexpectedPropertySetTypeException ex) { //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex); log.debug(ex); result = null; } catch (IllegalPropertySetDataException ex) { //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex); log.debug(ex); result = null; } return result; }
From source file:edu.ku.brc.specify.tasks.subpane.wb.XLSExport.java
License:Open Source License
public void writeData(final List<?> data) throws Exception { HSSFWorkbook workBook = new HSSFWorkbook(); HSSFSheet workSheet = workBook.createSheet(); DocumentSummaryInformation mappings = null; int rowNum = 0; if (config.getFirstRowHasHeaders() && !config.getAppendData()) { writeHeaders(workSheet);/*from www .java 2 s . c o m*/ rowNum++; String[] headers = config.getHeaders(); for (int i = 0; i < headers.length; i++) { workSheet.setColumnWidth(i, StringUtils.isNotEmpty(headers[i]) ? (256 * headers[i].length()) : 2560); } WorkbenchTemplate wbTemplate = null; if (data.get(0) instanceof WorkbenchTemplate) { wbTemplate = (WorkbenchTemplate) data.get(0); } else { wbTemplate = ((WorkbenchRow) data.get(0)).getWorkbench().getWorkbenchTemplate(); } mappings = writeMappings(wbTemplate); } //assuming data is never empty. boolean hasTemplate = data.get(0) instanceof WorkbenchTemplate; boolean hasRows = hasTemplate ? data.size() > 1 : data.size() > 0; if (hasRows) { int[] disciplinees; WorkbenchRow wbRow = (WorkbenchRow) data.get(hasTemplate ? 1 : 0); Workbench workBench = wbRow.getWorkbench(); WorkbenchTemplate template = workBench.getWorkbenchTemplate(); int numCols = template.getWorkbenchTemplateMappingItems().size(); int geoDataCol = -1; Vector<Integer> imgCols = new Vector<Integer>(); disciplinees = bldColTypes(template); for (Object rowObj : data) { if (rowObj instanceof WorkbenchTemplate) { continue; } WorkbenchRow row = (WorkbenchRow) rowObj; HSSFRow hssfRow = workSheet.createRow(rowNum++); int colNum; boolean rowHasGeoData = false; for (colNum = 0; colNum < numCols; colNum++) { HSSFCell cell = hssfRow.createCell(colNum); cell.setCellType(disciplinees[colNum]); setCellValue(cell, row.getData(colNum)); } if (row.getBioGeomancerResults() != null && !row.getBioGeomancerResults().equals("")) { geoDataCol = colNum; rowHasGeoData = true; HSSFCell cell = hssfRow.createCell(colNum++); cell.setCellType(HSSFCell.CELL_TYPE_STRING); setCellValue(cell, row.getBioGeomancerResults()); } // if (row.getCardImage() != null) if (row.getRowImage(0) != null) { if (!rowHasGeoData) { colNum++; } int imgIdx = 0; WorkbenchRowImage img = row.getRowImage(imgIdx++); while (img != null) { if (imgCols.indexOf(colNum) < 0) { imgCols.add(colNum); } HSSFCell cell = hssfRow.createCell(colNum++); cell.setCellType(HSSFCell.CELL_TYPE_STRING); String cellValue = img.getCardImageFullPath(); String attachToTbl = img.getAttachToTableName(); if (attachToTbl != null) { cellValue += "\t" + attachToTbl; } setCellValue(cell, cellValue); img = row.getRowImage(imgIdx++); } } } if (imgCols.size() > 0 || geoDataCol != -1) { writeExtraHeaders(workSheet, imgCols, geoDataCol); } } try { // Write the workbook File file = new File(getConfig().getFileName()); if (file.canWrite() || (!file.exists() && file.createNewFile())) { FileOutputStream fos = new FileOutputStream(file); workBook.write(fos); fos.close(); //Now write the mappings. //NOT (hopefully) the best way to write the mappings, but (sadly) the easiest way. //May need to do this another way if this slows performance for big wbs. if (mappings != null) { InputStream is = new FileInputStream(file); POIFSFileSystem poifs = new POIFSFileSystem(is); is.close(); mappings.write(poifs.getRoot(), DocumentSummaryInformation.DEFAULT_STREAM_NAME); fos = new FileOutputStream(file); poifs.writeFilesystem(fos); fos.close(); } } else { UIRegistry.displayErrorDlgLocalized("WB_EXPORT_PERM_ERR"); } } catch (Exception e) { edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(XLSExport.class, e); throw (e); } }
From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java
License:Open Source License
@SuppressWarnings("unchecked") public EIPImporter(String file, String destDir) { this.destDir = destDir; buffer = new byte[8192]; POIFSFileSystem eipSystem; try {/*from w w w.j a v a 2s . co m*/ // eip eipStream = new FileInputStream(file); eipSystem = new POIFSFileSystem(eipStream); // DirectoryEntry configDir = null, fileDir = null; DirectoryEntry root = eipSystem.getRoot(); Iterator<Entry> i = root.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDirectoryEntry()) { if (CONFIG_DIRECTORY.equals(e.getName().toLowerCase())) configDir = (DirectoryEntry) e; else if (FILES_DIRECTORY.equals(e.getName().toLowerCase())) fileDir = (DirectoryEntry) e; } } // ?? if (configDir == null || fileDir == null) throw new IOException("Can't find correct directories"); // ?face.xml i = configDir.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDocumentEntry() && CONFIG_FILE.equals(e.getName().toLowerCase())) { DocumentInputStream dis = new DocumentInputStream((DocumentEntry) e); parser = new FaceXMLParser(dis); dis.close(); break; } } // ??face.xml if (parser == null) throw new IOException("Can't find " + CONFIG_FILE); // iterator groupIterator = fileDir.getEntries(); currentDir = fileDir; faceIterator = currentDir.getEntries(); } catch (IOException e) { eipSystem = null; try { if (eipStream != null) { eipStream.close(); eipStream = null; } } catch (IOException e1) { } } }
From source file:FeatureExtraction.FeatureExtractorDocStreamPaths.java
@Override public Map ExtractFeaturesFrequencyFromSingleElement(T element) { Map<String, Integer> streamPaths = new HashMap<>(); String filePath = (String) element; try {/*from www. ja v a 2s . co m*/ InputStream inputStream = new FileInputStream(filePath); POIFSFileSystem poiFileSystem = new POIFSFileSystem(inputStream); DirectoryNode directoryNode = poiFileSystem.getRoot(); //HWPFDocument document = new HWPFDocument(directoryNode); GetStreamsPaths(directoryNode, "", streamPaths); } catch (FileNotFoundException ex) { Console.PrintException(String.format("Error extracting DOC features from file: %s", filePath), ex); } catch (IOException ex) { Console.PrintException(String.format("Error extracting DOC features from file: %s", filePath), ex); } return streamPaths; }
From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java
License:Open Source License
public static String getSummaryData(String filename, String key) { String value = null;// w w w. j a v a 2 s .c o m FileInputStream stream = null; try { stream = new FileInputStream(new File(filename)); POIFSFileSystem poifs = null; try { poifs = new POIFSFileSystem(stream); } catch (Exception e) { stream.close(); return getPOISummaryData(filename, key); } DirectoryEntry dir = poifs.getRoot(); DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); if (siEntry != null) { DocumentInputStream dis = new DocumentInputStream(siEntry); PropertySet ps = new PropertySet(dis); SummaryInformation si = new SummaryInformation(ps); if (key.compareTo(SUMMARY_DATA_AUTHOR) == 0) { value = si.getAuthor(); } else if (key.compareTo(SUMMARY_DATA_KEYWORDS) == 0) { value = si.getKeywords(); } else if (key.compareTo(SUMMARY_DATA_TITLE) == 0) { value = si.getTitle(); } else if (key.compareTo(SUMMARY_DATA_SUBJECT) == 0) { value = si.getSubject(); } } } catch (Exception ex) { ex.getStackTrace(); } finally { try { stream.close(); } catch (Exception ex) { ex.printStackTrace(); } } return value; }
From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java
License:Open Source License
public static String getAuthor(File file) { String author = null;//from w w w . jav a 2s.co m try { FileInputStream stream = new FileInputStream(file); POIFSFileSystem poifs = null; try { poifs = new POIFSFileSystem(stream); } catch (Exception e) { stream.close(); return getCreator(file); } DirectoryEntry dir = null; try { dir = poifs.getRoot(); } catch (Exception ex) { System.out.println("DirectoryEntry is NULL???"); return null; } DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); if (siEntry != null) { DocumentInputStream dis = new DocumentInputStream(siEntry); PropertySet ps = new PropertySet(dis); SummaryInformation si = new SummaryInformation(ps); author = si.getAuthor(); } stream.close(); } catch (Exception ex) { ex.getStackTrace(); } return author; }
From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java
License:Open Source License
public static void setAuthor(String filename, String author) { try {// ww w . j ava 2 s. c o m FileInputStream stream = new FileInputStream(new File(filename)); POIFSFileSystem poifs = new POIFSFileSystem(stream); DirectoryEntry dir = poifs.getRoot(); System.out.println("SummaryInformation.DEFAULT_STREAM_NAME: " + SummaryInformation.DEFAULT_STREAM_NAME); DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(siEntry); PropertySet ps = new PropertySet(dis); SummaryInformation si = new SummaryInformation(ps); System.out.println("SummaryInformation setAuthor: " + author); si.setAuthor(author); OutputStream outStream = null; outStream = new FileOutputStream(new File(filename)); byte[] buffer = new byte[1024]; int length; while ((length = stream.read(buffer)) > 0) { outStream.write(buffer, 0, length); } outStream.close(); stream.close(); } catch (Exception ex) { ex.getStackTrace(); } }
From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java
License:Open Source License
public static void setAuthor(File file, String author) { try {/* w w w. j av a 2s. c o m*/ FileInputStream stream = new FileInputStream(file); POIFSFileSystem poifs = null; try { poifs = new POIFSFileSystem(stream); } catch (Exception e) { stream.close(); setCreator(file, author); return; } DirectoryEntry dir = poifs.getRoot(); DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); if (siEntry != null) { DocumentInputStream dis = new DocumentInputStream(siEntry); PropertySet ps = new PropertySet(dis); SummaryInformation si = new SummaryInformation(ps); si.setAuthor(author); } stream.close(); } catch (Exception ex) { ex.getStackTrace(); } }