Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot

List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot.

Prototype

public DirectoryNode getRoot() 

Source Link

Document

Get the root entry

Usage

From source file:com.progdan.doc2txt.WordExtractor.java

License:Apache License

/**
 * Gets the text from a Word document./*ww w .j  a  v  a  2 s . c  om*/
 *
 * @param in The InputStream representing the Word file.
 */
public String extractText(InputStream in) throws Exception {
    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(in);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        throw new PasswordProtectedException("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    //Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    //get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.vodafone.poms.ii.helpers.ExportManager.java

private static String addFile(XSSFSheet sh, String filePath, double oleId)
        throws IOException, InvalidFormatException {
    File file = new File(filePath);
    FileInputStream fin = new FileInputStream(file);
    byte[] data;//from  ww w. ja  va2 s . c  om
    data = new byte[fin.available()];
    fin.read(data);
    Ole10Native ole10 = new Ole10Native(file.getAbsolutePath(), file.getAbsolutePath(), file.getAbsolutePath(),
            data);

    ByteArrayOutputStream bos = new ByteArrayOutputStream(500);
    ole10.writeOut(bos);

    POIFSFileSystem poifs = new POIFSFileSystem();
    poifs.getRoot().createDocument(Ole10Native.OLE10_NATIVE, new ByteArrayInputStream(bos.toByteArray()));

    poifs.getRoot().setStorageClsid(ClassID.OLE10_PACKAGE);

    final PackagePartName pnOLE = PackagingURIHelper
            .createPartName("/xl/embeddings/oleObject" + oleId + Math.random() + ".bin");
    final PackagePart partOLE = sh.getWorkbook().getPackage().createPart(pnOLE,
            "application/vnd.openxmlformats-officedocument.oleObject");
    PackageRelationship prOLE = sh.getPackagePart().addRelationship(pnOLE, TargetMode.INTERNAL,
            POIXMLDocument.OLE_OBJECT_REL_TYPE);
    OutputStream os = partOLE.getOutputStream();
    poifs.writeFilesystem(os);
    os.close();
    poifs.close();

    return prOLE.getId();

}

From source file:edu.ku.brc.specify.tasks.subpane.wb.ConfigureXLS.java

License:Open Source License

/**
 * @param poifs/*from w ww  . j  a  v  a  2  s .  c om*/
 * @returns the DocumentSummaryInformation for poifs, or null if no DocumentSummaryInformation is found.
 */
protected DocumentSummaryInformation getDocSummary(final POIFSFileSystem poifs) {
    DirectoryEntry dir = poifs.getRoot();
    DocumentSummaryInformation result = null;
    try {
        DocumentEntry dsiEntry = (DocumentEntry) dir.getEntry(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
        DocumentInputStream dis = new DocumentInputStream(dsiEntry);
        PropertySet ps = new PropertySet(dis);
        dis.close();
        result = new DocumentSummaryInformation(ps);
    } catch (FileNotFoundException ex) {
        // There is no document summary information. 
        result = null;
    }
    /*
     * just returning null if anything weird happens. If there is a problem with the xls file,
     * something else will probably blow up later. 
    */
    catch (IOException ex) {
        edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex);
        log.debug(ex);
        result = null;
    } catch (NoPropertySetStreamException ex) {
        //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex);
        log.debug(ex);
        result = null;
    } catch (MarkUnsupportedException ex) {
        //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex);
        log.debug(ex);
        result = null;
    } catch (UnexpectedPropertySetTypeException ex) {
        //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex);
        log.debug(ex);
        result = null;
    } catch (IllegalPropertySetDataException ex) {
        //edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        //edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(ConfigureXLS.class, ex);
        log.debug(ex);
        result = null;
    }
    return result;
}

From source file:edu.ku.brc.specify.tasks.subpane.wb.XLSExport.java

License:Open Source License

public void writeData(final List<?> data) throws Exception {
    HSSFWorkbook workBook = new HSSFWorkbook();
    HSSFSheet workSheet = workBook.createSheet();
    DocumentSummaryInformation mappings = null;

    int rowNum = 0;

    if (config.getFirstRowHasHeaders() && !config.getAppendData()) {
        writeHeaders(workSheet);/*from  www  .java 2  s . c  o m*/
        rowNum++;

        String[] headers = config.getHeaders();
        for (int i = 0; i < headers.length; i++) {
            workSheet.setColumnWidth(i,
                    StringUtils.isNotEmpty(headers[i]) ? (256 * headers[i].length()) : 2560);
        }

        WorkbenchTemplate wbTemplate = null;
        if (data.get(0) instanceof WorkbenchTemplate) {
            wbTemplate = (WorkbenchTemplate) data.get(0);
        } else {
            wbTemplate = ((WorkbenchRow) data.get(0)).getWorkbench().getWorkbenchTemplate();
        }
        mappings = writeMappings(wbTemplate);
    }
    //assuming data is never empty.
    boolean hasTemplate = data.get(0) instanceof WorkbenchTemplate;
    boolean hasRows = hasTemplate ? data.size() > 1 : data.size() > 0;
    if (hasRows) {
        int[] disciplinees;

        WorkbenchRow wbRow = (WorkbenchRow) data.get(hasTemplate ? 1 : 0);
        Workbench workBench = wbRow.getWorkbench();
        WorkbenchTemplate template = workBench.getWorkbenchTemplate();
        int numCols = template.getWorkbenchTemplateMappingItems().size();
        int geoDataCol = -1;
        Vector<Integer> imgCols = new Vector<Integer>();

        disciplinees = bldColTypes(template);
        for (Object rowObj : data) {
            if (rowObj instanceof WorkbenchTemplate) {
                continue;
            }

            WorkbenchRow row = (WorkbenchRow) rowObj;
            HSSFRow hssfRow = workSheet.createRow(rowNum++);
            int colNum;
            boolean rowHasGeoData = false;

            for (colNum = 0; colNum < numCols; colNum++) {
                HSSFCell cell = hssfRow.createCell(colNum);
                cell.setCellType(disciplinees[colNum]);
                setCellValue(cell, row.getData(colNum));
            }

            if (row.getBioGeomancerResults() != null && !row.getBioGeomancerResults().equals("")) {
                geoDataCol = colNum;
                rowHasGeoData = true;
                HSSFCell cell = hssfRow.createCell(colNum++);
                cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                setCellValue(cell, row.getBioGeomancerResults());
            }

            // if (row.getCardImage() != null)
            if (row.getRowImage(0) != null) {
                if (!rowHasGeoData) {
                    colNum++;
                }
                int imgIdx = 0;
                WorkbenchRowImage img = row.getRowImage(imgIdx++);
                while (img != null) {
                    if (imgCols.indexOf(colNum) < 0) {
                        imgCols.add(colNum);
                    }
                    HSSFCell cell = hssfRow.createCell(colNum++);
                    cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                    String cellValue = img.getCardImageFullPath();
                    String attachToTbl = img.getAttachToTableName();
                    if (attachToTbl != null) {
                        cellValue += "\t" + attachToTbl;
                    }
                    setCellValue(cell, cellValue);
                    img = row.getRowImage(imgIdx++);
                }
            }

        }
        if (imgCols.size() > 0 || geoDataCol != -1) {
            writeExtraHeaders(workSheet, imgCols, geoDataCol);
        }

    }
    try {
        // Write the workbook
        File file = new File(getConfig().getFileName());
        if (file.canWrite() || (!file.exists() && file.createNewFile())) {
            FileOutputStream fos = new FileOutputStream(file);
            workBook.write(fos);
            fos.close();

            //Now write the mappings.
            //NOT (hopefully) the best way to write the mappings, but (sadly) the easiest way. 
            //May need to do this another way if this slows performance for big wbs.
            if (mappings != null) {
                InputStream is = new FileInputStream(file);
                POIFSFileSystem poifs = new POIFSFileSystem(is);
                is.close();
                mappings.write(poifs.getRoot(), DocumentSummaryInformation.DEFAULT_STREAM_NAME);
                fos = new FileOutputStream(file);
                poifs.writeFilesystem(fos);
                fos.close();
            }
        } else {
            UIRegistry.displayErrorDlgLocalized("WB_EXPORT_PERM_ERR");
        }
    } catch (Exception e) {
        edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount();
        edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(XLSExport.class, e);
        throw (e);
    }
}

From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java

License:Open Source License

@SuppressWarnings("unchecked")
public EIPImporter(String file, String destDir) {
    this.destDir = destDir;
    buffer = new byte[8192];

    POIFSFileSystem eipSystem;
    try {/*from w w  w.j  a  v  a 2s . co  m*/
        // eip
        eipStream = new FileInputStream(file);
        eipSystem = new POIFSFileSystem(eipStream);

        // 
        DirectoryEntry configDir = null, fileDir = null;
        DirectoryEntry root = eipSystem.getRoot();
        Iterator<Entry> i = root.getEntries();
        while (i.hasNext()) {
            Entry e = i.next();
            if (e.isDirectoryEntry()) {
                if (CONFIG_DIRECTORY.equals(e.getName().toLowerCase()))
                    configDir = (DirectoryEntry) e;
                else if (FILES_DIRECTORY.equals(e.getName().toLowerCase()))
                    fileDir = (DirectoryEntry) e;
            }
        }

        // ??
        if (configDir == null || fileDir == null)
            throw new IOException("Can't find correct directories");

        // ?face.xml
        i = configDir.getEntries();
        while (i.hasNext()) {
            Entry e = i.next();
            if (e.isDocumentEntry() && CONFIG_FILE.equals(e.getName().toLowerCase())) {
                DocumentInputStream dis = new DocumentInputStream((DocumentEntry) e);
                parser = new FaceXMLParser(dis);
                dis.close();
                break;
            }
        }

        // ??face.xml
        if (parser == null)
            throw new IOException("Can't find " + CONFIG_FILE);

        // iterator
        groupIterator = fileDir.getEntries();
        currentDir = fileDir;
        faceIterator = currentDir.getEntries();
    } catch (IOException e) {
        eipSystem = null;
        try {
            if (eipStream != null) {
                eipStream.close();
                eipStream = null;
            }
        } catch (IOException e1) {
        }
    }
}

From source file:FeatureExtraction.FeatureExtractorDocStreamPaths.java

@Override
public Map ExtractFeaturesFrequencyFromSingleElement(T element) {
    Map<String, Integer> streamPaths = new HashMap<>();
    String filePath = (String) element;

    try {/*from   www. ja  v  a  2s  . co m*/
        InputStream inputStream = new FileInputStream(filePath);
        POIFSFileSystem poiFileSystem = new POIFSFileSystem(inputStream);
        DirectoryNode directoryNode = poiFileSystem.getRoot();
        //HWPFDocument document = new HWPFDocument(directoryNode);
        GetStreamsPaths(directoryNode, "", streamPaths);

    } catch (FileNotFoundException ex) {
        Console.PrintException(String.format("Error extracting DOC features from file: %s", filePath), ex);
    } catch (IOException ex) {
        Console.PrintException(String.format("Error extracting DOC features from file: %s", filePath), ex);
    }

    return streamPaths;
}

From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java

License:Open Source License

public static String getSummaryData(String filename, String key) {
    String value = null;//  w w w. j  a v  a 2 s .c o  m
    FileInputStream stream = null;
    try {
        stream = new FileInputStream(new File(filename));
        POIFSFileSystem poifs = null;
        try {
            poifs = new POIFSFileSystem(stream);
        } catch (Exception e) {
            stream.close();
            return getPOISummaryData(filename, key);
        }
        DirectoryEntry dir = poifs.getRoot();
        DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
        if (siEntry != null) {
            DocumentInputStream dis = new DocumentInputStream(siEntry);
            PropertySet ps = new PropertySet(dis);
            SummaryInformation si = new SummaryInformation(ps);

            if (key.compareTo(SUMMARY_DATA_AUTHOR) == 0) {
                value = si.getAuthor();
            } else if (key.compareTo(SUMMARY_DATA_KEYWORDS) == 0) {
                value = si.getKeywords();
            } else if (key.compareTo(SUMMARY_DATA_TITLE) == 0) {
                value = si.getTitle();
            } else if (key.compareTo(SUMMARY_DATA_SUBJECT) == 0) {
                value = si.getSubject();
            }
        }
    } catch (Exception ex) {
        ex.getStackTrace();
    } finally {
        try {
            stream.close();
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }
    return value;
}

From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java

License:Open Source License

public static String getAuthor(File file) {
    String author = null;//from  w  w  w .  jav a 2s.co m
    try {
        FileInputStream stream = new FileInputStream(file);
        POIFSFileSystem poifs = null;
        try {
            poifs = new POIFSFileSystem(stream);
        } catch (Exception e) {
            stream.close();
            return getCreator(file);
        }
        DirectoryEntry dir = null;
        try {
            dir = poifs.getRoot();
        } catch (Exception ex) {
            System.out.println("DirectoryEntry is NULL???");
            return null;
        }
        DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
        if (siEntry != null) {
            DocumentInputStream dis = new DocumentInputStream(siEntry);
            PropertySet ps = new PropertySet(dis);
            SummaryInformation si = new SummaryInformation(ps);
            author = si.getAuthor();
        }
        stream.close();
    } catch (Exception ex) {
        ex.getStackTrace();
    }
    return author;
}

From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java

License:Open Source License

public static void setAuthor(String filename, String author) {
    try {// ww  w  .  j ava  2 s.  c o m
        FileInputStream stream = new FileInputStream(new File(filename));
        POIFSFileSystem poifs = new POIFSFileSystem(stream);
        DirectoryEntry dir = poifs.getRoot();

        System.out.println("SummaryInformation.DEFAULT_STREAM_NAME: " + SummaryInformation.DEFAULT_STREAM_NAME);

        DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
        DocumentInputStream dis = new DocumentInputStream(siEntry);
        PropertySet ps = new PropertySet(dis);
        SummaryInformation si = new SummaryInformation(ps);

        System.out.println("SummaryInformation setAuthor: " + author);

        si.setAuthor(author);

        OutputStream outStream = null;
        outStream = new FileOutputStream(new File(filename));
        byte[] buffer = new byte[1024];
        int length;

        while ((length = stream.read(buffer)) > 0) {
            outStream.write(buffer, 0, length);
        }
        outStream.close();
        stream.close();

    } catch (Exception ex) {
        ex.getStackTrace();
    }
}

From source file:gov.nih.nci.evs.app.neopl.XLSXMetadataUtils.java

License:Open Source License

public static void setAuthor(File file, String author) {
    try {/* w w  w.  j av  a 2s.  c o  m*/
        FileInputStream stream = new FileInputStream(file);
        POIFSFileSystem poifs = null;
        try {
            poifs = new POIFSFileSystem(stream);
        } catch (Exception e) {
            stream.close();
            setCreator(file, author);
            return;
        }
        DirectoryEntry dir = poifs.getRoot();
        DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
        if (siEntry != null) {
            DocumentInputStream dis = new DocumentInputStream(siEntry);
            PropertySet ps = new PropertySet(dis);
            SummaryInformation si = new SummaryInformation(ps);
            si.setAuthor(author);
        }
        stream.close();
    } catch (Exception ex) {
        ex.getStackTrace();
    }
}