Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem.

Prototype


public POIFSFileSystem(InputStream stream) throws IOException

Source Link

Document

Create a POIFSFileSystem from an InputStream.

Usage

From source file:no.abmu.organisationregister.util.ExcelWithLibraryInformationParser.java

License:Open Source License

/**
 * Loads the Excel document.//w w  w . j a  v a  2  s  .c om
 */
protected void loadExcelDocument() {
    File file;
    POIFSFileSystem poifsFileSystem = null;

    if (excelFileName == null) {
        logger.error("Can't parse Excel document. No filename specified");
        throw new IllegalStateException("Can't parse Excel document. No filename specified");
    }

    logger.debug("Will load Excel document having filename '" + excelFileName + "'");

    file = new File(excelFileName);
    if (!file.exists()) {
        logger.error(
                "Can't load Excel document having filename '" + excelFileName + "'. The file does not exist");
        throw new IllegalArgumentException(
                "Can't parse Excel document. File " + excelFileName + " does not exist");
    }

    if (!file.canRead()) {
        logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access.");
        throw new IllegalArgumentException("Can't parse Excel document. No access");
    }

    try {
        poifsFileSystem = new POIFSFileSystem(new FileInputStream(file));
    } catch (IOException e) {
        logger.error("Can't load Excel document having filename '" + excelFileName
                + "'. Failed when reading file : '" + e.getMessage() + "'", e);
        throw new IllegalStateException("Can't parse Excel document. Failed to read.");
    }

    try {
        workBook = new HSSFWorkbook(poifsFileSystem);
    } catch (IOException e) {
        logger.error("Can't load Excel document having filename '" + excelFileName
                + "'. Failed when parsing file : '" + e.getMessage() + "'", e);
        throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data.");
    }

    logger.debug("Successfully loaded and parsed Excel document");
}

From source file:no.trank.openpipe.parse.ms.ExcelParser.java

License:Apache License

@Override
public ParserResult parse(ParseData data) throws IOException, ParserException {
    POIFSFileSystem fs = new POIFSFileSystem(data.getInputStream());

    final HSSFWorkbook doc = new HSSFWorkbook(fs);
    Map<String, String> properties = POIUtils.getProperties(fs);

    final ParserResultImpl result = new ParserResultImpl();
    result.setText(getText(doc));/*ww  w.ja  v  a 2s .  c om*/
    result.setTitle(properties.get("title"));
    if (data.includeProperties()) {
        result.setProperties(properties);
    }

    return result;
}

From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java

private void readCountries(String inputFile) {
    try {/*from w  w  w  .j a  va2s .  c o  m*/
        POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile));
        HSSFWorkbook wb = new HSSFWorkbook(fs);

        //            
        HSSFSheet sheet = wb.getSheetAt(1); //page with the information of the countries
        HSSFRow row;
        HSSFCell cell;

        int rows; // No of rows
        rows = sheet.getPhysicalNumberOfRows();

        int cols = 0; // No of columns
        int tmp = 0;

        // This trick ensures that we get the data properly even if it doesn't start from first few rows.
        // taken from stack overflow
        for (int i = 0; i < 10 || i < rows; i++) {
            row = sheet.getRow(i);
            if (row != null) {
                tmp = sheet.getRow(i).getPhysicalNumberOfCells();
                if (tmp > cols) {
                    cols = tmp;
                }
            }
        }

        HSSFRow header = sheet.getRow(0);

        int ccInd = 0;
        int regInd = 0;
        int igInd = 0;
        int namInd = 0;
        for (int i = 0; i < header.getLastCellNum(); i++) {
            if (header.getCell(i).toString().equalsIgnoreCase("Country Code")) {
                ccInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("Region")) {
                regInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("IncomeGroup")) {
                igInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("TableName")) {
                namInd = i;
            }
        }

        for (int r = 1; r < rows; r++) {
            row = sheet.getRow(r);
            if (row != null) {
                CountryData cd;
                if (row.getCell(regInd) != null && row.getCell(igInd) != null) {
                    cd = new CountryData(row.getCell(namInd).toString().replace("'", "`"),
                            row.getCell(ccInd).toString().replace("'", "`"),
                            row.getCell(regInd).toString().replace("'", "`"),
                            row.getCell(igInd).toString().replace("'", "`"));

                } else {
                    cd = new CountryData(row.getCell(namInd).toString(), row.getCell(ccInd).toString(), "", "",
                            true);

                }
                countries.add(cd);

            }
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
        System.out.println("##### ERROR: It looks like " + inputFile
                + " is not the appropriate type of file or it is not propperly structured");
    }
}

From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java

private void readAndStorePopulations(String inputFile) {
    try {/*w w  w.  ja v  a2s . c  om*/
        POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile));
        HSSFWorkbook wb = new HSSFWorkbook(fs);

        //            
        HSSFSheet sheet = wb.getSheetAt(0); //page with the population of the countries
        HSSFRow row;
        HSSFCell cell;

        int rows; // No of rows
        rows = sheet.getPhysicalNumberOfRows();

        int cols = 0; // No of columns
        int tmp = 0;

        // This trick ensures that we get the data properly even if it doesn't start from first few rows.
        // taken from stack overflow
        for (int i = 0; i < 10 || i < rows; i++) {
            row = sheet.getRow(i);
            if (row != null) {
                tmp = sheet.getRow(i).getPhysicalNumberOfCells();
                if (tmp > cols) {
                    cols = tmp;
                }
            }
        }

        //Start reading countries;
        HSSFRow header = sheet.getRow(3);
        ArrayList<Integer> years = new ArrayList<>();
        for (int i = 4; i < header.getLastCellNum(); i++) {
            years.add(Integer.parseInt(header.getCell(i).toString()));

        }

        for (int r = 4; r < rows; r++) {
            row = sheet.getRow(r);
            if (row != null) {
                String countryName = row.getCell(1).toString();

                int count = 0;
                for (int i = 4; i < row.getLastCellNum(); i++) {
                    if (row.getCell(i) != null) {
                        //System.out.print(years.get(count)+": "+Float.parseFloat(row.getCell(i).toString())+ " - ");
                        try {
                            sqlM.addPopulation(countryName, years.get(count), row.getCell(i).toString());
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                    count++;
                }

            }
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
        System.out.println("##### ERROR: It looks like " + inputFile
                + " is not the appropriate type of file or it is not propperly structured");
    }
}

From source file:npv.importer.XlsImporter.java

private Double[] parseFile() throws IOException {
    InputStream inputStream = new FileInputStream(file);
    POIFSFileSystem fs = new POIFSFileSystem(inputStream);
    HSSFWorkbook wb = new HSSFWorkbook(fs);
    HSSFSheet sheet = wb.getSheetAt(0);//from   w ww. j  a v  a  2 s. c  o  m
    System.out.println("Testing.First row num=" + sheet.getFirstRowNum());
    findTag(sheet, tag);

    //reading an array of Ri values after '#Ri' tag
    HSSFRow row = sheet.getRow(rPosition[0]);
    ArrayList<Double> cellValues = new ArrayList<Double>();

    Iterator<Cell> cellIterator = row.cellIterator();
    while (cellIterator.hasNext()) {
        Cell cell = cellIterator.next();
        if (cell.getColumnIndex() >= rPosition[1] + 1) {
            cellValues.add(cell.getNumericCellValue());
        }
    }

    rValues = new Double[cellValues.size()];
    rValues = cellValues.toArray(new Double[rValues.length]);
    System.out.println("Values from sheet:");
    for (int i = 0; i < rValues.length; i++) {
        System.out.println(rValues[i]);
    }

    return this.rValues;
}

From source file:nz.govt.natlib.adapter.works.DocAdapter.java

License:Apache License

public void adapt(File file, ParserContext ctx) throws IOException {
    ctx.fireStartParseEvent("MSWorks");
    writeFileInfo(file, ctx);/*  www  .  jav  a  2  s . c  o m*/
    ctx.fireParseEvent("Version", "Works");
    POIFSFileSystem fs = null;
    FileInputStream fin = null;
    try {
        fin = new FileInputStream(file);
        fs = new POIFSFileSystem(fin);
        DirectoryEntry root = fs.getRoot();
        readDirectory(fs, root, ctx);

    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        AdapterUtils.close(fin);
        fs = null;
    }
    ctx.fireEndParseEvent("MSWorks");
}

From source file:org.alinous.poi.PoiManager.java

License:GNU General Public License

public void open(String path) throws IOException {
    this.path = path;

    this.inStream = new AlinousFileInputStream(new AlinousFile(this.path));
    this.fs = new POIFSFileSystem(this.inStream);

    this.wb = new HSSFWorkbook(this.fs, false);

}

From source file:org.amanzi.splash.importer.ExcelImporter.java

License:Open Source License

@Override
public void run(IProgressMonitor monitor) throws InvocationTargetException {
    POIFSFileSystem fileSystem = null;/*from w  ww.j ava 2  s .  c o m*/

    monitor.beginTask("Importing data from Excel", 100);

    Transaction tx = NeoUtils.beginTransaction();
    try {
        fileSystem = new POIFSFileSystem(fileContent);

        HSSFWorkbook workBook = new HSSFWorkbook(fileSystem);

        createRootSpreadsheet();

        for (int i = 0; i < workBook.getNumberOfSheets(); i++) {
            monitor.subTask("Sheet " + workBook.getSheetName(i));
            createSheet(workBook.getSheetAt(i), workBook.getSheetName(i), tx);
            monitor.worked(100 / workBook.getNumberOfSheets());
        }
        monitor.done();

    } catch (IOException e) {
        throw new InvocationTargetException(e);
    } finally {
        tx.success();
        tx.finish();
    }
}

From source file:org.apache.jackrabbit.extractor.MsExcelTextExtractor.java

License:Apache License

/**
 * {@inheritDoc}/*from  www  .j av a 2 s  .com*/
 */
public Reader extractText(InputStream stream, String type, String encoding) throws IOException {
    CharArrayWriter writer = new CharArrayWriter();
    try {
        POIFSFileSystem fs = new POIFSFileSystem(stream);
        HSSFWorkbook workbook = new HSSFWorkbook(fs);

        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            HSSFSheet sheet = workbook.getSheetAt(i);

            Iterator rows = sheet.rowIterator();
            while (rows.hasNext()) {
                HSSFRow row = (HSSFRow) rows.next();

                Iterator cells = row.cellIterator();
                while (cells.hasNext()) {
                    HSSFCell cell = (HSSFCell) cells.next();
                    switch (cell.getCellType()) {
                    case HSSFCell.CELL_TYPE_NUMERIC:
                        String num = Double.toString(cell.getNumericCellValue()).trim();
                        if (num.length() > 0) {
                            writer.write(num + " ");
                        }
                        break;
                    case HSSFCell.CELL_TYPE_STRING:
                        String text = cell.getStringCellValue().trim();
                        if (text.length() > 0) {
                            writer.write(text + " ");
                        }
                        break;
                    }
                }
            }
        }

        return new CharArrayReader(writer.toCharArray());
    } catch (RuntimeException e) {
        logger.warn("Failed to extract Excel text content", e);
        return new StringReader("");
    } finally {
        stream.close();
    }
}

From source file:org.apache.nutch.parse.msword.WordExtractor.java

License:Apache License

/**
 * Gets the text from a Word document./*from  w  ww  .j a va  2  s  .c  o  m*/
 *
 * @param in The InputStream representing the Word file.
 */
protected String extractText(InputStream in) throws Exception {

    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(in);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        throw new PasswordProtectedException("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    //Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    //get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}