Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem.

Prototype


public POIFSFileSystem(InputStream stream) throws IOException 

Source Link

Document

Create a POIFSFileSystem from an InputStream.

Usage

From source file:no.abmu.organisationregister.util.ExcelWithLibraryInformationParser.java

License:Open Source License

/**
 * Loads the Excel document.//w w  w . j a  v a  2  s  .c om
 */
protected void loadExcelDocument() {
    File file;
    POIFSFileSystem poifsFileSystem = null;

    if (excelFileName == null) {
        logger.error("Can't parse Excel document. No filename specified");
        throw new IllegalStateException("Can't parse Excel document. No filename specified");
    }

    logger.debug("Will load Excel document having filename '" + excelFileName + "'");

    file = new File(excelFileName);
    if (!file.exists()) {
        logger.error(
                "Can't load Excel document having filename '" + excelFileName + "'. The file does not exist");
        throw new IllegalArgumentException(
                "Can't parse Excel document. File " + excelFileName + " does not exist");
    }

    if (!file.canRead()) {
        logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access.");
        throw new IllegalArgumentException("Can't parse Excel document. No access");
    }

    try {
        poifsFileSystem = new POIFSFileSystem(new FileInputStream(file));
    } catch (IOException e) {
        logger.error("Can't load Excel document having filename '" + excelFileName
                + "'. Failed when reading file : '" + e.getMessage() + "'", e);
        throw new IllegalStateException("Can't parse Excel document. Failed to read.");
    }

    try {
        workBook = new HSSFWorkbook(poifsFileSystem);
    } catch (IOException e) {
        logger.error("Can't load Excel document having filename '" + excelFileName
                + "'. Failed when parsing file : '" + e.getMessage() + "'", e);
        throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data.");
    }

    logger.debug("Successfully loaded and parsed Excel document");
}

From source file:no.trank.openpipe.parse.ms.ExcelParser.java

License:Apache License

@Override
public ParserResult parse(ParseData data) throws IOException, ParserException {
    POIFSFileSystem fs = new POIFSFileSystem(data.getInputStream());

    final HSSFWorkbook doc = new HSSFWorkbook(fs);
    Map<String, String> properties = POIUtils.getProperties(fs);

    final ParserResultImpl result = new ParserResultImpl();
    result.setText(getText(doc));/*ww  w.ja  v  a 2s .  c om*/
    result.setTitle(properties.get("title"));
    if (data.includeProperties()) {
        result.setProperties(properties);
    }

    return result;
}

From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java

private void readCountries(String inputFile) {
    try {/*from w  w  w  .j a  va2s .  c o  m*/
        POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile));
        HSSFWorkbook wb = new HSSFWorkbook(fs);

        //            
        HSSFSheet sheet = wb.getSheetAt(1); //page with the information of the countries
        HSSFRow row;
        HSSFCell cell;

        int rows; // No of rows
        rows = sheet.getPhysicalNumberOfRows();

        int cols = 0; // No of columns
        int tmp = 0;

        // This trick ensures that we get the data properly even if it doesn't start from first few rows.
        // taken from stack overflow
        for (int i = 0; i < 10 || i < rows; i++) {
            row = sheet.getRow(i);
            if (row != null) {
                tmp = sheet.getRow(i).getPhysicalNumberOfCells();
                if (tmp > cols) {
                    cols = tmp;
                }
            }
        }

        HSSFRow header = sheet.getRow(0);

        int ccInd = 0;
        int regInd = 0;
        int igInd = 0;
        int namInd = 0;
        for (int i = 0; i < header.getLastCellNum(); i++) {
            if (header.getCell(i).toString().equalsIgnoreCase("Country Code")) {
                ccInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("Region")) {
                regInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("IncomeGroup")) {
                igInd = i;
            }
            if (header.getCell(i).toString().equalsIgnoreCase("TableName")) {
                namInd = i;
            }
        }

        for (int r = 1; r < rows; r++) {
            row = sheet.getRow(r);
            if (row != null) {
                CountryData cd;
                if (row.getCell(regInd) != null && row.getCell(igInd) != null) {
                    cd = new CountryData(row.getCell(namInd).toString().replace("'", "`"),
                            row.getCell(ccInd).toString().replace("'", "`"),
                            row.getCell(regInd).toString().replace("'", "`"),
                            row.getCell(igInd).toString().replace("'", "`"));

                } else {
                    cd = new CountryData(row.getCell(namInd).toString(), row.getCell(ccInd).toString(), "", "",
                            true);

                }
                countries.add(cd);

            }
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
        System.out.println("##### ERROR: It looks like " + inputFile
                + " is not the appropriate type of file or it is not propperly structured");
    }
}

From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java

private void readAndStorePopulations(String inputFile) {
    try {/*w w  w.  ja v  a2s . c  om*/
        POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile));
        HSSFWorkbook wb = new HSSFWorkbook(fs);

        //            
        HSSFSheet sheet = wb.getSheetAt(0); //page with the population of the countries
        HSSFRow row;
        HSSFCell cell;

        int rows; // No of rows
        rows = sheet.getPhysicalNumberOfRows();

        int cols = 0; // No of columns
        int tmp = 0;

        // This trick ensures that we get the data properly even if it doesn't start from first few rows.
        // taken from stack overflow
        for (int i = 0; i < 10 || i < rows; i++) {
            row = sheet.getRow(i);
            if (row != null) {
                tmp = sheet.getRow(i).getPhysicalNumberOfCells();
                if (tmp > cols) {
                    cols = tmp;
                }
            }
        }

        //Start reading countries;
        HSSFRow header = sheet.getRow(3);
        ArrayList<Integer> years = new ArrayList<>();
        for (int i = 4; i < header.getLastCellNum(); i++) {
            years.add(Integer.parseInt(header.getCell(i).toString()));

        }

        for (int r = 4; r < rows; r++) {
            row = sheet.getRow(r);
            if (row != null) {
                String countryName = row.getCell(1).toString();

                int count = 0;
                for (int i = 4; i < row.getLastCellNum(); i++) {
                    if (row.getCell(i) != null) {
                        //System.out.print(years.get(count)+": "+Float.parseFloat(row.getCell(i).toString())+ " - ");
                        try {
                            sqlM.addPopulation(countryName, years.get(count), row.getCell(i).toString());
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                    count++;
                }

            }
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
        System.out.println("##### ERROR: It looks like " + inputFile
                + " is not the appropriate type of file or it is not propperly structured");
    }
}

From source file:npv.importer.XlsImporter.java

private Double[] parseFile() throws IOException {
    InputStream inputStream = new FileInputStream(file);
    POIFSFileSystem fs = new POIFSFileSystem(inputStream);
    HSSFWorkbook wb = new HSSFWorkbook(fs);
    HSSFSheet sheet = wb.getSheetAt(0);//from   w ww. j  a v  a  2 s. c  o  m
    System.out.println("Testing.First row num=" + sheet.getFirstRowNum());
    findTag(sheet, tag);

    //reading an array of Ri values after '#Ri' tag
    HSSFRow row = sheet.getRow(rPosition[0]);
    ArrayList<Double> cellValues = new ArrayList<Double>();

    Iterator<Cell> cellIterator = row.cellIterator();
    while (cellIterator.hasNext()) {
        Cell cell = cellIterator.next();
        if (cell.getColumnIndex() >= rPosition[1] + 1) {
            cellValues.add(cell.getNumericCellValue());
        }
    }

    rValues = new Double[cellValues.size()];
    rValues = cellValues.toArray(new Double[rValues.length]);
    System.out.println("Values from sheet:");
    for (int i = 0; i < rValues.length; i++) {
        System.out.println(rValues[i]);
    }

    return this.rValues;
}

From source file:nz.govt.natlib.adapter.works.DocAdapter.java

License:Apache License

public void adapt(File file, ParserContext ctx) throws IOException {
    ctx.fireStartParseEvent("MSWorks");
    writeFileInfo(file, ctx);/*  www  .  jav  a  2  s . c  o m*/
    ctx.fireParseEvent("Version", "Works");
    POIFSFileSystem fs = null;
    FileInputStream fin = null;
    try {
        fin = new FileInputStream(file);
        fs = new POIFSFileSystem(fin);
        DirectoryEntry root = fs.getRoot();
        readDirectory(fs, root, ctx);

    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        AdapterUtils.close(fin);
        fs = null;
    }
    ctx.fireEndParseEvent("MSWorks");
}

From source file:org.alinous.poi.PoiManager.java

License:GNU General Public License

public void open(String path) throws IOException {
    this.path = path;

    this.inStream = new AlinousFileInputStream(new AlinousFile(this.path));
    this.fs = new POIFSFileSystem(this.inStream);

    this.wb = new HSSFWorkbook(this.fs, false);

}

From source file:org.amanzi.splash.importer.ExcelImporter.java

License:Open Source License

@Override
public void run(IProgressMonitor monitor) throws InvocationTargetException {
    POIFSFileSystem fileSystem = null;/*from w  ww.j ava 2  s .  c o m*/

    monitor.beginTask("Importing data from Excel", 100);

    Transaction tx = NeoUtils.beginTransaction();
    try {
        fileSystem = new POIFSFileSystem(fileContent);

        HSSFWorkbook workBook = new HSSFWorkbook(fileSystem);

        createRootSpreadsheet();

        for (int i = 0; i < workBook.getNumberOfSheets(); i++) {
            monitor.subTask("Sheet " + workBook.getSheetName(i));
            createSheet(workBook.getSheetAt(i), workBook.getSheetName(i), tx);
            monitor.worked(100 / workBook.getNumberOfSheets());
        }
        monitor.done();

    } catch (IOException e) {
        throw new InvocationTargetException(e);
    } finally {
        tx.success();
        tx.finish();
    }
}

From source file:org.apache.jackrabbit.extractor.MsExcelTextExtractor.java

License:Apache License

/**
 * {@inheritDoc}/*from  www  .j av a 2 s  .com*/
 */
public Reader extractText(InputStream stream, String type, String encoding) throws IOException {
    CharArrayWriter writer = new CharArrayWriter();
    try {
        POIFSFileSystem fs = new POIFSFileSystem(stream);
        HSSFWorkbook workbook = new HSSFWorkbook(fs);

        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            HSSFSheet sheet = workbook.getSheetAt(i);

            Iterator rows = sheet.rowIterator();
            while (rows.hasNext()) {
                HSSFRow row = (HSSFRow) rows.next();

                Iterator cells = row.cellIterator();
                while (cells.hasNext()) {
                    HSSFCell cell = (HSSFCell) cells.next();
                    switch (cell.getCellType()) {
                    case HSSFCell.CELL_TYPE_NUMERIC:
                        String num = Double.toString(cell.getNumericCellValue()).trim();
                        if (num.length() > 0) {
                            writer.write(num + " ");
                        }
                        break;
                    case HSSFCell.CELL_TYPE_STRING:
                        String text = cell.getStringCellValue().trim();
                        if (text.length() > 0) {
                            writer.write(text + " ");
                        }
                        break;
                    }
                }
            }
        }

        return new CharArrayReader(writer.toCharArray());
    } catch (RuntimeException e) {
        logger.warn("Failed to extract Excel text content", e);
        return new StringReader("");
    } finally {
        stream.close();
    }
}

From source file:org.apache.nutch.parse.msword.WordExtractor.java

License:Apache License

/**
 * Gets the text from a Word document./*from  w  ww  .j a va  2  s  .c  o  m*/
 *
 * @param in The InputStream representing the Word file.
 */
protected String extractText(InputStream in) throws Exception {

    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(in);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        throw new PasswordProtectedException("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    //Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    //get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}