List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem
public POIFSFileSystem(InputStream stream) throws IOException
From source file:no.abmu.organisationregister.util.ExcelWithLibraryInformationParser.java
License:Open Source License
/** * Loads the Excel document.//w w w . j a v a 2 s .c om */ protected void loadExcelDocument() { File file; POIFSFileSystem poifsFileSystem = null; if (excelFileName == null) { logger.error("Can't parse Excel document. No filename specified"); throw new IllegalStateException("Can't parse Excel document. No filename specified"); } logger.debug("Will load Excel document having filename '" + excelFileName + "'"); file = new File(excelFileName); if (!file.exists()) { logger.error( "Can't load Excel document having filename '" + excelFileName + "'. The file does not exist"); throw new IllegalArgumentException( "Can't parse Excel document. File " + excelFileName + " does not exist"); } if (!file.canRead()) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access."); throw new IllegalArgumentException("Can't parse Excel document. No access"); } try { poifsFileSystem = new POIFSFileSystem(new FileInputStream(file)); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when reading file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to read."); } try { workBook = new HSSFWorkbook(poifsFileSystem); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when parsing file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data."); } logger.debug("Successfully loaded and parsed Excel document"); }
From source file:no.trank.openpipe.parse.ms.ExcelParser.java
License:Apache License
@Override public ParserResult parse(ParseData data) throws IOException, ParserException { POIFSFileSystem fs = new POIFSFileSystem(data.getInputStream()); final HSSFWorkbook doc = new HSSFWorkbook(fs); Map<String, String> properties = POIUtils.getProperties(fs); final ParserResultImpl result = new ParserResultImpl(); result.setText(getText(doc));/*ww w.ja v a 2s . c om*/ result.setTitle(properties.get("title")); if (data.includeProperties()) { result.setProperties(properties); } return result; }
From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java
private void readCountries(String inputFile) { try {/*from w w w .j a va2s . c o m*/ POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile)); HSSFWorkbook wb = new HSSFWorkbook(fs); // HSSFSheet sheet = wb.getSheetAt(1); //page with the information of the countries HSSFRow row; HSSFCell cell; int rows; // No of rows rows = sheet.getPhysicalNumberOfRows(); int cols = 0; // No of columns int tmp = 0; // This trick ensures that we get the data properly even if it doesn't start from first few rows. // taken from stack overflow for (int i = 0; i < 10 || i < rows; i++) { row = sheet.getRow(i); if (row != null) { tmp = sheet.getRow(i).getPhysicalNumberOfCells(); if (tmp > cols) { cols = tmp; } } } HSSFRow header = sheet.getRow(0); int ccInd = 0; int regInd = 0; int igInd = 0; int namInd = 0; for (int i = 0; i < header.getLastCellNum(); i++) { if (header.getCell(i).toString().equalsIgnoreCase("Country Code")) { ccInd = i; } if (header.getCell(i).toString().equalsIgnoreCase("Region")) { regInd = i; } if (header.getCell(i).toString().equalsIgnoreCase("IncomeGroup")) { igInd = i; } if (header.getCell(i).toString().equalsIgnoreCase("TableName")) { namInd = i; } } for (int r = 1; r < rows; r++) { row = sheet.getRow(r); if (row != null) { CountryData cd; if (row.getCell(regInd) != null && row.getCell(igInd) != null) { cd = new CountryData(row.getCell(namInd).toString().replace("'", "`"), row.getCell(ccInd).toString().replace("'", "`"), row.getCell(regInd).toString().replace("'", "`"), row.getCell(igInd).toString().replace("'", "`")); } else { cd = new CountryData(row.getCell(namInd).toString(), row.getCell(ccInd).toString(), "", "", true); } countries.add(cd); } } } catch (Exception ioe) { ioe.printStackTrace(); System.out.println("##### ERROR: It looks like " + inputFile + " is not the appropriate type of file or it is not propperly structured"); } }
From source file:no.uio.medicine.virsurveillance.parsers.XlsPopulationParser.java
private void readAndStorePopulations(String inputFile) { try {/*w w w. ja v a2s . c om*/ POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(inputFile)); HSSFWorkbook wb = new HSSFWorkbook(fs); // HSSFSheet sheet = wb.getSheetAt(0); //page with the population of the countries HSSFRow row; HSSFCell cell; int rows; // No of rows rows = sheet.getPhysicalNumberOfRows(); int cols = 0; // No of columns int tmp = 0; // This trick ensures that we get the data properly even if it doesn't start from first few rows. // taken from stack overflow for (int i = 0; i < 10 || i < rows; i++) { row = sheet.getRow(i); if (row != null) { tmp = sheet.getRow(i).getPhysicalNumberOfCells(); if (tmp > cols) { cols = tmp; } } } //Start reading countries; HSSFRow header = sheet.getRow(3); ArrayList<Integer> years = new ArrayList<>(); for (int i = 4; i < header.getLastCellNum(); i++) { years.add(Integer.parseInt(header.getCell(i).toString())); } for (int r = 4; r < rows; r++) { row = sheet.getRow(r); if (row != null) { String countryName = row.getCell(1).toString(); int count = 0; for (int i = 4; i < row.getLastCellNum(); i++) { if (row.getCell(i) != null) { //System.out.print(years.get(count)+": "+Float.parseFloat(row.getCell(i).toString())+ " - "); try { sqlM.addPopulation(countryName, years.get(count), row.getCell(i).toString()); } catch (Exception e) { e.printStackTrace(); } } count++; } } } } catch (Exception ioe) { ioe.printStackTrace(); System.out.println("##### ERROR: It looks like " + inputFile + " is not the appropriate type of file or it is not propperly structured"); } }
From source file:npv.importer.XlsImporter.java
private Double[] parseFile() throws IOException { InputStream inputStream = new FileInputStream(file); POIFSFileSystem fs = new POIFSFileSystem(inputStream); HSSFWorkbook wb = new HSSFWorkbook(fs); HSSFSheet sheet = wb.getSheetAt(0);//from w ww. j a v a 2 s. c o m System.out.println("Testing.First row num=" + sheet.getFirstRowNum()); findTag(sheet, tag); //reading an array of Ri values after '#Ri' tag HSSFRow row = sheet.getRow(rPosition[0]); ArrayList<Double> cellValues = new ArrayList<Double>(); Iterator<Cell> cellIterator = row.cellIterator(); while (cellIterator.hasNext()) { Cell cell = cellIterator.next(); if (cell.getColumnIndex() >= rPosition[1] + 1) { cellValues.add(cell.getNumericCellValue()); } } rValues = new Double[cellValues.size()]; rValues = cellValues.toArray(new Double[rValues.length]); System.out.println("Values from sheet:"); for (int i = 0; i < rValues.length; i++) { System.out.println(rValues[i]); } return this.rValues; }
From source file:nz.govt.natlib.adapter.works.DocAdapter.java
License:Apache License
public void adapt(File file, ParserContext ctx) throws IOException { ctx.fireStartParseEvent("MSWorks"); writeFileInfo(file, ctx);/* www . jav a 2 s . c o m*/ ctx.fireParseEvent("Version", "Works"); POIFSFileSystem fs = null; FileInputStream fin = null; try { fin = new FileInputStream(file); fs = new POIFSFileSystem(fin); DirectoryEntry root = fs.getRoot(); readDirectory(fs, root, ctx); } catch (Exception ex) { throw new RuntimeException(ex); } finally { AdapterUtils.close(fin); fs = null; } ctx.fireEndParseEvent("MSWorks"); }
From source file:org.alinous.poi.PoiManager.java
License:GNU General Public License
public void open(String path) throws IOException { this.path = path; this.inStream = new AlinousFileInputStream(new AlinousFile(this.path)); this.fs = new POIFSFileSystem(this.inStream); this.wb = new HSSFWorkbook(this.fs, false); }
From source file:org.amanzi.splash.importer.ExcelImporter.java
License:Open Source License
@Override public void run(IProgressMonitor monitor) throws InvocationTargetException { POIFSFileSystem fileSystem = null;/*from w ww.j ava 2 s . c o m*/ monitor.beginTask("Importing data from Excel", 100); Transaction tx = NeoUtils.beginTransaction(); try { fileSystem = new POIFSFileSystem(fileContent); HSSFWorkbook workBook = new HSSFWorkbook(fileSystem); createRootSpreadsheet(); for (int i = 0; i < workBook.getNumberOfSheets(); i++) { monitor.subTask("Sheet " + workBook.getSheetName(i)); createSheet(workBook.getSheetAt(i), workBook.getSheetName(i), tx); monitor.worked(100 / workBook.getNumberOfSheets()); } monitor.done(); } catch (IOException e) { throw new InvocationTargetException(e); } finally { tx.success(); tx.finish(); } }
From source file:org.apache.jackrabbit.extractor.MsExcelTextExtractor.java
License:Apache License
/** * {@inheritDoc}/*from www .j av a 2 s .com*/ */ public Reader extractText(InputStream stream, String type, String encoding) throws IOException { CharArrayWriter writer = new CharArrayWriter(); try { POIFSFileSystem fs = new POIFSFileSystem(stream); HSSFWorkbook workbook = new HSSFWorkbook(fs); for (int i = 0; i < workbook.getNumberOfSheets(); i++) { HSSFSheet sheet = workbook.getSheetAt(i); Iterator rows = sheet.rowIterator(); while (rows.hasNext()) { HSSFRow row = (HSSFRow) rows.next(); Iterator cells = row.cellIterator(); while (cells.hasNext()) { HSSFCell cell = (HSSFCell) cells.next(); switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_NUMERIC: String num = Double.toString(cell.getNumericCellValue()).trim(); if (num.length() > 0) { writer.write(num + " "); } break; case HSSFCell.CELL_TYPE_STRING: String text = cell.getStringCellValue().trim(); if (text.length() > 0) { writer.write(text + " "); } break; } } } } return new CharArrayReader(writer.toCharArray()); } catch (RuntimeException e) { logger.warn("Failed to extract Excel text content", e); return new StringReader(""); } finally { stream.close(); } }
From source file:org.apache.nutch.parse.msword.WordExtractor.java
License:Apache License
/** * Gets the text from a Word document./*from w ww .j a va 2 s .c o m*/ * * @param in The InputStream representing the Word file. */ protected String extractText(InputStream in) throws Exception { ArrayList text = new ArrayList(); POIFSFileSystem fsys = new POIFSFileSystem(in); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header); din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { throw new PasswordProtectedException("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header); } //Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; //get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin); // load our text pieces and our character runs ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); List textPieces = tpt.getTextPieces(); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }