List of usage examples for org.apache.poi.openxml4j.opc OPCPackage open
public static OPCPackage open(InputStream in) throws InvalidFormatException, IOException
From source file:opisiame.model.Import_animateur_excel.java
private void ouverture_fichier(String adresse) throws IOException { InputStream is = new FileInputStream(adresse); OPCPackage opc;//from ww w. ja v a 2 s.c om try { opc = OPCPackage.open(is); classeur = new XSSFWorkbook(opc); } catch (InvalidFormatException ex) { Logger.getLogger(Import_animateur_excel.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:opisiame.model.Import_eleve_excel.java
private void ouverture_fichier(String adresse) throws IOException { InputStream is = new FileInputStream(adresse); OPCPackage opc;/*www . ja v a 2 s. co m*/ try { opc = OPCPackage.open(is); classeur = new XSSFWorkbook(opc); } catch (InvalidFormatException ex) { Logger.getLogger(Import_eleve_excel.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.alfresco.repo.content.transform.OOXMLThumbnailContentTransformer.java
License:Open Source License
@Override protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options) throws Exception { final String sourceMimetype = reader.getMimetype(); final String sourceExtension = getMimetypeService().getExtension(sourceMimetype); final String targetMimetype = writer.getMimetype(); if (log.isDebugEnabled()) { StringBuilder msg = new StringBuilder(); msg.append("Transforming from ").append(sourceMimetype).append(" to ").append(targetMimetype); log.debug(msg.toString());/*w ww . j a v a 2 s. c o m*/ } OPCPackage pkg = null; try { File ooxmlTempFile = TempFileProvider.createTempFile(this.getClass().getSimpleName() + "_ooxml", sourceExtension); reader.getContent(ooxmlTempFile); // Load the file pkg = OPCPackage.open(ooxmlTempFile.getPath()); // Does it have a thumbnail? PackageRelationshipCollection rels = pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); if (rels.size() > 0) { // Get the thumbnail part PackageRelationship tRel = rels.getRelationship(0); PackagePart tPart = pkg.getPart(tRel); // Write it to the target InputStream tStream = tPart.getInputStream(); writer.putContent(tStream); tStream.close(); } else { log.debug("No thumbnail present in " + reader.toString()); throw new UnimportantTransformException(NO_THUMBNAIL_PRESENT_IN_FILE + targetMimetype); } } catch (IOException e) { throw new AlfrescoRuntimeException("Unable to transform " + sourceExtension + " file.", e); } finally { if (pkg != null) { pkg.close(); } } }
From source file:org.apache.metamodel.excel.XlsxSpreadsheetReaderDelegate.java
License:Apache License
private OPCPackage openOPCPackage() throws Exception { if (_resource instanceof FileResource) { final File file = ((FileResource) _resource).getFile(); return OPCPackage.open(file); }//from ww w .ja v a 2s .com return OPCPackage.open(_resource.read()); }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;/* ww w . ja v a2s . c o m*/ } final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions() .getValue(); final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean(); final CSVFormat csvFormat = CSVUtils.createCSVFormat(context); //Switch to 0 based index final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1; final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ","); final List<Integer> columnsToSkip = new ArrayList<>(); if (sColumnsToSkip != null && sColumnsToSkip.length > 0) { for (String c : sColumnsToSkip) { try { //Switch to 0 based index columnsToSkip.add(Integer.parseInt(c) - 1); } catch (NumberFormatException e) { throw new ProcessException("Invalid column in Columns to Skip list.", e); } } } try { session.read(flowFile, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { try { OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg); StylesTable styles = r.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData(); if (desiredSheetsDelimited != null) { String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER); if (desiredSheets != null) { while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); for (int i = 0; i < desiredSheets.length; i++) { //If the sheetName is a desired one parse it if (sheetName.equalsIgnoreCase(desiredSheets[i])) { ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig( columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); break; } } } } else { getLogger().debug( "Excel document was parsed but no sheets with the specified desired names were found."); } } else { //Get all of the sheets in the document. while (iter.hasNext()) { InputStream sheet = iter.next(); String sheetName = iter.getSheetName(); ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles); handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); } } } catch (InvalidFormatException ife) { getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife); throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife); } catch (OpenXML4JException | SAXException e) { getLogger().error("Error occurred while processing Excel document metadata", e); } } }); session.transfer(flowFile, ORIGINAL); } catch (RuntimeException ex) { getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex); FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage()); session.transfer(failedFlowFile, FAILURE); } }
From source file:org.apache.tika.parser.microsoft.ooxml.xslf.XSLFEventBasedPowerPointExtractor.java
License:Apache License
public XSLFEventBasedPowerPointExtractor(String path) throws XmlException, OpenXML4JException, IOException { this(OPCPackage.open(path)); }
From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java
License:Apache License
public XWPFEventBasedWordExtractor(String path) throws XmlException, OpenXML4JException, IOException { this(OPCPackage.open(path)); }
From source file:org.crypto.sse.TextExtractPar.java
License:Open Source License
private static TextExtractPar extractOneDoc(File[] listOfFile) throws FileNotFoundException { Multimap<String, String> lookup1 = ArrayListMultimap.create(); Multimap<String, String> lookup2 = ArrayListMultimap.create(); for (File file : listOfFile) { for (int j = 0; j < 100; j++) { if (counter == (int) ((j + 1) * listOfFile.length / 100)) { System.out.println("Number of files read equals " + j + " %"); break; }/*from w ww . j a v a 2 s. c o m*/ } List<String> lines = new ArrayList<String>(); counter++; FileInputStream fis = new FileInputStream(file); // ***********************************************************************************************// ///////////////////// .docx ///////////////////////////// // ***********************************************************************************************// if (file.getName().endsWith(".docx")) { XWPFDocument doc; try { // System.out.println("File read: "+file.getName()); doc = new XWPFDocument(fis); XWPFWordExtractor ex = new XWPFWordExtractor(doc); lines.add(ex.getText()); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } } // ***********************************************************************************************// ///////////////////// .pptx ///////////////////////////// // ***********************************************************************************************// else if (file.getName().endsWith(".pptx")) { OPCPackage ppt; try { // System.out.println("File read: "+file.getName()); ppt = OPCPackage.open(fis); XSLFPowerPointExtractor xw = new XSLFPowerPointExtractor(ppt); lines.add(xw.getText()); } catch (XmlException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } catch (OpenXML4JException e) { System.out.println("File not read: " + file.getName()); } } // ***********************************************************************************************// ///////////////////// .xlsx ///////////////////////////// // ***********************************************************************************************// else if (file.getName().endsWith(".xlsx")) { OPCPackage xls; try { // System.out.println("File read: "+file.getName()); xls = OPCPackage.open(fis); XSSFExcelExtractor xe = new XSSFExcelExtractor(xls); lines.add(xe.getText()); } catch (InvalidFormatException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } catch (IOException e) { System.out.println("File not read: " + file.getName()); } catch (XmlException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } catch (OpenXML4JException e) { System.out.println("File not read: " + file.getName()); } } // ***********************************************************************************************// ///////////////////// .doc ///////////////////////////// // ***********************************************************************************************// else if (file.getName().endsWith(".doc")) { NPOIFSFileSystem fs; try { // System.out.println("File read: "+file.getName()); fs = new NPOIFSFileSystem(file); WordExtractor extractor = new WordExtractor(fs.getRoot()); for (String rawText : extractor.getParagraphText()) { lines.add(extractor.stripFields(rawText)); } } catch (IOException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } } // ***********************************************************************************************// ///////////////////// .pdf ///////////////////////////// // ***********************************************************************************************// else if (file.getName().endsWith(".pdf")) { PDFParser parser; try { // System.out.println("File read: "+file.getName()); parser = new PDFParser(fis); parser.parse(); COSDocument cd = parser.getDocument(); PDFTextStripper stripper = new PDFTextStripper(); lines.add(stripper.getText(new PDDocument(cd))); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } } // ***********************************************************************************************// ///////////////////// Media Files such as gif, jpeg, .wmv, .mpeg, ///////////////////// .mp4 ///////////////////////////// // ***********************************************************************************************// else if (file.getName().endsWith(".gif") && file.getName().endsWith(".jpeg") && file.getName().endsWith(".wmv") && file.getName().endsWith(".mpeg") && file.getName().endsWith(".mp4")) { lines.add(file.getName()); } // ***********************************************************************************************// ///////////////////// raw text extensions ///////////////////// ///////////////////////////// // ***********************************************************************************************// else { try { // System.out.println("File read: "+file.getName()); lines = Files.readLines(file, Charsets.UTF_8); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("File not read: " + file.getName()); } finally { try { fis.close(); } catch (IOException ioex) { // omitted. } } } // ***********************************************************************************************// ///////////////////// Begin word extraction ///////////////////// ///////////////////////////// // ***********************************************************************************************// int temporaryCounter = 0; // Filter threshold int counterDoc = 0; for (int i = 0; i < lines.size(); i++) { CharArraySet noise = EnglishAnalyzer.getDefaultStopSet(); // We are using a standard tokenizer that eliminates the stop // words. We can use Stemming tokenizer such Porter // A set of English noise keywords is used that will eliminates // words such as "the, a, etc" Analyzer analyzer = new StandardAnalyzer(noise); List<String> token = Tokenizer.tokenizeString(analyzer, lines.get(i)); temporaryCounter = temporaryCounter + token.size(); for (int j = 0; j < token.size(); j++) { // Avoid counting occurrences of words in the same file if (!lookup2.get(file.getName()).contains(token.get(j))) { lookup2.put(file.getName(), token.get(j)); } // Avoid counting occurrences of words in the same file if (!lookup1.get(token.get(j)).contains(file.getName())) { lookup1.put(token.get(j), file.getName()); } } } } // System.out.println(lookup.toString()); return new TextExtractPar(lookup1, lookup2); }
From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java
License:Apache License
@Benchmark public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException { try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XSSFReader reader = new XSSFReader(pkg); StylesTable styles = reader.getStylesTable(); XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData(); int sheetIndex = 0; while (iterator.hasNext()) { try (InputStream sheetStream = iterator.next()) { if (sheetIndex == 0) { SheetContentHandler sheetHandler = new SheetContentHandler(); processSheet(styles, strings, sheetHandler, sheetStream); assertEquals(RESULT, sheetHandler.result); }/*from www .j a v a 2 s . c o m*/ } sheetIndex++; } return sheetIndex; } }
From source file:org.dhatim.fastexcel.reader.ReadableWorkbook.java
License:Apache License
private static OPCPackage open(InputStream in) throws IOException { try {//w w w . j a va2 s. com byte[] compressedBytes = IOUtils.toByteArray(in); ZipFile zipFile = new ZipFile(new SeekableInMemoryByteChannel(compressedBytes)); return OPCPackage.open(new ZipFileZipEntrySource(zipFile)); } catch (InvalidFormatException e) { throw new ExcelReaderException(e); } }