Example usage for org.apache.poi.openxml4j.opc OPCPackage open

List of usage examples for org.apache.poi.openxml4j.opc OPCPackage open

Introduction

In this page you can find the example usage for org.apache.poi.openxml4j.opc OPCPackage open.

Prototype

public static OPCPackage open(InputStream in) throws InvalidFormatException, IOException 

Source Link

Document

Open a package.

Usage

From source file:opisiame.model.Import_animateur_excel.java

private void ouverture_fichier(String adresse) throws IOException {
    InputStream is = new FileInputStream(adresse);
    OPCPackage opc;//from  ww w. ja  v a 2 s.c om
    try {
        opc = OPCPackage.open(is);
        classeur = new XSSFWorkbook(opc);
    } catch (InvalidFormatException ex) {
        Logger.getLogger(Import_animateur_excel.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:opisiame.model.Import_eleve_excel.java

private void ouverture_fichier(String adresse) throws IOException {
    InputStream is = new FileInputStream(adresse);
    OPCPackage opc;/*www  . ja  v a  2  s.  co m*/
    try {
        opc = OPCPackage.open(is);
        classeur = new XSSFWorkbook(opc);
    } catch (InvalidFormatException ex) {
        Logger.getLogger(Import_eleve_excel.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:org.alfresco.repo.content.transform.OOXMLThumbnailContentTransformer.java

License:Open Source License

@Override
protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options)
        throws Exception {
    final String sourceMimetype = reader.getMimetype();
    final String sourceExtension = getMimetypeService().getExtension(sourceMimetype);
    final String targetMimetype = writer.getMimetype();

    if (log.isDebugEnabled()) {
        StringBuilder msg = new StringBuilder();
        msg.append("Transforming from ").append(sourceMimetype).append(" to ").append(targetMimetype);
        log.debug(msg.toString());/*w  ww . j  a  v a 2 s. c o  m*/
    }

    OPCPackage pkg = null;
    try {
        File ooxmlTempFile = TempFileProvider.createTempFile(this.getClass().getSimpleName() + "_ooxml",
                sourceExtension);
        reader.getContent(ooxmlTempFile);

        // Load the file
        pkg = OPCPackage.open(ooxmlTempFile.getPath());

        // Does it have a thumbnail?
        PackageRelationshipCollection rels = pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
        if (rels.size() > 0) {
            // Get the thumbnail part
            PackageRelationship tRel = rels.getRelationship(0);
            PackagePart tPart = pkg.getPart(tRel);

            // Write it to the target
            InputStream tStream = tPart.getInputStream();
            writer.putContent(tStream);
            tStream.close();
        } else {
            log.debug("No thumbnail present in " + reader.toString());
            throw new UnimportantTransformException(NO_THUMBNAIL_PRESENT_IN_FILE + targetMimetype);
        }
    } catch (IOException e) {
        throw new AlfrescoRuntimeException("Unable to transform " + sourceExtension + " file.", e);
    } finally {
        if (pkg != null) {
            pkg.close();
        }
    }
}

From source file:org.apache.metamodel.excel.XlsxSpreadsheetReaderDelegate.java

License:Apache License

private OPCPackage openOPCPackage() throws Exception {
    if (_resource instanceof FileResource) {
        final File file = ((FileResource) _resource).getFile();
        return OPCPackage.open(file);
    }//from ww w .ja v a 2s  .com

    return OPCPackage.open(_resource.read());
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;/* ww  w  .  ja  v a2s .  c o m*/
    }

    final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions()
            .getValue();
    final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);

    //Switch to 0 based index
    final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
    final String[] sColumnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");

    final List<Integer> columnsToSkip = new ArrayList<>();

    if (sColumnsToSkip != null && sColumnsToSkip.length > 0) {
        for (String c : sColumnsToSkip) {
            try {
                //Switch to 0 based index
                columnsToSkip.add(Integer.parseInt(c) - 1);
            } catch (NumberFormatException e) {
                throw new ProcessException("Invalid column in Columns to Skip list.", e);
            }
        }
    }

    try {
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(InputStream inputStream) throws IOException {

                try {
                    OPCPackage pkg = OPCPackage.open(inputStream);
                    XSSFReader r = new XSSFReader(pkg);
                    ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
                    StylesTable styles = r.getStylesTable();
                    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();

                    if (desiredSheetsDelimited != null) {
                        String[] desiredSheets = StringUtils.split(desiredSheetsDelimited,
                                DESIRED_SHEETS_DELIMITER);

                        if (desiredSheets != null) {
                            while (iter.hasNext()) {
                                InputStream sheet = iter.next();
                                String sheetName = iter.getSheetName();

                                for (int i = 0; i < desiredSheets.length; i++) {
                                    //If the sheetName is a desired one parse it
                                    if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
                                        ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(
                                                columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
                                        handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                                        break;
                                    }
                                }
                            }
                        } else {
                            getLogger().debug(
                                    "Excel document was parsed but no sheets with the specified desired names were found.");
                        }

                    } else {
                        //Get all of the sheets in the document.
                        while (iter.hasNext()) {
                            InputStream sheet = iter.next();
                            String sheetName = iter.getSheetName();

                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow,
                                    sheetName, formatValues, sst, styles);
                            handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                        }
                    }
                } catch (InvalidFormatException ife) {
                    getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
                    throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported",
                            ife);
                } catch (OpenXML4JException | SAXException e) {
                    getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            }
        });

        session.transfer(flowFile, ORIGINAL);

    } catch (RuntimeException ex) {
        getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
        FlowFile failedFlowFile = session.putAttribute(flowFile,
                ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
        session.transfer(failedFlowFile, FAILURE);
    }
}

From source file:org.apache.tika.parser.microsoft.ooxml.xslf.XSLFEventBasedPowerPointExtractor.java

License:Apache License

public XSLFEventBasedPowerPointExtractor(String path) throws XmlException, OpenXML4JException, IOException {
    this(OPCPackage.open(path));
}

From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java

License:Apache License

public XWPFEventBasedWordExtractor(String path) throws XmlException, OpenXML4JException, IOException {
    this(OPCPackage.open(path));
}

From source file:org.crypto.sse.TextExtractPar.java

License:Open Source License

private static TextExtractPar extractOneDoc(File[] listOfFile) throws FileNotFoundException {

    Multimap<String, String> lookup1 = ArrayListMultimap.create();
    Multimap<String, String> lookup2 = ArrayListMultimap.create();

    for (File file : listOfFile) {

        for (int j = 0; j < 100; j++) {

            if (counter == (int) ((j + 1) * listOfFile.length / 100)) {
                System.out.println("Number of files read equals " + j + " %");
                break;
            }/*from  w ww  . j  a  v a 2 s.  c o m*/
        }

        List<String> lines = new ArrayList<String>();
        counter++;
        FileInputStream fis = new FileInputStream(file);

        // ***********************************************************************************************//

        ///////////////////// .docx /////////////////////////////

        // ***********************************************************************************************//

        if (file.getName().endsWith(".docx")) {
            XWPFDocument doc;
            try {
                // System.out.println("File read: "+file.getName());

                doc = new XWPFDocument(fis);
                XWPFWordExtractor ex = new XWPFWordExtractor(doc);
                lines.add(ex.getText());
            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            }

        }

        // ***********************************************************************************************//

        ///////////////////// .pptx /////////////////////////////

        // ***********************************************************************************************//

        else if (file.getName().endsWith(".pptx")) {

            OPCPackage ppt;
            try {
                // System.out.println("File read: "+file.getName());

                ppt = OPCPackage.open(fis);
                XSLFPowerPointExtractor xw = new XSLFPowerPointExtractor(ppt);
                lines.add(xw.getText());
            } catch (XmlException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            } catch (OpenXML4JException e) {
                System.out.println("File not read: " + file.getName());
            }

        }

        // ***********************************************************************************************//

        ///////////////////// .xlsx /////////////////////////////

        // ***********************************************************************************************//

        else if (file.getName().endsWith(".xlsx")) {

            OPCPackage xls;
            try {
                // System.out.println("File read: "+file.getName());

                xls = OPCPackage.open(fis);
                XSSFExcelExtractor xe = new XSSFExcelExtractor(xls);
                lines.add(xe.getText());
            } catch (InvalidFormatException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            } catch (IOException e) {
                System.out.println("File not read: " + file.getName());

            } catch (XmlException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            } catch (OpenXML4JException e) {
                System.out.println("File not read: " + file.getName());
            }

        }

        // ***********************************************************************************************//

        ///////////////////// .doc /////////////////////////////

        // ***********************************************************************************************//

        else if (file.getName().endsWith(".doc")) {

            NPOIFSFileSystem fs;
            try {
                // System.out.println("File read: "+file.getName());

                fs = new NPOIFSFileSystem(file);
                WordExtractor extractor = new WordExtractor(fs.getRoot());
                for (String rawText : extractor.getParagraphText()) {
                    lines.add(extractor.stripFields(rawText));
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            }

        }

        // ***********************************************************************************************//

        ///////////////////// .pdf /////////////////////////////

        // ***********************************************************************************************//

        else if (file.getName().endsWith(".pdf")) {

            PDFParser parser;
            try {
                // System.out.println("File read: "+file.getName());

                parser = new PDFParser(fis);
                parser.parse();
                COSDocument cd = parser.getDocument();
                PDFTextStripper stripper = new PDFTextStripper();
                lines.add(stripper.getText(new PDDocument(cd)));

            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            }

        }

        // ***********************************************************************************************//

        ///////////////////// Media Files such as gif, jpeg, .wmv, .mpeg,
        ///////////////////// .mp4 /////////////////////////////

        // ***********************************************************************************************//

        else if (file.getName().endsWith(".gif") && file.getName().endsWith(".jpeg")
                && file.getName().endsWith(".wmv") && file.getName().endsWith(".mpeg")
                && file.getName().endsWith(".mp4")) {

            lines.add(file.getName());

        }

        // ***********************************************************************************************//

        ///////////////////// raw text extensions
        ///////////////////// /////////////////////////////

        // ***********************************************************************************************//

        else {
            try {
                // System.out.println("File read: "+file.getName());

                lines = Files.readLines(file, Charsets.UTF_8);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("File not read: " + file.getName());
            } finally {
                try {
                    fis.close();
                } catch (IOException ioex) {
                    // omitted.
                }
            }
        }

        // ***********************************************************************************************//

        ///////////////////// Begin word extraction
        ///////////////////// /////////////////////////////

        // ***********************************************************************************************//

        int temporaryCounter = 0;

        // Filter threshold
        int counterDoc = 0;
        for (int i = 0; i < lines.size(); i++) {

            CharArraySet noise = EnglishAnalyzer.getDefaultStopSet();

            // We are using a standard tokenizer that eliminates the stop
            // words. We can use Stemming tokenizer such Porter
            // A set of English noise keywords is used that will eliminates
            // words such as "the, a, etc"

            Analyzer analyzer = new StandardAnalyzer(noise);
            List<String> token = Tokenizer.tokenizeString(analyzer, lines.get(i));
            temporaryCounter = temporaryCounter + token.size();
            for (int j = 0; j < token.size(); j++) {

                // Avoid counting occurrences of words in the same file
                if (!lookup2.get(file.getName()).contains(token.get(j))) {
                    lookup2.put(file.getName(), token.get(j));
                }

                // Avoid counting occurrences of words in the same file
                if (!lookup1.get(token.get(j)).contains(file.getName())) {
                    lookup1.put(token.get(j), file.getName());
                }

            }

        }

    }

    // System.out.println(lookup.toString());
    return new TextExtractPar(lookup1, lookup2);

}

From source file:org.dhatim.fastexcel.reader.BenchmarksTest.java

License:Apache License

@Benchmark
public int streamingApachePoi() throws IOException, OpenXML4JException, SAXException {
    try (OPCPackage pkg = OPCPackage.open(openResource(FILE))) {
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        XSSFReader reader = new XSSFReader(pkg);
        StylesTable styles = reader.getStylesTable();
        XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData();
        int sheetIndex = 0;
        while (iterator.hasNext()) {
            try (InputStream sheetStream = iterator.next()) {
                if (sheetIndex == 0) {
                    SheetContentHandler sheetHandler = new SheetContentHandler();
                    processSheet(styles, strings, sheetHandler, sheetStream);
                    assertEquals(RESULT, sheetHandler.result);
                }/*from www .j  a v  a 2 s . c o m*/
            }
            sheetIndex++;
        }
        return sheetIndex;
    }
}

From source file:org.dhatim.fastexcel.reader.ReadableWorkbook.java

License:Apache License

private static OPCPackage open(InputStream in) throws IOException {
    try {//w w w . j  a  va2 s. com
        byte[] compressedBytes = IOUtils.toByteArray(in);
        ZipFile zipFile = new ZipFile(new SeekableInMemoryByteChannel(compressedBytes));
        return OPCPackage.open(new ZipFileZipEntrySource(zipFile));
    } catch (InvalidFormatException e) {
        throw new ExcelReaderException(e);
    }
}