Example usage for org.apache.poi.openxml4j.opc OPCPackage open

List of usage examples for org.apache.poi.openxml4j.opc OPCPackage open

Introduction

In this page you can find the example usage for org.apache.poi.openxml4j.opc OPCPackage open.

Prototype

public static OPCPackage open(File file, PackageAccess access) throws InvalidFormatException 

Source Link

Document

Open a package.

Usage

From source file:featurescomparison.workingwithworksheets.converttocsv.java.ApacheXLSX2CSV.java

License:Apache License

public static void main(String[] args) throws Exception {
    String dataPath = "src/featurescomparison/workingwithworksheets/converttocsv/data/";

    File xlsxFile = new File(dataPath + "workbook.xls");
    if (!xlsxFile.exists()) {
        System.err.println("Not found or not a file: " + xlsxFile.getPath());
        return;//from   w  w  w  .ja  v a  2 s  . c  om
    }

    int minColumns = -1;
    if (args.length >= 2)
        minColumns = Integer.parseInt(args[1]);

    // The package open is instantaneous, as it should be.
    OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
    ApacheXLSX2CSV xlsx2csv = new ApacheXLSX2CSV(p, System.out, minColumns);
    xlsx2csv.process();
}

From source file:hsio.snippet.java.poi.examples.XLSX2CSV.java

License:Apache License

public static void main(String[] args) throws Exception {
    File xlsxFile = new File("./samples/excel/big-number.xlsx");

    int minColumns = -1;
    if (args.length >= 2)
        minColumns = Integer.parseInt(args[1]);

    // The package open is instantaneous, as it should be.
    OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
    XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
    xlsx2csv.process();//from  www .j  a v  a 2 s . c o m
    p.close();
}

From source file:kiklos.tv.timetable.XLSX2CSV.java

License:Apache License

public static void main(String[] args) throws Exception {

    args = new String[2];
    args[0] = "G:\\WORK\\batya\\kiklos-proxy\\PERREG.xlsx";
    ;/*from w w w . java  2 s .c  o  m*/
    args[1] = "10";

    if (args.length < 1) {
        System.err.println("Use:");
        System.err.println("  XLSX2CSV <xlsx file> [min columns]");
        return;
    }

    File xlsxFile = new File(args[0]);
    if (!xlsxFile.exists()) {
        System.err.println("Not found or not a file: " + xlsxFile.getPath());
        return;
    }

    int minColumns = -1;
    if (args.length >= 2)
        minColumns = Integer.parseInt(args[1]);

    // The package open is instantaneous, as it should be.
    OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
    StringBuilder sb = new StringBuilder();
    XLSX2CSV xlsx2csv = new XLSX2CSV(p, sb, minColumns);
    xlsx2csv.process();
    System.out.println(sb.toString());

    /*      SimpleDateFormat myFormatter = new SimpleDateFormat("HmmssSS");
          String output = myFormatter.parse("6000000").toString();
          System.out.println(output);
          return;*/
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.java

License:Apache License

public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
        ParseContext context) throws IOException, SAXException, TikaException {
    Locale locale = context.get(Locale.class, Locale.getDefault());
    ExtractorFactory.setThreadPrefersEventExtractors(true);

    try {/*from   w  w w.  jav a  2 s .  c  o m*/
        OOXMLExtractor extractor;
        OPCPackage pkg;

        // Locate or Open the OPCPackage for the file
        TikaInputStream tis = TikaInputStream.cast(stream);
        if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
            pkg = (OPCPackage) tis.getOpenContainer();
        } else if (tis != null && tis.hasFile()) {
            pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
            tis.setOpenContainer(pkg);
        } else {
            InputStream shield = new CloseShieldInputStream(stream);
            pkg = OPCPackage.open(shield);
        }

        // Get the type, and ensure it's one we handle
        MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
        if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
            // Not a supported type, delegate to Empty Parser
            EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
            return;
        }
        metadata.set(Metadata.CONTENT_TYPE, type.toString());

        // Have the appropriate OOXML text extractor picked
        POIXMLTextExtractor poiExtractor = ExtractorFactory.createExtractor(pkg);

        POIXMLDocument document = poiExtractor.getDocument();
        if (poiExtractor instanceof XSSFEventBasedExcelExtractor) {
            extractor = new XSSFExcelExtractorDecorator(context, (XSSFEventBasedExcelExtractor) poiExtractor,
                    locale, metadata);
        } else if (document == null) {
            throw new TikaException(
                    "Expecting UserModel based POI OOXML extractor with a document, but none found. "
                            + "The extractor returned was a " + poiExtractor);
        } else if (document instanceof XMLSlideShow) {
            extractor = new XSLFPowerPointExtractorDecorator(context, (XSLFPowerPointExtractor) poiExtractor,
                    metadata);
        } else if (document instanceof XWPFDocument) {
            extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor, metadata);
        } else {
            extractor = new POIXMLTextExtractorDecorator(context, poiExtractor);
        }

        // Get the bulk of the metadata first, so that it's accessible during
        // parsing if desired by the client (see TIKA-1109)
        extractor.getMetadataExtractor().extract(metadata);

        // Extract the text, along with any in-document metadata
        extractor.getXHTML(baseHandler, metadata, context);
    } catch (IllegalArgumentException e) {
        if (e.getMessage().startsWith("No supported documents found")) {
            throw new TikaException(
                    "TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e);
        } else {
            throw new TikaException("Error creating OOXML extractor", e);
        }
    } catch (InvalidFormatException e) {
        throw new TikaException("Error creating OOXML extractor", e);
    } catch (OpenXML4JException e) {
        throw new TikaException("Error creating OOXML extractor", e);
    } catch (XmlException e) {
        throw new TikaException("Error creating OOXML extractor", e);

    }
}

From source file:mj.ocraptor.extraction.tika.parser.pkg.ZipContainerDetector.java

License:Apache License

private static MediaType detectOfficeOpenXML(ZipFile zip, TikaInputStream stream) {
    try {//from   w ww .j av a  2  s .co  m
        if (zip.getEntry("_rels/.rels") != null || zip.getEntry("[Content_Types].xml") != null) {
            // Use POI to open and investigate it for us
            OPCPackage pkg = OPCPackage.open(stream.getFile().getPath(), PackageAccess.READ);
            stream.setOpenContainer(pkg);

            // Detect based on the open OPC Package
            return detectOfficeOpenXML(pkg);
        } else {
            return null;
        }
    } catch (IOException e) {
        return null;
    } catch (RuntimeException e) {
        return null;
    } catch (InvalidFormatException e) {
        return null;
    }
}

From source file:net.sourceforge.docfetcher.model.parse.MSOffice2007Parser.java

License:Open Source License

private static ParseResult doParse(File file, PackageAccess access) throws ParseException {
    OPCPackage pkg = null;//w  w w. j  a v  a2 s  .  c  om
    try {
        pkg = OPCPackage.open(file.getPath(), access);
        String contents = extractText(pkg);

        // Open properties
        PackageProperties props = pkg.getPackageProperties();

        // Get author(s)
        String author = null;
        String defaultAuthor = props.getCreatorProperty().getValue();
        String lastAuthor = props.getLastModifiedByProperty().getValue();
        if (defaultAuthor == null) {
            if (lastAuthor != null)
                author = lastAuthor;
        } else if (lastAuthor == null) {
            author = defaultAuthor;
        } else {
            if (defaultAuthor.equals(lastAuthor))
                author = defaultAuthor;
            else
                author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$
        }

        // Get other metadata
        String description = props.getDescriptionProperty().getValue();
        String keywords = props.getKeywordsProperty().getValue();
        String subject = props.getSubjectProperty().getValue();
        String title = props.getTitleProperty().getValue();

        return new ParseResult(contents).setTitle(title).addAuthor(author).addMiscMetadata(description)
                .addMiscMetadata(keywords).addMiscMetadata(subject);
    } catch (Exception e) {
        throw new ParseException(e);
    } finally {
        Closeables.closeQuietly(pkg);
    }
}

From source file:net.sourceforge.docfetcher.model.parse.MSOffice2007Parser.java

License:Open Source License

@NotNull
private static String doRenderText(File file, PackageAccess access) throws ParseException {
    OPCPackage pkg = null;//from   w w  w  .  j ava 2s  .  com
    try {
        pkg = OPCPackage.open(file.getPath(), access);
        return extractText(pkg);
    } catch (Exception e) {
        throw new ParseException(e);
    } finally {
        Closeables.closeQuietly(pkg);
    }
}

From source file:net.sourceforge.docfetcher.parse.MSOffice2007Parser.java

License:Open Source License

public Document parse(File file) throws ParseException {
    try {/*from   ww  w  . j av  a2 s  .  c o  m*/
        // Extract contents
        POITextExtractor ef = ExtractorFactory.createExtractor(file);
        StringBuffer contents = new StringBuffer(ef.getText());

        // Open up properties
        OPCPackage pkg = OPCPackage.open(file.getAbsolutePath(), PackageAccess.READ);
        PackageProperties props = pkg.getPackageProperties();

        // Get author(s)
        String author = null;
        String defaultAuthor = props.getCreatorProperty().getValue();
        String lastAuthor = props.getLastModifiedByProperty().getValue();
        if (defaultAuthor == null) {
            if (lastAuthor != null)
                author = lastAuthor;
        } else if (lastAuthor == null) {
            author = defaultAuthor;
        } else {
            if (defaultAuthor.equals(lastAuthor))
                author = defaultAuthor;
            else
                author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$
        }

        // Get other metadata
        String description = props.getDescriptionProperty().getValue();
        String keywords = props.getKeywordsProperty().getValue();
        String subject = props.getSubjectProperty().getValue();
        String title = props.getTitleProperty().getValue();

        // Append metadata to contents
        String[] metaData = new String[] { author, description, keywords, subject, title };
        for (String field : metaData)
            if (field != null)
                contents.append(" ").append(field); //$NON-NLS-1$
        return new Document(file, title, contents).addAuthor(author);
    } catch (Exception e) {
        throw new ParseException(file, Msg.file_not_readable.value());
    }
}

From source file:net.sourceforge.vaticanfetcher.model.parse.MSOffice2007Parser.java

License:Open Source License

@Override
protected ParseResult parse(File file, ParseContext context) throws ParseException {
    OPCPackage pkg = null;/*ww w  .  j  av a 2  s.  c o m*/
    try {
        pkg = OPCPackage.open(file.getPath(), PackageAccess.READ);
        String contents = extractText(pkg);

        // Open properties
        PackageProperties props = pkg.getPackageProperties();

        // Get author(s)
        String author = null;
        String defaultAuthor = props.getCreatorProperty().getValue();
        String lastAuthor = props.getLastModifiedByProperty().getValue();
        if (defaultAuthor == null) {
            if (lastAuthor != null)
                author = lastAuthor;
        } else if (lastAuthor == null) {
            author = defaultAuthor;
        } else {
            if (defaultAuthor.equals(lastAuthor))
                author = defaultAuthor;
            else
                author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$
        }

        // Get other metadata
        String description = props.getDescriptionProperty().getValue();
        String keywords = props.getKeywordsProperty().getValue();
        String subject = props.getSubjectProperty().getValue();
        String title = props.getTitleProperty().getValue();

        return new ParseResult(contents).setTitle(title).addAuthor(author).addMiscMetadata(description)
                .addMiscMetadata(keywords).addMiscMetadata(subject);
    } catch (Exception e) {
        throw new ParseException(e);
    } finally {
        Closeables.closeQuietly(pkg);
    }
}

From source file:net.sourceforge.vaticanfetcher.model.parse.MSOffice2007Parser.java

License:Open Source License

@Override
protected final String renderText(File file, String filename) throws ParseException {
    OPCPackage pkg = null;/*from  w ww.  jav a  2 s. c  om*/
    try {
        pkg = OPCPackage.open(file.getPath(), PackageAccess.READ);
        return extractText(pkg);
    } catch (Exception e) {
        throw new ParseException(e);
    } finally {
        Closeables.closeQuietly(pkg);
    }
}