List of usage examples for org.apache.poi.openxml4j.opc OPCPackage open
public static OPCPackage open(File file, PackageAccess access) throws InvalidFormatException
From source file:featurescomparison.workingwithworksheets.converttocsv.java.ApacheXLSX2CSV.java
License:Apache License
public static void main(String[] args) throws Exception { String dataPath = "src/featurescomparison/workingwithworksheets/converttocsv/data/"; File xlsxFile = new File(dataPath + "workbook.xls"); if (!xlsxFile.exists()) { System.err.println("Not found or not a file: " + xlsxFile.getPath()); return;//from w w w .ja v a 2 s . c om } int minColumns = -1; if (args.length >= 2) minColumns = Integer.parseInt(args[1]); // The package open is instantaneous, as it should be. OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); ApacheXLSX2CSV xlsx2csv = new ApacheXLSX2CSV(p, System.out, minColumns); xlsx2csv.process(); }
From source file:hsio.snippet.java.poi.examples.XLSX2CSV.java
License:Apache License
public static void main(String[] args) throws Exception { File xlsxFile = new File("./samples/excel/big-number.xlsx"); int minColumns = -1; if (args.length >= 2) minColumns = Integer.parseInt(args[1]); // The package open is instantaneous, as it should be. OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns); xlsx2csv.process();//from www .j a v a 2 s . c o m p.close(); }
From source file:kiklos.tv.timetable.XLSX2CSV.java
License:Apache License
public static void main(String[] args) throws Exception { args = new String[2]; args[0] = "G:\\WORK\\batya\\kiklos-proxy\\PERREG.xlsx"; ;/*from w w w . java 2 s .c o m*/ args[1] = "10"; if (args.length < 1) { System.err.println("Use:"); System.err.println(" XLSX2CSV <xlsx file> [min columns]"); return; } File xlsxFile = new File(args[0]); if (!xlsxFile.exists()) { System.err.println("Not found or not a file: " + xlsxFile.getPath()); return; } int minColumns = -1; if (args.length >= 2) minColumns = Integer.parseInt(args[1]); // The package open is instantaneous, as it should be. OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); StringBuilder sb = new StringBuilder(); XLSX2CSV xlsx2csv = new XLSX2CSV(p, sb, minColumns); xlsx2csv.process(); System.out.println(sb.toString()); /* SimpleDateFormat myFormatter = new SimpleDateFormat("HmmssSS"); String output = myFormatter.parse("6000000").toString(); System.out.println(output); return;*/ }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.java
License:Apache License
public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { Locale locale = context.get(Locale.class, Locale.getDefault()); ExtractorFactory.setThreadPrefersEventExtractors(true); try {/*from w w w. jav a 2 s . c o m*/ OOXMLExtractor extractor; OPCPackage pkg; // Locate or Open the OPCPackage for the file TikaInputStream tis = TikaInputStream.cast(stream); if (tis != null && tis.getOpenContainer() instanceof OPCPackage) { pkg = (OPCPackage) tis.getOpenContainer(); } else if (tis != null && tis.hasFile()) { pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ); tis.setOpenContainer(pkg); } else { InputStream shield = new CloseShieldInputStream(stream); pkg = OPCPackage.open(shield); } // Get the type, and ensure it's one we handle MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg); if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) { // Not a supported type, delegate to Empty Parser EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context); return; } metadata.set(Metadata.CONTENT_TYPE, type.toString()); // Have the appropriate OOXML text extractor picked POIXMLTextExtractor poiExtractor = ExtractorFactory.createExtractor(pkg); POIXMLDocument document = poiExtractor.getDocument(); if (poiExtractor instanceof XSSFEventBasedExcelExtractor) { extractor = new XSSFExcelExtractorDecorator(context, (XSSFEventBasedExcelExtractor) poiExtractor, locale, metadata); } else if (document == null) { throw new TikaException( "Expecting UserModel based POI OOXML extractor with a document, but none found. " + "The extractor returned was a " + poiExtractor); } else if (document instanceof XMLSlideShow) { extractor = new XSLFPowerPointExtractorDecorator(context, (XSLFPowerPointExtractor) poiExtractor, metadata); } else if (document instanceof XWPFDocument) { extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor, metadata); } else { extractor = new POIXMLTextExtractorDecorator(context, poiExtractor); } // Get the bulk of the metadata first, so that it's accessible during // parsing if desired by the client (see TIKA-1109) extractor.getMetadataExtractor().extract(metadata); // Extract the text, along with any in-document metadata extractor.getXHTML(baseHandler, metadata, context); } catch (IllegalArgumentException e) { if (e.getMessage().startsWith("No supported documents found")) { throw new TikaException( "TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e); } else { throw new TikaException("Error creating OOXML extractor", e); } } catch (InvalidFormatException e) { throw new TikaException("Error creating OOXML extractor", e); } catch (OpenXML4JException e) { throw new TikaException("Error creating OOXML extractor", e); } catch (XmlException e) { throw new TikaException("Error creating OOXML extractor", e); } }
From source file:mj.ocraptor.extraction.tika.parser.pkg.ZipContainerDetector.java
License:Apache License
private static MediaType detectOfficeOpenXML(ZipFile zip, TikaInputStream stream) { try {//from w ww .j av a 2 s .co m if (zip.getEntry("_rels/.rels") != null || zip.getEntry("[Content_Types].xml") != null) { // Use POI to open and investigate it for us OPCPackage pkg = OPCPackage.open(stream.getFile().getPath(), PackageAccess.READ); stream.setOpenContainer(pkg); // Detect based on the open OPC Package return detectOfficeOpenXML(pkg); } else { return null; } } catch (IOException e) { return null; } catch (RuntimeException e) { return null; } catch (InvalidFormatException e) { return null; } }
From source file:net.sourceforge.docfetcher.model.parse.MSOffice2007Parser.java
License:Open Source License
private static ParseResult doParse(File file, PackageAccess access) throws ParseException { OPCPackage pkg = null;//w w w. j a v a2 s . c om try { pkg = OPCPackage.open(file.getPath(), access); String contents = extractText(pkg); // Open properties PackageProperties props = pkg.getPackageProperties(); // Get author(s) String author = null; String defaultAuthor = props.getCreatorProperty().getValue(); String lastAuthor = props.getLastModifiedByProperty().getValue(); if (defaultAuthor == null) { if (lastAuthor != null) author = lastAuthor; } else if (lastAuthor == null) { author = defaultAuthor; } else { if (defaultAuthor.equals(lastAuthor)) author = defaultAuthor; else author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$ } // Get other metadata String description = props.getDescriptionProperty().getValue(); String keywords = props.getKeywordsProperty().getValue(); String subject = props.getSubjectProperty().getValue(); String title = props.getTitleProperty().getValue(); return new ParseResult(contents).setTitle(title).addAuthor(author).addMiscMetadata(description) .addMiscMetadata(keywords).addMiscMetadata(subject); } catch (Exception e) { throw new ParseException(e); } finally { Closeables.closeQuietly(pkg); } }
From source file:net.sourceforge.docfetcher.model.parse.MSOffice2007Parser.java
License:Open Source License
@NotNull private static String doRenderText(File file, PackageAccess access) throws ParseException { OPCPackage pkg = null;//from w w w . j ava 2s . com try { pkg = OPCPackage.open(file.getPath(), access); return extractText(pkg); } catch (Exception e) { throw new ParseException(e); } finally { Closeables.closeQuietly(pkg); } }
From source file:net.sourceforge.docfetcher.parse.MSOffice2007Parser.java
License:Open Source License
public Document parse(File file) throws ParseException { try {/*from ww w . j av a2 s . c o m*/ // Extract contents POITextExtractor ef = ExtractorFactory.createExtractor(file); StringBuffer contents = new StringBuffer(ef.getText()); // Open up properties OPCPackage pkg = OPCPackage.open(file.getAbsolutePath(), PackageAccess.READ); PackageProperties props = pkg.getPackageProperties(); // Get author(s) String author = null; String defaultAuthor = props.getCreatorProperty().getValue(); String lastAuthor = props.getLastModifiedByProperty().getValue(); if (defaultAuthor == null) { if (lastAuthor != null) author = lastAuthor; } else if (lastAuthor == null) { author = defaultAuthor; } else { if (defaultAuthor.equals(lastAuthor)) author = defaultAuthor; else author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$ } // Get other metadata String description = props.getDescriptionProperty().getValue(); String keywords = props.getKeywordsProperty().getValue(); String subject = props.getSubjectProperty().getValue(); String title = props.getTitleProperty().getValue(); // Append metadata to contents String[] metaData = new String[] { author, description, keywords, subject, title }; for (String field : metaData) if (field != null) contents.append(" ").append(field); //$NON-NLS-1$ return new Document(file, title, contents).addAuthor(author); } catch (Exception e) { throw new ParseException(file, Msg.file_not_readable.value()); } }
From source file:net.sourceforge.vaticanfetcher.model.parse.MSOffice2007Parser.java
License:Open Source License
@Override protected ParseResult parse(File file, ParseContext context) throws ParseException { OPCPackage pkg = null;/*ww w . j av a 2 s. c o m*/ try { pkg = OPCPackage.open(file.getPath(), PackageAccess.READ); String contents = extractText(pkg); // Open properties PackageProperties props = pkg.getPackageProperties(); // Get author(s) String author = null; String defaultAuthor = props.getCreatorProperty().getValue(); String lastAuthor = props.getLastModifiedByProperty().getValue(); if (defaultAuthor == null) { if (lastAuthor != null) author = lastAuthor; } else if (lastAuthor == null) { author = defaultAuthor; } else { if (defaultAuthor.equals(lastAuthor)) author = defaultAuthor; else author = defaultAuthor + ", " + lastAuthor; //$NON-NLS-1$ } // Get other metadata String description = props.getDescriptionProperty().getValue(); String keywords = props.getKeywordsProperty().getValue(); String subject = props.getSubjectProperty().getValue(); String title = props.getTitleProperty().getValue(); return new ParseResult(contents).setTitle(title).addAuthor(author).addMiscMetadata(description) .addMiscMetadata(keywords).addMiscMetadata(subject); } catch (Exception e) { throw new ParseException(e); } finally { Closeables.closeQuietly(pkg); } }
From source file:net.sourceforge.vaticanfetcher.model.parse.MSOffice2007Parser.java
License:Open Source License
@Override protected final String renderText(File file, String filename) throws ParseException { OPCPackage pkg = null;/*from w ww. jav a 2 s. c om*/ try { pkg = OPCPackage.open(file.getPath(), PackageAccess.READ); return extractText(pkg); } catch (Exception e) { throw new ParseException(e); } finally { Closeables.closeQuietly(pkg); } }