List of usage examples for org.apache.poi.openxml4j.opc PackageRelationshipCollection size
public int size()
From source file:mj.ocraptor.extraction.tika.parser.pkg.ZipContainerDetector.java
License:Apache License
/** * Detects the type of an OfficeOpenXML (OOXML) file from * opened Package /*from w w w . ja v a 2s . c o m*/ */ public static MediaType detectOfficeOpenXML(OPCPackage pkg) { PackageRelationshipCollection core = pkg.getRelationshipsByType(ExtractorFactory.CORE_DOCUMENT_REL); if (core.size() != 1) { // Invalid OOXML Package received return null; } // Get the type of the core document part PackagePart corePart = pkg.getPart(core.getRelationship(0)); String coreType = corePart.getContentType(); // Turn that into the type of the overall document String docType = coreType.substring(0, coreType.lastIndexOf('.')); // The Macro Enabled formats are a little special if (docType.toLowerCase().endsWith("macroenabled")) { docType = docType.toLowerCase() + ".12"; } if (docType.toLowerCase().endsWith("macroenabledtemplate")) { docType = MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12"); } // Build the MediaType object and return return MediaType.parse(docType); }
From source file:org.alfresco.repo.content.transform.OOXMLThumbnailContentTransformer.java
License:Open Source License
@Override protected void transformInternal(ContentReader reader, ContentWriter writer, TransformationOptions options) throws Exception { final String sourceMimetype = reader.getMimetype(); final String sourceExtension = getMimetypeService().getExtension(sourceMimetype); final String targetMimetype = writer.getMimetype(); if (log.isDebugEnabled()) { StringBuilder msg = new StringBuilder(); msg.append("Transforming from ").append(sourceMimetype).append(" to ").append(targetMimetype); log.debug(msg.toString());//from w w w. j a va2s . c o m } OPCPackage pkg = null; try { File ooxmlTempFile = TempFileProvider.createTempFile(this.getClass().getSimpleName() + "_ooxml", sourceExtension); reader.getContent(ooxmlTempFile); // Load the file pkg = OPCPackage.open(ooxmlTempFile.getPath()); // Does it have a thumbnail? PackageRelationshipCollection rels = pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL); if (rels.size() > 0) { // Get the thumbnail part PackageRelationship tRel = rels.getRelationship(0); PackagePart tPart = pkg.getPart(tRel); // Write it to the target InputStream tStream = tPart.getInputStream(); writer.putContent(tStream); tStream.close(); } else { log.debug("No thumbnail present in " + reader.toString()); throw new UnimportantTransformException(NO_THUMBNAIL_PRESENT_IN_FILE + targetMimetype); } } catch (IOException e) { throw new AlfrescoRuntimeException("Unable to transform " + sourceExtension + " file.", e); } finally { if (pkg != null) { pkg.close(); } } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
/** * @see XSLFPowerPointExtractor#getText() *///from w w w. ja v a 2 s. c om protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException { loadCommentAuthors(); //TODO: should check for custShowLst and order based on sldLst try { PackageRelationshipCollection prc = mainDocument .getRelationshipsByType(XSLFRelation.SLIDE.getRelation()); if (prc.size() == 0) { } for (int i = 0; i < prc.size(); i++) { handleSlidePart(mainDocument.getRelatedPart(prc.getRelationship(i)), xhtml); } } catch (InvalidFormatException e) { } handleBasicRelatedParts(XSLFRelation.SLIDE_MASTER.getRelation(), "slide-master", mainDocument, new PlaceHolderSkipper(new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>()))); handleBasicRelatedParts(HANDOUT_MASTER, "slide-handout-master", mainDocument, new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>())); }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
private void loadCommentAuthors() { PackageRelationshipCollection prc = null; try {/*from ww w. j a va 2 s .c o m*/ prc = mainDocument.getRelationshipsByType(XSLFRelation.COMMENT_AUTHORS.getRelation()); } catch (InvalidFormatException e) { } if (prc == null || prc.size() == 0) { return; } for (int i = 0; i < prc.size(); i++) { PackagePart commentAuthorsPart = null; try { commentAuthorsPart = commentAuthorsPart = mainDocument.getRelatedPart(prc.getRelationship(i)); } catch (InvalidFormatException e) { } if (commentAuthorsPart == null) { continue; } try (InputStream stream = commentAuthorsPart.getInputStream()) { context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new XSLFCommentAuthorHandler())); } catch (TikaException | SAXException | IOException e) { //do something with this } } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
/** * This should handle the comments, master, notes, etc * * @param contentType/*from w ww .j av a 2 s .co m*/ * @param xhtmlClassLabel * @param parentPart * @param contentHandler */ private void handleBasicRelatedParts(String contentType, String xhtmlClassLabel, PackagePart parentPart, ContentHandler contentHandler) throws SAXException { PackageRelationshipCollection relatedPartPRC = null; try { relatedPartPRC = parentPart.getRelationshipsByType(contentType); } catch (InvalidFormatException e) { //swallow } if (relatedPartPRC != null && relatedPartPRC.size() > 0) { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", xhtmlClassLabel); contentHandler.startElement("", "div", "div", attributes); for (int i = 0; i < relatedPartPRC.size(); i++) { PackageRelationship relatedPartPackageRelationship = relatedPartPRC.getRelationship(i); try { PackagePart relatedPartPart = parentPart.getRelatedPart(relatedPartPackageRelationship); try (InputStream stream = relatedPartPart.getInputStream()) { context.getSAXParser().parse(stream, new OfflineContentHandler(new EmbeddedContentHandler(contentHandler))); } catch (IOException | TikaException e) { //do something with this } } catch (InvalidFormatException e) { } } contentHandler.endElement("", "div", "div"); } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, * and slide drawings which have the images *///from ww w .j a v a2s . c o m @Override protected List<PackagePart> getMainDocumentParts() { List<PackagePart> parts = new ArrayList<>(); //TODO: consider: getPackage().getPartsByName(Pattern.compile("/ppt/embeddings/.*? //TODO: consider: getPackage().getPartsByName(Pattern.compile("/ppt/media/.*? try { PackageRelationshipCollection prc = mainDocument .getRelationshipsByType(XSLFRelation.SLIDE.getRelation()); for (int i = 0; i < prc.size(); i++) { PackagePart slidePart = mainDocument.getRelatedPart(prc.getRelationship(i)); addSlideParts(slidePart, parts); } } catch (InvalidFormatException e) { //log } parts.add(mainDocument); for (String rel : new String[] { XSLFRelation.SLIDE_MASTER.getRelation(), HANDOUT_MASTER }) { try { PackageRelationshipCollection prc = mainDocument.getRelationshipsByType(rel); for (int i = 0; i < prc.size(); i++) { PackagePart pp = mainDocument.getRelatedPart(prc.getRelationship(i)); if (pp != null) { parts.add(pp); } } } catch (InvalidFormatException e) { //log } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void handleDocumentPart(PackagePart documentPart, XHTMLContentHandler xhtml) throws IOException, SAXException { //load the numbering/list manager and styles from the main document part XWPFNumbering numbering = loadNumbering(documentPart); XWPFListManager listManager = new XWPFListManager(numbering); XWPFStylesShim styles = loadStyles(documentPart); //headers/*from w w w.j a v a 2 s . c om*/ try { PackageRelationshipCollection headersPRC = documentPart .getRelationshipsByType(XWPFRelation.HEADER.getRelation()); if (headersPRC != null) { for (int i = 0; i < headersPRC.size(); i++) { PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i)); handlePart(header, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } //main document handlePart(documentPart, styles, listManager, xhtml); //for now, just dump other components at end for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) { try { PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation()); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); handlePart(packagePart, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFStylesShim loadStyles(PackagePart packagePart) { try {/*from w w w . j a v a2s.co m*/ PackageRelationshipCollection stylesParts = packagePart .getRelationshipsByType(XWPFRelation.STYLES.getRelation()); if (stylesParts.size() > 0) { PackageRelationship stylesRelationShip = stylesParts.getRelationship(0); if (stylesRelationShip == null) { return null; } PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip); if (stylesPart == null) { return null; } return new XWPFStylesShim(stylesPart, context); } } catch (OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFNumbering loadNumbering(PackagePart packagePart) { try {/*from w ww . j a v a 2s . c o m*/ PackageRelationshipCollection numberingParts = packagePart .getRelationshipsByType(XWPFRelation.NUMBERING.getRelation()); if (numberingParts.size() > 0) { PackageRelationship numberingRelationShip = numberingParts.getRelationship(0); if (numberingRelationShip == null) { return null; } PackagePart numberingPart = packagePart.getRelatedPart(numberingRelationShip); if (numberingPart == null) { return null; } return new XWPFNumberingShim(numberingPart); } } catch (IOException | OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void addRelatedParts(PackagePart documentPart, List<PackagePart> relatedParts) { for (String relation : MAIN_PART_RELATIONS) { PackageRelationshipCollection prc = null; try {//ww w . j a v a2s . co m prc = documentPart.getRelationshipsByType(relation); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); relatedParts.add(packagePart); } } } catch (InvalidFormatException e) { } } }