Example usage for org.apache.poi.openxml4j.opc PackagePart getContentType

List of usage examples for org.apache.poi.openxml4j.opc PackagePart getContentType

Introduction

In this page you can find the example usage for org.apache.poi.openxml4j.opc PackagePart getContentType.

Prototype

public String getContentType() 

Source Link

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.java

License:Apache License

private void handleEmbeddedParts(ContentHandler handler) throws TikaException, IOException, SAXException {
    try {//  www.  ja  v  a  2s . c o m
        for (PackagePart source : getMainDocumentParts()) {
            for (PackageRelationship rel : source.getRelationships()) {

                URI sourceURI = rel.getSourceURI();
                String sourceDesc;
                if (sourceURI != null) {
                    sourceDesc = getJustFileName(sourceURI.getPath());
                    if (sourceDesc.startsWith("slide")) {
                        sourceDesc += "_";
                    } else {
                        sourceDesc = "";
                    }
                } else {
                    sourceDesc = "";
                }
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePart target;

                    try {
                        target = source.getRelatedPart(rel);
                    } catch (IllegalArgumentException ex) {
                        continue;
                    }

                    String type = rel.getRelationshipType();
                    if (RELATION_OLE_OBJECT.equals(type) && TYPE_OLE_OBJECT.equals(target.getContentType())) {
                        handleEmbeddedOLE(target, handler, sourceDesc + rel.getId());
                    } else if (RELATION_AUDIO.equals(type) || RELATION_IMAGE.equals(type)
                            || RELATION_PACKAGE.equals(type) || RELATION_OLE_OBJECT.equals(type)) {
                        handleEmbeddedFile(target, handler, sourceDesc + rel.getId());
                    }
                }
            }
        }
    } catch (InvalidFormatException e) {
        throw new TikaException("Broken OOXML file", e);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.java

License:Apache License

/**
 * Handles an embedded file in the document
 *///from  w  ww.  ja  va  2  s. co m
protected void handleEmbeddedFile(PackagePart part, ContentHandler handler, String rel)
        throws SAXException, IOException {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);

    // Get the name
    String name = part.getPartName().getName();
    metadata.set(Metadata.RESOURCE_NAME_KEY, name.substring(name.lastIndexOf('/') + 1));

    // Get the content type
    metadata.set(Metadata.CONTENT_TYPE, part.getContentType());

    // Call the recursing handler
    if (embeddedExtractor.shouldParseEmbedded(metadata)) {
        embeddedExtractor.parseEmbedded(TikaInputStream.get(part.getInputStream()),
                new EmbeddedContentHandler(handler), metadata, false);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.pkg.ZipContainerDetector.java

License:Apache License

/**
 * Detects the type of an OfficeOpenXML (OOXML) file from
 *  opened Package /*  w  w w  .j ava  2  s  .  c o  m*/
 */
public static MediaType detectOfficeOpenXML(OPCPackage pkg) {
    PackageRelationshipCollection core = pkg.getRelationshipsByType(ExtractorFactory.CORE_DOCUMENT_REL);
    if (core.size() != 1) {
        // Invalid OOXML Package received
        return null;
    }

    // Get the type of the core document part
    PackagePart corePart = pkg.getPart(core.getRelationship(0));
    String coreType = corePart.getContentType();

    // Turn that into the type of the overall document
    String docType = coreType.substring(0, coreType.lastIndexOf('.'));

    // The Macro Enabled formats are a little special
    if (docType.toLowerCase().endsWith("macroenabled")) {
        docType = docType.toLowerCase() + ".12";
    }

    if (docType.toLowerCase().endsWith("macroenabledtemplate")) {
        docType = MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12");
    }

    // Build the MediaType object and return
    return MediaType.parse(docType);
}

From source file:org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.java

License:Apache License

private void handleThumbnail(ContentHandler handler) {
    try {// w ww. j a va  2 s.c  o m
        OPCPackage opcPackage = extractor.getPackage();
        for (PackageRelationship rel : opcPackage.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL)) {
            PackagePart tPart = opcPackage.getPart(rel);
            InputStream tStream = tPart.getInputStream();
            Metadata thumbnailMetadata = new Metadata();
            String thumbName = tPart.getPartName().getName();
            thumbnailMetadata.set(Metadata.RESOURCE_NAME_KEY, thumbName);

            AttributesImpl attributes = new AttributesImpl();
            attributes.addAttribute(XHTML, "class", "class", "CDATA", "embedded");
            attributes.addAttribute(XHTML, "id", "id", "CDATA", thumbName);
            handler.startElement(XHTML, "div", "div", attributes);
            handler.endElement(XHTML, "div", "div");

            thumbnailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, thumbName);
            thumbnailMetadata.set(Metadata.CONTENT_TYPE, tPart.getContentType());
            thumbnailMetadata.set(TikaCoreProperties.TITLE, tPart.getPartName().getName());

            if (embeddedExtractor.shouldParseEmbedded(thumbnailMetadata)) {
                embeddedExtractor.parseEmbedded(TikaInputStream.get(tStream),
                        new EmbeddedContentHandler(handler), thumbnailMetadata, false);
            }

            tStream.close();
        }
    } catch (Exception ex) {

    }
}

From source file:org.apache.tika.parser.pkg.ZipContainerDetector.java

License:Apache License

/**
 * Detects the type of an OfficeOpenXML (OOXML) file from
 *  opened Package /* ww w.  j a v  a  2 s  .  c o  m*/
 */
public static MediaType detectOfficeOpenXML(OPCPackage pkg) {
    // Check for the normal Office core document
    PackageRelationshipCollection core = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT);
    // Otherwise check for some other Office core document types
    if (core.size() == 0) {
        core = pkg.getRelationshipsByType(STRICT_CORE_DOCUMENT);
    }
    if (core.size() == 0) {
        core = pkg.getRelationshipsByType(VISIO_DOCUMENT);
    }

    // If we didn't find a single core document of any type, skip detection
    if (core.size() != 1) {
        // Invalid OOXML Package received
        return null;
    }

    // Get the type of the core document part
    PackagePart corePart = pkg.getPart(core.getRelationship(0));
    String coreType = corePart.getContentType();

    // Turn that into the type of the overall document
    String docType = coreType.substring(0, coreType.lastIndexOf('.'));

    // The Macro Enabled formats are a little special
    if (docType.toLowerCase(Locale.ROOT).endsWith("macroenabled")) {
        docType = docType.toLowerCase(Locale.ROOT) + ".12";
    }

    if (docType.toLowerCase(Locale.ROOT).endsWith("macroenabledtemplate")) {
        docType = MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12");
    }

    // Build the MediaType object and return
    return MediaType.parse(docType);
}

From source file:poi.xslf.usermodel.DataExtraction.java

License:Apache License

public static void main(String args[]) throws Exception {

    if (args.length == 0) {
        System.out.println("Input file is required");
        return;/*from w  w  w .  j  a v a 2  s  .  c  om*/
    }

    FileInputStream is = new FileInputStream(args[0]);
    XMLSlideShow ppt = new XMLSlideShow(is);
    is.close();

    // Get the document's embedded files.
    List<PackagePart> embeds = ppt.getAllEmbedds();
    for (PackagePart p : embeds) {
        String type = p.getContentType();
        String name = p.getPartName().getName(); //typically file name

        InputStream pIs = p.getInputStream();
        // make sense of the part data
        pIs.close();

    }

    // Get the document's embedded files.
    List<XSLFPictureData> images = ppt.getAllPictures();
    for (XSLFPictureData data : images) {
        PackagePart p = data.getPackagePart();

        String type = p.getContentType();
        String name = data.getFileName();

        InputStream pIs = p.getInputStream();
        // make sense of the image data
        pIs.close();

    }

    Dimension pageSize = ppt.getPageSize(); // size of the canvas in points
    for (XSLFSlide slide : ppt.getSlides()) {
        for (XSLFShape shape : slide) {
            Rectangle2D anchor = shape.getAnchor(); // position on the canvas
            if (shape instanceof XSLFTextShape) {
                XSLFTextShape txShape = (XSLFTextShape) shape;
                System.out.println(txShape.getText());
            } else if (shape instanceof XSLFPictureShape) {
                XSLFPictureShape pShape = (XSLFPictureShape) shape;
                XSLFPictureData pData = pShape.getPictureData();
                System.out.println(pData.getFileName());
            } else {
                System.out.println("Process me: " + shape.getClass());
            }
        }
    }
}

From source file:poi.xssf.usermodel.examples.EmbeddedObjects.java

License:Apache License

public static void main(String[] args) throws Exception {
    OPCPackage pkg = OPCPackage.open(args[0]);
    XSSFWorkbook workbook = new XSSFWorkbook(pkg);
    for (PackagePart pPart : workbook.getAllEmbedds()) {
        String contentType = pPart.getContentType();
        // Excel Workbook - either binary or OpenXML
        if (contentType.equals("application/vnd.ms-excel")) {
            HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
        }// w ww .ja  v  a 2s  .  c om
        // Excel Workbook - OpenXML file format
        else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
            XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
        }
        // Word Document - binary (OLE2CDF) file format
        else if (contentType.equals("application/msword")) {
            HWPFDocument document = new HWPFDocument(pPart.getInputStream());
        }
        // Word Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
            XWPFDocument document = new XWPFDocument(pPart.getInputStream());
        }
        // PowerPoint Document - binary file format
        else if (contentType.equals("application/vnd.ms-powerpoint")) {
            HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream());
        }
        // PowerPoint Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {
            OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
            XSLFSlideShow slideShow = new XSLFSlideShow(docPackage);
        }
        // Any other type of embedded object.
        else {
            System.out.println("Unknown Embedded Document: " + contentType);
            InputStream inputStream = pPart.getInputStream();
        }
    }
    pkg.close();
}

From source file:test.unit.be.fedict.eid.applet.service.signer.OOXMLSignatureVerifierTest.java

License:Open Source License

@Test
public void testOPC() throws Exception {
    // setup//from   w ww. ja va2 s . c om
    InputStream inputStream = OOXMLSignatureVerifierTest.class.getResourceAsStream("/hello-world-signed.docx");

    // operate
    OPCPackage opcPackage = OPCPackage.open(inputStream);

    ArrayList<PackagePart> parts = opcPackage.getParts();
    for (PackagePart part : parts) {
        LOG.debug("part name: " + part.getPartName().getName());
        LOG.debug("part content type: " + part.getContentType());
    }

    ArrayList<PackagePart> signatureParts = opcPackage
            .getPartsByContentType("application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml");
    assertFalse(signatureParts.isEmpty());

    PackagePart signaturePart = signatureParts.get(0);
    LOG.debug("signature part class type: " + signaturePart.getClass().getName());

    PackageDigitalSignatureManager packageDigitalSignatureManager = new PackageDigitalSignatureManager();
    // yeah... POI implementation still missing
}

From source file:uk.ac.liverpool.spreadsheet.ExcelFeatureAnalysis.java

License:Apache License

private static void analyseSpreadsheet(Element da, ExcelFeatureAnalysis efa) {

    Element s = new Element("spreadsheets", sn);
    da.addContent(s);/* www . j  a va  2  s.  co m*/
    s.setAttribute("numberOfSheets", "" + efa.wb.getNumberOfSheets());
    // workbook wide features

    List<? extends PictureData> allPictures = efa.wb.getAllPictures();
    if (allPictures != null && allPictures.size() > 0) {
        Element oo = new Element("Pictures", sn);
        s.addContent(oo);
        for (PictureData pd : allPictures) {
            Element ob = new Element("Picture", sn);
            ob.setAttribute("mimeType", pd.getMimeType());
            oo.addContent(ob);
        }
    }

    int numfonts = efa.wb.getNumberOfFonts();
    if (numfonts > 0) {
        Element oo = new Element("Fonts", sn);
        s.addContent(oo);
        for (int i = 0; i < numfonts; i++) {
            Font cs = efa.wb.getFontAt((short) i);
            Element ob = new Element("Font", sn);
            ob.setAttribute("Name", cs.getFontName());

            ob.setAttribute("Charset", "" + cs.getCharSet());
            oo.addContent(ob);
        }
    }

    if (efa.hswb != null) {

        DocumentSummaryInformation dsi = efa.hswb.getDocumentSummaryInformation();
        if (dsi != null)
            s.setAttribute("OSVersion", "" + dsi.getOSVersion());
        // Property[] properties = dsi.getProperties();
        // CustomProperties customProperties = dsi.getCustomProperties();

        List<HSSFObjectData> eo = efa.hswb.getAllEmbeddedObjects();
        if (eo != null && eo.size() > 0) {
            Element oo = new Element("EmbeddedObjects", sn);
            s.addContent(oo);
            for (HSSFObjectData o : eo) {
                Element ob = new Element("EmbeddedObject", sn);
                ob.setAttribute("name", o.getOLE2ClassName());
                oo.addContent(ob);
            }

        }
    } else if (efa.xswb != null) {
        try {
            POIXMLProperties properties = efa.xswb.getProperties();
            List<PackagePart> allEmbedds = efa.xswb.getAllEmbedds();
            if (allEmbedds != null && allEmbedds.size() > 0) {
                Element oo = new Element("EmbeddedObjects", sn);
                s.addContent(oo);

                for (PackagePart p : allEmbedds) {
                    Element ob = new Element("EmbeddedObject", sn);
                    ob.setAttribute("mimeType", p.getContentType());
                    ob.setAttribute("name", p.getPartName().getName());

                    oo.addContent(ob);
                }
            }
        } catch (OpenXML4JException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
    int nn = efa.wb.getNumberOfNames();
    if (nn > 0) {
        Element oo = new Element("NamedCells", sn);
        s.addContent(oo);
    }

    // sheet specific features
    int total = efa.wb.getNumberOfSheets();
    for (int c = 0; c < total; c++) {
        Sheet sheet = efa.wb.getSheetAt(c);
        Element single = new Element("sheet", sn);
        s.addContent(single);
        analyseSheet(sheet, single, sn, efa);
    }
}