Example usage for org.apache.poi.xslf.usermodel XSLFPictureData getData

List of usage examples for org.apache.poi.xslf.usermodel XSLFPictureData getData

Introduction

In this page you can find the example usage for org.apache.poi.xslf.usermodel XSLFPictureData getData.

Prototype

public byte[] getData() 

Source Link

Document

Gets the picture data as a byte array.

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText()
 *//* w w  w. ja v  a2  s. c o m*/
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
    XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();

    XSLFSlide[] slides = slideShow.getSlides();
    for (XSLFSlide slide : slides) {
        String slideDesc;

        if (slide.getPackagePart() != null && slide.getPackagePart().getPartName() != null) {
            slideDesc = getJustFileName(slide.getPackagePart().getPartName().toString());
            slideDesc += "_";
        } else {
            slideDesc = null;
        }

        // slide
        extractContent(slide.getShapes(), false, xhtml, slideDesc);

        // slide layout which is the master sheet for this slide
        XSLFSheet slideLayout = slide.getMasterSheet();
        extractContent(slideLayout.getShapes(), true, xhtml, null);

        // slide master which is the master sheet for all text layouts
        XSLFSheet slideMaster = slideLayout.getMasterSheet();
        extractContent(slideMaster.getShapes(), true, xhtml, null);

        // notes (if present)
        XSLFSheet slideNotes = slide.getNotes();
        if (slideNotes != null) {
            extractContent(slideNotes.getShapes(), false, xhtml, slideDesc);
            // master sheet for this notes
            XSLFSheet notesMaster = slideNotes.getMasterSheet();
            extractContent(notesMaster.getShapes(), true, xhtml, null);
        }

        // comments (if present)
        XSLFComments comments = slide.getComments();
        if (comments != null) {
            for (CTComment comment : comments.getCTCommentsList().getCmList()) {
                xhtml.element("p", comment.getText());
            }
        }
    }

    if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) {
        TikaImageHelper helper = new TikaImageHelper(metadata);
        try {
            List<XSLFPictureData> pictures = slideShow.getAllPictures();
            for (XSLFPictureData picture : pictures) {
                ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData());
                BufferedImage image = ImageIO.read(imageData);
                helper.addImage(image);
                helper.addTextToHandler(xhtml);
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (extractor != null) {
                extractor.close();
            }
            if (helper != null) {
                helper.close();
            }
        }
    }
}

From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java

private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
    List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>();
    String parentFileName = getUniqueName(af);
    XMLSlideShow pptx;/*  w  w  w  .  j  a  v a 2 s . c  om*/
    try {
        pptx = new XMLSlideShow(new ReadContentInputStream(af));
    } catch (IOException ex) {
        logger.log(Level.WARNING, "SlideShow container could not be instantiated while reading " + af.getName(),
                ex);
        return null;
    }
    List<XSLFPictureData> listOfAllPictures = pptx.getAllPictures();

    // if no images are extracted from the ppt, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}",
                af.getName());
        return null;
    }

    for (XSLFPictureData xslsPicture : listOfAllPictures) {

        // get image file name, write it to the module outputFolder, and add
        // it to the listOfExtractedImages.
        String fileName = xslsPicture.getFileName();
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(outputFolderPath + File.separator + fileName);
        } catch (FileNotFoundException ex) {
            logger.log(Level.WARNING, "Invalid path provided for image extraction", ex);
            continue;
        }
        try {
            fos.write(xslsPicture.getData());
            fos.close();
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Could not write to the provided location", ex);
            continue;
        }

        String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName
                + File.separator + fileName;
        long size = xslsPicture.getData().length;
        ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af);
        listOfExtractedImages.add(extractedimage);

    }

    return listOfExtractedImages;

}

From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java

License:Open Source License

/**
 * Extract images from pptx format files.
 *
 * @param af the file from which images are to be extracted.
 *
 * @return list of extracted images. Returns null in case no images were
 *         extracted.//from  w  w  w .ja  v  a 2 s.c om
 */
private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
    List<ExtractedImage> listOfExtractedImages;
    XMLSlideShow pptx;
    try {
        pptx = new XMLSlideShow(new ReadContentInputStream(af));
    } catch (Throwable ex) {
        // instantiating POI containers throw RuntimeExceptions
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex); //NON-NLS
        return null;
    }
    List<XSLFPictureData> listOfAllPictures = null;
    try {
        listOfAllPictures = pptx.getAllPictures();
    } catch (Exception ex) {
        // log internal Java and Apache errors as WARNING
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS
        return null;
    }

    // if no images are extracted from the PPT, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(this.parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg",
                af.getName())); //NON-NLS
        return null;
    }

    listOfExtractedImages = new ArrayList<>();
    byte[] data = null;
    for (XSLFPictureData xslsPicture : listOfAllPictures) {

        // get image file name, write it to the module outputFolder, and add
        // it to the listOfExtractedImageAbstractFiles.
        String fileName = xslsPicture.getFileName();
        try {
            data = xslsPicture.getData();
        } catch (Exception ex) {
            // log internal Java and Apache errors as WARNING
            logger.log(Level.WARNING,
                    NbBundle.getMessage(this.getClass(),
                            "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()),
                    ex); //NON-NLS
            return null;
        }
        writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
        listOfExtractedImages.add(
                new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));

    }

    return listOfExtractedImages;

}