List of usage examples for org.apache.poi.xslf.usermodel XSLFPictureData getData
public byte[] getData()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText() *//* w w w. ja v a2 s. c o m*/ protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException { XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlide[] slides = slideShow.getSlides(); for (XSLFSlide slide : slides) { String slideDesc; if (slide.getPackagePart() != null && slide.getPackagePart().getPartName() != null) { slideDesc = getJustFileName(slide.getPackagePart().getPartName().toString()); slideDesc += "_"; } else { slideDesc = null; } // slide extractContent(slide.getShapes(), false, xhtml, slideDesc); // slide layout which is the master sheet for this slide XSLFSheet slideLayout = slide.getMasterSheet(); extractContent(slideLayout.getShapes(), true, xhtml, null); // slide master which is the master sheet for all text layouts XSLFSheet slideMaster = slideLayout.getMasterSheet(); extractContent(slideMaster.getShapes(), true, xhtml, null); // notes (if present) XSLFSheet slideNotes = slide.getNotes(); if (slideNotes != null) { extractContent(slideNotes.getShapes(), false, xhtml, slideDesc); // master sheet for this notes XSLFSheet notesMaster = slideNotes.getMasterSheet(); extractContent(notesMaster.getShapes(), true, xhtml, null); } // comments (if present) XSLFComments comments = slide.getComments(); if (comments != null) { for (CTComment comment : comments.getCTCommentsList().getCmList()) { xhtml.element("p", comment.getText()); } } } if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) { TikaImageHelper helper = new TikaImageHelper(metadata); try { List<XSLFPictureData> pictures = slideShow.getAllPictures(); for (XSLFPictureData picture : pictures) { ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData()); BufferedImage image = ImageIO.read(imageData); helper.addImage(image); helper.addTextToHandler(xhtml); } } catch (Exception e) { e.printStackTrace(); } finally { if (extractor != null) { extractor.close(); } if (helper != null) { helper.close(); } } } }
From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java
private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) { List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>(); String parentFileName = getUniqueName(af); XMLSlideShow pptx;/* w w w . j a v a 2 s . c om*/ try { pptx = new XMLSlideShow(new ReadContentInputStream(af)); } catch (IOException ex) { logger.log(Level.WARNING, "SlideShow container could not be instantiated while reading " + af.getName(), ex); return null; } List<XSLFPictureData> listOfAllPictures = pptx.getAllPictures(); // if no images are extracted from the ppt, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}", af.getName()); return null; } for (XSLFPictureData xslsPicture : listOfAllPictures) { // get image file name, write it to the module outputFolder, and add // it to the listOfExtractedImages. String fileName = xslsPicture.getFileName(); FileOutputStream fos = null; try { fos = new FileOutputStream(outputFolderPath + File.separator + fileName); } catch (FileNotFoundException ex) { logger.log(Level.WARNING, "Invalid path provided for image extraction", ex); continue; } try { fos.write(xslsPicture.getData()); fos.close(); } catch (IOException ex) { logger.log(Level.WARNING, "Could not write to the provided location", ex); continue; } String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName + File.separator + fileName; long size = xslsPicture.getData().length; ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af); listOfExtractedImages.add(extractedimage); } return listOfExtractedImages; }
From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java
License:Open Source License
/** * Extract images from pptx format files. * * @param af the file from which images are to be extracted. * * @return list of extracted images. Returns null in case no images were * extracted.//from w w w .ja v a 2 s.c om */ private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) { List<ExtractedImage> listOfExtractedImages; XMLSlideShow pptx; try { pptx = new XMLSlideShow(new ReadContentInputStream(af)); } catch (Throwable ex) { // instantiating POI containers throw RuntimeExceptions logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex); //NON-NLS return null; } List<XSLFPictureData> listOfAllPictures = null; try { listOfAllPictures = pptx.getAllPictures(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } // if no images are extracted from the PPT, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(this.parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); //NON-NLS return null; } listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (XSLFPictureData xslsPicture : listOfAllPictures) { // get image file name, write it to the module outputFolder, and add // it to the listOfExtractedImageAbstractFiles. String fileName = xslsPicture.getFileName(); try { data = xslsPicture.getData(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data); listOfExtractedImages.add( new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af)); } return listOfExtractedImages; }