List of usage examples for org.apache.poi.xwpf.usermodel XWPFPictureData getFileName
public String getFileName()
From source file:demo.poi.image.SimpleImages.java
License:Apache License
public static List<byte[]> extractImagesFromWord(InputStream file) { try {/* ww w. j a va2 s .co m*/ List<byte[]> result = new ArrayList<byte[]>(); XWPFDocument doc = new XWPFDocument(file); log.debug("{}", doc); for (XWPFPictureData picture : doc.getAllPictures()) { result.add(picture.getData()); log.debug("{}", picture.getFileName()); } return result; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:fr.opensagres.poi.xwpf.converter.core.XWPFDocumentVisitor.java
License:Open Source License
private void visitGraphicalObject(T parentContainer, CTGraphicalObject graphic, Float offsetX, STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText) throws Exception { if (graphic != null) { CTGraphicalObjectData graphicData = graphic.getGraphicData(); if (graphicData != null) { XmlCursor c = graphicData.newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTPicture) { CTPicture picture = (CTPicture) o; // extract the picture if needed IImageExtractor extractor = getImageExtractor(); if (extractor != null) { XWPFPictureData pictureData = getPictureData(picture); if (pictureData != null) { try { extractor.extract(WORD_MEDIA + pictureData.getFileName(), pictureData.getData()); } catch (Throwable e) { LOGGER.log(Level.SEVERE, "Error while extracting the image " + pictureData.getFileName(), e); }/*from w ww . j a va 2 s.co m*/ } } // visit the picture. visitPicture(picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText, parentContainer); } } c.dispose(); } } }
From source file:fr.opensagres.poi.xwpf.converter.xhtml.internal.XHTMLMapper.java
License:Open Source License
@Override protected void visitPicture(CTPicture picture, Float offsetX, Enum relativeFromH, Float offsetY, org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV.Enum relativeFromV, org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STWrapText.Enum wrapText, Object parentContainer) throws Exception { AttributesImpl attributes = null; // Src attribute XWPFPictureData pictureData = super.getPictureData(picture); if (pictureData != null) { // img/@src String src = pictureData.getFileName(); if (StringUtils.isNotEmpty(src)) { src = resolver.resolve(WORD_MEDIA + src); attributes = SAXHelper.addAttrValue(attributes, SRC_ATTR, src); }/*www .j a v a 2 s. c om*/ CTPositiveSize2D ext = picture.getSpPr().getXfrm().getExt(); CSSStyle style = new CSSStyle(IMG_ELEMENT, null); // img/@width float width = emu2points(ext.getCx()); // img/@height float height = emu2points(ext.getCy()); style.addProperty(WIDTH, getStylesDocument().getValueAsPoint(width)); style.addProperty(HEIGHT, getStylesDocument().getValueAsPoint(height)); attributes = SAXHelper.addAttrValue(attributes, STYLE_ATTR, style.getInlineStyles()); } else { // external link images inserted String link = picture.getBlipFill().getBlip().getLink(); String src = document.getPackagePart().getRelationships().getRelationshipByID(link).getTargetURI() .toString(); attributes = SAXHelper.addAttrValue(null, SRC_ATTR, src); CTPositiveSize2D ext = picture.getSpPr().getXfrm().getExt(); CSSStyle style = new CSSStyle(IMG_ELEMENT, null); // img/@width float width = emu2points(ext.getCx()); // img/@height float height = emu2points(ext.getCy()); style.addProperty(WIDTH, getStylesDocument().getValueAsPoint(width)); style.addProperty(HEIGHT, getStylesDocument().getValueAsPoint(height)); attributes = SAXHelper.addAttrValue(attributes, STYLE_ATTR, style.getInlineStyles()); } if (attributes != null) { startElement(IMG_ELEMENT, attributes); endElement(IMG_ELEMENT); } }
From source file:kz.service.DocumentReader.java
public static String readDocxFile(String fileName) { try {/*from ww w.j ava 2 s. c om*/ File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); StringBuffer content = new StringBuffer(); XWPFDocument document = new XWPFDocument(fis); XWPFStyles styles = document.getStyles(); List<XWPFParagraph> paragraphs = document.getParagraphs(); List<XWPFTable> tables = document.getTables(); List<XWPFPictureData> pictures = document.getAllPictures(); //int Picture_ID = 0; for (XWPFPictureData picture : pictures) { //XWPFPictureData picture = pictures.get(Picture_ID); System.out.println("Picture: " + picture.getFileName()); byte[] pictureData = picture.getData(); BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData)); ImageIO.write(image, picture.getFileName(), file); content.append("<p>"); content.append("Here must be image"); content.append("</p>"); //Picture_ID++; } Iterator<IBodyElement> bodyElementIterator = document.getBodyElementsIterator(); int Table_ID = 0; int Paragraph_ID = 0; while (bodyElementIterator.hasNext()) { IBodyElement element = bodyElementIterator.next(); System.out.println(element.getElementType().name());//prints Element type name if ("TABLE".equalsIgnoreCase(element.getElementType().name())) { content.append("<table>"); XWPFTable table = tables.get(Table_ID); CTTbl cttbl = table.getCTTbl(); CTTblPr cttblPr = cttbl.getTblPr(); List<XWPFTableRow> tblRows = table.getRows(); for (XWPFTableRow tblRow : tblRows) { content.append("<tr>"); List<XWPFTableCell> tblCells = tblRow.getTableCells(); for (XWPFTableCell tblCell : tblCells) { content.append("<td>"); content.append(tblCell.getText()); content.append("</td>"); } content.append("</tr>"); } content.append("</table>"); Table_ID++; } else if ("PARAGRAPH".equalsIgnoreCase(element.getElementType().name())) { XWPFParagraph paragraph = paragraphs.get(Paragraph_ID); String styleClass = null; if (paragraph.getStyleID() != null) { content.append("<p class=''>"); XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { //here will be code creation of tag with class style } } else { content.append("<p>"); } content.append(paragraph.getText()); content.append("</p>"); Paragraph_ID++; } } fis.close(); return content.toString(); } catch (Exception e) { return e.toString(); } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.java
License:Apache License
private TmpFormatting processRun(XWPFRun run, XWPFParagraph paragraph, XHTMLContentHandler xhtml, TmpFormatting tfmtg) throws SAXException, XmlException, IOException { // True if we are currently in the named style tag: if (run.isBold() != tfmtg.isBold()) { if (tfmtg.isItalic()) { xhtml.endElement("i"); tfmtg.setItalic(false);//from w ww . j a va2 s. co m } if (run.isBold()) { xhtml.startElement("b"); } else { xhtml.endElement("b"); } tfmtg.setBold(run.isBold()); } if (run.isItalic() != tfmtg.isItalic()) { if (run.isItalic()) { xhtml.startElement("i"); } else { xhtml.endElement("i"); } tfmtg.setItalic(run.isItalic()); } boolean addedHREF = false; if (run instanceof XWPFHyperlinkRun) { XWPFHyperlinkRun linkRun = (XWPFHyperlinkRun) run; XWPFHyperlink link = linkRun.getHyperlink(document); if (link != null && link.getURL() != null) { xhtml.startElement("a", "href", link.getURL()); addedHREF = true; } else if (linkRun.getAnchor() != null && linkRun.getAnchor().length() > 0) { xhtml.startElement("a", "href", "#" + linkRun.getAnchor()); addedHREF = true; } } xhtml.characters(run.toString()); // If we have any pictures, output them for (XWPFPicture picture : run.getEmbeddedPictures()) { if (paragraph.getDocument() != null) { XWPFPictureData data = picture.getPictureData(); if (data != null) { AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "src", "src", "CDATA", "embedded:" + data.getFileName()); attr.addAttribute("", "alt", "alt", "CDATA", picture.getDescription()); xhtml.startElement("img", attr); xhtml.endElement("img"); } } } if (addedHREF) { xhtml.endElement("a"); } return tfmtg; }
From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { // check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? // TODO check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>(); String parentFileName = getUniqueName(af); XWPFDocument docxA = null;/*from w ww. j a v a2 s. co m*/ try { docxA = new XWPFDocument(new ReadContentInputStream(af)); } catch (IOException ex) { logger.log(Level.WARNING, "XWPFDocument container could not be instantiated while reading " + af.getName(), ex); return null; } List<XWPFPictureData> listOfAllPictures = docxA.getAllPictures(); // if no images are extracted from the ppt, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}", af.getName()); return null; } for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); FileOutputStream fos = null; try { fos = new FileOutputStream(outputFolderPath + File.separator + fileName); } catch (FileNotFoundException ex) { logger.log(Level.WARNING, "Invalid path provided for image extraction", ex); continue; } try { fos.write(xwpfPicture.getData()); fos.close(); } catch (IOException ex) { logger.log(Level.WARNING, "Could not write to the provided location", ex); continue; } String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName + File.separator + fileName; long size = xwpfPicture.getData().length; ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af); listOfExtractedImages.add(extractedimage); } return listOfExtractedImages; }
From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java
License:Open Source License
/** * Extract images from docx format files. * * @param af the file from which images are to be extracted. * * @return list of extracted images. Returns null in case no images were * extracted./*w w w . j a v a 2s.c om*/ */ private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { List<ExtractedImage> listOfExtractedImages; XWPFDocument docx = null; try { docx = new XWPFDocument(new ReadContentInputStream(af)); } catch (Throwable ex) { // instantiating POI containers throw RuntimeExceptions logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS return null; } List<XWPFPictureData> listOfAllPictures = null; try { listOfAllPictures = docx.getAllPictures(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } // if no images are extracted from the PPT, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(this.parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); //NON-NLS return null; } listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); try { data = xwpfPicture.getData(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data); listOfExtractedImages.add( new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af)); } return listOfExtractedImages; }