List of usage examples for org.apache.poi.xwpf.usermodel XWPFPictureData getData
public byte[] getData()
From source file:com.swg.parse.docx.NewExtract.java
private void extractImages(String src, int cnt) { try {/* w w w .jav a2s . c o m*/ FileInputStream fs = new FileInputStream(src); XWPFDocument docx = new XWPFDocument(fs); List<XWPFPictureData> piclist = docx.getAllPictures(); Iterator<XWPFPictureData> iterator = piclist.iterator(); int i = 0; new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt).mkdir(); while (iterator.hasNext()) { XWPFPictureData pic = iterator.next(); byte[] bytepic = pic.getData(); BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic)); File CreatedImageFile = new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg"); ImageIO.write(imag, "jpg", new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg")); i++; System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath()); labelBeforePOJO.add("path to image " + i); ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath()); sectionBeforePOJO.add(section); } } catch (Exception e) {//I can handle the image advance conversion here? System.exit(-1); } }
From source file:demo.poi.image.SimpleImages.java
License:Apache License
public static List<byte[]> extractImagesFromWord(InputStream file) { try {//w w w . j a va2 s . co m List<byte[]> result = new ArrayList<byte[]>(); XWPFDocument doc = new XWPFDocument(file); log.debug("{}", doc); for (XWPFPictureData picture : doc.getAllPictures()) { result.add(picture.getData()); log.debug("{}", picture.getFileName()); } return result; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:fr.opensagres.poi.xwpf.converter.core.XWPFDocumentVisitor.java
License:Open Source License
private void visitGraphicalObject(T parentContainer, CTGraphicalObject graphic, Float offsetX, STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText) throws Exception { if (graphic != null) { CTGraphicalObjectData graphicData = graphic.getGraphicData(); if (graphicData != null) { XmlCursor c = graphicData.newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTPicture) { CTPicture picture = (CTPicture) o; // extract the picture if needed IImageExtractor extractor = getImageExtractor(); if (extractor != null) { XWPFPictureData pictureData = getPictureData(picture); if (pictureData != null) { try { extractor.extract(WORD_MEDIA + pictureData.getFileName(), pictureData.getData()); } catch (Throwable e) { LOGGER.log(Level.SEVERE, "Error while extracting the image " + pictureData.getFileName(), e); }//w w w.j av a 2 s . c om } } // visit the picture. visitPicture(picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText, parentContainer); } } c.dispose(); } } }
From source file:fr.opensagres.poi.xwpf.converter.pdf.internal.PdfMapper.java
License:Open Source License
@Override protected void visitPicture(CTPicture picture, Float offsetX, org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH.Enum relativeFromH, Float offsetY,/*from w w w . j av a 2s .c om*/ org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText, IITextContainer pdfParentContainer) throws Exception { CTPositiveSize2D ext = picture.getSpPr().getXfrm().getExt(); long x = ext.getCx(); long y = ext.getCy(); XWPFPictureData pictureData = super.getPictureData(picture); if (pictureData != null) { try { Image img = Image.getInstance(pictureData.getData()); img.scaleAbsolute(emu2points(x), emu2points(y)); IITextContainer parentOfParentContainer = pdfParentContainer.getITextContainer(); if (parentOfParentContainer != null && parentOfParentContainer instanceof PdfPCell) { parentOfParentContainer.addElement(img); } else { float chunkOffsetX = 0; if (offsetX != null) { if (STRelFromH.CHARACTER.equals(relativeFromH)) { chunkOffsetX = offsetX; } else if (STRelFromH.COLUMN.equals(relativeFromH)) { chunkOffsetX = offsetX; } else if (STRelFromH.INSIDE_MARGIN.equals(relativeFromH)) { chunkOffsetX = offsetX; } else if (STRelFromH.LEFT_MARGIN.equals(relativeFromH)) { chunkOffsetX = offsetX; } else if (STRelFromH.MARGIN.equals(relativeFromH)) { chunkOffsetX = pdfDocument.left() + offsetX; } else if (STRelFromH.OUTSIDE_MARGIN.equals(relativeFromH)) { chunkOffsetX = offsetX; } else if (STRelFromH.PAGE.equals(relativeFromH)) { chunkOffsetX = offsetX - pdfDocument.left(); } } float chunkOffsetY = 0; boolean useExtendedImage = false; if (STRelFromV.PARAGRAPH.equals(relativeFromV)) { useExtendedImage = true; } if (useExtendedImage) { ExtendedImage extImg = new ExtendedImage(img, -offsetY); if (STRelFromV.PARAGRAPH.equals(relativeFromV)) { chunkOffsetY = -extImg.getScaledHeight(); } Chunk chunk = new Chunk(extImg, chunkOffsetX, chunkOffsetY, false); pdfParentContainer.addElement(chunk); } /* * float chunkOffsetY = 0; if ( wrapText != null ) { * chunkOffsetY = -img.getScaledHeight(); } boolean * useExtendedImage = offsetY != null; // if ( * STRelFromV.PARAGRAPH.equals( relativeFromV ) ) // { // * useExtendedImage = true; // } // if ( useExtendedImage ) * { float imgY = -offsetY; if ( pdfHeader != null ) { float * headerY = pdfHeader.getY() != null ? pdfHeader.getY() : * 0; imgY += - img.getScaledHeight() + headerY; } * ExtendedImage extImg = new ExtendedImage( img, imgY ); // * if ( STRelFromV.PARAGRAPH.equals( relativeFromV ) ) // { * // chunkOffsetY = -extImg.getScaledHeight(); // } Chunk * chunk = new Chunk( extImg, chunkOffsetX, chunkOffsetY, * false ); pdfParentContainer.addElement( chunk ); } */ else { if (pdfParentContainer instanceof Paragraph) { // I don't know why but we need add some spacing // before in the paragraph // otherwise the image cut the text of the below // paragraph (see FormattingTests JUnit)? Paragraph paragraph = (Paragraph) pdfParentContainer; paragraph.setSpacingBefore(paragraph.getSpacingBefore() + 5f); } pdfParentContainer.addElement(new Chunk(img, chunkOffsetX, chunkOffsetY, false)); } } } catch (Exception e) { LOGGER.severe(e.getMessage()); } } }
From source file:kz.service.DocumentReader.java
public static String readDocxFile(String fileName) { try {// w w w . ja v a 2 s . c om File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); StringBuffer content = new StringBuffer(); XWPFDocument document = new XWPFDocument(fis); XWPFStyles styles = document.getStyles(); List<XWPFParagraph> paragraphs = document.getParagraphs(); List<XWPFTable> tables = document.getTables(); List<XWPFPictureData> pictures = document.getAllPictures(); //int Picture_ID = 0; for (XWPFPictureData picture : pictures) { //XWPFPictureData picture = pictures.get(Picture_ID); System.out.println("Picture: " + picture.getFileName()); byte[] pictureData = picture.getData(); BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData)); ImageIO.write(image, picture.getFileName(), file); content.append("<p>"); content.append("Here must be image"); content.append("</p>"); //Picture_ID++; } Iterator<IBodyElement> bodyElementIterator = document.getBodyElementsIterator(); int Table_ID = 0; int Paragraph_ID = 0; while (bodyElementIterator.hasNext()) { IBodyElement element = bodyElementIterator.next(); System.out.println(element.getElementType().name());//prints Element type name if ("TABLE".equalsIgnoreCase(element.getElementType().name())) { content.append("<table>"); XWPFTable table = tables.get(Table_ID); CTTbl cttbl = table.getCTTbl(); CTTblPr cttblPr = cttbl.getTblPr(); List<XWPFTableRow> tblRows = table.getRows(); for (XWPFTableRow tblRow : tblRows) { content.append("<tr>"); List<XWPFTableCell> tblCells = tblRow.getTableCells(); for (XWPFTableCell tblCell : tblCells) { content.append("<td>"); content.append(tblCell.getText()); content.append("</td>"); } content.append("</tr>"); } content.append("</table>"); Table_ID++; } else if ("PARAGRAPH".equalsIgnoreCase(element.getElementType().name())) { XWPFParagraph paragraph = paragraphs.get(Paragraph_ID); String styleClass = null; if (paragraph.getStyleID() != null) { content.append("<p class=''>"); XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { //here will be code creation of tag with class style } } else { content.append("<p>"); } content.append(paragraph.getText()); content.append("</p>"); Paragraph_ID++; } } fis.close(); return content.toString(); } catch (Exception e) { return e.toString(); } }
From source file:offishell.word.WordHeleper.java
License:MIT License
/** * <p>//from w w w .j a v a2 s. co m * Helper method to clone {@link XWPFRun}. * </p> * * @param in * @param out * @param model */ public static void copy(XWPFRun in, XWPFRun out, UnaryOperator<String> converter) { // copy out.setBold(in.isBold()); out.setCapitalized(in.isCapitalized()); out.setCharacterSpacing(in.getCharacterSpacing()); out.setColor(in.getColor()); out.setDoubleStrikethrough(in.isDoubleStrikeThrough()); out.setEmbossed(in.isEmbossed()); out.setFontFamily(in.getFontFamily()); out.setFontSize(in.getFontSize()); out.setImprinted(in.isImprinted()); out.setItalic(in.isItalic()); out.setKerning(in.getKerning()); out.setShadow(in.isShadowed()); out.setSmallCaps(in.isSmallCaps()); out.setStrikeThrough(in.isStrikeThrough()); out.setVerticalAlignment(out.getVerticalAlignment().toString()); out.setTextPosition(in.getTextPosition()); out.setUnderline(in.getUnderline()); // copy context CTR inCTR = in.getCTR(); CTRPr inPR = inCTR.getRPr(); CTR outCTR = out.getCTR(); CTRPr outPR = outCTR.isSetRPr() ? outCTR.getRPr() : outCTR.addNewRPr(); outPR.set(inCTR.getRPr()); out.setVerticalAlignment( inPR == null || inPR.getVertAlign() == null ? "baseline" : inPR.getVertAlign().toString()); // // copy tab // CTEmpty[] tabs = inCTR.getTabArray(); // // if (tabs.length != 0) { // out.addTab(); // } outCTR.setAnnotationRefArray(inCTR.getAnnotationRefList().toArray(CTEmpty[]::new)); outCTR.setBrArray(inCTR.getBrList().toArray(CTBr[]::new)); outCTR.setCommentReferenceArray(inCTR.getCommentReferenceList().toArray(CTMarkup[]::new)); outCTR.setContinuationSeparatorArray(inCTR.getContinuationSeparatorList().toArray(CTEmpty[]::new)); outCTR.setCrArray(inCTR.getCrList().toArray(CTEmpty[]::new)); outCTR.setDelInstrTextArray(inCTR.getDelInstrTextList().toArray(CTText[]::new)); outCTR.setDrawingArray(inCTR.getDrawingList().toArray(CTDrawing[]::new)); outCTR.setEndnoteRefArray(inCTR.getEndnoteRefList().toArray(CTEmpty[]::new)); outCTR.setFldCharArray(inCTR.getFldCharList().toArray(CTFldChar[]::new)); outCTR.setFootnoteRefArray(inCTR.getFootnoteRefList().toArray(CTEmpty[]::new)); outCTR.setInstrTextArray(inCTR.getInstrTextList().toArray(CTText[]::new)); outCTR.setLastRenderedPageBreakArray(inCTR.getLastRenderedPageBreakList().toArray(CTEmpty[]::new)); outCTR.setObjectArray(inCTR.getObjectList().toArray(CTObject[]::new)); outCTR.setPictArray(inCTR.getPictList().toArray(CTPicture[]::new)); outCTR.setPtabArray(inCTR.getPtabList().toArray(CTPTab[]::new)); outCTR.setSymArray(inCTR.getSymList().toArray(CTSym[]::new)); outCTR.setTabArray(inCTR.getTabList().toArray(CTEmpty[]::new)); // copy image for (XWPFPicture inPicture : in.getEmbeddedPictures()) { try { XWPFPictureData inData = inPicture.getPictureData(); String outId = out.getDocument().addPictureData(new ByteArrayInputStream(inData.getData()), inData.getPictureType()); select(CTBlip.class, outCTR).to(blip -> blip.setEmbed(outId)); } catch (Exception e) { throw I.quiet(e); } } // copy text write(out, converter.apply(in.text())); }
From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { // check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? // TODO check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>(); String parentFileName = getUniqueName(af); XWPFDocument docxA = null;/*from w w w . j a va 2s . c om*/ try { docxA = new XWPFDocument(new ReadContentInputStream(af)); } catch (IOException ex) { logger.log(Level.WARNING, "XWPFDocument container could not be instantiated while reading " + af.getName(), ex); return null; } List<XWPFPictureData> listOfAllPictures = docxA.getAllPictures(); // if no images are extracted from the ppt, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}", af.getName()); return null; } for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); FileOutputStream fos = null; try { fos = new FileOutputStream(outputFolderPath + File.separator + fileName); } catch (FileNotFoundException ex) { logger.log(Level.WARNING, "Invalid path provided for image extraction", ex); continue; } try { fos.write(xwpfPicture.getData()); fos.close(); } catch (IOException ex) { logger.log(Level.WARNING, "Could not write to the provided location", ex); continue; } String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName + File.separator + fileName; long size = xwpfPicture.getData().length; ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af); listOfExtractedImages.add(extractedimage); } return listOfExtractedImages; }
From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java
License:Open Source License
/** * Extract images from docx format files. * * @param af the file from which images are to be extracted. * * @return list of extracted images. Returns null in case no images were * extracted./*from ww w .java2s . c om*/ */ private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { List<ExtractedImage> listOfExtractedImages; XWPFDocument docx = null; try { docx = new XWPFDocument(new ReadContentInputStream(af)); } catch (Throwable ex) { // instantiating POI containers throw RuntimeExceptions logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS return null; } List<XWPFPictureData> listOfAllPictures = null; try { listOfAllPictures = docx.getAllPictures(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } // if no images are extracted from the PPT, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(this.parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); //NON-NLS return null; } listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); try { data = xwpfPicture.getData(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data); listOfExtractedImages.add( new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af)); } return listOfExtractedImages; }