Example usage for org.apache.poi.xwpf.usermodel XWPFPictureData getData

List of usage examples for org.apache.poi.xwpf.usermodel XWPFPictureData getData

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFPictureData getData.

Prototype

public byte[] getData() 

Source Link

Document

Gets the picture data as a byte array.

Usage

From source file:com.swg.parse.docx.NewExtract.java

private void extractImages(String src, int cnt) {

    try {/*  w w w .jav  a2s  . c  o m*/

        FileInputStream fs = new FileInputStream(src);
        XWPFDocument docx = new XWPFDocument(fs);
        List<XWPFPictureData> piclist = docx.getAllPictures();
        Iterator<XWPFPictureData> iterator = piclist.iterator();
        int i = 0;
        new File(
                "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                        + cnt).mkdir();
        while (iterator.hasNext()) {
            XWPFPictureData pic = iterator.next();
            byte[] bytepic = pic.getData();
            BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic));
            File CreatedImageFile = new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg");
            ImageIO.write(imag, "jpg", new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg"));
            i++;
            System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath());
            labelBeforePOJO.add("path to image " + i);
            ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath());
            sectionBeforePOJO.add(section);
        }
    } catch (Exception e) {//I can handle the image advance conversion here?
        System.exit(-1);
    }

}

From source file:demo.poi.image.SimpleImages.java

License:Apache License

public static List<byte[]> extractImagesFromWord(InputStream file) {
    try {//w  w  w  .  j  a va2  s  . co m
        List<byte[]> result = new ArrayList<byte[]>();
        XWPFDocument doc = new XWPFDocument(file);
        log.debug("{}", doc);

        for (XWPFPictureData picture : doc.getAllPictures()) {
            result.add(picture.getData());
            log.debug("{}", picture.getFileName());
        }

        return result;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:fr.opensagres.poi.xwpf.converter.core.XWPFDocumentVisitor.java

License:Open Source License

private void visitGraphicalObject(T parentContainer, CTGraphicalObject graphic, Float offsetX,
        STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText)
        throws Exception {
    if (graphic != null) {
        CTGraphicalObjectData graphicData = graphic.getGraphicData();
        if (graphicData != null) {
            XmlCursor c = graphicData.newCursor();
            c.selectPath("./*");
            while (c.toNextSelection()) {
                XmlObject o = c.getObject();
                if (o instanceof CTPicture) {
                    CTPicture picture = (CTPicture) o;
                    // extract the picture if needed
                    IImageExtractor extractor = getImageExtractor();
                    if (extractor != null) {
                        XWPFPictureData pictureData = getPictureData(picture);
                        if (pictureData != null) {
                            try {
                                extractor.extract(WORD_MEDIA + pictureData.getFileName(),
                                        pictureData.getData());
                            } catch (Throwable e) {
                                LOGGER.log(Level.SEVERE,
                                        "Error while extracting the image " + pictureData.getFileName(), e);
                            }//w w w.j av  a 2 s .  c  om
                        }
                    }
                    // visit the picture.
                    visitPicture(picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText,
                            parentContainer);
                }
            }
            c.dispose();
        }
    }
}

From source file:fr.opensagres.poi.xwpf.converter.pdf.internal.PdfMapper.java

License:Open Source License

@Override
protected void visitPicture(CTPicture picture, Float offsetX,
        org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH.Enum relativeFromH,
        Float offsetY,/*from w w w  . j  av  a 2s .c  om*/
        org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV.Enum relativeFromV,
        STWrapText.Enum wrapText, IITextContainer pdfParentContainer) throws Exception {

    CTPositiveSize2D ext = picture.getSpPr().getXfrm().getExt();
    long x = ext.getCx();
    long y = ext.getCy();

    XWPFPictureData pictureData = super.getPictureData(picture);
    if (pictureData != null) {
        try {
            Image img = Image.getInstance(pictureData.getData());
            img.scaleAbsolute(emu2points(x), emu2points(y));

            IITextContainer parentOfParentContainer = pdfParentContainer.getITextContainer();
            if (parentOfParentContainer != null && parentOfParentContainer instanceof PdfPCell) {
                parentOfParentContainer.addElement(img);
            } else {
                float chunkOffsetX = 0;
                if (offsetX != null) {
                    if (STRelFromH.CHARACTER.equals(relativeFromH)) {
                        chunkOffsetX = offsetX;
                    } else if (STRelFromH.COLUMN.equals(relativeFromH)) {
                        chunkOffsetX = offsetX;
                    } else if (STRelFromH.INSIDE_MARGIN.equals(relativeFromH)) {
                        chunkOffsetX = offsetX;
                    } else if (STRelFromH.LEFT_MARGIN.equals(relativeFromH)) {
                        chunkOffsetX = offsetX;
                    } else if (STRelFromH.MARGIN.equals(relativeFromH)) {
                        chunkOffsetX = pdfDocument.left() + offsetX;
                    } else if (STRelFromH.OUTSIDE_MARGIN.equals(relativeFromH)) {
                        chunkOffsetX = offsetX;
                    } else if (STRelFromH.PAGE.equals(relativeFromH)) {
                        chunkOffsetX = offsetX - pdfDocument.left();
                    }
                }

                float chunkOffsetY = 0;
                boolean useExtendedImage = false;
                if (STRelFromV.PARAGRAPH.equals(relativeFromV)) {
                    useExtendedImage = true;
                }

                if (useExtendedImage) {
                    ExtendedImage extImg = new ExtendedImage(img, -offsetY);

                    if (STRelFromV.PARAGRAPH.equals(relativeFromV)) {
                        chunkOffsetY = -extImg.getScaledHeight();
                    }

                    Chunk chunk = new Chunk(extImg, chunkOffsetX, chunkOffsetY, false);
                    pdfParentContainer.addElement(chunk);
                }
                /*
                 * float chunkOffsetY = 0; if ( wrapText != null ) {
                 * chunkOffsetY = -img.getScaledHeight(); } boolean
                 * useExtendedImage = offsetY != null; // if (
                 * STRelFromV.PARAGRAPH.equals( relativeFromV ) ) // { //
                 * useExtendedImage = true; // } // if ( useExtendedImage )
                 * { float imgY = -offsetY; if ( pdfHeader != null ) { float
                 * headerY = pdfHeader.getY() != null ? pdfHeader.getY() :
                 * 0; imgY += - img.getScaledHeight() + headerY; }
                 * ExtendedImage extImg = new ExtendedImage( img, imgY ); //
                 * if ( STRelFromV.PARAGRAPH.equals( relativeFromV ) ) // {
                 * // chunkOffsetY = -extImg.getScaledHeight(); // } Chunk
                 * chunk = new Chunk( extImg, chunkOffsetX, chunkOffsetY,
                 * false ); pdfParentContainer.addElement( chunk ); }
                 */
                else {
                    if (pdfParentContainer instanceof Paragraph) {
                        // I don't know why but we need add some spacing
                        // before in the paragraph
                        // otherwise the image cut the text of the below
                        // paragraph (see FormattingTests JUnit)?
                        Paragraph paragraph = (Paragraph) pdfParentContainer;
                        paragraph.setSpacingBefore(paragraph.getSpacingBefore() + 5f);
                    }
                    pdfParentContainer.addElement(new Chunk(img, chunkOffsetX, chunkOffsetY, false));
                }
            }

        } catch (Exception e) {
            LOGGER.severe(e.getMessage());
        }

    }
}

From source file:kz.service.DocumentReader.java

public static String readDocxFile(String fileName) {

    try {//  w  w  w .  ja v  a  2 s .  c  om
        File file = new File(fileName);
        FileInputStream fis = new FileInputStream(file.getAbsolutePath());
        StringBuffer content = new StringBuffer();

        XWPFDocument document = new XWPFDocument(fis);
        XWPFStyles styles = document.getStyles();

        List<XWPFParagraph> paragraphs = document.getParagraphs();
        List<XWPFTable> tables = document.getTables();
        List<XWPFPictureData> pictures = document.getAllPictures();

        //int Picture_ID = 0;
        for (XWPFPictureData picture : pictures) {
            //XWPFPictureData picture = pictures.get(Picture_ID);
            System.out.println("Picture: " + picture.getFileName());
            byte[] pictureData = picture.getData();
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData));
            ImageIO.write(image, picture.getFileName(), file);
            content.append("<p>");
            content.append("Here must be image");
            content.append("</p>");
            //Picture_ID++;
        }

        Iterator<IBodyElement> bodyElementIterator = document.getBodyElementsIterator();
        int Table_ID = 0;
        int Paragraph_ID = 0;
        while (bodyElementIterator.hasNext()) {

            IBodyElement element = bodyElementIterator.next();
            System.out.println(element.getElementType().name());//prints Element type name

            if ("TABLE".equalsIgnoreCase(element.getElementType().name())) {

                content.append("<table>");
                XWPFTable table = tables.get(Table_ID);
                CTTbl cttbl = table.getCTTbl();
                CTTblPr cttblPr = cttbl.getTblPr();

                List<XWPFTableRow> tblRows = table.getRows();
                for (XWPFTableRow tblRow : tblRows) {
                    content.append("<tr>");
                    List<XWPFTableCell> tblCells = tblRow.getTableCells();
                    for (XWPFTableCell tblCell : tblCells) {
                        content.append("<td>");
                        content.append(tblCell.getText());
                        content.append("</td>");
                    }
                    content.append("</tr>");
                }
                content.append("</table>");
                Table_ID++;

            } else if ("PARAGRAPH".equalsIgnoreCase(element.getElementType().name())) {

                XWPFParagraph paragraph = paragraphs.get(Paragraph_ID);

                String styleClass = null;
                if (paragraph.getStyleID() != null) {
                    content.append("<p class=''>");
                    XWPFStyle style = styles.getStyle(paragraph.getStyleID());
                    if (style != null && style.getName() != null) {
                        //here will be code creation of tag with class style
                    }
                } else {
                    content.append("<p>");
                }
                content.append(paragraph.getText());
                content.append("</p>");
                Paragraph_ID++;

            }
        }

        fis.close();
        return content.toString();
    } catch (Exception e) {
        return e.toString();
    }

}

From source file:offishell.word.WordHeleper.java

License:MIT License

/**
 * <p>//from w w  w  .j a  v  a2 s.  co  m
 * Helper method to clone {@link XWPFRun}.
 * </p>
 * 
 * @param in
 * @param out
 * @param model
 */
public static void copy(XWPFRun in, XWPFRun out, UnaryOperator<String> converter) {
    // copy
    out.setBold(in.isBold());
    out.setCapitalized(in.isCapitalized());
    out.setCharacterSpacing(in.getCharacterSpacing());
    out.setColor(in.getColor());
    out.setDoubleStrikethrough(in.isDoubleStrikeThrough());
    out.setEmbossed(in.isEmbossed());
    out.setFontFamily(in.getFontFamily());
    out.setFontSize(in.getFontSize());
    out.setImprinted(in.isImprinted());
    out.setItalic(in.isItalic());
    out.setKerning(in.getKerning());
    out.setShadow(in.isShadowed());
    out.setSmallCaps(in.isSmallCaps());
    out.setStrikeThrough(in.isStrikeThrough());
    out.setVerticalAlignment(out.getVerticalAlignment().toString());
    out.setTextPosition(in.getTextPosition());
    out.setUnderline(in.getUnderline());

    // copy context
    CTR inCTR = in.getCTR();
    CTRPr inPR = inCTR.getRPr();
    CTR outCTR = out.getCTR();
    CTRPr outPR = outCTR.isSetRPr() ? outCTR.getRPr() : outCTR.addNewRPr();
    outPR.set(inCTR.getRPr());
    out.setVerticalAlignment(
            inPR == null || inPR.getVertAlign() == null ? "baseline" : inPR.getVertAlign().toString());

    // // copy tab
    // CTEmpty[] tabs = inCTR.getTabArray();
    //
    // if (tabs.length != 0) {
    // out.addTab();
    // }
    outCTR.setAnnotationRefArray(inCTR.getAnnotationRefList().toArray(CTEmpty[]::new));
    outCTR.setBrArray(inCTR.getBrList().toArray(CTBr[]::new));
    outCTR.setCommentReferenceArray(inCTR.getCommentReferenceList().toArray(CTMarkup[]::new));
    outCTR.setContinuationSeparatorArray(inCTR.getContinuationSeparatorList().toArray(CTEmpty[]::new));
    outCTR.setCrArray(inCTR.getCrList().toArray(CTEmpty[]::new));
    outCTR.setDelInstrTextArray(inCTR.getDelInstrTextList().toArray(CTText[]::new));
    outCTR.setDrawingArray(inCTR.getDrawingList().toArray(CTDrawing[]::new));
    outCTR.setEndnoteRefArray(inCTR.getEndnoteRefList().toArray(CTEmpty[]::new));
    outCTR.setFldCharArray(inCTR.getFldCharList().toArray(CTFldChar[]::new));
    outCTR.setFootnoteRefArray(inCTR.getFootnoteRefList().toArray(CTEmpty[]::new));
    outCTR.setInstrTextArray(inCTR.getInstrTextList().toArray(CTText[]::new));
    outCTR.setLastRenderedPageBreakArray(inCTR.getLastRenderedPageBreakList().toArray(CTEmpty[]::new));
    outCTR.setObjectArray(inCTR.getObjectList().toArray(CTObject[]::new));
    outCTR.setPictArray(inCTR.getPictList().toArray(CTPicture[]::new));
    outCTR.setPtabArray(inCTR.getPtabList().toArray(CTPTab[]::new));
    outCTR.setSymArray(inCTR.getSymList().toArray(CTSym[]::new));
    outCTR.setTabArray(inCTR.getTabList().toArray(CTEmpty[]::new));

    // copy image
    for (XWPFPicture inPicture : in.getEmbeddedPictures()) {
        try {
            XWPFPictureData inData = inPicture.getPictureData();
            String outId = out.getDocument().addPictureData(new ByteArrayInputStream(inData.getData()),
                    inData.getPictureType());

            select(CTBlip.class, outCTR).to(blip -> blip.setEmbed(outId));
        } catch (Exception e) {
            throw I.quiet(e);
        }
    }

    // copy text
    write(out, converter.apply(in.text()));
}

From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java

private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
    // check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...?
    // TODO check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...?
    List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>();
    String parentFileName = getUniqueName(af);
    XWPFDocument docxA = null;/*from w  w w . j a  va  2s . c om*/
    try {
        docxA = new XWPFDocument(new ReadContentInputStream(af));
    } catch (IOException ex) {
        logger.log(Level.WARNING,
                "XWPFDocument container could not be instantiated while reading " + af.getName(), ex);
        return null;
    }
    List<XWPFPictureData> listOfAllPictures = docxA.getAllPictures();

    // if no images are extracted from the ppt, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}",
                af.getName());
        return null;
    }
    for (XWPFPictureData xwpfPicture : listOfAllPictures) {
        String fileName = xwpfPicture.getFileName();
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(outputFolderPath + File.separator + fileName);
        } catch (FileNotFoundException ex) {
            logger.log(Level.WARNING, "Invalid path provided for image extraction", ex);
            continue;
        }
        try {
            fos.write(xwpfPicture.getData());
            fos.close();
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Could not write to the provided location", ex);
            continue;
        }
        String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName
                + File.separator + fileName;
        long size = xwpfPicture.getData().length;
        ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af);
        listOfExtractedImages.add(extractedimage);
    }
    return listOfExtractedImages;
}

From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java

License:Open Source License

/**
 * Extract images from docx format files.
 *
 * @param af the file from which images are to be extracted.
 *
 * @return list of extracted images. Returns null in case no images were
 *         extracted./*from ww  w .java2s .  c  om*/
 */
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
    List<ExtractedImage> listOfExtractedImages;
    XWPFDocument docx = null;
    try {
        docx = new XWPFDocument(new ReadContentInputStream(af));
    } catch (Throwable ex) {
        // instantiating POI containers throw RuntimeExceptions
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS
        return null;
    }
    List<XWPFPictureData> listOfAllPictures = null;
    try {
        listOfAllPictures = docx.getAllPictures();
    } catch (Exception ex) {
        // log internal Java and Apache errors as WARNING
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS
        return null;
    }

    // if no images are extracted from the PPT, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(this.parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg",
                af.getName())); //NON-NLS
        return null;
    }
    listOfExtractedImages = new ArrayList<>();
    byte[] data = null;
    for (XWPFPictureData xwpfPicture : listOfAllPictures) {
        String fileName = xwpfPicture.getFileName();
        try {
            data = xwpfPicture.getData();
        } catch (Exception ex) {
            // log internal Java and Apache errors as WARNING
            logger.log(Level.WARNING,
                    NbBundle.getMessage(this.getClass(),
                            "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()),
                    ex); //NON-NLS
            return null;
        }
        writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
        listOfExtractedImages.add(
                new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
    }
    return listOfExtractedImages;
}