Example usage for org.apache.poi.xwpf.usermodel XWPFDocument getAllPictures

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getAllPictures

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument getAllPictures.

Prototype

public List<XWPFPictureData> getAllPictures() 

Source Link

Document

Returns all Pictures, which are referenced from the document itself.

Usage

From source file:com.swg.parse.docx.NewExtract.java

private void extractImages(String src, int cnt) {

    try {//from w  w  w.  jav  a 2s .c o m

        FileInputStream fs = new FileInputStream(src);
        XWPFDocument docx = new XWPFDocument(fs);
        List<XWPFPictureData> piclist = docx.getAllPictures();
        Iterator<XWPFPictureData> iterator = piclist.iterator();
        int i = 0;
        new File(
                "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                        + cnt).mkdir();
        while (iterator.hasNext()) {
            XWPFPictureData pic = iterator.next();
            byte[] bytepic = pic.getData();
            BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic));
            File CreatedImageFile = new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg");
            ImageIO.write(imag, "jpg", new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg"));
            i++;
            System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath());
            labelBeforePOJO.add("path to image " + i);
            ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath());
            sectionBeforePOJO.add(section);
        }
    } catch (Exception e) {//I can handle the image advance conversion here?
        System.exit(-1);
    }

}

From source file:demo.poi.image.SimpleImages.java

License:Apache License

public static List<byte[]> extractImagesFromWord(InputStream file) {
    try {/* w w  w  . j a  v  a 2 s.  co  m*/
        List<byte[]> result = new ArrayList<byte[]>();
        XWPFDocument doc = new XWPFDocument(file);
        log.debug("{}", doc);

        for (XWPFPictureData picture : doc.getAllPictures()) {
            result.add(picture.getData());
            log.debug("{}", picture.getFileName());
        }

        return result;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:kz.service.DocumentReader.java

public static String readDocxFile(String fileName) {

    try {//from  w  w w . ja v a2s .c  o m
        File file = new File(fileName);
        FileInputStream fis = new FileInputStream(file.getAbsolutePath());
        StringBuffer content = new StringBuffer();

        XWPFDocument document = new XWPFDocument(fis);
        XWPFStyles styles = document.getStyles();

        List<XWPFParagraph> paragraphs = document.getParagraphs();
        List<XWPFTable> tables = document.getTables();
        List<XWPFPictureData> pictures = document.getAllPictures();

        //int Picture_ID = 0;
        for (XWPFPictureData picture : pictures) {
            //XWPFPictureData picture = pictures.get(Picture_ID);
            System.out.println("Picture: " + picture.getFileName());
            byte[] pictureData = picture.getData();
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData));
            ImageIO.write(image, picture.getFileName(), file);
            content.append("<p>");
            content.append("Here must be image");
            content.append("</p>");
            //Picture_ID++;
        }

        Iterator<IBodyElement> bodyElementIterator = document.getBodyElementsIterator();
        int Table_ID = 0;
        int Paragraph_ID = 0;
        while (bodyElementIterator.hasNext()) {

            IBodyElement element = bodyElementIterator.next();
            System.out.println(element.getElementType().name());//prints Element type name

            if ("TABLE".equalsIgnoreCase(element.getElementType().name())) {

                content.append("<table>");
                XWPFTable table = tables.get(Table_ID);
                CTTbl cttbl = table.getCTTbl();
                CTTblPr cttblPr = cttbl.getTblPr();

                List<XWPFTableRow> tblRows = table.getRows();
                for (XWPFTableRow tblRow : tblRows) {
                    content.append("<tr>");
                    List<XWPFTableCell> tblCells = tblRow.getTableCells();
                    for (XWPFTableCell tblCell : tblCells) {
                        content.append("<td>");
                        content.append(tblCell.getText());
                        content.append("</td>");
                    }
                    content.append("</tr>");
                }
                content.append("</table>");
                Table_ID++;

            } else if ("PARAGRAPH".equalsIgnoreCase(element.getElementType().name())) {

                XWPFParagraph paragraph = paragraphs.get(Paragraph_ID);

                String styleClass = null;
                if (paragraph.getStyleID() != null) {
                    content.append("<p class=''>");
                    XWPFStyle style = styles.getStyle(paragraph.getStyleID());
                    if (style != null && style.getName() != null) {
                        //here will be code creation of tag with class style
                    }
                } else {
                    content.append("<p>");
                }
                content.append(paragraph.getText());
                content.append("</p>");
                Paragraph_ID++;

            }
        }

        fis.close();
        return content.toString();
    } catch (Exception e) {
        return e.toString();
    }

}

From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java

private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
    // check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...?
    // TODO check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...?
    List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>();
    String parentFileName = getUniqueName(af);
    XWPFDocument docxA = null;
    try {// w ww.  j  a  va2  s  .  c o  m
        docxA = new XWPFDocument(new ReadContentInputStream(af));
    } catch (IOException ex) {
        logger.log(Level.WARNING,
                "XWPFDocument container could not be instantiated while reading " + af.getName(), ex);
        return null;
    }
    List<XWPFPictureData> listOfAllPictures = docxA.getAllPictures();

    // if no images are extracted from the ppt, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}",
                af.getName());
        return null;
    }
    for (XWPFPictureData xwpfPicture : listOfAllPictures) {
        String fileName = xwpfPicture.getFileName();
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(outputFolderPath + File.separator + fileName);
        } catch (FileNotFoundException ex) {
            logger.log(Level.WARNING, "Invalid path provided for image extraction", ex);
            continue;
        }
        try {
            fos.write(xwpfPicture.getData());
            fos.close();
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Could not write to the provided location", ex);
            continue;
        }
        String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName
                + File.separator + fileName;
        long size = xwpfPicture.getData().length;
        ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af);
        listOfExtractedImages.add(extractedimage);
    }
    return listOfExtractedImages;
}

From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java

License:Open Source License

/**
 * Extract images from docx format files.
 *
 * @param af the file from which images are to be extracted.
 *
 * @return list of extracted images. Returns null in case no images were
 *         extracted./*from   w w w  .  jav  a  2 s .c  om*/
 */
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
    List<ExtractedImage> listOfExtractedImages;
    XWPFDocument docx = null;
    try {
        docx = new XWPFDocument(new ReadContentInputStream(af));
    } catch (Throwable ex) {
        // instantiating POI containers throw RuntimeExceptions
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS
        return null;
    }
    List<XWPFPictureData> listOfAllPictures = null;
    try {
        listOfAllPictures = docx.getAllPictures();
    } catch (Exception ex) {
        // log internal Java and Apache errors as WARNING
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS
        return null;
    }

    // if no images are extracted from the PPT, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(this.parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg",
                af.getName())); //NON-NLS
        return null;
    }
    listOfExtractedImages = new ArrayList<>();
    byte[] data = null;
    for (XWPFPictureData xwpfPicture : listOfAllPictures) {
        String fileName = xwpfPicture.getFileName();
        try {
            data = xwpfPicture.getData();
        } catch (Exception ex) {
            // log internal Java and Apache errors as WARNING
            logger.log(Level.WARNING,
                    NbBundle.getMessage(this.getClass(),
                            "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()),
                    ex); //NON-NLS
            return null;
        }
        writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
        listOfExtractedImages.add(
                new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
    }
    return listOfExtractedImages;
}