List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getAllPictures
public List<XWPFPictureData> getAllPictures()
From source file:com.swg.parse.docx.NewExtract.java
private void extractImages(String src, int cnt) { try {//from w w w. jav a 2s .c o m FileInputStream fs = new FileInputStream(src); XWPFDocument docx = new XWPFDocument(fs); List<XWPFPictureData> piclist = docx.getAllPictures(); Iterator<XWPFPictureData> iterator = piclist.iterator(); int i = 0; new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt).mkdir(); while (iterator.hasNext()) { XWPFPictureData pic = iterator.next(); byte[] bytepic = pic.getData(); BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic)); File CreatedImageFile = new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg"); ImageIO.write(imag, "jpg", new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg")); i++; System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath()); labelBeforePOJO.add("path to image " + i); ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath()); sectionBeforePOJO.add(section); } } catch (Exception e) {//I can handle the image advance conversion here? System.exit(-1); } }
From source file:demo.poi.image.SimpleImages.java
License:Apache License
public static List<byte[]> extractImagesFromWord(InputStream file) { try {/* w w w . j a v a 2 s. co m*/ List<byte[]> result = new ArrayList<byte[]>(); XWPFDocument doc = new XWPFDocument(file); log.debug("{}", doc); for (XWPFPictureData picture : doc.getAllPictures()) { result.add(picture.getData()); log.debug("{}", picture.getFileName()); } return result; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:kz.service.DocumentReader.java
public static String readDocxFile(String fileName) { try {//from w w w . ja v a2s .c o m File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); StringBuffer content = new StringBuffer(); XWPFDocument document = new XWPFDocument(fis); XWPFStyles styles = document.getStyles(); List<XWPFParagraph> paragraphs = document.getParagraphs(); List<XWPFTable> tables = document.getTables(); List<XWPFPictureData> pictures = document.getAllPictures(); //int Picture_ID = 0; for (XWPFPictureData picture : pictures) { //XWPFPictureData picture = pictures.get(Picture_ID); System.out.println("Picture: " + picture.getFileName()); byte[] pictureData = picture.getData(); BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData)); ImageIO.write(image, picture.getFileName(), file); content.append("<p>"); content.append("Here must be image"); content.append("</p>"); //Picture_ID++; } Iterator<IBodyElement> bodyElementIterator = document.getBodyElementsIterator(); int Table_ID = 0; int Paragraph_ID = 0; while (bodyElementIterator.hasNext()) { IBodyElement element = bodyElementIterator.next(); System.out.println(element.getElementType().name());//prints Element type name if ("TABLE".equalsIgnoreCase(element.getElementType().name())) { content.append("<table>"); XWPFTable table = tables.get(Table_ID); CTTbl cttbl = table.getCTTbl(); CTTblPr cttblPr = cttbl.getTblPr(); List<XWPFTableRow> tblRows = table.getRows(); for (XWPFTableRow tblRow : tblRows) { content.append("<tr>"); List<XWPFTableCell> tblCells = tblRow.getTableCells(); for (XWPFTableCell tblCell : tblCells) { content.append("<td>"); content.append(tblCell.getText()); content.append("</td>"); } content.append("</tr>"); } content.append("</table>"); Table_ID++; } else if ("PARAGRAPH".equalsIgnoreCase(element.getElementType().name())) { XWPFParagraph paragraph = paragraphs.get(Paragraph_ID); String styleClass = null; if (paragraph.getStyleID() != null) { content.append("<p class=''>"); XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { //here will be code creation of tag with class style } } else { content.append("<p>"); } content.append(paragraph.getText()); content.append("</p>"); Paragraph_ID++; } } fis.close(); return content.toString(); } catch (Exception e) { return e.toString(); } }
From source file:org.sleuthkit.autopsy.imageExtractor.ImageExtractor.java
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { // check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? // TODO check for BBArtifact ENCRYPTION_DETECTED? Might be detected elsewhere...? List<ExtractedImage> listOfExtractedImages = new ArrayList<ExtractedImage>(); String parentFileName = getUniqueName(af); XWPFDocument docxA = null; try {// w ww. j a va2 s . c o m docxA = new XWPFDocument(new ReadContentInputStream(af)); } catch (IOException ex) { logger.log(Level.WARNING, "XWPFDocument container could not be instantiated while reading " + af.getName(), ex); return null; } List<XWPFPictureData> listOfAllPictures = docxA.getAllPictures(); // if no images are extracted from the ppt, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, "Could not get path for image extraction from AbstractFile: {0}", af.getName()); return null; } for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); FileOutputStream fos = null; try { fos = new FileOutputStream(outputFolderPath + File.separator + fileName); } catch (FileNotFoundException ex) { logger.log(Level.WARNING, "Invalid path provided for image extraction", ex); continue; } try { fos.write(xwpfPicture.getData()); fos.close(); } catch (IOException ex) { logger.log(Level.WARNING, "Could not write to the provided location", ex); continue; } String fileRelativePath = File.separator + moduleDirRelative + File.separator + parentFileName + File.separator + fileName; long size = xwpfPicture.getData().length; ExtractedImage extractedimage = new ExtractedImage(fileName, fileRelativePath, size, af); listOfExtractedImages.add(extractedimage); } return listOfExtractedImages; }
From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java
License:Open Source License
/** * Extract images from docx format files. * * @param af the file from which images are to be extracted. * * @return list of extracted images. Returns null in case no images were * extracted./*from w w w . jav a 2 s .c om*/ */ private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) { List<ExtractedImage> listOfExtractedImages; XWPFDocument docx = null; try { docx = new XWPFDocument(new ReadContentInputStream(af)); } catch (Throwable ex) { // instantiating POI containers throw RuntimeExceptions logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS return null; } List<XWPFPictureData> listOfAllPictures = null; try { listOfAllPictures = docx.getAllPictures(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } // if no images are extracted from the PPT, return null, else initialize // the output folder for image extraction. String outputFolderPath; if (listOfAllPictures.isEmpty()) { return null; } else { outputFolderPath = getOutputFolderPath(this.parentFileName); } if (outputFolderPath == null) { logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); //NON-NLS return null; } listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (XWPFPictureData xwpfPicture : listOfAllPictures) { String fileName = xwpfPicture.getFileName(); try { data = xwpfPicture.getData(); } catch (Exception ex) { // log internal Java and Apache errors as WARNING logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS return null; } writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data); listOfExtractedImages.add( new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af)); } return listOfExtractedImages; }