List of usage examples for org.apache.poi.hslf.usermodel HSLFSlideShow HSLFSlideShow
@SuppressWarnings("resource") public HSLFSlideShow(DirectoryNode root) throws IOException
From source file:com.github.codeurjc.slidesconverter.PowerPointToHTML.java
License:Apache License
public void convert() throws IOException { InputStream fis = Files.newInputStream(pptxFile); XMLSlideShow pptx = new XMLSlideShow(fis); fis.close();// w w w. j ava2s . c om InputStream is = Files.newInputStream(pptFile); HSLFSlideShow ppt = new HSLFSlideShow(is); is.close(); width = pptx.getPageSize().getWidth(); height = pptx.getPageSize().getHeight(); out = new PrintWriter(Files.newOutputStream(htmlFile)); out.println("<!DOCTYPE html>"); out.println("<html><body>"); out.println("<h1>" + this.mainTitleNumber + " " + mainTitle + "</h1>"); out.println("<h2>" + this.slidesContext + "</h2>"); List<Section> sections = calculateSections(pptx, ppt); generateTOC(sections); generateSlidesContent(pptx, ppt); pptx.close(); ppt.close(); out.close(); }
From source file:com.jaeksoft.searchlib.parser.PptParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { HSLFSlideShow ppt = new HSLFSlideShow(streamLimiter.getNewInputStream()); List<HSLFSlide> slides = ppt.getSlides(); ParserResultItem result = getNewParserResultItem(); for (HSLFSlide slide : slides) { List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs(); for (List<HSLFTextParagraph> textLevel1 : textLevel0) { for (HSLFTextParagraph textPara : textLevel1) { ParserFieldEnum field;/*from w w w .j av a 2s. c o m*/ switch (textPara.getRunType()) { case TextHeaderAtom.TITLE_TYPE: case TextHeaderAtom.CENTER_TITLE_TYPE: field = ParserFieldEnum.title; break; case TextHeaderAtom.NOTES_TYPE: field = ParserFieldEnum.note; break; case TextHeaderAtom.BODY_TYPE: case TextHeaderAtom.CENTRE_BODY_TYPE: case TextHeaderAtom.HALF_BODY_TYPE: case TextHeaderAtom.QUARTER_BODY_TYPE: field = ParserFieldEnum.body; break; case TextHeaderAtom.OTHER_TYPE: default: field = ParserFieldEnum.other; break; } StringBuilder sb = new StringBuilder(); for (HSLFTextRun textRun : textPara.getTextRuns()) { sb.append(textRun.getRawText()); sb.append(' '); } result.addField(field, StringUtils.replaceConsecutiveSpaces(sb.toString(), " ")); } } } result.langDetection(10000, ParserFieldEnum.body); }
From source file:com.qwazr.extractor.parser.Ppt.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { HSLFSlideShow ppt = new HSLFSlideShow(inputStream); List<HSLFSlide> slides = ppt.getSlides(); for (HSLFSlide slide : slides) { ParserDocument document = getNewParserDocument(); List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs(); for (List<HSLFTextParagraph> textLevel1 : textLevel0) { for (HSLFTextParagraph textPara : textLevel1) { ParserField parserField; switch (textPara.getRunType()) { case TextHeaderAtom.TITLE_TYPE: case TextHeaderAtom.CENTER_TITLE_TYPE: parserField = TITLE; break; case TextHeaderAtom.NOTES_TYPE: parserField = NOTES; break; case TextHeaderAtom.BODY_TYPE: case TextHeaderAtom.CENTRE_BODY_TYPE: case TextHeaderAtom.HALF_BODY_TYPE: case TextHeaderAtom.QUARTER_BODY_TYPE: parserField = BODY;/*from w ww .j a v a 2s .c om*/ break; case TextHeaderAtom.OTHER_TYPE: default: parserField = OTHER; break; } StringBuilder sb = new StringBuilder(); for (HSLFTextRun textRun : textPara.getTextRuns()) { sb.append(textRun.getRawText()); sb.append(' '); } document.add(parserField, sb.toString().trim()); } } document.add(LANG_DETECTION, languageDetection(document, BODY, 10000)); } }
From source file:com.qwazr.library.poi.PptParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final HSLFSlideShow ppt = new HSLFSlideShow(inputStream); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final List<HSLFSlide> slides = ppt.getSlides(); for (HSLFSlide slide : slides) { final ParserFieldsBuilder document = resultBuilder.newDocument(); final List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs(); for (List<HSLFTextParagraph> textLevel1 : textLevel0) { for (HSLFTextParagraph textPara : textLevel1) { final ParserField parserField; switch (textPara.getRunType()) { case TextHeaderAtom.TITLE_TYPE: case TextHeaderAtom.CENTER_TITLE_TYPE: parserField = TITLE; break; case TextHeaderAtom.NOTES_TYPE: parserField = NOTES; break; case TextHeaderAtom.BODY_TYPE: case TextHeaderAtom.CENTRE_BODY_TYPE: case TextHeaderAtom.HALF_BODY_TYPE: case TextHeaderAtom.QUARTER_BODY_TYPE: parserField = BODY;//w ww.j a v a2s.com break; case TextHeaderAtom.OTHER_TYPE: default: parserField = OTHER; break; } StringBuilder sb = new StringBuilder(); for (HSLFTextRun textRun : textPara.getTextRuns()) { sb.append(textRun.getRawText()); sb.append(' '); } final String text = sb.toString().trim(); document.add(parserField, text); if (parserField != TITLE) document.add(CONTENT, text); } } document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000)); } }
From source file:IO.search.SearchWordFile.java
private void search(File scrFile, String word) { //split the key word in different way //there are two way to split key word //space or hyphen //space 's meaning is any one key word contain in the file we search //hyphen 's meaning is all key word must contain in the file we seach String[] arrStr = null;//from ww w .j av a 2 s . c om String[] arrStrA = null; if (word.contains(" ")) { arrStr = word.split(" "); } else if (word.contains("-")) { arrStrA = word.split("-"); System.out.println("reach"); } //regular expression mean suffixes must contain doc. boolean is03word = scrFile.getName().matches("^.+\\.(?i)(doc)$"); if (is03word) { try { InputStream is = new FileInputStream(scrFile); WordExtractor ex = new WordExtractor(is); String text2003 = ex.getText(); if (arrStr != null && arrStr.length > 0) { //if keyword has space ,then we do spilt it //invoke the method finding(text2003, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { //if keyword has hyphen, it mean that the file we search must contain these key word. //we are using count varible to count the text of the file containing keyword whether enough or not. int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2003.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } //if count varible if equal with amount of keyword that the file is we want. if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2003.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(docx)$")) { try { OPCPackage opcPackage = POIXMLDocument.openPackage(scrFile.getPath()); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); String text2007 = extractor.getText(); if (arrStr != null && arrStr.length > 0) { finding(text2007, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2007.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2007.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(pdf)$")) { FileInputStream input = null; PDDocument pdfDocument = null; try { input = new FileInputStream(scrFile); PDFParser pdfParser = new PDFParser(input); pdfParser.parse(); pdfDocument = pdfParser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); String content = stripper.getText(pdfDocument); if (arrStr != null && arrStr.length > 0) { finding(content, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (content.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (content.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { input.close(); pdfDocument.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(txt)$")) { BufferedReader in = null; StringBuffer sb = new StringBuffer(); try { in = new BufferedReader((new InputStreamReader(new FileInputStream(scrFile), getCharset(scrFile.getAbsolutePath())))); String line = null; while ((line = in.readLine()) != null) { sb.append(line); } if (arrStr != null && arrStr.length > 0) { if (finding(sb.toString(), arrStr, scrFile)) { return; } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (sb.toString().contains(arrStrA[i])) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (line.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); return; } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { in.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(ppt)$")) {//find the key word in ppt file InputStream is = null; try { StringBuffer content = new StringBuffer(""); is = new FileInputStream(scrFile); //get core API HSLFSlideShow ss = new HSLFSlideShow(is); //get how many page in this PPT List<HSLFSlide> slides = ss.getSlides(); System.out.println("total have " + slides.size() + " page PPT"); for (int i = 0; i < slides.size(); i++) { //get each page of ppt content, retrun is List List<List<HSLFTextParagraph>> textParagraphs = slides.get(i).getTextParagraphs(); if (textParagraphs != null) { for (int j = 0; j < textParagraphs.size(); j++) { content.append("\n"); //get each row of the page List<HSLFTextParagraph> hslfTextParagraph = textParagraphs.get(j); for (int f = 0; f < hslfTextParagraph.size(); f++) { //get the text of this row content.append(hslfTextParagraph.get(f).toString()); } } } } if (arrStr != null && arrStr.length > 0) { finding(content.toString(), arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (content.toString().toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (content.toString().toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { is.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(pptx)$")) {//if powerpoint is 2007 or after we use this method String conetxt = null; try { conetxt = new XSLFPowerPointExtractor(POIXMLDocument.openPackage(scrFile.getPath())).getText(); if (arrStr != null && arrStr.length > 0) { finding(conetxt, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (conetxt.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (conetxt.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:org.apache.tika.parser.microsoft.HSLFExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HSLFSlideShow ss = new HSLFSlideShow(root); List<HSLFSlide> _slides = ss.getSlides(); xhtml.startElement("div", "class", "slideShow"); /* Iterate over slides and extract text */ for (HSLFSlide slide : _slides) { xhtml.startElement("div", "class", "slide"); // Slide header, if present HeadersFooters hf = slide.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); }/* ww w .j a v a2 s . c o m*/ // Slide master, if present extractMaster(xhtml, slide.getMasterSheet()); // Slide text { xhtml.startElement("div", "class", "slide-content"); textRunsToText(xhtml, slide.getTextParagraphs()); xhtml.endElement("div"); } // Table text for (HSLFShape shape : slide.getShapes()) { if (shape instanceof HSLFTable) { extractTableText(xhtml, (HSLFTable) shape); } } // Slide footer, if present if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } // Comments, if present StringBuilder authorStringBuilder = new StringBuilder(); for (Comment comment : slide.getComments()) { authorStringBuilder.setLength(0); xhtml.startElement("p", "class", "slide-comment"); if (comment.getAuthor() != null) { authorStringBuilder.append(comment.getAuthor()); } if (comment.getAuthorInitials() != null) { if (authorStringBuilder.length() > 0) { authorStringBuilder.append(" "); } authorStringBuilder.append("(" + comment.getAuthorInitials() + ")"); } if (authorStringBuilder.length() > 0) { if (comment.getText() != null) { authorStringBuilder.append(" - "); } xhtml.startElement("b"); xhtml.characters(authorStringBuilder.toString()); xhtml.endElement("b"); } if (comment.getText() != null) { xhtml.characters(comment.getText()); } xhtml.endElement("p"); } // Now any embedded resources handleSlideEmbeddedResources(slide, xhtml); // Find the Notes for this slide and extract inline HSLFNotes notes = slide.getNotes(); if (notes != null) { xhtml.startElement("div", "class", "slide-notes"); textRunsToText(xhtml, notes.getTextParagraphs()); xhtml.endElement("div"); } // Slide complete xhtml.endElement("div"); } // All slides done xhtml.endElement("div"); /* notes */ xhtml.startElement("div", "class", "slide-notes"); HashSet<Integer> seenNotes = new HashSet<>(); HeadersFooters hf = ss.getNotesHeadersFooters(); for (HSLFSlide slide : _slides) { HSLFNotes notes = slide.getNotes(); if (notes == null) { continue; } Integer id = notes._getSheetNumber(); if (seenNotes.contains(id)) { continue; } seenNotes.add(id); // Repeat the Notes header, if set if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-note-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); } // Notes text textRunsToText(xhtml, notes.getTextParagraphs()); // Repeat the notes footer, if set if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-note-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } } handleSlideEmbeddedPictures(ss, xhtml); xhtml.endElement("div"); }
From source file:org.quelea.data.powerpoint.PPTPresentation.java
License:Open Source License
/** * Create a presentation from a file.//from www.j a v a 2 s .co m * * @param file the file containing the presentation. */ public PPTPresentation(String file) throws IOException { slideshow = new HSLFSlideShow(new FileInputStream(new File(file))); slides = makeSlides(); }
From source file:testppttopdf.TestPptToPdf.java
/** * @param args the command line arguments *//*ww w . java 2s. c om*/ public static void main(String[] args) throws FileNotFoundException, IOException, COSVisitorException { // TODO code application logic here FileInputStream is = new FileInputStream("/home/sagar/Desktop/Shareback/test.ppt"); HSLFSlideShow ppt = new HSLFSlideShow(is); is.close(); Dimension pgsize = ppt.getPageSize(); int idx = 1; for (HSLFSlide slide : ppt.getSlides()) { BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); // clear the drawing area graphics.setPaint(Color.white); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); // render slide.draw(graphics); // save the output FileOutputStream out = new FileOutputStream("/home/sagar/Desktop/Shareback/img/slide-" + idx + ".jpg"); javax.imageio.ImageIO.write(img, "jpg", out); out.close(); idx++; } String someimg = "/home/sagar/Desktop/Shareback/img/"; PDDocument document = new PDDocument(); File file = new File(someimg); if (file.isDirectory()) { for (File f : file.listFiles()) { InputStream in = new FileInputStream(f); BufferedImage bimg = ImageIO.read(in); float width = bimg.getWidth(); float height = bimg.getHeight(); PDPage page = new PDPage(new PDRectangle(width + 10, height + 10)); document.addPage(page); PDXObjectImage img = new PDJpeg(document, new FileInputStream(f)); PDPageContentStream contentStream = new PDPageContentStream(document, page); contentStream.drawImage(img, 0, 0); contentStream.close(); in.close(); } document.save("/home/sagar/Desktop/Shareback/test-generated.pdf"); document.close(); } else { System.out.println(someimg + "is not a Directory"); } }