Example usage for org.apache.poi.hslf.usermodel HSLFSlideShow HSLFSlideShow

List of usage examples for org.apache.poi.hslf.usermodel HSLFSlideShow HSLFSlideShow

Introduction

In this page you can find the example usage for org.apache.poi.hslf.usermodel HSLFSlideShow HSLFSlideShow.

Prototype

@SuppressWarnings("resource")
public HSLFSlideShow(DirectoryNode root) throws IOException 

Source Link

Document

Constructs a Powerpoint document from an DirectoryNode.

Usage

From source file:com.github.codeurjc.slidesconverter.PowerPointToHTML.java

License:Apache License

public void convert() throws IOException {

    InputStream fis = Files.newInputStream(pptxFile);
    XMLSlideShow pptx = new XMLSlideShow(fis);
    fis.close();//  w  w w. j  ava2s  . c om

    InputStream is = Files.newInputStream(pptFile);
    HSLFSlideShow ppt = new HSLFSlideShow(is);
    is.close();

    width = pptx.getPageSize().getWidth();
    height = pptx.getPageSize().getHeight();

    out = new PrintWriter(Files.newOutputStream(htmlFile));

    out.println("<!DOCTYPE html>");
    out.println("<html><body>");

    out.println("<h1>" + this.mainTitleNumber + " " + mainTitle + "</h1>");
    out.println("<h2>" + this.slidesContext + "</h2>");

    List<Section> sections = calculateSections(pptx, ppt);

    generateTOC(sections);

    generateSlidesContent(pptx, ppt);

    pptx.close();
    ppt.close();
    out.close();

}

From source file:com.jaeksoft.searchlib.parser.PptParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSLFSlideShow ppt = new HSLFSlideShow(streamLimiter.getNewInputStream());
    List<HSLFSlide> slides = ppt.getSlides();
    ParserResultItem result = getNewParserResultItem();
    for (HSLFSlide slide : slides) {
        List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                ParserFieldEnum field;/*from  w  w  w .j  av a  2s.  c o m*/
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    field = ParserFieldEnum.title;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    field = ParserFieldEnum.note;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    field = ParserFieldEnum.body;
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    field = ParserFieldEnum.other;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                result.addField(field, StringUtils.replaceConsecutiveSpaces(sb.toString(), " "));
            }
        }
    }
    result.langDetection(10000, ParserFieldEnum.body);
}

From source file:com.qwazr.extractor.parser.Ppt.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    HSLFSlideShow ppt = new HSLFSlideShow(inputStream);

    List<HSLFSlide> slides = ppt.getSlides();
    for (HSLFSlide slide : slides) {
        ParserDocument document = getNewParserDocument();
        List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                ParserField parserField;
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    parserField = TITLE;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    parserField = NOTES;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    parserField = BODY;/*from   w ww .j  a v  a 2s  .c  om*/
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    parserField = OTHER;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                document.add(parserField, sb.toString().trim());
            }
        }
        document.add(LANG_DETECTION, languageDetection(document, BODY, 10000));
    }

}

From source file:com.qwazr.library.poi.PptParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
        final String extension, final String mimeType, final ParserResultBuilder resultBuilder)
        throws Exception {

    final HSLFSlideShow ppt = new HSLFSlideShow(inputStream);

    final ParserFieldsBuilder metas = resultBuilder.metas();
    metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault));

    final List<HSLFSlide> slides = ppt.getSlides();
    for (HSLFSlide slide : slides) {
        final ParserFieldsBuilder document = resultBuilder.newDocument();
        final List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                final ParserField parserField;
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    parserField = TITLE;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    parserField = NOTES;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    parserField = BODY;//w  ww.j a v a2s.com
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    parserField = OTHER;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                final String text = sb.toString().trim();
                document.add(parserField, text);
                if (parserField != TITLE)
                    document.add(CONTENT, text);
            }
        }
        document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000));
    }

}

From source file:IO.search.SearchWordFile.java

private void search(File scrFile, String word) {
    //split the key word in different way
    //there are two way to split key word
    //space or hyphen
    //space 's meaning is any one key word contain in the file we search
    //hyphen 's meaning is all key word must contain in the file we seach
    String[] arrStr = null;//from ww  w .j av  a 2 s . c  om
    String[] arrStrA = null;
    if (word.contains(" ")) {
        arrStr = word.split(" ");
    } else if (word.contains("-")) {
        arrStrA = word.split("-");
        System.out.println("reach");
    }

    //regular expression mean suffixes must contain doc.
    boolean is03word = scrFile.getName().matches("^.+\\.(?i)(doc)$");
    if (is03word) {
        try {
            InputStream is = new FileInputStream(scrFile);
            WordExtractor ex = new WordExtractor(is);
            String text2003 = ex.getText();
            if (arrStr != null && arrStr.length > 0) {
                //if keyword has space ,then we do spilt it
                //invoke the method
                finding(text2003, arrStr, scrFile);
            } else if (arrStrA != null && arrStrA.length > 0) {
                //if keyword has hyphen, it mean that the file we search must contain these key word.
                //we are using count varible to count the text of the file containing keyword whether enough or not.
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (text2003.toLowerCase().contains(arrStrA[i].toLowerCase())) {
                        count++;
                    }
                }
                //if count varible if equal with amount of keyword that the file is we want.
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (text2003.toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());
            }
        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        }

    } else if (scrFile.getName().matches("^.+\\.(?i)(docx)$")) {
        try {
            OPCPackage opcPackage = POIXMLDocument.openPackage(scrFile.getPath());
            POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
            String text2007 = extractor.getText();
            if (arrStr != null && arrStr.length > 0) {
                finding(text2007, arrStr, scrFile);
            } else if (arrStrA != null && arrStrA.length > 0) {
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (text2007.toLowerCase().contains(arrStrA[i].toLowerCase())) {
                        count++;
                    }
                }
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (text2007.toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());
            }
        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if (scrFile.getName().matches("^.+\\.(?i)(pdf)$")) {

        FileInputStream input = null;
        PDDocument pdfDocument = null;
        try {

            input = new FileInputStream(scrFile);
            PDFParser pdfParser = new PDFParser(input);
            pdfParser.parse();
            pdfDocument = pdfParser.getPDDocument();
            PDFTextStripper stripper = new PDFTextStripper();
            String content = stripper.getText(pdfDocument);
            if (arrStr != null && arrStr.length > 0) {
                finding(content, arrStr, scrFile);
            } else if (arrStrA != null && arrStrA.length > 0) {
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (content.toLowerCase().contains(arrStrA[i].toLowerCase())) {
                        count++;
                    }
                }
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (content.toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());

            }

        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            try {
                input.close();
                pdfDocument.close();
            } catch (IOException ex) {
                Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    } else if (scrFile.getName().matches("^.+\\.(?i)(txt)$")) {
        BufferedReader in = null;
        StringBuffer sb = new StringBuffer();
        try {
            in = new BufferedReader((new InputStreamReader(new FileInputStream(scrFile),
                    getCharset(scrFile.getAbsolutePath()))));
            String line = null;
            while ((line = in.readLine()) != null) {
                sb.append(line);
            }
            if (arrStr != null && arrStr.length > 0) {
                if (finding(sb.toString(), arrStr, scrFile)) {
                    return;
                }
            } else if (arrStrA != null && arrStrA.length > 0) {
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (sb.toString().contains(arrStrA[i])) {
                        count++;
                    }
                }
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (line.toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());
                return;
            }

        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            try {
                in.close();
            } catch (IOException ex) {
                Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

    } else if (scrFile.getName().matches("^.+\\.(?i)(ppt)$")) {//find the key word in ppt file
        InputStream is = null;
        try {
            StringBuffer content = new StringBuffer("");
            is = new FileInputStream(scrFile);
            //get core API
            HSLFSlideShow ss = new HSLFSlideShow(is);
            //get how many page in this PPT
            List<HSLFSlide> slides = ss.getSlides();
            System.out.println("total have " + slides.size() + " page PPT");
            for (int i = 0; i < slides.size(); i++) {
                //get each page of ppt content, retrun is List
                List<List<HSLFTextParagraph>> textParagraphs = slides.get(i).getTextParagraphs();
                if (textParagraphs != null) {
                    for (int j = 0; j < textParagraphs.size(); j++) {
                        content.append("\n");
                        //get each row of the page
                        List<HSLFTextParagraph> hslfTextParagraph = textParagraphs.get(j);
                        for (int f = 0; f < hslfTextParagraph.size(); f++) {
                            //get  the text of this row
                            content.append(hslfTextParagraph.get(f).toString());
                        }
                    }
                }
            }
            if (arrStr != null && arrStr.length > 0) {
                finding(content.toString(), arrStr, scrFile);
            } else if (arrStrA != null && arrStrA.length > 0) {
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (content.toString().toLowerCase().contains(arrStrA[i].toLowerCase())) {
                        count++;
                    }
                }
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (content.toString().toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());
            }

        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            try {
                is.close();
            } catch (IOException ex) {
                Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    } else if (scrFile.getName().matches("^.+\\.(?i)(pptx)$")) {//if powerpoint is 2007 or after we use this method
        String conetxt = null;
        try {
            conetxt = new XSLFPowerPointExtractor(POIXMLDocument.openPackage(scrFile.getPath())).getText();
            if (arrStr != null && arrStr.length > 0) {
                finding(conetxt, arrStr, scrFile);
            } else if (arrStrA != null && arrStrA.length > 0) {
                int count = 0;
                for (int i = 0; i < arrStrA.length; i++) {
                    if (conetxt.toLowerCase().contains(arrStrA[i].toLowerCase())) {
                        count++;
                    }
                }
                if (count == arrStrA.length) {
                    nameList.add(scrFile.getPath());
                }
            } else if (conetxt.toLowerCase().contains(word.toLowerCase())) {
                System.out.println("true");
                nameList.add(scrFile.getPath());
            }
        } catch (Exception ex) {
            Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:org.apache.tika.parser.microsoft.HSLFExtractor.java

License:Apache License

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {
    HSLFSlideShow ss = new HSLFSlideShow(root);
    List<HSLFSlide> _slides = ss.getSlides();

    xhtml.startElement("div", "class", "slideShow");

    /* Iterate over slides and extract text */
    for (HSLFSlide slide : _slides) {
        xhtml.startElement("div", "class", "slide");

        // Slide header, if present
        HeadersFooters hf = slide.getHeadersFooters();
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-header");

            xhtml.characters(hf.getHeaderText());

            xhtml.endElement("p");
        }/* ww w  .j a  v a2 s  .  c o m*/

        // Slide master, if present
        extractMaster(xhtml, slide.getMasterSheet());

        // Slide text
        {
            xhtml.startElement("div", "class", "slide-content");

            textRunsToText(xhtml, slide.getTextParagraphs());

            xhtml.endElement("div");
        }

        // Table text
        for (HSLFShape shape : slide.getShapes()) {
            if (shape instanceof HSLFTable) {
                extractTableText(xhtml, (HSLFTable) shape);
            }
        }

        // Slide footer, if present
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-footer");

            xhtml.characters(hf.getFooterText());

            xhtml.endElement("p");
        }

        // Comments, if present
        StringBuilder authorStringBuilder = new StringBuilder();
        for (Comment comment : slide.getComments()) {
            authorStringBuilder.setLength(0);
            xhtml.startElement("p", "class", "slide-comment");

            if (comment.getAuthor() != null) {
                authorStringBuilder.append(comment.getAuthor());
            }
            if (comment.getAuthorInitials() != null) {
                if (authorStringBuilder.length() > 0) {
                    authorStringBuilder.append(" ");
                }
                authorStringBuilder.append("(" + comment.getAuthorInitials() + ")");
            }
            if (authorStringBuilder.length() > 0) {
                if (comment.getText() != null) {
                    authorStringBuilder.append(" - ");
                }
                xhtml.startElement("b");
                xhtml.characters(authorStringBuilder.toString());
                xhtml.endElement("b");
            }
            if (comment.getText() != null) {
                xhtml.characters(comment.getText());
            }
            xhtml.endElement("p");
        }

        // Now any embedded resources
        handleSlideEmbeddedResources(slide, xhtml);

        // Find the Notes for this slide and extract inline
        HSLFNotes notes = slide.getNotes();
        if (notes != null) {
            xhtml.startElement("div", "class", "slide-notes");

            textRunsToText(xhtml, notes.getTextParagraphs());

            xhtml.endElement("div");
        }

        // Slide complete
        xhtml.endElement("div");
    }

    // All slides done
    xhtml.endElement("div");

    /* notes */
    xhtml.startElement("div", "class", "slide-notes");
    HashSet<Integer> seenNotes = new HashSet<>();
    HeadersFooters hf = ss.getNotesHeadersFooters();

    for (HSLFSlide slide : _slides) {
        HSLFNotes notes = slide.getNotes();
        if (notes == null) {
            continue;
        }
        Integer id = notes._getSheetNumber();
        if (seenNotes.contains(id)) {
            continue;
        }
        seenNotes.add(id);

        // Repeat the Notes header, if set
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-note-header");
            xhtml.characters(hf.getHeaderText());
            xhtml.endElement("p");
        }

        // Notes text
        textRunsToText(xhtml, notes.getTextParagraphs());

        // Repeat the notes footer, if set
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-note-footer");
            xhtml.characters(hf.getFooterText());
            xhtml.endElement("p");
        }
    }

    handleSlideEmbeddedPictures(ss, xhtml);

    xhtml.endElement("div");
}

From source file:org.quelea.data.powerpoint.PPTPresentation.java

License:Open Source License

/**
 * Create a presentation from a file.//from   www.j  a  v a 2 s  .co m
 *
 * @param file the file containing the presentation.
 */
public PPTPresentation(String file) throws IOException {
    slideshow = new HSLFSlideShow(new FileInputStream(new File(file)));
    slides = makeSlides();
}

From source file:testppttopdf.TestPptToPdf.java

/**
 * @param args the command line arguments
 *//*ww w . java 2s.  c  om*/
public static void main(String[] args) throws FileNotFoundException, IOException, COSVisitorException {
    // TODO code application logic here
    FileInputStream is = new FileInputStream("/home/sagar/Desktop/Shareback/test.ppt");
    HSLFSlideShow ppt = new HSLFSlideShow(is);
    is.close();

    Dimension pgsize = ppt.getPageSize();

    int idx = 1;
    for (HSLFSlide slide : ppt.getSlides()) {

        BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB);
        Graphics2D graphics = img.createGraphics();
        // clear the drawing area
        graphics.setPaint(Color.white);
        graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));

        // render
        slide.draw(graphics);

        // save the output
        FileOutputStream out = new FileOutputStream("/home/sagar/Desktop/Shareback/img/slide-" + idx + ".jpg");
        javax.imageio.ImageIO.write(img, "jpg", out);
        out.close();

        idx++;
    }

    String someimg = "/home/sagar/Desktop/Shareback/img/";

    PDDocument document = new PDDocument();
    File file = new File(someimg);
    if (file.isDirectory()) {
        for (File f : file.listFiles()) {

            InputStream in = new FileInputStream(f);

            BufferedImage bimg = ImageIO.read(in);
            float width = bimg.getWidth();
            float height = bimg.getHeight();
            PDPage page = new PDPage(new PDRectangle(width + 10, height + 10));
            document.addPage(page);
            PDXObjectImage img = new PDJpeg(document, new FileInputStream(f));
            PDPageContentStream contentStream = new PDPageContentStream(document, page);
            contentStream.drawImage(img, 0, 0);
            contentStream.close();
            in.close();
        }

        document.save("/home/sagar/Desktop/Shareback/test-generated.pdf");
        document.close();
    } else {
        System.out.println(someimg + "is not a Directory");
    }

}