Example usage for org.apache.poi.hwpf.extractor Word6Extractor getParagraphText

List of usage examples for org.apache.poi.hwpf.extractor Word6Extractor getParagraphText

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.extractor Word6Extractor getParagraphText.

Prototype

@Deprecated
public String[] getParagraphText() 

Source Link

Document

Get the text from the word file, as an array with one String per paragraph

Usage

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void oldWordExtraction(InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;
    try {//from w  w  w.j a v a  2 s. c o  m
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            metas.add(TITLE, si.getTitle());
            metas.add(AUTHOR, si.getAuthor());
            metas.add(SUBJECT, si.getSubject());
        }

        ParserDocument document = getNewParserDocument();
        @SuppressWarnings("deprecation")
        String[] paragraphes = word6.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word6);
    }
}

From source file:com.qwazr.library.poi.DocParser.java

License:Apache License

private void oldWordExtraction(final InputStream inputStream, final ParserResultBuilder resultBuilder)
        throws IOException {
    Word6Extractor word6 = null;
    try {/* w ww  .j  a v  a 2 s. co  m*/
        word6 = new Word6Extractor(inputStream);

        final ParserFieldsBuilder metas = resultBuilder.metas();
        metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]);

        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            metas.add(TITLE, si.getTitle());
            metas.add(AUTHOR, si.getAuthor());
            metas.add(SUBJECT, si.getSubject());
        }

        final ParserFieldsBuilder document = resultBuilder.newDocument();
        @SuppressWarnings("deprecation")
        String[] paragraphes = word6.getParagraphText();
        if (paragraphes != null)
            for (String paragraph : paragraphes)
                document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word6);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java

License:Apache License

protected void parseWord6(DirectoryNode root, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {
    HWPFOldDocument doc = new HWPFOldDocument(root);
    Word6Extractor extractor = new Word6Extractor(doc);

    for (String p : extractor.getParagraphText()) {
        xhtml.element("p", p);
    }// w w w  . ja  v a 2s.  co m
}