Example usage for org.apache.poi.hwpf.extractor Word6Extractor getSummaryInformation

List of usage examples for org.apache.poi.hwpf.extractor Word6Extractor getSummaryInformation

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.extractor Word6Extractor getSummaryInformation.

Prototype

public SummaryInformation getSummaryInformation() 

Source Link

Document

Returns the summary information metadata for the document.

Usage

From source file:com.jaeksoft.searchlib.parser.DocParser.java

License:Open Source License

private void oldWordExtraction(ParserResultItem result, InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;
    try {//from   www .  ja  v  a2 s  . co  m
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            result.addField(ParserFieldEnum.title, si.getTitle());
            result.addField(ParserFieldEnum.author, si.getAuthor());
            result.addField(ParserFieldEnum.subject, si.getSubject());
        }

        String text = word6.getText();
        String[] frags = text.split("\\n");
        for (String frag : frags)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(frag, " "));
    } finally {
        IOUtils.close(word6);
    }
}

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void oldWordExtraction(InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;
    try {//  w  w w  .j a  v a 2 s  .com
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            metas.add(TITLE, si.getTitle());
            metas.add(AUTHOR, si.getAuthor());
            metas.add(SUBJECT, si.getSubject());
        }

        ParserDocument document = getNewParserDocument();
        @SuppressWarnings("deprecation")
        String[] paragraphes = word6.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word6);
    }
}

From source file:com.qwazr.library.poi.DocParser.java

License:Apache License

private void oldWordExtraction(final InputStream inputStream, final ParserResultBuilder resultBuilder)
        throws IOException {
    Word6Extractor word6 = null;
    try {/*from  ww w  . j  a  va 2s  .c  om*/
        word6 = new Word6Extractor(inputStream);

        final ParserFieldsBuilder metas = resultBuilder.metas();
        metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]);

        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            metas.add(TITLE, si.getTitle());
            metas.add(AUTHOR, si.getAuthor());
            metas.add(SUBJECT, si.getSubject());
        }

        final ParserFieldsBuilder document = resultBuilder.newDocument();
        @SuppressWarnings("deprecation")
        String[] paragraphes = word6.getParagraphText();
        if (paragraphes != null)
            for (String paragraph : paragraphes)
                document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word6);
    }
}