Example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getCoreProperties

List of usage examples for org.apache.poi.xwpf.extractor XWPFWordExtractor getCoreProperties

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getCoreProperties.

Prototype

public CoreProperties getCoreProperties() 

Source Link

Document

Returns the core document properties

Usage

From source file:com.jaeksoft.searchlib.parser.DocxParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    ParserResultItem result = getNewParserResultItem();

    XWPFDocument document = new XWPFDocument(streamLimiter.getNewInputStream());
    XWPFWordExtractor word = null;
    try {/*from  w  w w.  ja va 2s  .  c  o  m*/
        word = new XWPFWordExtractor(document);

        CoreProperties info = word.getCoreProperties();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.creator, info.getCreator());
            result.addField(ParserFieldEnum.subject, info.getSubject());
            result.addField(ParserFieldEnum.description, info.getDescription());
            result.addField(ParserFieldEnum.keywords, info.getKeywords());
        }

        String content = word.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(word);
    }
}

From source file:com.opensearchserver.extractor.parser.Docx.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws IOException {

    XWPFDocument document = new XWPFDocument(inputStream);
    XWPFWordExtractor word = null;
    try {//w  w  w .j  a va 2  s . c  o  m
        word = new XWPFWordExtractor(document);

        CoreProperties info = word.getCoreProperties();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(CREATOR, info.getCreator());
            metas.add(CREATION_DATE, info.getCreated());
            metas.add(MODIFICATION_DATE, info.getModified());
            metas.add(SUBJECT, info.getSubject());
            metas.add(DESCRIPTION, info.getDescription());
            metas.add(KEYWORDS, info.getKeywords());
        }
        ParserDocument parserDocument = getNewParserDocument();
        parserDocument.add(CONTENT, word.getText());
        parserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word);
    }
}

From source file:com.opensearchserver.textextractor.parser.Docx.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream) throws IOException {

    XWPFDocument document = new XWPFDocument(inputStream);
    XWPFWordExtractor word = null;
    try {/*from w  ww.j  a  v a  2s . co m*/
        word = new XWPFWordExtractor(document);

        CoreProperties info = word.getCoreProperties();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(CREATOR, info.getCreator());
            metas.add(CREATION_DATE, info.getCreated());
            metas.add(MODIFICATION_DATE, info.getModified());
            metas.add(SUBJECT, info.getSubject());
            metas.add(DESCRIPTION, info.getDescription());
            metas.add(KEYWORDS, info.getKeywords());
        }
        ParserDocument parserDocument = getNewParserDocument();
        parserDocument.add(CONTENT, word.getText());
        parserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word);
    }
}