Example usage for org.apache.poi.hpsf SummaryInformation getKeywords

List of usage examples for org.apache.poi.hpsf SummaryInformation getKeywords

Introduction

In this page you can find the example usage for org.apache.poi.hpsf SummaryInformation getKeywords.

Prototype

public String getKeywords() 

Source Link

Document

Returns the keywords (or null ).

Usage

From source file:com.duroty.lucene.parser.utils.POIFSListener.java

License:Apache License

/**
 * DOCUMENT ME!/* w w w .j a  v  a  2 s.  c  o  m*/
 *
 * @param arg0 DOCUMENT ME!
 */
public void processPOIFSReaderEvent(POIFSReaderEvent readerEvent) {
    org.apache.poi.hpsf.PropertySet propertySet;

    try {
        propertySet = PropertySetFactory.create(readerEvent.getStream());

        SummaryInformation info = (SummaryInformation) propertySet;
        this.author = info.getAuthor();
        this.title = info.getTitle();
        this.keywords = info.getKeywords();
        this.subject = info.getSubject();
    } catch (NoPropertySetStreamException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (MarkUnsupportedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnexpectedPropertySetTypeException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Constructor.//  w  ww.  j a  v a 2 s.  c om
 *
 * @param si the summary information
 */
public FxSummaryInformation(SummaryInformation si) {
    author = si.getAuthor();
    applicationName = si.getApplicationName();
    charCount = si.getCharCount();
    comments = si.getComments();
    createdAt = si.getCreateDateTime();
    editTime = new Date(si.getEditTime());
    keywords = si.getKeywords();
    lastModifiedBy = si.getLastAuthor();
    lastPrintedAt = si.getLastPrinted();
    title = si.getTitle();
    lastModifiedAt = si.getLastSaveDateTime();
    pageCount = si.getPageCount();
    revNumber = si.getRevNumber();
    wordCount = si.getWordCount();
    encrypted = false;
}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from Office Word/*w w  w  . java 2s  . c om*/
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(is);
    OfficeMetadata md = new OfficeMetadata();
    SummaryInformation si = null;

    if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
        si = new WordExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
        si = new ExcelExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
        si = new PowerPointExtractor(fs).getSummaryInformation();
    }

    if (si != null) {
        md.setTitle(si.getTitle());
        md.setSubject(si.getSubject());
        md.setAuthor(si.getAuthor());
        md.setLastAuthor(si.getLastAuthor());
        md.setKeywords(si.getKeywords());
        md.setComments(si.getComments());
        md.setTemplate(si.getTemplate());
        md.setRevNumber(si.getRevNumber());
        md.setApplicationName(si.getApplicationName());
        md.setEditTime(si.getEditTime());
        md.setPageCount(si.getPageCount());
        md.setWordCount(si.getWordCount());
        md.setCharCount(si.getCharCount());
        md.setSecurity(si.getSecurity());

        Calendar createDateTime = Calendar.getInstance();
        createDateTime.setTime(si.getCreateDateTime());
        md.setCreateDateTime(createDateTime);

        Calendar lastSaveDateTime = Calendar.getInstance();
        lastSaveDateTime.setTime(si.getLastSaveDateTime());
        md.setLastSaveDateTime(lastSaveDateTime);

        Calendar lastPrinted = Calendar.getInstance();
        lastPrinted.setTime(si.getLastPrinted());
        md.setLastPrinted(lastPrinted);
    }

    log.info("officeExtractor: {}", md);
    return md;
}

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void currentWordExtraction(InputStream inputStream) throws IOException {
    WordExtractor word = null;/*from www .  j a  v a2  s . com*/

    try {
        word = new WordExtractor(inputStream);

        SummaryInformation info = word.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument document = getNewParserDocument();
        String[] paragraphes = word.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word);
    }
}

From source file:com.opensearchserver.extractor.parser.Publisher.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {
    PublisherTextExtractor extractor = null;
    try {//from w ww  . ja va2 s  . co m
        extractor = new PublisherTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String text = extractor.getText();
        if (StringUtils.isEmpty(text))
            return;
        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}

From source file:com.opensearchserver.extractor.parser.Visio.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {
    VisioTextExtractor extractor = null;
    try {/*from w  w  w  .  j  a  v a 2s  .com*/
        extractor = new VisioTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        ParserDocument result = getNewParserDocument();
        for (String text : texts)
            result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}

From source file:com.opensearchserver.extractor.parser.Xls.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
    ExcelExtractor excel = null;//from www  .  j a va 2s .c  o m
    try {
        excel = new ExcelExtractor(workbook);

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, excel.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (excel != null)
            IOUtils.closeQuietly(excel);
    }

}

From source file:com.opensearchserver.textextractor.parser.Visio.java

License:Open Source License

@Override
protected void parseContent(InputStream inputStream) throws Exception {
    VisioTextExtractor extractor = null;
    try {/*ww w.  j  ava 2s .com*/
        extractor = new VisioTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        ParserDocument result = getNewParserDocument();
        for (String text : texts)
            result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}

From source file:com.opensearchserver.textextractor.parser.Xls.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream) throws Exception {

    HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
    ExcelExtractor excel = null;//  www .j a  va 2  s . co  m
    try {
        excel = new ExcelExtractor(workbook);

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, excel.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (excel != null)
            IOUtils.closeQuietly(excel);
    }

}

From source file:com.pnf.plugin.ole.parser.StreamReader.java

License:Apache License

private List<INode> readSummaryInfoStream(ByteBuffer stream) {
    List<INode> roots = new LinkedList<>();
    String propType = "Property";

    try {/*from   w ww  .j a  v  a  2 s.co  m*/
        SummaryInformation sInfo = new SummaryInformation(new PropertySet(stream.array()));

        StreamEntry cInfo = new StreamEntry("Creation Information");
        cInfo.addChild(new StreamEntry("Application Name", propType, sInfo.getApplicationName()));
        cInfo.addChild(new StreamEntry("Creation", "Time",
                sInfo.getCreateDateTime() != null ? sInfo.getCreateDateTime().toString() : null));
        cInfo.addChild(new StreamEntry("Author", propType, sInfo.getAuthor()));
        cInfo.addChild(new StreamEntry("Last Author", propType, sInfo.getLastAuthor()));
        cInfo.addChild(new StreamEntry("Template", propType, sInfo.getTemplate()));
        roots.add(cInfo);

        propType = "Time";
        StreamEntry timeInfo = new StreamEntry("Times");
        timeInfo.addChild(new StreamEntry("Total Edit Time", propType, String.valueOf(sInfo.getEditTime())));
        timeInfo.addChild(new StreamEntry("Last Saved", propType,
                sInfo.getLastSaveDateTime() != null ? sInfo.getLastSaveDateTime().toString() : null));
        timeInfo.addChild(new StreamEntry("Last Printed", propType,
                sInfo.getLastPrinted() != null ? sInfo.getLastPrinted().toString() : null));
        roots.add(timeInfo);

        propType = "Misc";
        StreamEntry misc = new StreamEntry("Miscellaneous");
        misc.addChild(new StreamEntry("OS Version", "int", String.valueOf(sInfo.getOSVersion())));
        misc.addChild(new StreamEntry("Revision Number", "int", sInfo.getRevNumber()));
        misc.addChild(new StreamEntry("Page Count", "int", String.valueOf(sInfo.getPageCount())));
        misc.addChild(new StreamEntry("Word Count", "int", String.valueOf(sInfo.getWordCount())));

        int secVal = sInfo.getSecurity();
        String security = null;

        if (!sInfo.wasNull()) { // Set description according to POI documentation
            switch (secVal) {
            case 0:
                security = "No security";
                break;
            case 1:
                security = "Password protected";
                break;
            case 2:
                security = "Read-only recommended";
                break;
            case 4:
                security = "Read-only enforced";
                break;
            case 8:
                security = "Locked for annotations";
                break;
            default:
                break;
            }

            security += " (code " + secVal + ")";
        } else {
            security = "Field not set";
        }

        misc.addChild(new StreamEntry("Document Security", "int", security));
        misc.addChild(new StreamEntry("Subject", propType, sInfo.getSubject()));
        misc.addChild(new StreamEntry("Keywords", propType, sInfo.getKeywords()));
        roots.add(misc);
    } catch (Throwable t) {
        addMessage("Attempted to read " + SUMM_INFO + " stream but no property sets were found.", null,
                Message.CORRUPT);
    }

    return roots;
}