Example usage for org.apache.poi.hpsf SummaryInformation getWordCount

List of usage examples for org.apache.poi.hpsf SummaryInformation getWordCount

Introduction

In this page you can find the example usage for org.apache.poi.hpsf SummaryInformation getWordCount.

Prototype

public int getWordCount() 

Source Link

Document

Returns the word count or 0 if the SummaryInformation does not contain a word count.

Usage

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Constructor.//from   w  w  w  .  j  a  v a  2  s.  c o m
 *
 * @param si the summary information
 */
public FxSummaryInformation(SummaryInformation si) {
    author = si.getAuthor();
    applicationName = si.getApplicationName();
    charCount = si.getCharCount();
    comments = si.getComments();
    createdAt = si.getCreateDateTime();
    editTime = new Date(si.getEditTime());
    keywords = si.getKeywords();
    lastModifiedBy = si.getLastAuthor();
    lastPrintedAt = si.getLastPrinted();
    title = si.getTitle();
    lastModifiedAt = si.getLastSaveDateTime();
    pageCount = si.getPageCount();
    revNumber = si.getRevNumber();
    wordCount = si.getWordCount();
    encrypted = false;
}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from Office Word//from   ww  w.  j a  v a 2  s. co m
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(is);
    OfficeMetadata md = new OfficeMetadata();
    SummaryInformation si = null;

    if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
        si = new WordExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
        si = new ExcelExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
        si = new PowerPointExtractor(fs).getSummaryInformation();
    }

    if (si != null) {
        md.setTitle(si.getTitle());
        md.setSubject(si.getSubject());
        md.setAuthor(si.getAuthor());
        md.setLastAuthor(si.getLastAuthor());
        md.setKeywords(si.getKeywords());
        md.setComments(si.getComments());
        md.setTemplate(si.getTemplate());
        md.setRevNumber(si.getRevNumber());
        md.setApplicationName(si.getApplicationName());
        md.setEditTime(si.getEditTime());
        md.setPageCount(si.getPageCount());
        md.setWordCount(si.getWordCount());
        md.setCharCount(si.getCharCount());
        md.setSecurity(si.getSecurity());

        Calendar createDateTime = Calendar.getInstance();
        createDateTime.setTime(si.getCreateDateTime());
        md.setCreateDateTime(createDateTime);

        Calendar lastSaveDateTime = Calendar.getInstance();
        lastSaveDateTime.setTime(si.getLastSaveDateTime());
        md.setLastSaveDateTime(lastSaveDateTime);

        Calendar lastPrinted = Calendar.getInstance();
        lastPrinted.setTime(si.getLastPrinted());
        md.setLastPrinted(lastPrinted);
    }

    log.info("officeExtractor: {}", md);
    return md;
}

From source file:com.pnf.plugin.ole.parser.StreamReader.java

License:Apache License

private List<INode> readSummaryInfoStream(ByteBuffer stream) {
    List<INode> roots = new LinkedList<>();
    String propType = "Property";

    try {//from   ww  w . ja  v  a  2 s. com
        SummaryInformation sInfo = new SummaryInformation(new PropertySet(stream.array()));

        StreamEntry cInfo = new StreamEntry("Creation Information");
        cInfo.addChild(new StreamEntry("Application Name", propType, sInfo.getApplicationName()));
        cInfo.addChild(new StreamEntry("Creation", "Time",
                sInfo.getCreateDateTime() != null ? sInfo.getCreateDateTime().toString() : null));
        cInfo.addChild(new StreamEntry("Author", propType, sInfo.getAuthor()));
        cInfo.addChild(new StreamEntry("Last Author", propType, sInfo.getLastAuthor()));
        cInfo.addChild(new StreamEntry("Template", propType, sInfo.getTemplate()));
        roots.add(cInfo);

        propType = "Time";
        StreamEntry timeInfo = new StreamEntry("Times");
        timeInfo.addChild(new StreamEntry("Total Edit Time", propType, String.valueOf(sInfo.getEditTime())));
        timeInfo.addChild(new StreamEntry("Last Saved", propType,
                sInfo.getLastSaveDateTime() != null ? sInfo.getLastSaveDateTime().toString() : null));
        timeInfo.addChild(new StreamEntry("Last Printed", propType,
                sInfo.getLastPrinted() != null ? sInfo.getLastPrinted().toString() : null));
        roots.add(timeInfo);

        propType = "Misc";
        StreamEntry misc = new StreamEntry("Miscellaneous");
        misc.addChild(new StreamEntry("OS Version", "int", String.valueOf(sInfo.getOSVersion())));
        misc.addChild(new StreamEntry("Revision Number", "int", sInfo.getRevNumber()));
        misc.addChild(new StreamEntry("Page Count", "int", String.valueOf(sInfo.getPageCount())));
        misc.addChild(new StreamEntry("Word Count", "int", String.valueOf(sInfo.getWordCount())));

        int secVal = sInfo.getSecurity();
        String security = null;

        if (!sInfo.wasNull()) { // Set description according to POI documentation
            switch (secVal) {
            case 0:
                security = "No security";
                break;
            case 1:
                security = "Password protected";
                break;
            case 2:
                security = "Read-only recommended";
                break;
            case 4:
                security = "Read-only enforced";
                break;
            case 8:
                security = "Locked for annotations";
                break;
            default:
                break;
            }

            security += " (code " + secVal + ")";
        } else {
            security = "Field not set";
        }

        misc.addChild(new StreamEntry("Document Security", "int", security));
        misc.addChild(new StreamEntry("Subject", propType, sInfo.getSubject()));
        misc.addChild(new StreamEntry("Keywords", propType, sInfo.getKeywords()));
        roots.add(misc);
    } catch (Throwable t) {
        addMessage("Attempted to read " + SUMM_INFO + " stream but no property sets were found.", null,
                Message.CORRUPT);
    }

    return roots;
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.SummaryExtractor.java

License:Apache License

private void parse(SummaryInformation summary) {
    set(TikaCoreProperties.TITLE, summary.getTitle());
    set(TikaCoreProperties.CREATOR, summary.getAuthor());
    set(TikaCoreProperties.KEYWORDS, summary.getKeywords());
    // TODO Move to OO subject in Tika 2.0
    set(TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, summary.getSubject());
    set(TikaCoreProperties.MODIFIER, summary.getLastAuthor());
    set(TikaCoreProperties.COMMENTS, summary.getComments());
    set(OfficeOpenXMLExtended.TEMPLATE, summary.getTemplate());
    set(OfficeOpenXMLExtended.APPLICATION, summary.getApplicationName());
    set(OfficeOpenXMLCore.REVISION, summary.getRevNumber());
    set(TikaCoreProperties.CREATED, summary.getCreateDateTime());
    set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime());
    set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted());
    set(Metadata.EDIT_TIME, summary.getEditTime());
    set(OfficeOpenXMLExtended.DOC_SECURITY, summary.getSecurity());

    // New style counts
    set(Office.WORD_COUNT, summary.getWordCount());
    set(Office.CHARACTER_COUNT, summary.getCharCount());
    set(Office.PAGE_COUNT, summary.getPageCount());
    if (summary.getPageCount() > 0) {
        metadata.set(PagedText.N_PAGES, summary.getPageCount());
    }/*w  w  w .  java 2s . c  o  m*/

    // Old style, Tika 1.0 properties
    // TODO Remove these in Tika 2.0
    set(Metadata.TEMPLATE, summary.getTemplate());
    set(Metadata.APPLICATION_NAME, summary.getApplicationName());
    set(Metadata.REVISION_NUMBER, summary.getRevNumber());
    set(Metadata.SECURITY, summary.getSecurity());
    set(MSOffice.WORD_COUNT, summary.getWordCount());
    set(MSOffice.CHARACTER_COUNT, summary.getCharCount());
    set(MSOffice.PAGE_COUNT, summary.getPageCount());
}

From source file:org.apache.tika.parser.microsoft.SummaryExtractor.java

License:Apache License

private void parse(SummaryInformation summary) {
    set(TikaCoreProperties.TITLE, summary.getTitle());
    addMulti(metadata, TikaCoreProperties.CREATOR, summary.getAuthor());
    set(TikaCoreProperties.KEYWORDS, summary.getKeywords());
    // TODO Move to OO subject in Tika 2.0
    set(TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, summary.getSubject());
    set(TikaCoreProperties.MODIFIER, summary.getLastAuthor());
    set(TikaCoreProperties.COMMENTS, summary.getComments());
    set(OfficeOpenXMLExtended.TEMPLATE, summary.getTemplate());
    set(OfficeOpenXMLExtended.APPLICATION, summary.getApplicationName());
    set(OfficeOpenXMLCore.REVISION, summary.getRevNumber());
    set(TikaCoreProperties.CREATED, summary.getCreateDateTime());
    set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime());
    set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted());
    set(Metadata.EDIT_TIME, summary.getEditTime());
    set(OfficeOpenXMLExtended.DOC_SECURITY, summary.getSecurity());

    // New style counts
    set(Office.WORD_COUNT, summary.getWordCount());
    set(Office.CHARACTER_COUNT, summary.getCharCount());
    set(Office.PAGE_COUNT, summary.getPageCount());
    if (summary.getPageCount() > 0) {
        metadata.set(PagedText.N_PAGES, summary.getPageCount());
    }/*  w ww . java  2 s .c  o  m*/

    // Old style, Tika 1.0 properties
    // TODO Remove these in Tika 2.0
    set(Metadata.TEMPLATE, summary.getTemplate());
    set(Metadata.APPLICATION_NAME, summary.getApplicationName());
    set(Metadata.REVISION_NUMBER, summary.getRevNumber());
    set(Metadata.SECURITY, summary.getSecurity());
    set(MSOffice.WORD_COUNT, summary.getWordCount());
    set(MSOffice.CHARACTER_COUNT, summary.getCharCount());
    set(MSOffice.PAGE_COUNT, summary.getPageCount());
}

From source file:org.modeshape.sequencer.msoffice.MSOfficeMetadata.java

License:Apache License

public void setSummaryInformation(SummaryInformation si) {
    title = si.getTitle();//ww  w  . j a  va 2  s  . c o  m
    subject = si.getSubject();
    author = si.getAuthor();
    keywords = si.getKeywords();
    comment = si.getComments();
    template = si.getTemplate();
    lastSaved = si.getLastSaveDateTime();
    revision = si.getRevNumber();
    totalEditingTime = si.getEditTime();
    lastPrinted = si.getLastPrinted();
    created = si.getCreateDateTime();
    pages = si.getPageCount();
    words = si.getWordCount();
    characters = si.getCharCount();
    creatingApplication = si.getApplicationName();
    thumbnail = si.getThumbnail();
}

From source file:org.sakaiproject.contentreview.impl.compilatio.CompilatioContentValidator.java

License:Educational Community License

private int wordDocLength(ContentResource resource) {
    if (!serverConfigurationService.getBoolean("tii.checkWordLength", false))
        return 100;

    try {/*ww w.j  a v  a2  s . com*/
        POIFSFileSystem pfs = new POIFSFileSystem(resource.streamContent());
        HWPFDocument doc = new HWPFDocument(pfs);
        SummaryInformation dsi = doc.getSummaryInformation();
        int count = dsi.getWordCount();
        log.debug("got a count of " + count);
        //if this == 0 then its likely that something went wrong -poi couldn't read it
        if (count == 0)
            return 100;
        return count;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (ServerOverloadException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    //in case we can't read this lets err on the side of caution
    return 100;
}

From source file:poi.poifs.poibrowser.PropertySetDescriptorRenderer.java

License:Apache License

public Component getTreeCellRendererComponent(final JTree tree, final Object value, final boolean selected,
        final boolean expanded, final boolean leaf, final int row, final boolean hasFocus) {
    final PropertySetDescriptor d = (PropertySetDescriptor) ((DefaultMutableTreeNode) value).getUserObject();
    final PropertySet ps = d.getPropertySet();
    final JPanel p = new JPanel();
    final JTextArea text = new JTextArea();
    text.setBackground(new Color(200, 255, 200));
    text.setFont(new Font("Monospaced", Font.PLAIN, 10));
    text.append(renderAsString(d));//from w  w w  .ja  va2 s  .  co m
    text.append("\nByte order: " + Codec.hexEncode((short) ps.getByteOrder()));
    text.append("\nFormat: " + Codec.hexEncode((short) ps.getFormat()));
    text.append("\nOS version: " + Codec.hexEncode(ps.getOSVersion()));
    text.append("\nClass ID: " + Codec.hexEncode(ps.getClassID()));
    text.append("\nSection count: " + ps.getSectionCount());
    text.append(sectionsToString(ps.getSections()));
    p.add(text);

    if (ps instanceof SummaryInformation) {
        /* Use the convenience methods. */
        final SummaryInformation si = (SummaryInformation) ps;
        text.append("\n");
        text.append("\nTitle:               " + si.getTitle());
        text.append("\nSubject:             " + si.getSubject());
        text.append("\nAuthor:              " + si.getAuthor());
        text.append("\nKeywords:            " + si.getKeywords());
        text.append("\nComments:            " + si.getComments());
        text.append("\nTemplate:            " + si.getTemplate());
        text.append("\nLast Author:         " + si.getLastAuthor());
        text.append("\nRev. Number:         " + si.getRevNumber());
        text.append("\nEdit Time:           " + si.getEditTime());
        text.append("\nLast Printed:        " + si.getLastPrinted());
        text.append("\nCreate Date/Time:    " + si.getCreateDateTime());
        text.append("\nLast Save Date/Time: " + si.getLastSaveDateTime());
        text.append("\nPage Count:          " + si.getPageCount());
        text.append("\nWord Count:          " + si.getWordCount());
        text.append("\nChar Count:          " + si.getCharCount());
        // text.append("\nThumbnail:           " + si.getThumbnail());
        text.append("\nApplication Name:    " + si.getApplicationName());
        text.append("\nSecurity:            " + si.getSecurity());
    }

    if (selected)
        Util.invert(text);
    return p;
}