Example usage for org.apache.poi.hpsf SummaryInformation getTitle

List of usage examples for org.apache.poi.hpsf SummaryInformation getTitle

Introduction

In this page you can find the example usage for org.apache.poi.hpsf SummaryInformation getTitle.

Prototype

public String getTitle() 

Source Link

Usage

From source file:com.duroty.lucene.parser.utils.POIFSListener.java

License:Apache License

/**
 * DOCUMENT ME!/*from ww  w  .j a  va 2  s  .c  om*/
 *
 * @param arg0 DOCUMENT ME!
 */
public void processPOIFSReaderEvent(POIFSReaderEvent readerEvent) {
    org.apache.poi.hpsf.PropertySet propertySet;

    try {
        propertySet = PropertySetFactory.create(readerEvent.getStream());

        SummaryInformation info = (SummaryInformation) propertySet;
        this.author = info.getAuthor();
        this.title = info.getTitle();
        this.keywords = info.getKeywords();
        this.subject = info.getSubject();
    } catch (NoPropertySetStreamException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (MarkUnsupportedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnexpectedPropertySetTypeException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Constructor./*from ww w  . ja  va2s .c o  m*/
 *
 * @param si the summary information
 */
public FxSummaryInformation(SummaryInformation si) {
    author = si.getAuthor();
    applicationName = si.getApplicationName();
    charCount = si.getCharCount();
    comments = si.getComments();
    createdAt = si.getCreateDateTime();
    editTime = new Date(si.getEditTime());
    keywords = si.getKeywords();
    lastModifiedBy = si.getLastAuthor();
    lastPrintedAt = si.getLastPrinted();
    title = si.getTitle();
    lastModifiedAt = si.getLastSaveDateTime();
    pageCount = si.getPageCount();
    revNumber = si.getRevNumber();
    wordCount = si.getWordCount();
    encrypted = false;
}

From source file:com.jaeksoft.searchlib.parser.DocParser.java

License:Open Source License

private void currentWordExtraction(ParserResultItem result, InputStream inputStream) throws IOException {
    WordExtractor word = null;/*w ww . java2s.com*/

    try {
        word = new WordExtractor(inputStream);

        SummaryInformation info = word.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String[] paragraphes = word.getParagraphText();
        for (String paragraph : paragraphes) {
            String[] frags = paragraph.split("\\n");
            for (String frag : frags)
                result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(frag, " "));
        }
    } finally {
        IOUtils.close(word);
    }
}

From source file:com.jaeksoft.searchlib.parser.DocParser.java

License:Open Source License

private void oldWordExtraction(ParserResultItem result, InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;/*from  w ww  .j a va  2 s  .  c o  m*/
    try {
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            result.addField(ParserFieldEnum.title, si.getTitle());
            result.addField(ParserFieldEnum.author, si.getAuthor());
            result.addField(ParserFieldEnum.subject, si.getSubject());
        }

        String text = word6.getText();
        String[] frags = text.split("\\n");
        for (String frag : frags)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(frag, " "));
    } finally {
        IOUtils.close(word6);
    }
}

From source file:com.jaeksoft.searchlib.parser.PublisherParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    PublisherTextExtractor extractor = null;
    try {/*from  ww  w .  j av  a2  s. com*/
        extractor = new PublisherTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        result.addField(ParserFieldEnum.content,
                StringUtils.replaceConsecutiveSpaces(extractor.getText(), " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.jaeksoft.searchlib.parser.VisioParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    VisioTextExtractor extractor = null;
    try {//  w  ww .  j  a v  a2s .c o  m
        extractor = new VisioTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();

        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        for (String text : texts)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(text, " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.jaeksoft.searchlib.parser.XlsParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;/*from   w w  w. j  av  a  2s.c  o m*/
    try {
        excel = new ExcelExtractor(workbook);
        ParserResultItem result = getNewParserResultItem();

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String content = excel.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(excel);
    }

}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from Office Word//from  ww w  . ja va 2s  .co  m
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(is);
    OfficeMetadata md = new OfficeMetadata();
    SummaryInformation si = null;

    if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
        si = new WordExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
        si = new ExcelExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
        si = new PowerPointExtractor(fs).getSummaryInformation();
    }

    if (si != null) {
        md.setTitle(si.getTitle());
        md.setSubject(si.getSubject());
        md.setAuthor(si.getAuthor());
        md.setLastAuthor(si.getLastAuthor());
        md.setKeywords(si.getKeywords());
        md.setComments(si.getComments());
        md.setTemplate(si.getTemplate());
        md.setRevNumber(si.getRevNumber());
        md.setApplicationName(si.getApplicationName());
        md.setEditTime(si.getEditTime());
        md.setPageCount(si.getPageCount());
        md.setWordCount(si.getWordCount());
        md.setCharCount(si.getCharCount());
        md.setSecurity(si.getSecurity());

        Calendar createDateTime = Calendar.getInstance();
        createDateTime.setTime(si.getCreateDateTime());
        md.setCreateDateTime(createDateTime);

        Calendar lastSaveDateTime = Calendar.getInstance();
        lastSaveDateTime.setTime(si.getLastSaveDateTime());
        md.setLastSaveDateTime(lastSaveDateTime);

        Calendar lastPrinted = Calendar.getInstance();
        lastPrinted.setTime(si.getLastPrinted());
        md.setLastPrinted(lastPrinted);
    }

    log.info("officeExtractor: {}", md);
    return md;
}

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void currentWordExtraction(InputStream inputStream) throws IOException {
    WordExtractor word = null;/*from  www  . java2 s.  c  o m*/

    try {
        word = new WordExtractor(inputStream);

        SummaryInformation info = word.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument document = getNewParserDocument();
        String[] paragraphes = word.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word);
    }
}

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void oldWordExtraction(InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;//  w  w  w . j  a  va2s  .co m
    try {
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            metas.add(TITLE, si.getTitle());
            metas.add(AUTHOR, si.getAuthor());
            metas.add(SUBJECT, si.getSubject());
        }

        ParserDocument document = getNewParserDocument();
        @SuppressWarnings("deprecation")
        String[] paragraphes = word6.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word6);
    }
}