Example usage for org.apache.poi.hpsf SummaryInformation getAuthor

List of usage examples for org.apache.poi.hpsf SummaryInformation getAuthor

Introduction

In this page you can find the example usage for org.apache.poi.hpsf SummaryInformation getAuthor.

Prototype

public String getAuthor() 

Source Link

Document

Returns the author (or null ).

Usage

From source file:com.duroty.lucene.parser.utils.POIFSListener.java

License:Apache License

/**
 * DOCUMENT ME!//from w w  w . jav  a  2 s.c  o  m
 *
 * @param arg0 DOCUMENT ME!
 */
public void processPOIFSReaderEvent(POIFSReaderEvent readerEvent) {
    org.apache.poi.hpsf.PropertySet propertySet;

    try {
        propertySet = PropertySetFactory.create(readerEvent.getStream());

        SummaryInformation info = (SummaryInformation) propertySet;
        this.author = info.getAuthor();
        this.title = info.getTitle();
        this.keywords = info.getKeywords();
        this.subject = info.getSubject();
    } catch (NoPropertySetStreamException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (MarkUnsupportedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnexpectedPropertySetTypeException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Constructor./*from   ww  w  .  j  av  a  2 s. c o  m*/
 *
 * @param si the summary information
 */
public FxSummaryInformation(SummaryInformation si) {
    author = si.getAuthor();
    applicationName = si.getApplicationName();
    charCount = si.getCharCount();
    comments = si.getComments();
    createdAt = si.getCreateDateTime();
    editTime = new Date(si.getEditTime());
    keywords = si.getKeywords();
    lastModifiedBy = si.getLastAuthor();
    lastPrintedAt = si.getLastPrinted();
    title = si.getTitle();
    lastModifiedAt = si.getLastSaveDateTime();
    pageCount = si.getPageCount();
    revNumber = si.getRevNumber();
    wordCount = si.getWordCount();
    encrypted = false;
}

From source file:com.jaeksoft.searchlib.parser.DocParser.java

License:Open Source License

private void currentWordExtraction(ParserResultItem result, InputStream inputStream) throws IOException {
    WordExtractor word = null;/*from  w w w .  j  a va 2s .  c  o  m*/

    try {
        word = new WordExtractor(inputStream);

        SummaryInformation info = word.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String[] paragraphes = word.getParagraphText();
        for (String paragraph : paragraphes) {
            String[] frags = paragraph.split("\\n");
            for (String frag : frags)
                result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(frag, " "));
        }
    } finally {
        IOUtils.close(word);
    }
}

From source file:com.jaeksoft.searchlib.parser.DocParser.java

License:Open Source License

private void oldWordExtraction(ParserResultItem result, InputStream inputStream) throws IOException {
    Word6Extractor word6 = null;/*from ww w .  j av  a  2 s . co  m*/
    try {
        word6 = new Word6Extractor(inputStream);
        SummaryInformation si = word6.getSummaryInformation();
        if (si != null) {
            result.addField(ParserFieldEnum.title, si.getTitle());
            result.addField(ParserFieldEnum.author, si.getAuthor());
            result.addField(ParserFieldEnum.subject, si.getSubject());
        }

        String text = word6.getText();
        String[] frags = text.split("\\n");
        for (String frag : frags)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(frag, " "));
    } finally {
        IOUtils.close(word6);
    }
}

From source file:com.jaeksoft.searchlib.parser.PublisherParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    PublisherTextExtractor extractor = null;
    try {//from ww  w.ja v a 2s .c om
        extractor = new PublisherTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        result.addField(ParserFieldEnum.content,
                StringUtils.replaceConsecutiveSpaces(extractor.getText(), " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.jaeksoft.searchlib.parser.VisioParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    VisioTextExtractor extractor = null;
    try {//from   w w w. j  a  va 2s.  c  o m
        extractor = new VisioTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();

        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        for (String text : texts)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(text, " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.jaeksoft.searchlib.parser.XlsParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;/*w ww . j a  va  2 s  . c o m*/
    try {
        excel = new ExcelExtractor(workbook);
        ParserResultItem result = getNewParserResultItem();

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String content = excel.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(excel);
    }

}

From source file:com.opendoorlogistics.core.tables.io.PoiIO.java

License:Open Source License

private static String getAuthor(Workbook wb) {
    if (HSSFWorkbook.class.isInstance(wb)) {
        HSSFWorkbook hssf = (HSSFWorkbook) wb;
        SummaryInformation info = hssf.getSummaryInformation();
        if (info != null) {
            return info.getAuthor();
        }//  ww  w  .  ja  v  a2  s.  c o m
    } else if (XSSFWorkbook.class.isInstance(wb)) {
        XSSFWorkbook xssf = (XSSFWorkbook) wb;
        POIXMLProperties xmlProps = xssf.getProperties();
        if (xmlProps != null) {
            POIXMLProperties.CoreProperties coreProps = xmlProps.getCoreProperties();
            if (coreProps != null) {
                return coreProps.getCreator();
            }
        }
    }
    return null;
}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from Office Word/*from   w ww.  j ava 2 s . co  m*/
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(is);
    OfficeMetadata md = new OfficeMetadata();
    SummaryInformation si = null;

    if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
        si = new WordExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
        si = new ExcelExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
        si = new PowerPointExtractor(fs).getSummaryInformation();
    }

    if (si != null) {
        md.setTitle(si.getTitle());
        md.setSubject(si.getSubject());
        md.setAuthor(si.getAuthor());
        md.setLastAuthor(si.getLastAuthor());
        md.setKeywords(si.getKeywords());
        md.setComments(si.getComments());
        md.setTemplate(si.getTemplate());
        md.setRevNumber(si.getRevNumber());
        md.setApplicationName(si.getApplicationName());
        md.setEditTime(si.getEditTime());
        md.setPageCount(si.getPageCount());
        md.setWordCount(si.getWordCount());
        md.setCharCount(si.getCharCount());
        md.setSecurity(si.getSecurity());

        Calendar createDateTime = Calendar.getInstance();
        createDateTime.setTime(si.getCreateDateTime());
        md.setCreateDateTime(createDateTime);

        Calendar lastSaveDateTime = Calendar.getInstance();
        lastSaveDateTime.setTime(si.getLastSaveDateTime());
        md.setLastSaveDateTime(lastSaveDateTime);

        Calendar lastPrinted = Calendar.getInstance();
        lastPrinted.setTime(si.getLastPrinted());
        md.setLastPrinted(lastPrinted);
    }

    log.info("officeExtractor: {}", md);
    return md;
}

From source file:com.opensearchserver.extractor.parser.Doc.java

License:Apache License

private void currentWordExtraction(InputStream inputStream) throws IOException {
    WordExtractor word = null;/*from   w w w . j  ava 2s .  co m*/

    try {
        word = new WordExtractor(inputStream);

        SummaryInformation info = word.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument document = getNewParserDocument();
        String[] paragraphes = word.getParagraphText();
        for (String paragraph : paragraphes)
            document.add(CONTENT, paragraph);
        document.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        IOUtils.closeQuietly(word);
    }
}