List of usage examples for org.apache.poi.hpsf SummaryInformation getKeywords
public String getKeywords()
From source file:com.duroty.lucene.parser.utils.POIFSListener.java
License:Apache License
/** * DOCUMENT ME!/* w w w .j a v a 2 s. c o m*/ * * @param arg0 DOCUMENT ME! */ public void processPOIFSReaderEvent(POIFSReaderEvent readerEvent) { org.apache.poi.hpsf.PropertySet propertySet; try { propertySet = PropertySetFactory.create(readerEvent.getStream()); SummaryInformation info = (SummaryInformation) propertySet; this.author = info.getAuthor(); this.title = info.getTitle(); this.keywords = info.getKeywords(); this.subject = info.getSubject(); } catch (NoPropertySetStreamException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MarkUnsupportedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (UnexpectedPropertySetTypeException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.flexive.extractor.FxSummaryInformation.java
License:Open Source License
/** * Constructor.// w ww. j a v a 2 s. c om * * @param si the summary information */ public FxSummaryInformation(SummaryInformation si) { author = si.getAuthor(); applicationName = si.getApplicationName(); charCount = si.getCharCount(); comments = si.getComments(); createdAt = si.getCreateDateTime(); editTime = new Date(si.getEditTime()); keywords = si.getKeywords(); lastModifiedBy = si.getLastAuthor(); lastPrintedAt = si.getLastPrinted(); title = si.getTitle(); lastModifiedAt = si.getLastSaveDateTime(); pageCount = si.getPageCount(); revNumber = si.getRevNumber(); wordCount = si.getWordCount(); encrypted = false; }
From source file:com.openkm.util.metadata.MetadataExtractor.java
License:Open Source License
/** * Extract metadata from Office Word/*w w w . java 2s . c om*/ */ public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(is); OfficeMetadata md = new OfficeMetadata(); SummaryInformation si = null; if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) { si = new WordExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) { si = new ExcelExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) { si = new PowerPointExtractor(fs).getSummaryInformation(); } if (si != null) { md.setTitle(si.getTitle()); md.setSubject(si.getSubject()); md.setAuthor(si.getAuthor()); md.setLastAuthor(si.getLastAuthor()); md.setKeywords(si.getKeywords()); md.setComments(si.getComments()); md.setTemplate(si.getTemplate()); md.setRevNumber(si.getRevNumber()); md.setApplicationName(si.getApplicationName()); md.setEditTime(si.getEditTime()); md.setPageCount(si.getPageCount()); md.setWordCount(si.getWordCount()); md.setCharCount(si.getCharCount()); md.setSecurity(si.getSecurity()); Calendar createDateTime = Calendar.getInstance(); createDateTime.setTime(si.getCreateDateTime()); md.setCreateDateTime(createDateTime); Calendar lastSaveDateTime = Calendar.getInstance(); lastSaveDateTime.setTime(si.getLastSaveDateTime()); md.setLastSaveDateTime(lastSaveDateTime); Calendar lastPrinted = Calendar.getInstance(); lastPrinted.setTime(si.getLastPrinted()); md.setLastPrinted(lastPrinted); } log.info("officeExtractor: {}", md); return md; }
From source file:com.opensearchserver.extractor.parser.Doc.java
License:Apache License
private void currentWordExtraction(InputStream inputStream) throws IOException { WordExtractor word = null;/*from www . j a v a2 s . com*/ try { word = new WordExtractor(inputStream); SummaryInformation info = word.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument document = getNewParserDocument(); String[] paragraphes = word.getParagraphText(); for (String paragraph : paragraphes) document.add(CONTENT, paragraph); document.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { IOUtils.closeQuietly(word); } }
From source file:com.opensearchserver.extractor.parser.Publisher.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { PublisherTextExtractor extractor = null; try {//from w ww . ja va2 s . co m extractor = new PublisherTextExtractor(inputStream); SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } String text = extractor.getText(); if (StringUtils.isEmpty(text)) return; ParserDocument result = getNewParserDocument(); result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (extractor != null) IOUtils.closeQuietly(extractor); } }
From source file:com.opensearchserver.extractor.parser.Visio.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { VisioTextExtractor extractor = null; try {/*from w w w . j a v a 2s .com*/ extractor = new VisioTextExtractor(inputStream); SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } String[] texts = extractor.getAllText(); if (texts == null) return; ParserDocument result = getNewParserDocument(); for (String text : texts) result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (extractor != null) IOUtils.closeQuietly(extractor); } }
From source file:com.opensearchserver.extractor.parser.Xls.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { HSSFWorkbook workbook = new HSSFWorkbook(inputStream); ExcelExtractor excel = null;//from www . j a va 2s .c o m try { excel = new ExcelExtractor(workbook); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excel != null) IOUtils.closeQuietly(excel); } }
From source file:com.opensearchserver.textextractor.parser.Visio.java
License:Open Source License
@Override protected void parseContent(InputStream inputStream) throws Exception { VisioTextExtractor extractor = null; try {/*ww w. j ava 2s .com*/ extractor = new VisioTextExtractor(inputStream); SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } String[] texts = extractor.getAllText(); if (texts == null) return; ParserDocument result = getNewParserDocument(); for (String text : texts) result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (extractor != null) IOUtils.closeQuietly(extractor); } }
From source file:com.opensearchserver.textextractor.parser.Xls.java
License:Apache License
@Override protected void parseContent(InputStream inputStream) throws Exception { HSSFWorkbook workbook = new HSSFWorkbook(inputStream); ExcelExtractor excel = null;// www .j a va 2 s . co m try { excel = new ExcelExtractor(workbook); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excel != null) IOUtils.closeQuietly(excel); } }
From source file:com.pnf.plugin.ole.parser.StreamReader.java
License:Apache License
private List<INode> readSummaryInfoStream(ByteBuffer stream) { List<INode> roots = new LinkedList<>(); String propType = "Property"; try {/*from w ww .j a v a 2 s.co m*/ SummaryInformation sInfo = new SummaryInformation(new PropertySet(stream.array())); StreamEntry cInfo = new StreamEntry("Creation Information"); cInfo.addChild(new StreamEntry("Application Name", propType, sInfo.getApplicationName())); cInfo.addChild(new StreamEntry("Creation", "Time", sInfo.getCreateDateTime() != null ? sInfo.getCreateDateTime().toString() : null)); cInfo.addChild(new StreamEntry("Author", propType, sInfo.getAuthor())); cInfo.addChild(new StreamEntry("Last Author", propType, sInfo.getLastAuthor())); cInfo.addChild(new StreamEntry("Template", propType, sInfo.getTemplate())); roots.add(cInfo); propType = "Time"; StreamEntry timeInfo = new StreamEntry("Times"); timeInfo.addChild(new StreamEntry("Total Edit Time", propType, String.valueOf(sInfo.getEditTime()))); timeInfo.addChild(new StreamEntry("Last Saved", propType, sInfo.getLastSaveDateTime() != null ? sInfo.getLastSaveDateTime().toString() : null)); timeInfo.addChild(new StreamEntry("Last Printed", propType, sInfo.getLastPrinted() != null ? sInfo.getLastPrinted().toString() : null)); roots.add(timeInfo); propType = "Misc"; StreamEntry misc = new StreamEntry("Miscellaneous"); misc.addChild(new StreamEntry("OS Version", "int", String.valueOf(sInfo.getOSVersion()))); misc.addChild(new StreamEntry("Revision Number", "int", sInfo.getRevNumber())); misc.addChild(new StreamEntry("Page Count", "int", String.valueOf(sInfo.getPageCount()))); misc.addChild(new StreamEntry("Word Count", "int", String.valueOf(sInfo.getWordCount()))); int secVal = sInfo.getSecurity(); String security = null; if (!sInfo.wasNull()) { // Set description according to POI documentation switch (secVal) { case 0: security = "No security"; break; case 1: security = "Password protected"; break; case 2: security = "Read-only recommended"; break; case 4: security = "Read-only enforced"; break; case 8: security = "Locked for annotations"; break; default: break; } security += " (code " + secVal + ")"; } else { security = "Field not set"; } misc.addChild(new StreamEntry("Document Security", "int", security)); misc.addChild(new StreamEntry("Subject", propType, sInfo.getSubject())); misc.addChild(new StreamEntry("Keywords", propType, sInfo.getKeywords())); roots.add(misc); } catch (Throwable t) { addMessage("Attempted to read " + SUMM_INFO + " stream but no property sets were found.", null, Message.CORRUPT); } return roots; }