List of usage examples for org.apache.poi.hpsf.wellknown PropertyIDMap PID_AUTHOR
int PID_AUTHOR
To view the source code for org.apache.poi.hpsf.wellknown PropertyIDMap PID_AUTHOR.
Click Source Link
From source file:org.jlibrary.core.search.extraction.MSOfficeExtractor.java
License:Open Source License
/** * Returns a map with the extracted meta information from the document.<p> * //w w w . j a v a 2s.co m * @return a map with the extracted meta information from the document */ protected HeaderMetaData extractMetaInformation() { HeaderMetaData metadata = new HeaderMetaData(); String meta; if (m_summary != null) { // can't use convenience methods on summary since they can't deal with multiple sections Section section = (Section) m_summary.getSections().get(0); meta = (String) section.getProperty(PropertyIDMap.PID_TITLE); if ((meta != null) && !meta.equals("")) { metadata.setTitle(meta); metadata.setDescription(meta); } meta = (String) section.getProperty(PropertyIDMap.PID_KEYWORDS); if ((meta != null) && !meta.equals("")) { metadata.setKeywords(meta); } meta = (String) section.getProperty(PropertyIDMap.PID_SUBJECT); if ((meta != null) && !meta.equals("")) { metadata.setDescription(meta); } meta = (String) section.getProperty(PropertyIDMap.PID_COMMENTS); if ((meta != null) && !meta.equals("")) { // Not handled } // extract other available meta information meta = (String) section.getProperty(PropertyIDMap.PID_AUTHOR); if ((meta != null) && !meta.equals("")) { metadata.setAuthor(meta); } Date date; date = (Date) section.getProperty(PropertyIDMap.PID_CREATE_DTM); if ((date != null) && (date.getTime() > 0)) { // Not handled } date = (Date) section.getProperty(PropertyIDMap.PID_LASTSAVE_DTM); if ((date != null) && (date.getTime() > 0)) { // Not handled } } if (m_documentSummary != null) { // can't use convenience methods on document since they can't deal with multiple sections Section section = (Section) m_documentSummary.getSections().get(0); // extract available meta information from document summary meta = (String) section.getProperty(PropertyIDMap.PID_COMPANY); if ((meta != null) && !meta.equals("")) { // Not handled } meta = (String) section.getProperty(PropertyIDMap.PID_MANAGER); if ((meta != null) && !meta.equals("")) { // Not handled } meta = (String) section.getProperty(PropertyIDMap.PID_CATEGORY); if ((meta != null) && !meta.equals("")) { // Not handled } } return metadata; }
From source file:org.opencms.search.extractors.A_CmsTextExtractorMsOfficeBase.java
License:Open Source License
/** * Creates the extraction result for this MS Office document.<p> * // w w w. ja v a 2s . co m * The extraction result contains the raw content, plus additional meta information * as content items read from the MS Office document properties.<p> * * @param rawContent the raw content extracted from the document * * @return the extraction result for this MS Office document */ protected I_CmsExtractionResult createExtractionResult(String rawContent) { Map contentItems = new HashMap(); if (CmsStringUtil.isNotEmpty(rawContent)) { contentItems.put(I_CmsExtractionResult.ITEM_RAW, rawContent); } StringBuffer content = new StringBuffer(rawContent); if (m_summary != null) { // can't use convenience methods on summary since they can't deal with multiple sections Section section = (Section) m_summary.getSections().get(0); combineContentItem((String) section.getProperty(PropertyIDMap.PID_TITLE), I_CmsExtractionResult.ITEM_TITLE, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_KEYWORDS), I_CmsExtractionResult.ITEM_KEYWORDS, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_SUBJECT), I_CmsExtractionResult.ITEM_SUBJECT, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_COMMENTS), I_CmsExtractionResult.ITEM_COMMENTS, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_AUTHOR), I_CmsExtractionResult.ITEM_AUTHOR, content, contentItems); } if (m_documentSummary != null) { // can't use convenience methods on document since they can't deal with multiple sections Section section = (Section) m_documentSummary.getSections().get(0); // extract available meta information from document summary combineContentItem((String) section.getProperty(PropertyIDMap.PID_COMPANY), I_CmsExtractionResult.ITEM_COMPANY, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_MANAGER), I_CmsExtractionResult.ITEM_MANAGER, content, contentItems); combineContentItem((String) section.getProperty(PropertyIDMap.PID_CATEGORY), I_CmsExtractionResult.ITEM_CATEGORY, content, contentItems); } // free some memory cleanup(); return new CmsExtractionResult(content.toString(), contentItems); }