Example usage for org.apache.poi.xssf.extractor XSSFExcelExtractor getCoreProperties

List of usage examples for org.apache.poi.xssf.extractor XSSFExcelExtractor getCoreProperties

Introduction

In this page you can find the example usage for org.apache.poi.xssf.extractor XSSFExcelExtractor getCoreProperties.

Prototype

public CoreProperties getCoreProperties() 

Source Link

Document

Returns the core document properties

Usage

From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java

License:Open Source License

/** 
    * ?excel2007 //from   w  w  w  .ja v  a2s  .c  o m
    * @param path 
     * @return 
     * @throws IOException 
    */
public String readExcel2007(InputStream in) throws IOException {
    //                StringBuffer content = new StringBuffer();  
    //  XSSFWorkbook strPath   
    String content = null;
    XSSFWorkbook xwb = new XSSFWorkbook(in);
    XSSFExcelExtractor extractor = new XSSFExcelExtractor(xwb);
    extractor.setFormulasNotResults(true);
    extractor.setIncludeSheetNames(false);
    content = extractor.getText();
    this.cp = extractor.getCoreProperties();
    return content;
    //               // Sheet  
    //               for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {  
    //                  XSSFSheet xSheet = xwb.getSheetAt(numSheet);  
    //                    if (xSheet == null) {  
    //                        continue;  
    //                  }  
    //                    // Row  
    //                 for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {  
    //                       XSSFRow xRow = xSheet.getRow(rowNum);  
    //                        if (xRow == null) {  
    //                          continue;  
    //                        }  
    //                        // Cell  
    //                        for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {  
    //                            XSSFCell xCell = xRow.getCell(cellNum);  
    //                            if (xCell == null) {  
    //                                continue;  
    //                           }  
    //                         if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {  
    //                                content.append(xCell.getBooleanCellValue());  
    //                            } else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {  
    //                                content.append(xCell.getNumericCellValue());  
    //                            } else {  
    //                                content.append(xCell.getStringCellValue());  
    //                            }  
    //                        }  
    //                   }  
    //                }  
    //          
    //                return content.toString();  
}

From source file:com.jaeksoft.searchlib.parser.XlsxParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    XSSFWorkbook workbook = new XSSFWorkbook(streamLimiter.getNewInputStream());
    XSSFExcelExtractor excelExtractor = null;
    try {/*from ww  w  . j a  v  a  2s. c  o m*/
        excelExtractor = new XSSFExcelExtractor(workbook);
        ParserResultItem result = getNewParserResultItem();

        CoreProperties info = excelExtractor.getCoreProperties();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.creator, info.getCreator());
            result.addField(ParserFieldEnum.subject, info.getSubject());
            result.addField(ParserFieldEnum.description, info.getDescription());
            result.addField(ParserFieldEnum.keywords, info.getKeywords());
        }

        excelExtractor.setIncludeCellComments(true);
        excelExtractor.setIncludeHeadersFooters(true);
        excelExtractor.setIncludeSheetNames(true);
        String content = excelExtractor.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(excelExtractor);
    }

}

From source file:com.opensearchserver.extractor.parser.Xlsx.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    XSSFWorkbook workbook = null;//from  w  ww .  j  a v a  2s. c  o  m
    XSSFExcelExtractor excelExtractor = null;
    try {
        workbook = new XSSFWorkbook(inputStream);
        excelExtractor = new XSSFExcelExtractor(workbook);

        CoreProperties info = excelExtractor.getCoreProperties();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(CREATOR, info.getCreator());
            metas.add(CREATION_DATE, info.getCreated());
            metas.add(MODIFICATION_DATE, info.getModified());
            metas.add(SUBJECT, info.getSubject());
            metas.add(DESCRIPTION, info.getDescription());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        excelExtractor.setIncludeCellComments(true);
        excelExtractor.setIncludeHeadersFooters(true);
        excelExtractor.setIncludeSheetNames(true);
        result.add(CONTENT, excelExtractor.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));

    } finally {
        if (excelExtractor != null)
            IOUtils.closeQuietly(excelExtractor);
        if (workbook != null)
            IOUtils.closeQuietly(workbook);
    }

}

From source file:com.opensearchserver.textextractor.parser.Xlsx.java

License:Apache License

private void parseContent(XSSFWorkbook workbook) throws Exception {

    XSSFExcelExtractor excelExtractor = null;
    try {//  w  w  w . j av a2 s .  c o m
        excelExtractor = new XSSFExcelExtractor(workbook);

        CoreProperties info = excelExtractor.getCoreProperties();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(CREATOR, info.getCreator());
            metas.add(CREATION_DATE, info.getCreated());
            metas.add(MODIFICATION_DATE, info.getModified());
            metas.add(SUBJECT, info.getSubject());
            metas.add(DESCRIPTION, info.getDescription());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        excelExtractor.setIncludeCellComments(true);
        excelExtractor.setIncludeHeadersFooters(true);
        excelExtractor.setIncludeSheetNames(true);
        result.add(CONTENT, excelExtractor.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));

    } finally {
        if (excelExtractor != null)
            IOUtils.closeQuietly(excelExtractor);
    }

}