Example usage for org.apache.poi.hssf.extractor ExcelExtractor ExcelExtractor

List of usage examples for org.apache.poi.hssf.extractor ExcelExtractor ExcelExtractor

Introduction

In this page you can find the example usage for org.apache.poi.hssf.extractor ExcelExtractor ExcelExtractor.

Prototype

public ExcelExtractor(DirectoryNode dir) throws IOException 

Source Link

Usage

From source file:com.bayareasoftware.chartengine.ds.util.ExcelDump.java

License:Apache License

private static void runNew(String fileName) throws Exception {
    InputStream inp = new FileInputStream(fileName);
    HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
    ExcelExtractor xt = new ExcelExtractor(wb);

    xt.setFormulasNotResults(false);//from ww  w  .  jav a2  s.com
    xt.setIncludeBlankCells(true);
    xt.setIncludeSheetNames(false);
    String text = xt.getText();
    String[] lines = StringUtil.splitCompletely(text, '\n');
    for (int i = 0; i < lines.length; i++) {
        System.out.println((i + 1) + ") " + lines[i]);
    }
    System.out.println("XLS: \n" + text);
}

From source file:com.docdoku.server.esindexer.ESTools.java

License:Open Source License

private static String microsoftExcelDocumentToString(InputStream inputStream)
        throws IOException, OpenXML4JException, XmlException {
    StringBuilder sb = new StringBuilder();
    try (InputStream excelStream = new BufferedInputStream(inputStream)) {
        if (POIFSFileSystem.hasPOIFSHeader(excelStream)) { // Before 2007 format files
            POIFSFileSystem excelFS = new POIFSFileSystem(excelStream);
            ExcelExtractor excelExtractor = new ExcelExtractor(excelFS);
            sb.append(excelExtractor.getText());
        } else { // New format
            XSSFWorkbook workBook = new XSSFWorkbook(excelStream);
            int numberOfSheets = workBook.getNumberOfSheets();
            for (int i = 0; i < numberOfSheets; i++) {
                XSSFSheet sheet = workBook.getSheetAt(0);
                Iterator<Row> rowIterator = sheet.rowIterator();
                while (rowIterator.hasNext()) {
                    XSSFRow row = (XSSFRow) rowIterator.next();
                    Iterator<Cell> cellIterator = row.cellIterator();
                    while (cellIterator.hasNext()) {
                        XSSFCell cell = (XSSFCell) cellIterator.next();
                        sb.append(cell.toString());
                        sb.append(" ");
                    }/*from w ww.j  a  va 2  s. com*/
                    sb.append("\n");
                }
                sb.append("\n");
            }
        }
    }
    return sb.toString();
}

From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java

License:Open Source License

/** 
 * ?excel2003 /*from ww  w .ja v  a  2  s .c o  m*/
 * @param path 
 * @return 
 * @throws IOException 
 */
public String readExcel(InputStream in) throws IOException {

    String content = null;
    try {

        HSSFWorkbook wb = new HSSFWorkbook(in);
        ExcelExtractor extractor = new ExcelExtractor(wb);
        extractor.setFormulasNotResults(true);
        extractor.setIncludeSheetNames(false);
        content = extractor.getText();
        this.m_documentSummary = extractor.getDocSummaryInformation();
        this.m_summary = extractor.getSummaryInformation();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
    return content;
}

From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsExcel.java

License:Open Source License

/**
 * Extrae el texto de un fichero excel.//from  w  w  w.  ja va2 s  .c  om
 * @param in
 * @return String. Devuelve el texto crudo
 * @throws Exception
 */
public static String extractText(InputStream in) throws Exception {

    String result = "";

    HSSFWorkbook wb = new HSSFWorkbook(in);

    ExcelExtractor ee = new ExcelExtractor(wb);
    result = ee.getText();

    // Eliminamos los caracteres que no nos sirven para indexar.
    result = ExtractorUtil.removeControlChars(result);

    return result;
}

From source file:com.jaeksoft.searchlib.parser.XlsParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;/*  w  w  w .  j a  v a  2s.c  om*/
    try {
        excel = new ExcelExtractor(workbook);
        ParserResultItem result = getNewParserResultItem();

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String content = excel.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(excel);
    }

}

From source file:com.mycompany.devisetty_mavenlocalrepositorydemo.POIDriver.java

/**
 * @param args the command line arguments
 *//*  w w w  .jav a  2  s  .c  o m*/
public static void main(String[] args) throws IOException {
    // TODO code application logic here
    //Create POI file system object.
    POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("sample.xls"));

    //Create a data extractor using file system object.
    ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem);

    //Extract data using extractor and print
    System.out.println(datExtractor.getText());

}

From source file:com.mycompany.mavenlocalrepository.POIDriver.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //Create POI file system object.
    POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("Sample.xls"));

    //Create a data extractor using file system object.
    ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem);

    //Extract data using extractor and print
    System.out.println(datExtractor.getText());

}

From source file:com.openkm.extractor.MsExcelTextExtractor.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w w  w . j  a  v  a 2  s  . c  o m*/
 */
public String extractText(InputStream stream, String type, String encoding) throws IOException {
    try {
        POIFSFileSystem fs = new POIFSFileSystem(stream);
        return new ExcelExtractor(fs).getText();
    } catch (RuntimeException e) {
        logger.warn("Failed to extract Excel text content", e);
        throw new IOException(e.getMessage(), e);
    } finally {
        stream.close();
    }
}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from Office Word/*from   w  w  w.  j  a  v  a2s . com*/
 */
public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(is);
    OfficeMetadata md = new OfficeMetadata();
    SummaryInformation si = null;

    if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) {
        si = new WordExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) {
        si = new ExcelExtractor(fs).getSummaryInformation();
    } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) {
        si = new PowerPointExtractor(fs).getSummaryInformation();
    }

    if (si != null) {
        md.setTitle(si.getTitle());
        md.setSubject(si.getSubject());
        md.setAuthor(si.getAuthor());
        md.setLastAuthor(si.getLastAuthor());
        md.setKeywords(si.getKeywords());
        md.setComments(si.getComments());
        md.setTemplate(si.getTemplate());
        md.setRevNumber(si.getRevNumber());
        md.setApplicationName(si.getApplicationName());
        md.setEditTime(si.getEditTime());
        md.setPageCount(si.getPageCount());
        md.setWordCount(si.getWordCount());
        md.setCharCount(si.getCharCount());
        md.setSecurity(si.getSecurity());

        Calendar createDateTime = Calendar.getInstance();
        createDateTime.setTime(si.getCreateDateTime());
        md.setCreateDateTime(createDateTime);

        Calendar lastSaveDateTime = Calendar.getInstance();
        lastSaveDateTime.setTime(si.getLastSaveDateTime());
        md.setLastSaveDateTime(lastSaveDateTime);

        Calendar lastPrinted = Calendar.getInstance();
        lastPrinted.setTime(si.getLastPrinted());
        md.setLastPrinted(lastPrinted);
    }

    log.info("officeExtractor: {}", md);
    return md;
}

From source file:com.opensearchserver.extractor.parser.Xls.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
    ExcelExtractor excel = null;// www.  j av a 2 s. co  m
    try {
        excel = new ExcelExtractor(workbook);

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, excel.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (excel != null)
            IOUtils.closeQuietly(excel);
    }

}