Example usage for org.apache.poi.hssf.extractor ExcelExtractor getText

List of usage examples for org.apache.poi.hssf.extractor ExcelExtractor getText

Introduction

In this page you can find the example usage for org.apache.poi.hssf.extractor ExcelExtractor getText.

Prototype

@Override
    public String getText() 

Source Link

Usage

From source file:com.bayareasoftware.chartengine.ds.util.ExcelDump.java

License:Apache License

private static void runNew(String fileName) throws Exception {
    InputStream inp = new FileInputStream(fileName);
    HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
    ExcelExtractor xt = new ExcelExtractor(wb);

    xt.setFormulasNotResults(false);//from w  w  w  .j  a  va2 s . com
    xt.setIncludeBlankCells(true);
    xt.setIncludeSheetNames(false);
    String text = xt.getText();
    String[] lines = StringUtil.splitCompletely(text, '\n');
    for (int i = 0; i < lines.length; i++) {
        System.out.println((i + 1) + ") " + lines[i]);
    }
    System.out.println("XLS: \n" + text);
}

From source file:com.docdoku.server.esindexer.ESTools.java

License:Open Source License

private static String microsoftExcelDocumentToString(InputStream inputStream)
        throws IOException, OpenXML4JException, XmlException {
    StringBuilder sb = new StringBuilder();
    try (InputStream excelStream = new BufferedInputStream(inputStream)) {
        if (POIFSFileSystem.hasPOIFSHeader(excelStream)) { // Before 2007 format files
            POIFSFileSystem excelFS = new POIFSFileSystem(excelStream);
            ExcelExtractor excelExtractor = new ExcelExtractor(excelFS);
            sb.append(excelExtractor.getText());
        } else { // New format
            XSSFWorkbook workBook = new XSSFWorkbook(excelStream);
            int numberOfSheets = workBook.getNumberOfSheets();
            for (int i = 0; i < numberOfSheets; i++) {
                XSSFSheet sheet = workBook.getSheetAt(0);
                Iterator<Row> rowIterator = sheet.rowIterator();
                while (rowIterator.hasNext()) {
                    XSSFRow row = (XSSFRow) rowIterator.next();
                    Iterator<Cell> cellIterator = row.cellIterator();
                    while (cellIterator.hasNext()) {
                        XSSFCell cell = (XSSFCell) cellIterator.next();
                        sb.append(cell.toString());
                        sb.append(" ");
                    }/*from   w  w  w.  ja  v  a 2  s. co  m*/
                    sb.append("\n");
                }
                sb.append("\n");
            }
        }
    }
    return sb.toString();
}

From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java

License:Open Source License

/** 
 * ?excel2003 //w w w.  j  a  v a2  s  . co  m
 * @param path 
 * @return 
 * @throws IOException 
 */
public String readExcel(InputStream in) throws IOException {

    String content = null;
    try {

        HSSFWorkbook wb = new HSSFWorkbook(in);
        ExcelExtractor extractor = new ExcelExtractor(wb);
        extractor.setFormulasNotResults(true);
        extractor.setIncludeSheetNames(false);
        content = extractor.getText();
        this.m_documentSummary = extractor.getDocSummaryInformation();
        this.m_summary = extractor.getSummaryInformation();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
    return content;
}

From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsExcel.java

License:Open Source License

/**
 * Extrae el texto de un fichero excel./*from   ww w. j a v  a2 s  .co  m*/
 * @param in
 * @return String. Devuelve el texto crudo
 * @throws Exception
 */
public static String extractText(InputStream in) throws Exception {

    String result = "";

    HSSFWorkbook wb = new HSSFWorkbook(in);

    ExcelExtractor ee = new ExcelExtractor(wb);
    result = ee.getText();

    // Eliminamos los caracteres que no nos sirven para indexar.
    result = ExtractorUtil.removeControlChars(result);

    return result;
}

From source file:com.jaeksoft.searchlib.parser.XlsParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream());
    ExcelExtractor excel = null;
    try {//from   ww  w.j a v a 2 s  .  c  o  m
        excel = new ExcelExtractor(workbook);
        ParserResultItem result = getNewParserResultItem();

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }

        String content = excel.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(excel);
    }

}

From source file:com.mycompany.devisetty_mavenlocalrepositorydemo.POIDriver.java

/**
 * @param args the command line arguments
 *//*from  w  w w.j a v  a2 s.c o  m*/
public static void main(String[] args) throws IOException {
    // TODO code application logic here
    //Create POI file system object.
    POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("sample.xls"));

    //Create a data extractor using file system object.
    ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem);

    //Extract data using extractor and print
    System.out.println(datExtractor.getText());

}

From source file:com.mycompany.mavenlocalrepository.POIDriver.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //Create POI file system object.
    POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("Sample.xls"));

    //Create a data extractor using file system object.
    ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem);

    //Extract data using extractor and print
    System.out.println(datExtractor.getText());

}

From source file:com.opensearchserver.extractor.parser.Xls.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
    ExcelExtractor excel = null;
    try {//from ww  w.j  av a 2  s  .com
        excel = new ExcelExtractor(workbook);

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, excel.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (excel != null)
            IOUtils.closeQuietly(excel);
    }

}

From source file:com.opensearchserver.textextractor.parser.Xls.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream) throws Exception {

    HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
    ExcelExtractor excel = null;
    try {/*  www.j  a  v a2 s  . c om*/
        excel = new ExcelExtractor(workbook);

        SummaryInformation info = excel.getSummaryInformation();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(KEYWORDS, info.getKeywords());
        }

        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, excel.getText());
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (excel != null)
            IOUtils.closeQuietly(excel);
    }

}

From source file:de.micromata.genome.gwiki.plugin.msotextextractor_1_0.ExcelTextExtractor.java

License:Apache License

public String extractText(String fileName, InputStream data) {
    try {/* w  w  w. j  a  v a 2s  .c o  m*/
        HSSFWorkbook wb = new HSSFWorkbook(data);
        ExcelExtractor extr = new ExcelExtractor(wb);
        String text = extr.getText();
        text = WordTextExtractor.reworkWordText(text);
        return text;
    } catch (IOException ex) {
        throw new RuntimeIOException("Failure to extract word from " + fileName + "; " + ex.getMessage(), ex);
    }
}