List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem
public POIFSFileSystem(InputStream stream) throws IOException
From source file:org.tonguetied.datatransfer.importing.ExcelImporter.java
License:Apache License
/** * This method initializes the parser enabling the parser to handle the * excel document.//from w w w.j a va 2 s . c om * * @param input the byte code representation of the excel document * @throws ImportException if the input data fails to be parsed */ private void loadData(byte[] input) throws ImportException { ByteArrayInputStream bais = null; InputStream dis = null; try { bais = new ByteArrayInputStream(input); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(bais); // get the Workbook (excel part) stream in a InputStream dis = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(parser); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, dis); } catch (IOException ioe) { throw new ImportException(ioe); } finally { // and our document input stream (don't want to leak these!) close(dis); // once all the events are processed close our file input stream close(bais); } }
From source file:org.tonguetied.datatransfer.importing.ExcelLanguageCentricParserTest.java
License:Apache License
/** * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}. *//* ww w .j a va2 s .co m*/ @Test public final void testProcessRecord() throws Exception { ExcelLanguageCentricParser parser = new ExcelLanguageCentricParser(keywordService); InputStream is = null; try { // create a new file input stream with the input file specified // at the command line File input = new File(TEST_DATA_DIR, "LanguageCentricImportData.xls"); is = new BufferedInputStream(new FileInputStream(input)); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(is); // get the Workbook (excel part) stream in a InputStream InputStream din = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(parser); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, din); } finally { // once all the events are processed close our file input stream if (is != null) is.close(); } List<Language> languages = parser.getLanguages(); assertEquals(4, languages.size()); assertTrue(languages.contains(defaultLanguage)); assertTrue(languages.contains(hebrew)); assertTrue(languages.contains(simplifiedChinese)); assertTrue(languages.contains(traditionalChinese)); Map<String, Keyword> keywords = parser.getKeywords(); assertEquals(8, keywords.size()); Keyword actual = keywords.get(keyword1.getKeyword()); assessKeyword(keyword1, actual); actual = keywords.get(keyword2.getKeyword()); assessKeyword(keyword2, actual); }
From source file:org.tonguetied.datatransfer.importing.KeywordExcelParserTest.java
License:Apache License
/** * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}. *//*w w w. j a v a 2 s .c o m*/ @Test public final void testProcessRecord() throws Exception { ExcelParser parser = new ExcelKeywordParser(keywordService); InputStream is = null; try { // create a new file input stream with the input file specified // at the command line File input = new File(TEST_DATA_DIR, "KeywordExcelParserTest.xls"); is = new BufferedInputStream(new FileInputStream(input)); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(is); // get the Workbook (excel part) stream in a InputStream InputStream din = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(parser); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, din); } finally { // once all the events are processed close our file input stream if (is != null) is.close(); } Map<String, Keyword> keywords = parser.getKeywords(); assertEquals(7, keywords.size()); Keyword actual = keywords.get(keyword1.getKeyword()); assessKeyword(keyword1, actual); actual = keywords.get(keyword2.getKeyword()); assessKeyword(keyword2, actual); actual = keywords.get(keyword3.getKeyword()); assessKeyword(keyword3, actual); actual = keywords.get(keyword4.getKeyword()); assessKeyword(keyword4, actual); assertTrue(actual.getTranslations().isEmpty()); actual = keywords.get(keyword5.getKeyword()); assessKeyword(keyword5, actual); final List<ImportErrorCode> errorCodes = parser.getErrorCodes(); assertEquals(6, errorCodes.size()); assertTrue(errorCodes.contains(ImportErrorCode.unknownCountry)); assertTrue(errorCodes.contains(ImportErrorCode.illegalCountry)); assertTrue(errorCodes.contains(ImportErrorCode.unknownLanguage)); assertTrue(errorCodes.contains(ImportErrorCode.illegalLanguage)); assertTrue(errorCodes.contains(ImportErrorCode.unknownBundle)); assertTrue(errorCodes.contains(ImportErrorCode.illegalTranslationState)); }
From source file:org.waterforpeople.mapping.app.harness.DeleteSurveyInstanceHarness.java
License:Open Source License
public void processSheet(String spreadsheetName, String serviceUrl) { InputStream inp;/*from www.ja va 2 s.co m*/ Sheet sheet1 = null; try { inp = new FileInputStream(spreadsheetName); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); int i = 0; sheet1 = wb.getSheetAt(0); for (Row row : sheet1) { if (row.getRowNum() >= 1) { StringBuilder sb = new StringBuilder(); sb.append("?action=deleteSurveyInstance&"); for (Cell cell : row) { switch (cell.getColumnIndex()) { case 0: sb.append("instanceId=" + new Double(cell.getNumericCellValue()).intValue()); break; } } URL url = new URL(serviceUrl + sb.toString()); System.out.println(i++ + " : " + serviceUrl + sb.toString()); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setDoOutput(true); String line; BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); while ((line = reader.readLine()) != null) { System.out.println(line); } // writer.close(); reader.close(); } } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java
License:Open Source License
@Override public void executeImport(File file, String serverBase, Map<String, String> criteria) { InputStream inp = null;/*from w w w . j a va 2s .co m*/ Sheet sheet1 = null; Integer startRow = 1; Long beforeQuestionId = null; boolean isWholeSurvey = true; if (criteria != null) { if (criteria.get(BEFORE_QUESTION_ID_PARAM) != null) { beforeQuestionId = new Long(criteria.get(BEFORE_QUESTION_ID_PARAM)); } if (criteria.get(WHOLE_SURVEY_PARAM) != null) { if ("false".equalsIgnoreCase(criteria.get(WHOLE_SURVEY_PARAM))) { isWholeSurvey = false; } } } try { inp = new FileInputStream(file); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); sheet1 = wb.getSheetAt(0); String apiKey = criteria != null ? criteria.get("apiKey") : null; if (!isWholeSurvey) { // even though there is a header row, we want lastRowNum since // rows are 0 indexed int questionCount = sheet1.getLastRowNum(); // figure out the starting order QuestionDto startingQuestion = BulkDataServiceClient.loadQuestionDetails(serverBase, beforeQuestionId, apiKey); startRow = startingQuestion.getOrder(); // now get all the questions List<QuestionDto> questionsInGroup = BulkDataServiceClient.fetchQuestions(serverBase, startingQuestion.getQuestionGroupId(), apiKey); if (questionsInGroup != null) { // we only need to reorder the group into which we're // importing for (QuestionDto q : questionsInGroup) { if (q.getOrder() >= startRow) { StringBuilder reorderBuffer = new StringBuilder(); reorderBuffer.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=") .append(SurveyRestRequest.UPDATE_QUESTION_ORDER_ACTION).append("&") .append(SurveyRestRequest.QUESTION_ID_PARAM).append("=").append(q.getKeyId()) .append("&").append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=") .append((q.getOrder() + questionCount)); String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, reorderBuffer.toString(), true, criteria.get(KEY_PARAM)); log.debug(result); } } } } for (Row row : sheet1) { if (row.getRowNum() >= 1) { StringBuilder sb = new StringBuilder(); sb.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=") .append(SurveyRestRequest.SAVE_QUESTION_ACTION).append("&"); for (Cell cell : row) { switch (cell.getColumnIndex()) { case 0: sb.append(SurveyRestRequest.SURVEY_GROUP_NAME_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 1: sb.append(SurveyRestRequest.SURVEY_NAME_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 2: sb.append(SurveyRestRequest.QUESTION_GROUP_ORDER_PARAM).append("=") .append(new Double(cell.getNumericCellValue()).intValue()).append("&"); break; case 3: sb.append(SurveyRestRequest.QUESTION_GROUP_NAME_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 4: int order = new Double(cell.getNumericCellValue()).intValue(); if (!isWholeSurvey) { order += (startRow - 1); } sb.append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=").append(order).append("&"); break; case 5: sb.append(SurveyRestRequest.QUESTION_TEXT_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 6: sb.append(SurveyRestRequest.QUESTION_TYPE_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 7: sb.append(SurveyRestRequest.OPTIONS_PARAM).append("=") .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&"); break; case 8: String valString = parseCellAsString(cell); if (valString != null && valString.trim().length() > 0) { String[] parts = valString.split("\\|"); int depOrder = new Integer(parts[0].trim()); if (!isWholeSurvey) { depOrder += (startRow - 1); } sb.append(SurveyRestRequest.DEPEND_QUESTION_PARAM).append("=") .append(URLEncoder.encode(depOrder + "|" + parts[1], "UTF-8")).append("&"); } break; case 9: sb.append(SurveyRestRequest.ALLOW_OTHER_PARAM).append("=") .append(parseCellAsString(cell)).append("&"); break; case 10: sb.append(SurveyRestRequest.ALLOW_MULTIPLE_PARAM).append("=") .append(parseCellAsString(cell)).append("&"); break; case 11: sb.append(SurveyRestRequest.MANDATORY_PARAM).append("=").append(parseCellAsString(cell)) .append("&"); break; case 12: sb.append(SurveyRestRequest.SCORING_PARAM).append("=").append(parseCellAsString(cell)); break; case 13: // min val String minVal = parseCellAsString(cell); if (minVal != null && minVal.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.VALIDATION_MIN_PARAM).append("=") .append(minVal); } break; case 14: // max val String maxVal = parseCellAsString(cell); if (maxVal != null && maxVal.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.VALIDATION_MAX_PARAM).append("=") .append(maxVal); } break; case 15: // allow sign String signVal = parseCellAsString(cell); if (signVal != null && signVal.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_SIGN_PARAM).append("=") .append(signVal); } break; case 16: // allow decimal String decimalVal = parseCellAsString(cell); if (decimalVal != null && decimalVal.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_DECIMAL_PARAM).append("=") .append(decimalVal); } break; case 17: // is name String isNameVal = parseCellAsString(cell); if (isNameVal != null && isNameVal.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.VALIDATION_IS_NAME_PARAM).append("=") .append(isNameVal); } break; case 18: String metricName = parseCellAsString(cell); if (metricName != null && metricName.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.METRIC_NAME_PARAM).append("=") .append(metricName); } break; case 19: String metricGroup = parseCellAsString(cell); if (metricGroup != null && metricGroup.trim().length() > 0) { sb.append("&").append(SurveyRestRequest.METRIC_GROUP_PARAM).append("=") .append(metricGroup); } break; } } try { String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, sb.toString(), true, criteria.get(KEY_PARAM)); log.debug(result); } catch (Throwable t) { log.error("Error: " + t.getMessage(), t); log.info("Trying again"); try { BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, sb.toString(), true, criteria.get(KEY_PARAM)); } catch (Exception e) { log.error("Error:" + e.getMessage(), e); // giving up } } } } } catch (Exception e) { e.printStackTrace(); } finally { if (inp != null) { try { inp.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java
License:Open Source License
@Override public Map<Integer, String> validate(File file) { InputStream inp = null;// ww w. ja v a2s . c om Sheet sheet1 = null; Map<Integer, String> errorMap = new TreeMap<Integer, String>(); try { inp = new FileInputStream(file); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); sheet1 = wb.getSheetAt(0); for (Row row : sheet1) { StringBuffer rowError = new StringBuffer(); if (row.getRowNum() >= 1) { String type = null; for (Cell cell : row) { try { switch (cell.getColumnIndex()) { case 0: if (cell.getStringCellValue().trim().length() == 0) { rowError.append("Survey Group Name is missing\n"); } break; case 1: if (cell.getStringCellValue().trim().length() == 0) { rowError.append("Survey Name is missing\n"); } break; case 2: try { if (cell.getNumericCellValue() < 0) { rowError.append("Question Group Order must be a positive integer\n"); } } catch (Exception e) { rowError.append("Question group order must be a number\n"); } break; case 3: if (cell.getStringCellValue().trim().length() == 0) { rowError.append("Question Group Name is missing\n"); } break; case 4: try { if (cell.getNumericCellValue() < 0) { rowError.append("Question Id Order must be a positive integer\n"); } } catch (Exception e) { rowError.append("Question Id order must be a number\n"); } break; case 5: if (cell.getStringCellValue().trim().length() == 0) { rowError.append("Question Text is missing\n"); } break; case 6: type = cell.getStringCellValue().trim(); if (type.length() == 0) { rowError.append("Question Type is missing\n"); } else { if (!(type.equals(QuestionDto.QuestionType.FREE_TEXT.toString()) || type.equals(QuestionDto.QuestionType.PHOTO.toString()) || type.equals(QuestionDto.QuestionType.VIDEO.toString()) || type.equals(QuestionDto.QuestionType.GEO.toString()) || type.equals(QuestionDto.QuestionType.SCAN.toString()) || type.equals(QuestionDto.QuestionType.TRACK.toString()) || type.equals(QuestionDto.QuestionType.NAME.toString()) || type.equals(QuestionDto.QuestionType.NUMBER.toString()) || type.equals(QuestionDto.QuestionType.OPTION.toString())) || type.equals(QuestionDto.QuestionType.STRENGTH.toString())) { rowError.append( "Invalid question type. Must be either: FREE_TEXT, PHOTO, VIDEO, GEO, NUMBER, OPTION, SCAN, TRACK, NAME, STRENGTH\n"); } } break; case 7: if (QuestionType.OPTION.toString().equals(type) || QuestionType.STRENGTH.toString().equals(type)) { if (cell.getStringCellValue().trim().length() == 0) { rowError.append("Options are missing\n"); } } // TODO: validate language codes break; case 8: // TODO: validate dependency break; case 9: if (!validateBooleanField(cell)) { rowError.append("Allow Other must be either TRUE or FALSE\n"); } break; case 10: if (!validateBooleanField(cell)) { rowError.append("Allow Multiple must be either TRUE or FALSE\n"); } break; case 11: if (!validateBooleanField(cell)) { rowError.append("Manditory must be either TRUE or FALSE\n"); } break; } } catch (Exception e) { rowError.append(e.toString()); } finally { if (rowError.toString().trim().length() > 0) { errorMap.put(row.getRowNum() + 1, rowError.toString().trim()); } } } } } } catch (Exception e) { e.printStackTrace(); } finally { if (inp != null) { try { inp.close(); } catch (IOException e) { e.printStackTrace(); } } } return errorMap; }
From source file:org.wso2.carbon.apimgt.impl.indexing.indexer.DocumentIndexer.java
License:Open Source License
/** * Write document content to document artifact as its raw content * * @param registry/*from w w w.jav a 2s . c o m*/ * @param documentResource * @return * @throws RegistryException * @throws IOException * @throws APIManagementException */ private String fetchDocumentContent(Registry registry, Resource documentResource) throws RegistryException, IOException, APIManagementException { GenericArtifactManager docArtifactManager = APIUtil.getArtifactManager(registry, APIConstants.DOCUMENTATION_KEY); GenericArtifact documentArtifact = docArtifactManager.getGenericArtifact(documentResource.getUUID()); String sourceType = documentArtifact.getAttribute(APIConstants.DOC_SOURCE_TYPE); String contentString = null; if (Documentation.DocumentSourceType.FILE.name().equals(sourceType)) { Association fileAssociations[] = registry.getAssociations(documentResource.getPath(), APIConstants.DOCUMENTATION_FILE_ASSOCIATION); Association fileAssociation; if (fileAssociations.length < 1) { String error = "No document associated to API"; log.error(error); throw new APIManagementException(error); } //a file document can have one file association fileAssociation = fileAssociations[0]; String contentPath = fileAssociation.getDestinationPath(); if (!registry.resourceExists(contentPath)) { String error = "API not found at " + contentPath; log.error(error); throw new APIManagementException(error); } Resource contentResource = registry.get(contentPath); String fileName = ((ResourceImpl) contentResource).getName(); String extension = FilenameUtils.getExtension(fileName); InputStream inputStream = null; try { inputStream = contentResource.getContentStream(); switch (extension) { case APIConstants.PDF_EXTENSION: PDFParser pdfParser = new PDFParser(inputStream); pdfParser.parse(); COSDocument cosDocument = pdfParser.getDocument(); PDFTextStripper stripper = new PDFTextStripper(); contentString = stripper.getText(new PDDocument(cosDocument)); break; case APIConstants.DOC_EXTENSION: { POIFSFileSystem pfs = new POIFSFileSystem(inputStream); WordExtractor msWord2003Extractor = new WordExtractor(pfs); contentString = msWord2003Extractor.getText(); break; } case APIConstants.DOCX_EXTENSION: XWPFDocument doc = new XWPFDocument(inputStream); XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc); contentString = msWord2007Extractor.getText(); break; case APIConstants.XLS_EXTENSION: { POIFSFileSystem pfs = new POIFSFileSystem(inputStream); ExcelExtractor extractor = new ExcelExtractor(pfs); contentString = extractor.getText(); break; } case APIConstants.XLSX_EXTENSION: XSSFWorkbook xssfSheets = new XSSFWorkbook(inputStream); XSSFExcelExtractor xssfExcelExtractor = new XSSFExcelExtractor(xssfSheets); contentString = xssfExcelExtractor.getText(); break; case APIConstants.PPT_EXTENSION: { POIFSFileSystem fs = new POIFSFileSystem(inputStream); PowerPointExtractor extractor = new PowerPointExtractor(fs); contentString = extractor.getText(); break; } case APIConstants.PPTX_EXTENSION: XMLSlideShow xmlSlideShow = new XMLSlideShow(inputStream); XSLFPowerPointExtractor xslfPowerPointExtractor = new XSLFPowerPointExtractor(xmlSlideShow); contentString = xslfPowerPointExtractor.getText(); break; case APIConstants.TXT_EXTENSION: case APIConstants.WSDL_EXTENSION: case APIConstants.XML_DOC_EXTENSION: BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); String line; StringBuilder contentBuilder = new StringBuilder(); while ((line = reader.readLine()) != null) { contentBuilder.append(line); } contentString = contentBuilder.toString(); break; } } finally { IOUtils.closeQuietly(inputStream); } } else if (Documentation.DocumentSourceType.INLINE.name().equals(sourceType)) { Association contentAssociations[] = registry.getAssociations(documentResource.getPath(), APIConstants.DOCUMENTATION_CONTENT_ASSOCIATION); Association contentAssociation; //an inline document can have one or no content associations if (contentAssociations.length == 1) { contentAssociation = contentAssociations[0]; String contentPath = contentAssociation.getDestinationPath(); if (registry.resourceExists(contentPath)) { Resource contentResource = registry.get(contentPath); InputStream instream = null; BufferedReader reader = null; String line; try { instream = contentResource.getContentStream(); reader = new BufferedReader(new InputStreamReader(instream)); StringBuilder contentBuilder = new StringBuilder(); while ((line = reader.readLine()) != null) { contentBuilder.append(line); } contentString = contentBuilder.toString(); } finally { if (reader != null) { IOUtils.closeQuietly(reader); } } } } } return contentString; }
From source file:org.wso2.carbon.pc.core.DocumentIndexer.java
License:Open Source License
@Override public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData) throws SolrException, RegistryException { try {//from w ww . j av a 2 s . co m String wordText = null; try { //Extract MSWord 2003 document files POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); WordExtractor msWord2003Extractor = new WordExtractor(fs); wordText = msWord2003Extractor.getText(); } catch (OfficeXmlFileException e) { //if 2003 extraction failed, try with MSWord 2007 document files extractor XWPFDocument doc = new XWPFDocument(new ByteArrayInputStream(fileData.data)); XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc); wordText = msWord2007Extractor.getText(); } catch (Exception e) { //The reason for not throwing an exception is that since this is an indexer that runs in the background //throwing an exception might lead to adverse behaviors in the client side and might lead to //other files not being indexed String msg = "Failed to extract the document while indexing"; log.error(msg, e); } IndexDocument indexDoc = new IndexDocument(fileData.path, wordText, null); Map<String, List<String>> fields = new HashMap<String, List<String>>(); fields.put("path", Arrays.asList(fileData.path)); if (fileData.mediaType != null) { fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType)); } else { fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList("application/pdf")); } indexDoc.setFields(fields); return indexDoc; } catch (IOException e) { String msg = "Failed to write to the index"; log.error(msg, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg); } }
From source file:org.wso2.carbon.registry.indexing.indexer.MSExcelIndexer.java
License:Open Source License
public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException { try {//from w w w . j a va 2 s . c o m POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); ExcelExtractor extractor = new ExcelExtractor(fs); String excelText = extractor.getText(); return new IndexDocument(fileData.path, excelText, null); } catch (IOException e) { String msg = "Failed to write to the index"; log.error(msg, e); throw new SolrException(ErrorCode.SERVER_ERROR, msg); } }
From source file:org.wso2.carbon.registry.indexing.indexer.MSPowerpointIndexer.java
License:Open Source License
public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException { try {/*from w ww. j av a2s. c o m*/ POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); PowerPointExtractor extractor = new PowerPointExtractor(fs); String ppText = extractor.getText(); return new IndexDocument(fileData.path, ppText, null); } catch (IOException e) { String msg = "Failed to write to the index"; log.error(msg, e); throw new SolrException(ErrorCode.SERVER_ERROR, msg); } }