Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem.

Prototype


public POIFSFileSystem(InputStream stream) throws IOException 

Source Link

Document

Create a POIFSFileSystem from an InputStream.

Usage

From source file:org.tonguetied.datatransfer.importing.ExcelImporter.java

License:Apache License

/**
 * This method initializes the parser enabling the parser to handle the
 * excel document.//from   w w w.j a va 2 s .  c  om
 * 
 * @param input the byte code representation of the excel document 
 * @throws ImportException if the input data fails to be parsed
 */
private void loadData(byte[] input) throws ImportException {
    ByteArrayInputStream bais = null;
    InputStream dis = null;
    try {
        bais = new ByteArrayInputStream(input);
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(bais);
        // get the Workbook (excel part) stream in a InputStream
        dis = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, dis);
    } catch (IOException ioe) {
        throw new ImportException(ioe);
    } finally {
        // and our document input stream (don't want to leak these!)
        close(dis);
        // once all the events are processed close our file input stream
        close(bais);
    }
}

From source file:org.tonguetied.datatransfer.importing.ExcelLanguageCentricParserTest.java

License:Apache License

/**
 * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}.
 *//* ww  w  .j a va2  s  .co  m*/
@Test
public final void testProcessRecord() throws Exception {
    ExcelLanguageCentricParser parser = new ExcelLanguageCentricParser(keywordService);
    InputStream is = null;
    try {
        // create a new file input stream with the input file specified
        // at the command line
        File input = new File(TEST_DATA_DIR, "LanguageCentricImportData.xls");

        is = new BufferedInputStream(new FileInputStream(input));
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(is);
        // get the Workbook (excel part) stream in a InputStream
        InputStream din = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, din);
    } finally {
        // once all the events are processed close our file input stream
        if (is != null)
            is.close();
    }

    List<Language> languages = parser.getLanguages();
    assertEquals(4, languages.size());
    assertTrue(languages.contains(defaultLanguage));
    assertTrue(languages.contains(hebrew));
    assertTrue(languages.contains(simplifiedChinese));
    assertTrue(languages.contains(traditionalChinese));

    Map<String, Keyword> keywords = parser.getKeywords();
    assertEquals(8, keywords.size());
    Keyword actual = keywords.get(keyword1.getKeyword());
    assessKeyword(keyword1, actual);

    actual = keywords.get(keyword2.getKeyword());
    assessKeyword(keyword2, actual);
}

From source file:org.tonguetied.datatransfer.importing.KeywordExcelParserTest.java

License:Apache License

/**
 * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}.
 *//*w  w  w.  j  a  v a  2 s .c  o  m*/
@Test
public final void testProcessRecord() throws Exception {
    ExcelParser parser = new ExcelKeywordParser(keywordService);
    InputStream is = null;
    try {
        // create a new file input stream with the input file specified
        // at the command line
        File input = new File(TEST_DATA_DIR, "KeywordExcelParserTest.xls");

        is = new BufferedInputStream(new FileInputStream(input));
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(is);
        // get the Workbook (excel part) stream in a InputStream
        InputStream din = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, din);
    } finally {
        // once all the events are processed close our file input stream
        if (is != null)
            is.close();
    }

    Map<String, Keyword> keywords = parser.getKeywords();
    assertEquals(7, keywords.size());
    Keyword actual = keywords.get(keyword1.getKeyword());
    assessKeyword(keyword1, actual);

    actual = keywords.get(keyword2.getKeyword());
    assessKeyword(keyword2, actual);

    actual = keywords.get(keyword3.getKeyword());
    assessKeyword(keyword3, actual);

    actual = keywords.get(keyword4.getKeyword());
    assessKeyword(keyword4, actual);
    assertTrue(actual.getTranslations().isEmpty());

    actual = keywords.get(keyword5.getKeyword());
    assessKeyword(keyword5, actual);

    final List<ImportErrorCode> errorCodes = parser.getErrorCodes();
    assertEquals(6, errorCodes.size());
    assertTrue(errorCodes.contains(ImportErrorCode.unknownCountry));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalCountry));
    assertTrue(errorCodes.contains(ImportErrorCode.unknownLanguage));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalLanguage));
    assertTrue(errorCodes.contains(ImportErrorCode.unknownBundle));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalTranslationState));
}

From source file:org.waterforpeople.mapping.app.harness.DeleteSurveyInstanceHarness.java

License:Open Source License

public void processSheet(String spreadsheetName, String serviceUrl) {
    InputStream inp;/*from   www.ja  va  2  s.co  m*/

    Sheet sheet1 = null;

    try {
        inp = new FileInputStream(spreadsheetName);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        int i = 0;
        sheet1 = wb.getSheetAt(0);
        for (Row row : sheet1) {
            if (row.getRowNum() >= 1) {
                StringBuilder sb = new StringBuilder();
                sb.append("?action=deleteSurveyInstance&");
                for (Cell cell : row) {
                    switch (cell.getColumnIndex()) {
                    case 0:
                        sb.append("instanceId=" + new Double(cell.getNumericCellValue()).intValue());
                        break;
                    }
                }

                URL url = new URL(serviceUrl + sb.toString());
                System.out.println(i++ + " : " + serviceUrl + sb.toString());
                HttpURLConnection conn = (HttpURLConnection) url.openConnection();
                conn.setRequestMethod("GET");
                conn.setDoOutput(true);
                String line;
                BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                while ((line = reader.readLine()) != null) {
                    System.out.println(line);
                }
                // writer.close();
                reader.close();
            }
        }
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java

License:Open Source License

@Override
public void executeImport(File file, String serverBase, Map<String, String> criteria) {
    InputStream inp = null;/*from w  w  w  .  j  a  va 2s .co  m*/
    Sheet sheet1 = null;
    Integer startRow = 1;
    Long beforeQuestionId = null;
    boolean isWholeSurvey = true;
    if (criteria != null) {
        if (criteria.get(BEFORE_QUESTION_ID_PARAM) != null) {
            beforeQuestionId = new Long(criteria.get(BEFORE_QUESTION_ID_PARAM));
        }
        if (criteria.get(WHOLE_SURVEY_PARAM) != null) {
            if ("false".equalsIgnoreCase(criteria.get(WHOLE_SURVEY_PARAM))) {
                isWholeSurvey = false;
            }
        }
    }
    try {
        inp = new FileInputStream(file);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        sheet1 = wb.getSheetAt(0);
        String apiKey = criteria != null ? criteria.get("apiKey") : null;
        if (!isWholeSurvey) {
            // even though there is a header row, we want lastRowNum since
            // rows are 0 indexed
            int questionCount = sheet1.getLastRowNum();
            // figure out the starting order
            QuestionDto startingQuestion = BulkDataServiceClient.loadQuestionDetails(serverBase,
                    beforeQuestionId, apiKey);
            startRow = startingQuestion.getOrder();
            // now get all the questions
            List<QuestionDto> questionsInGroup = BulkDataServiceClient.fetchQuestions(serverBase,
                    startingQuestion.getQuestionGroupId(), apiKey);

            if (questionsInGroup != null) {
                // we only need to reorder the group into which we're
                // importing

                for (QuestionDto q : questionsInGroup) {
                    if (q.getOrder() >= startRow) {
                        StringBuilder reorderBuffer = new StringBuilder();
                        reorderBuffer.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=")
                                .append(SurveyRestRequest.UPDATE_QUESTION_ORDER_ACTION).append("&")
                                .append(SurveyRestRequest.QUESTION_ID_PARAM).append("=").append(q.getKeyId())
                                .append("&").append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=")
                                .append((q.getOrder() + questionCount));
                        String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL,
                                reorderBuffer.toString(), true, criteria.get(KEY_PARAM));
                        log.debug(result);
                    }
                }
            }
        }

        for (Row row : sheet1) {
            if (row.getRowNum() >= 1) {
                StringBuilder sb = new StringBuilder();
                sb.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=")
                        .append(SurveyRestRequest.SAVE_QUESTION_ACTION).append("&");
                for (Cell cell : row) {
                    switch (cell.getColumnIndex()) {
                    case 0:
                        sb.append(SurveyRestRequest.SURVEY_GROUP_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 1:
                        sb.append(SurveyRestRequest.SURVEY_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 2:
                        sb.append(SurveyRestRequest.QUESTION_GROUP_ORDER_PARAM).append("=")
                                .append(new Double(cell.getNumericCellValue()).intValue()).append("&");
                        break;

                    case 3:
                        sb.append(SurveyRestRequest.QUESTION_GROUP_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;

                    case 4:
                        int order = new Double(cell.getNumericCellValue()).intValue();
                        if (!isWholeSurvey) {
                            order += (startRow - 1);
                        }
                        sb.append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=").append(order).append("&");
                        break;

                    case 5:
                        sb.append(SurveyRestRequest.QUESTION_TEXT_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 6:
                        sb.append(SurveyRestRequest.QUESTION_TYPE_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 7:
                        sb.append(SurveyRestRequest.OPTIONS_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 8:
                        String valString = parseCellAsString(cell);
                        if (valString != null && valString.trim().length() > 0) {
                            String[] parts = valString.split("\\|");
                            int depOrder = new Integer(parts[0].trim());
                            if (!isWholeSurvey) {
                                depOrder += (startRow - 1);
                            }
                            sb.append(SurveyRestRequest.DEPEND_QUESTION_PARAM).append("=")
                                    .append(URLEncoder.encode(depOrder + "|" + parts[1], "UTF-8")).append("&");
                        }
                        break;
                    case 9:
                        sb.append(SurveyRestRequest.ALLOW_OTHER_PARAM).append("=")
                                .append(parseCellAsString(cell)).append("&");
                        break;
                    case 10:
                        sb.append(SurveyRestRequest.ALLOW_MULTIPLE_PARAM).append("=")
                                .append(parseCellAsString(cell)).append("&");
                        break;
                    case 11:
                        sb.append(SurveyRestRequest.MANDATORY_PARAM).append("=").append(parseCellAsString(cell))
                                .append("&");
                        break;
                    case 12:
                        sb.append(SurveyRestRequest.SCORING_PARAM).append("=").append(parseCellAsString(cell));
                        break;
                    case 13:
                        // min val
                        String minVal = parseCellAsString(cell);
                        if (minVal != null && minVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_MIN_PARAM).append("=")
                                    .append(minVal);
                        }
                        break;
                    case 14:
                        // max val
                        String maxVal = parseCellAsString(cell);
                        if (maxVal != null && maxVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_MAX_PARAM).append("=")
                                    .append(maxVal);
                        }
                        break;
                    case 15:
                        // allow sign
                        String signVal = parseCellAsString(cell);
                        if (signVal != null && signVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_SIGN_PARAM).append("=")
                                    .append(signVal);
                        }
                        break;
                    case 16:
                        // allow decimal
                        String decimalVal = parseCellAsString(cell);
                        if (decimalVal != null && decimalVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_DECIMAL_PARAM).append("=")
                                    .append(decimalVal);
                        }
                        break;
                    case 17:
                        // is name
                        String isNameVal = parseCellAsString(cell);
                        if (isNameVal != null && isNameVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_IS_NAME_PARAM).append("=")
                                    .append(isNameVal);
                        }
                        break;

                    case 18:
                        String metricName = parseCellAsString(cell);
                        if (metricName != null && metricName.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.METRIC_NAME_PARAM).append("=")
                                    .append(metricName);
                        }
                        break;
                    case 19:
                        String metricGroup = parseCellAsString(cell);
                        if (metricGroup != null && metricGroup.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.METRIC_GROUP_PARAM).append("=")
                                    .append(metricGroup);
                        }
                        break;
                    }
                }
                try {
                    String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL,
                            sb.toString(), true, criteria.get(KEY_PARAM));
                    log.debug(result);
                } catch (Throwable t) {
                    log.error("Error: " + t.getMessage(), t);
                    log.info("Trying again");
                    try {
                        BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, sb.toString(), true,
                                criteria.get(KEY_PARAM));
                    } catch (Exception e) {
                        log.error("Error:" + e.getMessage(), e);
                        // giving up
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (inp != null) {
            try {
                inp.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java

License:Open Source License

@Override
public Map<Integer, String> validate(File file) {
    InputStream inp = null;//  ww w.  ja v a2s  . c om
    Sheet sheet1 = null;
    Map<Integer, String> errorMap = new TreeMap<Integer, String>();

    try {
        inp = new FileInputStream(file);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        sheet1 = wb.getSheetAt(0);
        for (Row row : sheet1) {
            StringBuffer rowError = new StringBuffer();
            if (row.getRowNum() >= 1) {
                String type = null;
                for (Cell cell : row) {
                    try {
                        switch (cell.getColumnIndex()) {
                        case 0:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Survey Group Name is missing\n");
                            }
                            break;
                        case 1:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Survey Name is missing\n");
                            }
                            break;
                        case 2:
                            try {
                                if (cell.getNumericCellValue() < 0) {
                                    rowError.append("Question Group Order must be a positive integer\n");
                                }
                            } catch (Exception e) {
                                rowError.append("Question group order must be a number\n");
                            }
                            break;
                        case 3:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Question Group Name is missing\n");
                            }
                            break;
                        case 4:
                            try {
                                if (cell.getNumericCellValue() < 0) {
                                    rowError.append("Question Id Order must be a positive integer\n");
                                }
                            } catch (Exception e) {
                                rowError.append("Question Id order must be a number\n");
                            }
                            break;
                        case 5:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Question Text is missing\n");
                            }
                            break;
                        case 6:
                            type = cell.getStringCellValue().trim();
                            if (type.length() == 0) {
                                rowError.append("Question Type is missing\n");
                            } else {
                                if (!(type.equals(QuestionDto.QuestionType.FREE_TEXT.toString())
                                        || type.equals(QuestionDto.QuestionType.PHOTO.toString())
                                        || type.equals(QuestionDto.QuestionType.VIDEO.toString())
                                        || type.equals(QuestionDto.QuestionType.GEO.toString())
                                        || type.equals(QuestionDto.QuestionType.SCAN.toString())
                                        || type.equals(QuestionDto.QuestionType.TRACK.toString())
                                        || type.equals(QuestionDto.QuestionType.NAME.toString())
                                        || type.equals(QuestionDto.QuestionType.NUMBER.toString())
                                        || type.equals(QuestionDto.QuestionType.OPTION.toString()))
                                        || type.equals(QuestionDto.QuestionType.STRENGTH.toString())) {
                                    rowError.append(
                                            "Invalid question type. Must be either: FREE_TEXT, PHOTO, VIDEO, GEO, NUMBER, OPTION, SCAN, TRACK, NAME, STRENGTH\n");
                                }
                            }
                            break;
                        case 7:
                            if (QuestionType.OPTION.toString().equals(type)
                                    || QuestionType.STRENGTH.toString().equals(type)) {
                                if (cell.getStringCellValue().trim().length() == 0) {
                                    rowError.append("Options are missing\n");
                                }
                            }
                            // TODO: validate language codes
                            break;
                        case 8:
                            // TODO: validate dependency
                            break;
                        case 9:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Allow Other must be either TRUE or FALSE\n");
                            }
                            break;
                        case 10:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Allow Multiple must be either TRUE or FALSE\n");
                            }
                            break;
                        case 11:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Manditory must be either TRUE or FALSE\n");
                            }
                            break;
                        }
                    } catch (Exception e) {
                        rowError.append(e.toString());
                    } finally {
                        if (rowError.toString().trim().length() > 0) {
                            errorMap.put(row.getRowNum() + 1, rowError.toString().trim());
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (inp != null) {
            try {
                inp.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return errorMap;
}

From source file:org.wso2.carbon.apimgt.impl.indexing.indexer.DocumentIndexer.java

License:Open Source License

/**
 * Write document content to document artifact as its raw content
 *
 * @param registry/*from w w  w.jav a  2s . c o m*/
 * @param documentResource
 * @return
 * @throws RegistryException
 * @throws IOException
 * @throws APIManagementException
 */
private String fetchDocumentContent(Registry registry, Resource documentResource)
        throws RegistryException, IOException, APIManagementException {
    GenericArtifactManager docArtifactManager = APIUtil.getArtifactManager(registry,
            APIConstants.DOCUMENTATION_KEY);
    GenericArtifact documentArtifact = docArtifactManager.getGenericArtifact(documentResource.getUUID());
    String sourceType = documentArtifact.getAttribute(APIConstants.DOC_SOURCE_TYPE);

    String contentString = null;
    if (Documentation.DocumentSourceType.FILE.name().equals(sourceType)) {
        Association fileAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_FILE_ASSOCIATION);
        Association fileAssociation;

        if (fileAssociations.length < 1) {
            String error = "No document associated to API";
            log.error(error);
            throw new APIManagementException(error);
        }

        //a file document can have one file association
        fileAssociation = fileAssociations[0];
        String contentPath = fileAssociation.getDestinationPath();

        if (!registry.resourceExists(contentPath)) {
            String error = "API not found at " + contentPath;
            log.error(error);
            throw new APIManagementException(error);
        }

        Resource contentResource = registry.get(contentPath);

        String fileName = ((ResourceImpl) contentResource).getName();
        String extension = FilenameUtils.getExtension(fileName);
        InputStream inputStream = null;
        try {
            inputStream = contentResource.getContentStream();
            switch (extension) {
            case APIConstants.PDF_EXTENSION:
                PDFParser pdfParser = new PDFParser(inputStream);
                pdfParser.parse();
                COSDocument cosDocument = pdfParser.getDocument();
                PDFTextStripper stripper = new PDFTextStripper();
                contentString = stripper.getText(new PDDocument(cosDocument));
                break;
            case APIConstants.DOC_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                WordExtractor msWord2003Extractor = new WordExtractor(pfs);
                contentString = msWord2003Extractor.getText();
                break;
            }
            case APIConstants.DOCX_EXTENSION:
                XWPFDocument doc = new XWPFDocument(inputStream);
                XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
                contentString = msWord2007Extractor.getText();
                break;
            case APIConstants.XLS_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                ExcelExtractor extractor = new ExcelExtractor(pfs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.XLSX_EXTENSION:
                XSSFWorkbook xssfSheets = new XSSFWorkbook(inputStream);
                XSSFExcelExtractor xssfExcelExtractor = new XSSFExcelExtractor(xssfSheets);
                contentString = xssfExcelExtractor.getText();
                break;
            case APIConstants.PPT_EXTENSION: {
                POIFSFileSystem fs = new POIFSFileSystem(inputStream);
                PowerPointExtractor extractor = new PowerPointExtractor(fs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.PPTX_EXTENSION:
                XMLSlideShow xmlSlideShow = new XMLSlideShow(inputStream);
                XSLFPowerPointExtractor xslfPowerPointExtractor = new XSLFPowerPointExtractor(xmlSlideShow);
                contentString = xslfPowerPointExtractor.getText();
                break;
            case APIConstants.TXT_EXTENSION:
            case APIConstants.WSDL_EXTENSION:
            case APIConstants.XML_DOC_EXTENSION:
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
                String line;
                StringBuilder contentBuilder = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    contentBuilder.append(line);
                }
                contentString = contentBuilder.toString();
                break;
            }
        } finally {
            IOUtils.closeQuietly(inputStream);
        }

    } else if (Documentation.DocumentSourceType.INLINE.name().equals(sourceType)) {
        Association contentAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_CONTENT_ASSOCIATION);
        Association contentAssociation;

        //an inline document can have one or no content associations
        if (contentAssociations.length == 1) {
            contentAssociation = contentAssociations[0];
            String contentPath = contentAssociation.getDestinationPath();

            if (registry.resourceExists(contentPath)) {
                Resource contentResource = registry.get(contentPath);

                InputStream instream = null;
                BufferedReader reader = null;
                String line;
                try {
                    instream = contentResource.getContentStream();
                    reader = new BufferedReader(new InputStreamReader(instream));
                    StringBuilder contentBuilder = new StringBuilder();
                    while ((line = reader.readLine()) != null) {
                        contentBuilder.append(line);
                    }
                    contentString = contentBuilder.toString();
                } finally {
                    if (reader != null) {
                        IOUtils.closeQuietly(reader);
                    }
                }
            }
        }
    }
    return contentString;
}

From source file:org.wso2.carbon.pc.core.DocumentIndexer.java

License:Open Source License

@Override
public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData)
        throws SolrException, RegistryException {
    try {//from w ww .  j av a  2  s  .  co  m
        String wordText = null;
        try {
            //Extract MSWord 2003 document files
            POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));

            WordExtractor msWord2003Extractor = new WordExtractor(fs);
            wordText = msWord2003Extractor.getText();

        } catch (OfficeXmlFileException e) {
            //if 2003 extraction failed, try with MSWord 2007 document files extractor
            XWPFDocument doc = new XWPFDocument(new ByteArrayInputStream(fileData.data));

            XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
            wordText = msWord2007Extractor.getText();

        } catch (Exception e) {
            //The reason for not throwing an exception is that since this is an indexer that runs in the background
            //throwing an exception might lead to adverse behaviors in the client side and might lead to
            //other files not being indexed
            String msg = "Failed to extract the document while indexing";
            log.error(msg, e);
        }
        IndexDocument indexDoc = new IndexDocument(fileData.path, wordText, null);

        Map<String, List<String>> fields = new HashMap<String, List<String>>();
        fields.put("path", Arrays.asList(fileData.path));
        if (fileData.mediaType != null) {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType));
        } else {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList("application/pdf"));
        }

        indexDoc.setFields(fields);

        return indexDoc;

    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
    }
}

From source file:org.wso2.carbon.registry.indexing.indexer.MSExcelIndexer.java

License:Open Source License

public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
    try {//from w  w  w . j a  va  2  s .  c  o m
        POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
        ExcelExtractor extractor = new ExcelExtractor(fs);
        String excelText = extractor.getText();

        return new IndexDocument(fileData.path, excelText, null);
    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(ErrorCode.SERVER_ERROR, msg);
    }

}

From source file:org.wso2.carbon.registry.indexing.indexer.MSPowerpointIndexer.java

License:Open Source License

public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
    try {/*from   w ww.  j  av a2s.  c o  m*/
        POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
        PowerPointExtractor extractor = new PowerPointExtractor(fs);
        String ppText = extractor.getText();

        return new IndexDocument(fileData.path, ppText, null);
    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(ErrorCode.SERVER_ERROR, msg);
    }

}