Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem.

Prototype


public POIFSFileSystem(InputStream stream) throws IOException

Source Link

Document

Create a POIFSFileSystem from an InputStream.

Usage

From source file:org.tonguetied.datatransfer.importing.ExcelImporter.java

License:Apache License

/**
 * This method initializes the parser enabling the parser to handle the
 * excel document.//from   w w w.j a va 2 s .  c  om
 * 
 * @param input the byte code representation of the excel document 
 * @throws ImportException if the input data fails to be parsed
 */
private void loadData(byte[] input) throws ImportException {
    ByteArrayInputStream bais = null;
    InputStream dis = null;
    try {
        bais = new ByteArrayInputStream(input);
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(bais);
        // get the Workbook (excel part) stream in a InputStream
        dis = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, dis);
    } catch (IOException ioe) {
        throw new ImportException(ioe);
    } finally {
        // and our document input stream (don't want to leak these!)
        close(dis);
        // once all the events are processed close our file input stream
        close(bais);
    }
}

From source file:org.tonguetied.datatransfer.importing.ExcelLanguageCentricParserTest.java

License:Apache License

/**
 * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}.
 *//* ww  w  .j a va2  s  .co  m*/
@Test
public final void testProcessRecord() throws Exception {
    ExcelLanguageCentricParser parser = new ExcelLanguageCentricParser(keywordService);
    InputStream is = null;
    try {
        // create a new file input stream with the input file specified
        // at the command line
        File input = new File(TEST_DATA_DIR, "LanguageCentricImportData.xls");

        is = new BufferedInputStream(new FileInputStream(input));
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(is);
        // get the Workbook (excel part) stream in a InputStream
        InputStream din = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, din);
    } finally {
        // once all the events are processed close our file input stream
        if (is != null)
            is.close();
    }

    List<Language> languages = parser.getLanguages();
    assertEquals(4, languages.size());
    assertTrue(languages.contains(defaultLanguage));
    assertTrue(languages.contains(hebrew));
    assertTrue(languages.contains(simplifiedChinese));
    assertTrue(languages.contains(traditionalChinese));

    Map<String, Keyword> keywords = parser.getKeywords();
    assertEquals(8, keywords.size());
    Keyword actual = keywords.get(keyword1.getKeyword());
    assessKeyword(keyword1, actual);

    actual = keywords.get(keyword2.getKeyword());
    assessKeyword(keyword2, actual);
}

From source file:org.tonguetied.datatransfer.importing.KeywordExcelParserTest.java

License:Apache License

/**
 * Test method for {@link org.tonguetied.datatransfer.importing.ExcelLanguageCentricParser#processRecord(org.apache.poi.hssf.record.Record)}.
 *//*w  w  w.  j  a  v a  2 s .c  o  m*/
@Test
public final void testProcessRecord() throws Exception {
    ExcelParser parser = new ExcelKeywordParser(keywordService);
    InputStream is = null;
    try {
        // create a new file input stream with the input file specified
        // at the command line
        File input = new File(TEST_DATA_DIR, "KeywordExcelParserTest.xls");

        is = new BufferedInputStream(new FileInputStream(input));
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(is);
        // get the Workbook (excel part) stream in a InputStream
        InputStream din = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(parser);
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, din);
    } finally {
        // once all the events are processed close our file input stream
        if (is != null)
            is.close();
    }

    Map<String, Keyword> keywords = parser.getKeywords();
    assertEquals(7, keywords.size());
    Keyword actual = keywords.get(keyword1.getKeyword());
    assessKeyword(keyword1, actual);

    actual = keywords.get(keyword2.getKeyword());
    assessKeyword(keyword2, actual);

    actual = keywords.get(keyword3.getKeyword());
    assessKeyword(keyword3, actual);

    actual = keywords.get(keyword4.getKeyword());
    assessKeyword(keyword4, actual);
    assertTrue(actual.getTranslations().isEmpty());

    actual = keywords.get(keyword5.getKeyword());
    assessKeyword(keyword5, actual);

    final List<ImportErrorCode> errorCodes = parser.getErrorCodes();
    assertEquals(6, errorCodes.size());
    assertTrue(errorCodes.contains(ImportErrorCode.unknownCountry));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalCountry));
    assertTrue(errorCodes.contains(ImportErrorCode.unknownLanguage));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalLanguage));
    assertTrue(errorCodes.contains(ImportErrorCode.unknownBundle));
    assertTrue(errorCodes.contains(ImportErrorCode.illegalTranslationState));
}

From source file:org.waterforpeople.mapping.app.harness.DeleteSurveyInstanceHarness.java

License:Open Source License

public void processSheet(String spreadsheetName, String serviceUrl) {
    InputStream inp;/*from   www.ja  va  2  s.co  m*/

    Sheet sheet1 = null;

    try {
        inp = new FileInputStream(spreadsheetName);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        int i = 0;
        sheet1 = wb.getSheetAt(0);
        for (Row row : sheet1) {
            if (row.getRowNum() >= 1) {
                StringBuilder sb = new StringBuilder();
                sb.append("?action=deleteSurveyInstance&");
                for (Cell cell : row) {
                    switch (cell.getColumnIndex()) {
                    case 0:
                        sb.append("instanceId=" + new Double(cell.getNumericCellValue()).intValue());
                        break;
                    }
                }

                URL url = new URL(serviceUrl + sb.toString());
                System.out.println(i++ + " : " + serviceUrl + sb.toString());
                HttpURLConnection conn = (HttpURLConnection) url.openConnection();
                conn.setRequestMethod("GET");
                conn.setDoOutput(true);
                String line;
                BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                while ((line = reader.readLine()) != null) {
                    System.out.println(line);
                }
                // writer.close();
                reader.close();
            }
        }
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java

License:Open Source License

@Override
public void executeImport(File file, String serverBase, Map<String, String> criteria) {
    InputStream inp = null;/*from w  w  w  .  j  a  va 2s .co  m*/
    Sheet sheet1 = null;
    Integer startRow = 1;
    Long beforeQuestionId = null;
    boolean isWholeSurvey = true;
    if (criteria != null) {
        if (criteria.get(BEFORE_QUESTION_ID_PARAM) != null) {
            beforeQuestionId = new Long(criteria.get(BEFORE_QUESTION_ID_PARAM));
        }
        if (criteria.get(WHOLE_SURVEY_PARAM) != null) {
            if ("false".equalsIgnoreCase(criteria.get(WHOLE_SURVEY_PARAM))) {
                isWholeSurvey = false;
            }
        }
    }
    try {
        inp = new FileInputStream(file);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        sheet1 = wb.getSheetAt(0);
        String apiKey = criteria != null ? criteria.get("apiKey") : null;
        if (!isWholeSurvey) {
            // even though there is a header row, we want lastRowNum since
            // rows are 0 indexed
            int questionCount = sheet1.getLastRowNum();
            // figure out the starting order
            QuestionDto startingQuestion = BulkDataServiceClient.loadQuestionDetails(serverBase,
                    beforeQuestionId, apiKey);
            startRow = startingQuestion.getOrder();
            // now get all the questions
            List<QuestionDto> questionsInGroup = BulkDataServiceClient.fetchQuestions(serverBase,
                    startingQuestion.getQuestionGroupId(), apiKey);

            if (questionsInGroup != null) {
                // we only need to reorder the group into which we're
                // importing

                for (QuestionDto q : questionsInGroup) {
                    if (q.getOrder() >= startRow) {
                        StringBuilder reorderBuffer = new StringBuilder();
                        reorderBuffer.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=")
                                .append(SurveyRestRequest.UPDATE_QUESTION_ORDER_ACTION).append("&")
                                .append(SurveyRestRequest.QUESTION_ID_PARAM).append("=").append(q.getKeyId())
                                .append("&").append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=")
                                .append((q.getOrder() + questionCount));
                        String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL,
                                reorderBuffer.toString(), true, criteria.get(KEY_PARAM));
                        log.debug(result);
                    }
                }
            }
        }

        for (Row row : sheet1) {
            if (row.getRowNum() >= 1) {
                StringBuilder sb = new StringBuilder();
                sb.append("?").append(SurveyRestRequest.ACTION_PARAM).append("=")
                        .append(SurveyRestRequest.SAVE_QUESTION_ACTION).append("&");
                for (Cell cell : row) {
                    switch (cell.getColumnIndex()) {
                    case 0:
                        sb.append(SurveyRestRequest.SURVEY_GROUP_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 1:
                        sb.append(SurveyRestRequest.SURVEY_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 2:
                        sb.append(SurveyRestRequest.QUESTION_GROUP_ORDER_PARAM).append("=")
                                .append(new Double(cell.getNumericCellValue()).intValue()).append("&");
                        break;

                    case 3:
                        sb.append(SurveyRestRequest.QUESTION_GROUP_NAME_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;

                    case 4:
                        int order = new Double(cell.getNumericCellValue()).intValue();
                        if (!isWholeSurvey) {
                            order += (startRow - 1);
                        }
                        sb.append(SurveyRestRequest.QUESTION_ORDER_PARAM).append("=").append(order).append("&");
                        break;

                    case 5:
                        sb.append(SurveyRestRequest.QUESTION_TEXT_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 6:
                        sb.append(SurveyRestRequest.QUESTION_TYPE_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 7:
                        sb.append(SurveyRestRequest.OPTIONS_PARAM).append("=")
                                .append(URLEncoder.encode(parseCellAsString(cell).trim(), "UTF-8")).append("&");
                        break;
                    case 8:
                        String valString = parseCellAsString(cell);
                        if (valString != null && valString.trim().length() > 0) {
                            String[] parts = valString.split("\\|");
                            int depOrder = new Integer(parts[0].trim());
                            if (!isWholeSurvey) {
                                depOrder += (startRow - 1);
                            }
                            sb.append(SurveyRestRequest.DEPEND_QUESTION_PARAM).append("=")
                                    .append(URLEncoder.encode(depOrder + "|" + parts[1], "UTF-8")).append("&");
                        }
                        break;
                    case 9:
                        sb.append(SurveyRestRequest.ALLOW_OTHER_PARAM).append("=")
                                .append(parseCellAsString(cell)).append("&");
                        break;
                    case 10:
                        sb.append(SurveyRestRequest.ALLOW_MULTIPLE_PARAM).append("=")
                                .append(parseCellAsString(cell)).append("&");
                        break;
                    case 11:
                        sb.append(SurveyRestRequest.MANDATORY_PARAM).append("=").append(parseCellAsString(cell))
                                .append("&");
                        break;
                    case 12:
                        sb.append(SurveyRestRequest.SCORING_PARAM).append("=").append(parseCellAsString(cell));
                        break;
                    case 13:
                        // min val
                        String minVal = parseCellAsString(cell);
                        if (minVal != null && minVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_MIN_PARAM).append("=")
                                    .append(minVal);
                        }
                        break;
                    case 14:
                        // max val
                        String maxVal = parseCellAsString(cell);
                        if (maxVal != null && maxVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_MAX_PARAM).append("=")
                                    .append(maxVal);
                        }
                        break;
                    case 15:
                        // allow sign
                        String signVal = parseCellAsString(cell);
                        if (signVal != null && signVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_SIGN_PARAM).append("=")
                                    .append(signVal);
                        }
                        break;
                    case 16:
                        // allow decimal
                        String decimalVal = parseCellAsString(cell);
                        if (decimalVal != null && decimalVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_ALLOW_DECIMAL_PARAM).append("=")
                                    .append(decimalVal);
                        }
                        break;
                    case 17:
                        // is name
                        String isNameVal = parseCellAsString(cell);
                        if (isNameVal != null && isNameVal.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.VALIDATION_IS_NAME_PARAM).append("=")
                                    .append(isNameVal);
                        }
                        break;

                    case 18:
                        String metricName = parseCellAsString(cell);
                        if (metricName != null && metricName.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.METRIC_NAME_PARAM).append("=")
                                    .append(metricName);
                        }
                        break;
                    case 19:
                        String metricGroup = parseCellAsString(cell);
                        if (metricGroup != null && metricGroup.trim().length() > 0) {
                            sb.append("&").append(SurveyRestRequest.METRIC_GROUP_PARAM).append("=")
                                    .append(metricGroup);
                        }
                        break;
                    }
                }
                try {
                    String result = BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL,
                            sb.toString(), true, criteria.get(KEY_PARAM));
                    log.debug(result);
                } catch (Throwable t) {
                    log.error("Error: " + t.getMessage(), t);
                    log.info("Trying again");
                    try {
                        BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, sb.toString(), true,
                                criteria.get(KEY_PARAM));
                    } catch (Exception e) {
                        log.error("Error:" + e.getMessage(), e);
                        // giving up
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (inp != null) {
            try {
                inp.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.waterforpeople.mapping.dataexport.SurveySpreadsheetImporter.java

License:Open Source License

@Override
public Map<Integer, String> validate(File file) {
    InputStream inp = null;//  ww w.  ja v a2s  . c om
    Sheet sheet1 = null;
    Map<Integer, String> errorMap = new TreeMap<Integer, String>();

    try {
        inp = new FileInputStream(file);
        HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
        sheet1 = wb.getSheetAt(0);
        for (Row row : sheet1) {
            StringBuffer rowError = new StringBuffer();
            if (row.getRowNum() >= 1) {
                String type = null;
                for (Cell cell : row) {
                    try {
                        switch (cell.getColumnIndex()) {
                        case 0:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Survey Group Name is missing\n");
                            }
                            break;
                        case 1:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Survey Name is missing\n");
                            }
                            break;
                        case 2:
                            try {
                                if (cell.getNumericCellValue() < 0) {
                                    rowError.append("Question Group Order must be a positive integer\n");
                                }
                            } catch (Exception e) {
                                rowError.append("Question group order must be a number\n");
                            }
                            break;
                        case 3:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Question Group Name is missing\n");
                            }
                            break;
                        case 4:
                            try {
                                if (cell.getNumericCellValue() < 0) {
                                    rowError.append("Question Id Order must be a positive integer\n");
                                }
                            } catch (Exception e) {
                                rowError.append("Question Id order must be a number\n");
                            }
                            break;
                        case 5:
                            if (cell.getStringCellValue().trim().length() == 0) {
                                rowError.append("Question Text is missing\n");
                            }
                            break;
                        case 6:
                            type = cell.getStringCellValue().trim();
                            if (type.length() == 0) {
                                rowError.append("Question Type is missing\n");
                            } else {
                                if (!(type.equals(QuestionDto.QuestionType.FREE_TEXT.toString())
                                        || type.equals(QuestionDto.QuestionType.PHOTO.toString())
                                        || type.equals(QuestionDto.QuestionType.VIDEO.toString())
                                        || type.equals(QuestionDto.QuestionType.GEO.toString())
                                        || type.equals(QuestionDto.QuestionType.SCAN.toString())
                                        || type.equals(QuestionDto.QuestionType.TRACK.toString())
                                        || type.equals(QuestionDto.QuestionType.NAME.toString())
                                        || type.equals(QuestionDto.QuestionType.NUMBER.toString())
                                        || type.equals(QuestionDto.QuestionType.OPTION.toString()))
                                        || type.equals(QuestionDto.QuestionType.STRENGTH.toString())) {
                                    rowError.append(
                                            "Invalid question type. Must be either: FREE_TEXT, PHOTO, VIDEO, GEO, NUMBER, OPTION, SCAN, TRACK, NAME, STRENGTH\n");
                                }
                            }
                            break;
                        case 7:
                            if (QuestionType.OPTION.toString().equals(type)
                                    || QuestionType.STRENGTH.toString().equals(type)) {
                                if (cell.getStringCellValue().trim().length() == 0) {
                                    rowError.append("Options are missing\n");
                                }
                            }
                            // TODO: validate language codes
                            break;
                        case 8:
                            // TODO: validate dependency
                            break;
                        case 9:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Allow Other must be either TRUE or FALSE\n");
                            }
                            break;
                        case 10:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Allow Multiple must be either TRUE or FALSE\n");
                            }
                            break;
                        case 11:
                            if (!validateBooleanField(cell)) {
                                rowError.append("Manditory must be either TRUE or FALSE\n");
                            }
                            break;
                        }
                    } catch (Exception e) {
                        rowError.append(e.toString());
                    } finally {
                        if (rowError.toString().trim().length() > 0) {
                            errorMap.put(row.getRowNum() + 1, rowError.toString().trim());
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (inp != null) {
            try {
                inp.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return errorMap;
}

From source file:org.wso2.carbon.apimgt.impl.indexing.indexer.DocumentIndexer.java

License:Open Source License

/**
 * Write document content to document artifact as its raw content
 *
 * @param registry/*from w w  w.jav a  2s . c o m*/
 * @param documentResource
 * @return
 * @throws RegistryException
 * @throws IOException
 * @throws APIManagementException
 */
private String fetchDocumentContent(Registry registry, Resource documentResource)
        throws RegistryException, IOException, APIManagementException {
    GenericArtifactManager docArtifactManager = APIUtil.getArtifactManager(registry,
            APIConstants.DOCUMENTATION_KEY);
    GenericArtifact documentArtifact = docArtifactManager.getGenericArtifact(documentResource.getUUID());
    String sourceType = documentArtifact.getAttribute(APIConstants.DOC_SOURCE_TYPE);

    String contentString = null;
    if (Documentation.DocumentSourceType.FILE.name().equals(sourceType)) {
        Association fileAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_FILE_ASSOCIATION);
        Association fileAssociation;

        if (fileAssociations.length < 1) {
            String error = "No document associated to API";
            log.error(error);
            throw new APIManagementException(error);
        }

        //a file document can have one file association
        fileAssociation = fileAssociations[0];
        String contentPath = fileAssociation.getDestinationPath();

        if (!registry.resourceExists(contentPath)) {
            String error = "API not found at " + contentPath;
            log.error(error);
            throw new APIManagementException(error);
        }

        Resource contentResource = registry.get(contentPath);

        String fileName = ((ResourceImpl) contentResource).getName();
        String extension = FilenameUtils.getExtension(fileName);
        InputStream inputStream = null;
        try {
            inputStream = contentResource.getContentStream();
            switch (extension) {
            case APIConstants.PDF_EXTENSION:
                PDFParser pdfParser = new PDFParser(inputStream);
                pdfParser.parse();
                COSDocument cosDocument = pdfParser.getDocument();
                PDFTextStripper stripper = new PDFTextStripper();
                contentString = stripper.getText(new PDDocument(cosDocument));
                break;
            case APIConstants.DOC_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                WordExtractor msWord2003Extractor = new WordExtractor(pfs);
                contentString = msWord2003Extractor.getText();
                break;
            }
            case APIConstants.DOCX_EXTENSION:
                XWPFDocument doc = new XWPFDocument(inputStream);
                XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
                contentString = msWord2007Extractor.getText();
                break;
            case APIConstants.XLS_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                ExcelExtractor extractor = new ExcelExtractor(pfs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.XLSX_EXTENSION:
                XSSFWorkbook xssfSheets = new XSSFWorkbook(inputStream);
                XSSFExcelExtractor xssfExcelExtractor = new XSSFExcelExtractor(xssfSheets);
                contentString = xssfExcelExtractor.getText();
                break;
            case APIConstants.PPT_EXTENSION: {
                POIFSFileSystem fs = new POIFSFileSystem(inputStream);
                PowerPointExtractor extractor = new PowerPointExtractor(fs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.PPTX_EXTENSION:
                XMLSlideShow xmlSlideShow = new XMLSlideShow(inputStream);
                XSLFPowerPointExtractor xslfPowerPointExtractor = new XSLFPowerPointExtractor(xmlSlideShow);
                contentString = xslfPowerPointExtractor.getText();
                break;
            case APIConstants.TXT_EXTENSION:
            case APIConstants.WSDL_EXTENSION:
            case APIConstants.XML_DOC_EXTENSION:
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
                String line;
                StringBuilder contentBuilder = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    contentBuilder.append(line);
                }
                contentString = contentBuilder.toString();
                break;
            }
        } finally {
            IOUtils.closeQuietly(inputStream);
        }

    } else if (Documentation.DocumentSourceType.INLINE.name().equals(sourceType)) {
        Association contentAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_CONTENT_ASSOCIATION);
        Association contentAssociation;

        //an inline document can have one or no content associations
        if (contentAssociations.length == 1) {
            contentAssociation = contentAssociations[0];
            String contentPath = contentAssociation.getDestinationPath();

            if (registry.resourceExists(contentPath)) {
                Resource contentResource = registry.get(contentPath);

                InputStream instream = null;
                BufferedReader reader = null;
                String line;
                try {
                    instream = contentResource.getContentStream();
                    reader = new BufferedReader(new InputStreamReader(instream));
                    StringBuilder contentBuilder = new StringBuilder();
                    while ((line = reader.readLine()) != null) {
                        contentBuilder.append(line);
                    }
                    contentString = contentBuilder.toString();
                } finally {
                    if (reader != null) {
                        IOUtils.closeQuietly(reader);
                    }
                }
            }
        }
    }
    return contentString;
}

From source file:org.wso2.carbon.pc.core.DocumentIndexer.java

License:Open Source License

@Override
public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData)
        throws SolrException, RegistryException {
    try {//from w ww .  j av a  2  s  .  co  m
        String wordText = null;
        try {
            //Extract MSWord 2003 document files
            POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));

            WordExtractor msWord2003Extractor = new WordExtractor(fs);
            wordText = msWord2003Extractor.getText();

        } catch (OfficeXmlFileException e) {
            //if 2003 extraction failed, try with MSWord 2007 document files extractor
            XWPFDocument doc = new XWPFDocument(new ByteArrayInputStream(fileData.data));

            XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
            wordText = msWord2007Extractor.getText();

        } catch (Exception e) {
            //The reason for not throwing an exception is that since this is an indexer that runs in the background
            //throwing an exception might lead to adverse behaviors in the client side and might lead to
            //other files not being indexed
            String msg = "Failed to extract the document while indexing";
            log.error(msg, e);
        }
        IndexDocument indexDoc = new IndexDocument(fileData.path, wordText, null);

        Map<String, List<String>> fields = new HashMap<String, List<String>>();
        fields.put("path", Arrays.asList(fileData.path));
        if (fileData.mediaType != null) {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType));
        } else {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList("application/pdf"));
        }

        indexDoc.setFields(fields);

        return indexDoc;

    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
    }
}

From source file:org.wso2.carbon.registry.indexing.indexer.MSExcelIndexer.java

License:Open Source License

public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
    try {//from w  w  w . j a  va  2  s .  c  o m
        POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
        ExcelExtractor extractor = new ExcelExtractor(fs);
        String excelText = extractor.getText();

        return new IndexDocument(fileData.path, excelText, null);
    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(ErrorCode.SERVER_ERROR, msg);
    }

}

From source file:org.wso2.carbon.registry.indexing.indexer.MSPowerpointIndexer.java

License:Open Source License

public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
    try {/*from   w ww.  j  av a2s.  c o  m*/
        POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
        PowerPointExtractor extractor = new PowerPointExtractor(fs);
        String ppText = extractor.getText();

        return new IndexDocument(fileData.path, ppText, null);
    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(ErrorCode.SERVER_ERROR, msg);
    }

}