Example usage for edu.stanford.nlp.io IOUtils slurpInputStream

Introduction

In this page you can find the example usage for edu.stanford.nlp.io IOUtils slurpInputStream.

Prototype

public static String slurpInputStream(InputStream input, String encoding) throws IOException

Source Link

Document

Read the contents of an input stream, decoding it according to the given character encoding.

Usage

From source file:org.exist.xquery.corenlp.Tokenize.java

License:Open Source License

private String readUploadedTextDocument(final TextDocType textDocType) throws IOException {
    String text = "";

    switch (textDocType) {
    case ODT://from   ww  w. jav a 2s .c om
        try (InputStream is = uploadedFileBase64String.getInputStream()) {

            TextDocument utd = ODPackage.createFromStream(is, "UserTextDocument").getTextDocument();
            text = utd.getCharacterContent(true); //ooMode?
        }
        break;
    case DOCX:
        try (InputStream is = uploadedFileBase64String.getInputStream()) {
            POITextExtractor extractor = ExtractorFactory.createExtractor(is);
            text = extractor.getText();
        } catch (InvalidFormatException ife) {
            LOG.error(ife);
        } catch (OpenXML4JException ox4e) {
            LOG.error(ox4e);
        } catch (XmlException xe) {
            LOG.error(xe);
        }
        break;
    case DOC:
        try (InputStream is = uploadedFileBase64String.getInputStream()) {
            POITextExtractor extractor = ExtractorFactory.createExtractor(is);
            text = extractor.getText();
        } catch (InvalidFormatException ife) {
            LOG.error(ife);
        } catch (OpenXML4JException ox4e) {
            LOG.error(ox4e);
        } catch (XmlException xe) {
            LOG.error(xe);
        }
        break;
    case TXT:
        text = IOUtils.slurpInputStream(uploadedFileBase64String.getInputStream(), "UTF-8"); // Or null
        break;
    }
    return text;
}