Example usage for javax.swing.text DefaultStyledDocument getText

List of usage examples for javax.swing.text DefaultStyledDocument getText

Introduction

In this page you can find the example usage for javax.swing.text DefaultStyledDocument getText.

Prototype

public String getText(int offset, int length) throws BadLocationException 

Source Link

Document

Gets a sequence of text from the document.

Usage

From source file:com.liferay.portal.util.LuceneFields.java

public static Field getFile(String field, File file, String fileExt) throws IOException {

    fileExt = fileExt.toLowerCase();//from   w  w w . j av  a2s.c  o m

    FileInputStream fis = new FileInputStream(file);
    Reader reader = new BufferedReader(new InputStreamReader(fis));

    String text = null;

    if (fileExt.equals(".doc")) {
        try {
            WordDocument wordDocument = new WordDocument(fis);

            StringWriter stringWriter = new StringWriter();

            wordDocument.writeAllText(stringWriter);

            text = stringWriter.toString();

            stringWriter.close();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".htm") || fileExt.equals(".html")) {
        try {
            DefaultStyledDocument dsd = new DefaultStyledDocument();

            HTMLEditorKit htmlEditorKit = new HTMLEditorKit();
            htmlEditorKit.read(reader, dsd, 0);

            text = dsd.getText(0, dsd.getLength());
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".pdf")) {
        try {
            PDFParser parser = new PDFParser(fis);
            parser.parse();

            PDDocument pdDoc = parser.getPDDocument();

            StringWriter stringWriter = new StringWriter();

            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.writeText(pdDoc, stringWriter);

            text = stringWriter.toString();

            stringWriter.close();
            pdDoc.close();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".rtf")) {
        try {
            DefaultStyledDocument dsd = new DefaultStyledDocument();

            RTFEditorKit rtfEditorKit = new RTFEditorKit();
            rtfEditorKit.read(reader, dsd, 0);

            text = dsd.getText(0, dsd.getLength());
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".xls")) {
        try {
            XLSTextStripper stripper = new XLSTextStripper(fis);

            text = stripper.getText();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    }

    if (text != null) {
        return new Field(field, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
    } else {
        return new Field(field, reader);
    }
}

From source file:com.stimulus.archiva.extraction.RTFExtractor.java

public Reader getText(InputStream is, Charset charset, IndexInfo indexInfo) throws ExtractionException {

    Reader reader = null;//from  w  w  w.j  a  va2s  .  c om
    FileWriter writer = null;
    File file = null;
    try {
        reader = new InputStreamReader(is);
        file = File.createTempFile("extract_rtf", ".tmp");
        indexInfo.addDeleteFile(file);
        writer = new FileWriter(file);
        DefaultStyledDocument doc = new DefaultStyledDocument();
        new RTFEditorKit().read(reader, doc, 0);
        writer.write(doc.getText(0, doc.getLength()));
    } catch (Throwable ioe) {
        throw new ExtractionException("failed to parse rtf document", ioe, logger);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException ioe) {
            }
        }

        if (writer != null) {
            try {
                writer.close();
            } catch (IOException ioe) {
            }
        }
    }
    try {
        Reader outReader = new FileReader(file);
        indexInfo.addReader(outReader);
        return outReader;
    } catch (Exception ex) {
        throw new ExtractionException("failed to extract text from powerpoint document", ex, logger,
                ChainedException.Level.DEBUG);
    }

}

From source file:framework.retrieval.engine.index.create.impl.file.parse.RTFFileContentParser.java

public String getContent(RFileDocument document, String charsetName) {
    String content = "";
    InputStream fileInputStream = null;
    try {/*www  .  j a v a 2  s .com*/
        fileInputStream = new FileInputStream(document.getFile());
        DefaultStyledDocument styledDoc = new DefaultStyledDocument();
        RTFEditorKit rtfEditorKit = new RTFEditorKit();
        rtfEditorKit.read(fileInputStream, styledDoc, 0);
        content = styledDoc.getText(0, styledDoc.getLength());
    } catch (Exception e) {
        RetrievalUtil.errorLog(log, document.getFile().getAbsolutePath(), e);
    } finally {
        try {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
        } catch (Exception e) {
            RetrievalUtil.errorLog(log, e);
        }
    }
    return content;
}

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.HTMLExtractor.java

/**
 * Gets the text from file content /*from ww w . jav a2  s  .  c om*/
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    Reader reader = null;
    try {
        try {
            fis = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
            return null;
        }
        reader = new BufferedReader(new InputStreamReader(fis));
        DefaultStyledDocument dsd = new DefaultStyledDocument();
        HTMLEditorKit htmlEditorKit = new HTMLEditorKit();
        htmlEditorKit.read(reader, dsd, 0);
        return dsd.getText(0, dsd.getLength());
    } catch (Exception e) {
        LOGGER.debug("Extracting text from the .htm or .html  file " + file.getName() + " failed with "
                + e.getMessage());
        LOGGER.error(ExceptionUtils.getStackTrace(e));
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            LOGGER.debug("Closing the reader for file " + file.getName() + " failed with " + e.getMessage());
        }
        try {
            if (fis != null) {
                fis.close();
            }
        } catch (Exception e) {
            LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                    + e.getMessage());
        }
    }
    return null;
}

From source file:com.croer.javaorange.diviner.SimpleOrangeTextPane.java

protected void colorStyledDocument(final DefaultStyledDocument document) {
    EventQueue.invokeLater(new Runnable() {

        @Override/* www . j a  va2s  .c  o  m*/
        public void run() {
            String input = "";
            try {
                input = document.getText(0, document.getLength());
            } catch (BadLocationException ex) {
                Logger.getLogger(SimpleOrangeTextPane.class.getName()).log(Level.SEVERE, null, ex);
            }

            StringBuilder inputMut = new StringBuilder(input);
            String[] split = StringUtils.split(inputMut.toString());
            int i = 0;
            for (String string : split) {
                int start = inputMut.indexOf(string);
                int end = start + string.length();
                inputMut.replace(start, end, StringUtils.repeat(" ", string.length()));
                document.setCharacterAttributes(start, string.length(), styles[i++ % styles.length], true);
            }
        }
    });
}

From source file:edu.ur.ir.index.DefaultRtfTextExtractor.java

/**
 * Extract text from the Rich text file document 
 * @throws Exception //from   w  ww .j  av a 2s.c o  m
 * 
 * @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File)
 */
public String getText(File f) throws Exception {
    String text = null;
    // don't even try if the file is too large
    if (isFileTooLarge(f) || f.length() <= 0l) {
        return text;
    }
    DefaultStyledDocument styledDoc = new DefaultStyledDocument();
    RTFEditorKit editorKit = new RTFEditorKit();
    FileInputStream inputStream = null;

    try {
        inputStream = new FileInputStream(f);
        editorKit.read(inputStream, styledDoc, 0);
        String myText = styledDoc.getText(0, styledDoc.getLength());
        if (myText != null && !myText.trim().equals("")) {
            text = myText;
        }
    } catch (OutOfMemoryError oome) {
        text = null;
        log.error("could not extract text", oome);
        throw (oome);
    } catch (Exception e) {
        text = null;
        log.error("could not get text for rich text document " + f.getAbsolutePath(), e);
        throw (e);
    }

    finally {
        closeInputStream(inputStream);
        editorKit = null;
    }

    return text;
}

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.RTFExtractor.java

/**
 * Gets the text from file content /*from   w ww.j a  va 2s  .c  o m*/
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    Reader reader = null;
    try {
        DefaultStyledDocument dsd = new DefaultStyledDocument();
        RTFEditorKit rtfEditorKit = new RTFEditorKit();

        try {
            fis = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
            return null;
        }
        reader = new BufferedReader(new InputStreamReader(fis));
        rtfEditorKit.read(reader, dsd, 0);
        return dsd.getText(0, dsd.getLength());
    } catch (Exception e) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug(
                    "Extracting text from the .rtf  file " + file.getName() + " failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
        return null;
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.debug(
                        "Closing the reader for file " + file.getName() + "  failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
        }
        if (fis != null) {
            try {
                fis.close();
            } catch (IOException e) {
                LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                        + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
        }
    }
}

From source file:org.alder.fotobuchconvert.scribus.RtfToScribusConverter.java

void output(XmlBuilder xml, DefaultStyledDocument doc, ScribusWriter scribus) {
    log.debug("Starting conversion of RTF data");
    if (log.isTraceEnabled())
        doc.dump(System.err);/*from   w w w  . j  ava 2 s . co  m*/

    try {
        Element section = doc.getDefaultRootElement();
        log.trace(section);
        assert section.getName().equals("section");

        final int nj = section.getElementCount();
        for (int j = 0; j < nj; j++) {
            Element paragraph = section.getElement(j);
            log.trace(paragraph);
            assert section.getName().equals("paragraph");

            // boolean firstInPara = true;
            AttributeSet attr = paragraph.getAttributes();
            Integer alignment = (Integer) attr.getAttribute(StyleConstants.Alignment);

            boolean elementsInThisLine = false;
            final int ni = paragraph.getElementCount();
            for (int i = 0; i < ni; i++) {
                Element content = paragraph.getElement(i);
                assert section.getName().equals("content");

                int start = content.getStartOffset();
                int end = content.getEndOffset();

                attr = content.getAttributes();
                Boolean italic = (Boolean) attr.getAttribute(StyleConstants.Italic);
                Boolean bold = (Boolean) attr.getAttribute(StyleConstants.Bold);
                Boolean underline = (Boolean) attr.getAttribute(StyleConstants.Underline);
                String family = (String) attr.getAttribute(StyleConstants.Family);
                Integer fontSize = (Integer) attr.getAttribute(StyleConstants.Size);
                Color color = (Color) attr.getAttribute(StyleConstants.ColorConstants.Foreground);

                String text = doc.getText(start, end - start);

                // if (firstInPara && text.trim().isEmpty() && family ==
                // null
                // && fontSize == null)
                // continue;
                // else
                // firstInPara = false;
                if (i == ni - 1 && text.trim().isEmpty() && text.length() < 3)
                    continue;
                elementsInThisLine = true;

                while (text.endsWith("\n") || text.endsWith("\r"))
                    text = text.substring(0, text.length() - 1);

                log.debug(italic + " " + bold + " " + underline + " " + family + " " + fontSize + " " + color
                        + "\t\"" + text + "\"");

                XmlBuilder el = xml.add(C.EL_ITEXT).set(C.CH, text);

                if (bold == Boolean.TRUE && italic == Boolean.TRUE)
                    el.set(C.FONT, family + " Bold Italic");
                else if (bold == Boolean.TRUE)
                    el.set(C.FONT, family + " Bold");
                else if (italic == Boolean.TRUE)
                    el.set(C.FONT, family + " Italic");
                else
                    el.set(C.FONT, family + " Regular");

                if (fontSize != null)
                    el.set(C.FONTSIZE, fontSize);

                if (color != null && color.equals(Color.BLACK) && scribus != null) {
                    String colname = scribus.colorManager.getColorName(color);
                    el.set(C.FCOLOR, colname);
                }
            }

            if (!elementsInThisLine && j == nj - 1)
                break; // don't convert last line if empty

            XmlBuilder el = xml.add(C.EL_PARA);
            if (alignment != null)
                switch (alignment) {
                case StyleConstants.ALIGN_LEFT:
                    el.set(C.ALIGN, 0);
                    break;
                case StyleConstants.ALIGN_CENTER:
                    el.set(C.ALIGN, 1);
                    break;
                case StyleConstants.ALIGN_RIGHT:
                    el.set(C.ALIGN, 2);
                    break;
                case StyleConstants.ALIGN_JUSTIFIED:
                    el.set(C.ALIGN, 3);
                    break;
                }
        }
    } catch (BadLocationException e) {
        throw new RuntimeException("This error should not occour", e);
    }

}

From source file:simplealbum.mvc.autocomplete.JTextPaneX.java

protected void colorStyledDocument(final DefaultStyledDocument document) {
    EventQueue.invokeLater(new Runnable() {

        @Override// w ww.j  a  v a2  s  .c  o m
        public void run() {
            String input = "";
            try {
                input = document.getText(0, document.getLength());
            } catch (BadLocationException ex) {
                Logger.getLogger(JTextPaneX.class.getName()).log(Level.SEVERE, null, ex);
            }

            StringBuilder inputMut = new StringBuilder(input);
            String[] split = StringUtils.split(inputMut.toString());
            int i = 0;
            for (String string : split) {
                int start = inputMut.indexOf(string);
                int end = start + string.length();
                inputMut.replace(start, end, StringUtils.repeat(" ", string.length()));
                document.setCharacterAttributes(start, string.length(), styles[i++ % styles.length], true);
            }
        }
    });
}