Example usage for javax.swing.text.html HTMLEditorKit read

List of usage examples for javax.swing.text.html HTMLEditorKit read

Introduction

In this page you can find the example usage for javax.swing.text.html HTMLEditorKit read.

Prototype

public void read(Reader in, Document doc, int pos) throws IOException, BadLocationException 

Source Link

Document

Inserts content from the given stream.

Usage

From source file:ReplaceReader.java

public static void main(String[] args) {
    try {//from  ww  w .ja  v a 2  s.c  om
        UIManager.setLookAndFeel("com.sun.java.swing.plaf.windows.WindowsLookAndFeel");
    } catch (Exception evt) {
    }

    JFrame f = new JFrame("JEditorPane with Custom Reader");
    JEditorPane ep = new JEditorPane();
    f.getContentPane().add(new JScrollPane(ep));
    f.setSize(400, 300);
    f.setVisible(true);

    HTMLEditorKit kit = new HTMLEditorKit() {
        public Document createDefaultDocument() {
            HTMLDocument doc = new CustomHTMLDocument(getStyleSheet());
            doc.setAsynchronousLoadPriority(4);
            doc.setTokenThreshold(100);
            return doc;
        }
    };
    ep.setEditorKit(kit);

    try {
        Document doc = ep.getDocument();
        doc.putProperty("IgnoreCharsetDirective", new Boolean(true));
        kit.read(new FileReader(args[0]), doc, 0);
    } catch (Exception e) {
        System.out.println("Exception while reading HTML " + e);
    }
}

From source file:com.liferay.portal.util.LuceneFields.java

public static Field getFile(String field, File file, String fileExt) throws IOException {

    fileExt = fileExt.toLowerCase();/*w w w  .  ja va2s  . c om*/

    FileInputStream fis = new FileInputStream(file);
    Reader reader = new BufferedReader(new InputStreamReader(fis));

    String text = null;

    if (fileExt.equals(".doc")) {
        try {
            WordDocument wordDocument = new WordDocument(fis);

            StringWriter stringWriter = new StringWriter();

            wordDocument.writeAllText(stringWriter);

            text = stringWriter.toString();

            stringWriter.close();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".htm") || fileExt.equals(".html")) {
        try {
            DefaultStyledDocument dsd = new DefaultStyledDocument();

            HTMLEditorKit htmlEditorKit = new HTMLEditorKit();
            htmlEditorKit.read(reader, dsd, 0);

            text = dsd.getText(0, dsd.getLength());
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".pdf")) {
        try {
            PDFParser parser = new PDFParser(fis);
            parser.parse();

            PDDocument pdDoc = parser.getPDDocument();

            StringWriter stringWriter = new StringWriter();

            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.writeText(pdDoc, stringWriter);

            text = stringWriter.toString();

            stringWriter.close();
            pdDoc.close();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".rtf")) {
        try {
            DefaultStyledDocument dsd = new DefaultStyledDocument();

            RTFEditorKit rtfEditorKit = new RTFEditorKit();
            rtfEditorKit.read(reader, dsd, 0);

            text = dsd.getText(0, dsd.getLength());
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    } else if (fileExt.equals(".xls")) {
        try {
            XLSTextStripper stripper = new XLSTextStripper(fis);

            text = stripper.getText();
        } catch (Exception e) {
            _log.error(e.getMessage());
        }
    }

    if (text != null) {
        return new Field(field, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
    } else {
        return new Field(field, reader);
    }
}

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.HTMLExtractor.java

/**
 * Gets the text from file content //from   ww w  . j a v a2  s. c o m
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    Reader reader = null;
    try {
        try {
            fis = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
            return null;
        }
        reader = new BufferedReader(new InputStreamReader(fis));
        DefaultStyledDocument dsd = new DefaultStyledDocument();
        HTMLEditorKit htmlEditorKit = new HTMLEditorKit();
        htmlEditorKit.read(reader, dsd, 0);
        return dsd.getText(0, dsd.getLength());
    } catch (Exception e) {
        LOGGER.debug("Extracting text from the .htm or .html  file " + file.getName() + " failed with "
                + e.getMessage());
        LOGGER.error(ExceptionUtils.getStackTrace(e));
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (Exception e) {
            LOGGER.debug("Closing the reader for file " + file.getName() + " failed with " + e.getMessage());
        }
        try {
            if (fis != null) {
                fis.close();
            }
        } catch (Exception e) {
            LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                    + e.getMessage());
        }
    }
    return null;
}