Example usage for org.apache.poi.hwpf.converter WordToHtmlConverter WordToHtmlConverter

List of usage examples for org.apache.poi.hwpf.converter WordToHtmlConverter WordToHtmlConverter

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.converter WordToHtmlConverter WordToHtmlConverter.

Prototype

public WordToHtmlConverter(HtmlDocumentFacade htmlDocumentFacade) 

Source Link

Usage

From source file:de.fionera.javamailer.dataProcessors.parseFilesForImport.java

/**
 * Gets a DOC file and parse it to HTML/*from  ww w.j av a  2  s. c o m*/
 * @param file The DOC File that you want to get parsed
 * @return A String congaing the DOC Document converted to HTML
 */
public String parseDOCFile(File file) {

    HWPFDocumentCore wordDocument;
    String result = "";
    DOMSource domSource;
    StreamResult streamResult;
    ByteArrayOutputStream out;
    Document htmlDocument;
    WordToHtmlConverter wordToHtmlConverter;

    try {
        wordDocument = WordToHtmlUtils.loadDoc(file);

        wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);
        htmlDocument = wordToHtmlConverter.getDocument();
        domSource = new DOMSource(htmlDocument);
        out = new ByteArrayOutputStream();
        streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);

        out.close();

        result = new String(out.toByteArray());

    } catch (IOException | ParserConfigurationException | TransformerException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:javaapplication1.utils.MyWordToHtml.java

public static void convert(String path, String file) throws Throwable {
    InputStream input = new FileInputStream(path + file);
    HWPFDocument wordDocument = new HWPFDocument(input);
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
            DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    MyPictureManager pictureManager = new MyPictureManager();
    wordToHtmlConverter.setPicturesManager(pictureManager);

    wordToHtmlConverter.processDocument(wordDocument);
    List<?> pics = wordDocument.getPicturesTable().getAllPictures();
    File dir = new File("D:\\pics");
    dir.mkdir();/*from   w  w  w  . j  av a  2 s .  c om*/
    if (pics != null) {
        for (int i = 0; i < pics.size(); i++) {
            Picture pic = (Picture) pics.get(i);
            try {
                pic.writeImageContent(new FileOutputStream(path + "pics/" + pic.suggestFullFileName()));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
    }
    Document htmlDocument = wordToHtmlConverter.getDocument();
    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(outStream);

    TransformerFactory tf = TransformerFactory.newInstance();
    Transformer serializer = tf.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
    outStream.close();

    String content = new String(outStream.toByteArray());

    writeFile(content, path + "result.html", "UTF-8");
}