Example usage for org.apache.poi.hwpf.converter WordToHtmlConverter getDocument

List of usage examples for org.apache.poi.hwpf.converter WordToHtmlConverter getDocument

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.converter WordToHtmlConverter getDocument.

Prototype

@Override
    public Document getDocument() 

Source Link

Usage

From source file:de.fionera.javamailer.dataProcessors.parseFilesForImport.java

/**
 * Gets a DOC file and parse it to HTML/*w w w  . j  a  v a  2 s  .  com*/
 * @param file The DOC File that you want to get parsed
 * @return A String congaing the DOC Document converted to HTML
 */
public String parseDOCFile(File file) {

    HWPFDocumentCore wordDocument;
    String result = "";
    DOMSource domSource;
    StreamResult streamResult;
    ByteArrayOutputStream out;
    Document htmlDocument;
    WordToHtmlConverter wordToHtmlConverter;

    try {
        wordDocument = WordToHtmlUtils.loadDoc(file);

        wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);
        htmlDocument = wordToHtmlConverter.getDocument();
        domSource = new DOMSource(htmlDocument);
        out = new ByteArrayOutputStream();
        streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);

        out.close();

        result = new String(out.toByteArray());

    } catch (IOException | ParserConfigurationException | TransformerException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:javaapplication1.utils.MyWordToHtml.java

public static void convert(String path, String file) throws Throwable {
    InputStream input = new FileInputStream(path + file);
    HWPFDocument wordDocument = new HWPFDocument(input);
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
            DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    MyPictureManager pictureManager = new MyPictureManager();
    wordToHtmlConverter.setPicturesManager(pictureManager);

    wordToHtmlConverter.processDocument(wordDocument);
    List<?> pics = wordDocument.getPicturesTable().getAllPictures();
    File dir = new File("D:\\pics");
    dir.mkdir();/*from  w  ww . j  av  a2  s.c  o  m*/
    if (pics != null) {
        for (int i = 0; i < pics.size(); i++) {
            Picture pic = (Picture) pics.get(i);
            try {
                pic.writeImageContent(new FileOutputStream(path + "pics/" + pic.suggestFullFileName()));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
    }
    Document htmlDocument = wordToHtmlConverter.getDocument();
    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(outStream);

    TransformerFactory tf = TransformerFactory.newInstance();
    Transformer serializer = tf.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
    outStream.close();

    String content = new String(outStream.toByteArray());

    writeFile(content, path + "result.html", "UTF-8");
}