Parsing Html : HTML Parser « PDF « Java Tutorial






import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;

import com.lowagie.text.Document;
import com.lowagie.text.Element;
import com.lowagie.text.html.simpleparser.HTMLWorker;
import com.lowagie.text.html.simpleparser.StyleSheet;
import com.lowagie.text.pdf.PdfWriter;

public class MainClass {
  public static void main(String[] args) throws Exception {
    Document document = new Document();
    StyleSheet st = new StyleSheet();
    st.loadTagStyle("body", "leading", "16,0");
    PdfWriter.getInstance(document, new FileOutputStream("html2.pdf"));
    document.open();
    ArrayList p = HTMLWorker.parseToList(new FileReader("example.html"), st);
    for (int k = 0; k < p.size(); ++k)
      document.add((Element) p.get(k));
    document.close();
  }
}








29.66.HTML Parser
29.66.1.HtmlParser from iText
29.66.2.Parsing Html
29.66.3.Parsing Html Snippets