List of usage examples for org.jsoup.nodes Document createShell
public static Document createShell(String baseUri)
From source file:uk.co.blackpepper.support.retrofit.jsoup.spring.AbstractBeanHtmlConverterTest.java
@Before public void setUp() { converter = new FakeBeanHtmlConverter(); document = Document.createShell(""); }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
void parseMultipleMonitorDays(SubstitutionSchedule v, Document doc, JSONObject data) throws JSONException, CredentialInvalidException { if (doc.select(".mon_head").size() > 1) { for (int j = 0; j < doc.select(".mon_head").size(); j++) { Document doc2 = Document.createShell(doc.baseUri()); doc2.body().appendChild(doc.select(".mon_head").get(j).clone()); Element next = doc.select(".mon_head").get(j).nextElementSibling(); if (next != null && next.tagName().equals("center")) { doc2.body().appendChild(next.select(".mon_title").first().clone()); if (next.select("table:has(tr.list)").size() > 0) { doc2.body().appendChild(next.select("table:has(tr.list)").first()); }/*from w ww . j a va 2 s .c o m*/ if (next.select("table.info").size() > 0) { doc2.body().appendChild(next.select("table.info").first()); } } else if (doc.select(".mon_title").size() - 1 >= j) { doc2.body().appendChild(doc.select(".mon_title").get(j).clone()); doc2.body().appendChild(doc.select("table:has(tr.list)").get(j).clone()); } else { continue; } SubstitutionScheduleDay day = parseMonitorDay(doc2, data); v.addDay(day); } } else if (doc.select(".mon_title").size() > 1) { for (int j = 0; j < doc.select(".mon_title").size(); j++) { Document doc2 = Document.createShell(doc.baseUri()); doc2.body().appendChild(doc.select(".mon_title").get(j).clone()); Element next = doc.select(".mon_title").get(j).nextElementSibling(); while (next != null && !next.tagName().equals("center")) { doc2.body().appendChild(next); next = doc.select(".mon_title").get(j).nextElementSibling(); } SubstitutionScheduleDay day = parseMonitorDay(doc2, data); v.addDay(day); } } else { SubstitutionScheduleDay day = parseMonitorDay(doc, data); v.addDay(day); } }
From source file:no.kantega.publishing.admin.content.htmlfilter.RemoveNestedSpanTagsFilter.java
@Override public Document runFilter(Document document) { final Document clean = Document.createShell(document.baseUri()); if (document.body() != null) // frameset documents won't have a body. the clean doc will have empty body. copySafeNodes(document.body(), clean.body()); return clean; }
From source file:org.structr.web.importer.Importer.java
/** * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment. * * @param fragment/*from ww w.j a v a 2s . c o m*/ * @return * @throws FrameworkException */ public boolean parse(final boolean fragment) throws FrameworkException { init(); if (StringUtils.isNotBlank(code)) { if (!isDeployment) { logger.info("##### Start parsing code for page {} #####", new Object[] { name }); } else { // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly code = code.replaceAll( "<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>", "<$1$2/>"); } if (fragment) { if (isDeployment) { final List<Node> nodeList = Parser.parseXmlFragment(code, ""); parsedDocument = Document.createShell(""); final Element body = parsedDocument.body(); final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } } else { parsedDocument = Jsoup.parseBodyFragment(code); } } else { if (isDeployment) { parsedDocument = Jsoup.parse(code, "", Parser.xmlParser()); } else { parsedDocument = Jsoup.parse(code); } } } else { if (!isDeployment) { logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name }); } code = HttpHelper.get(address); parsedDocument = Jsoup.parse(code); } return true; }