List of usage examples for org.jsoup.parser Parser parseXmlFragment
public static List<Node> parseXmlFragment(String fragmentXml, String baseUri)
From source file:com.ufukuzun.myth.dialect.bean.Myth.java
private Document parse(String html) { Document document = new Document(""); List<Node> nodes = Parser.parseXmlFragment(html, ""); if (!nodes.isEmpty()) { document.appendChild(nodes.get(0)); }/*from ww w . j av a 2s. com*/ return document; }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
public static void enhanceXML(final String inputFileName, final String outputFileName) throws IOException { final Path inputFilePath = Paths.get(inputFileName); final byte[] inputBytes = Files.readAllBytes(inputFilePath); final String inputString = new String(inputBytes, StandardCharsets.UTF_8); final List<Node> nodes = Parser.parseXmlFragment(inputString, DUMMY_BASE_URI); enhanceNodes(nodes);/*from ww w . jav a 2 s . c om*/ final Path outputFilePath = Paths.get(outputFileName); final String outResource = outputFilePath.toString(); final PrintWriter out = new PrintWriter(outResource, UTF_8); nodes.forEach(node -> unescapeEntity(out, node)); out.flush(); out.close(); }
From source file:org.structr.web.importer.Importer.java
/** * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment. * * @param fragment/*from w w w . ja v a 2s. c o m*/ * @return * @throws FrameworkException */ public boolean parse(final boolean fragment) throws FrameworkException { init(); if (StringUtils.isNotBlank(code)) { if (!isDeployment) { logger.info("##### Start parsing code for page {} #####", new Object[] { name }); } else { // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly code = code.replaceAll( "<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>", "<$1$2/>"); } if (fragment) { if (isDeployment) { final List<Node> nodeList = Parser.parseXmlFragment(code, ""); parsedDocument = Document.createShell(""); final Element body = parsedDocument.body(); final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } } else { parsedDocument = Jsoup.parseBodyFragment(code); } } else { if (isDeployment) { parsedDocument = Jsoup.parse(code, "", Parser.xmlParser()); } else { parsedDocument = Jsoup.parse(code); } } } else { if (!isDeployment) { logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name }); } code = HttpHelper.get(address); parsedDocument = Jsoup.parse(code); } return true; }