List of usage examples for org.jsoup.nodes Node attributes
public abstract Attributes attributes();
From source file:com.screenslicer.core.util.Util.java
public static String urlFromAttr(Node node) { for (Attribute attr : node.attributes().asList()) { if (attr.getValue().contains("://")) { return attr.getValue(); }// w ww. ja va2 s . c o m } return null; }
From source file:org.structr.web.importer.Importer.java
private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page, final boolean removeHashAttribute, final int depth) throws FrameworkException { DOMNode rootElement = null;/*from ww w . j a v a 2s .c o m*/ Linkable linkable = null; String instructions = null; final List<Node> children = startNode.childNodes(); for (Node node : children) { String tag = node.nodeName(); // clean tag, remove non-word characters except : and # if (tag != null) { tag = tag.replaceAll("[^a-zA-Z0-9#:.\\-_]+", ""); } final StringBuilder classString = new StringBuilder(); final String type = CaseHelper.toUpperCamelCase(tag); String comment = null; String content = null; String id = null; boolean isNewTemplateOrComponent = false; if (ignoreElementNames.contains(type)) { continue; } if (node instanceof Element) { final Element el = ((Element) node); final Set<String> classes = el.classNames(); for (String cls : classes) { classString.append(cls).append(" "); } id = el.id(); // do not download files when called from DeployCommand! if (!isDeployment) { String downloadAddressAttr = srcElements.contains(tag) ? "src" : hrefElements.contains(tag) ? "href" : null; if (originalUrl != null && downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) { String downloadAddress = node.attr(downloadAddressAttr); linkable = downloadFile(downloadAddress, originalUrl); } else { linkable = null; } } if (removeHashAttribute) { // Remove data-structr-hash attribute node.removeAttr("data-structr-hash"); } } // Data and comment nodes: Trim the text and put it into the "content" field without changes if (type.equals("#comment")) { comment = ((Comment) node).getData(); tag = ""; // Don't add content node for whitespace if (StringUtils.isBlank(comment)) { continue; } // store for later use commentSource.append(comment).append("\n"); // check if comment contains instructions if (commentHandler != null && commentHandler.containsInstructions(comment)) { if (instructions != null) { // unhandled instructions from previous iteration => empty content element createEmptyContentNode(page, parent, commentHandler, instructions); } instructions = comment; continue; } } else if (type.equals("#data")) { tag = ""; content = ((DataNode) node).getWholeData(); // Don't add content node for whitespace if (StringUtils.isBlank(content)) { continue; } } else // Text-only nodes: Trim the text and put it into the "content" field { if (type.equals("#text")) { tag = ""; if (isDeployment) { content = trimTrailingNewline(((TextNode) node).getWholeText()); if (content == null || content.length() == 0) { continue; } } else { content = trimTrailingNewline(((TextNode) node).text()); if (StringUtils.isBlank(content)) { continue; } } } } org.structr.web.entity.dom.DOMNode newNode = null; // create node if (StringUtils.isBlank(tag)) { if (page != null) { // create comment or content node if (!StringUtils.isBlank(comment)) { final PropertyKey<String> contentTypeKey = StructrApp.key(Content.class, "contentType"); newNode = (DOMNode) page.createComment(comment); newNode.setProperty(contentTypeKey, "text/html"); } else { newNode = (Content) page.createTextNode(content); } } } else if ("structr:template".equals(tag)) { final String src = node.attr("src"); if (src != null) { DOMNode template = null; if (DeployCommand.isUuid(src)) { template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class) .and(GraphObject.id, src).getFirst(); if (template == null) { System.out.println("##################################### template with UUID " + src + " not found, this is a known bug"); } } else if (DeployCommand.endsWithUuid(src)) { final String uuid = src.substring(src.length() - 32); template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class) .and(GraphObject.id, uuid).getFirst(); if (template == null) { System.out.println("##################################### template with UUID " + uuid + " not found, this is a known bug"); } } else { template = Importer.findSharedComponentByName(src); if (template == null) { template = Importer.findTemplateByName(src); if (template == null) { template = createNewTemplateNode(parent, node.childNodes()); isNewTemplateOrComponent = true; } } } if (template != null) { newNode = template; if (template.isSharedComponent()) { newNode = (DOMNode) template.cloneNode(false); newNode.setSharedComponent(template); newNode.setOwnerDocument(page); } else if (page != null) { newNode.setOwnerDocument(page); } } else { logger.warn("Unable to find template or shared component {}, template ignored!", src); } } else { logger.warn("Invalid template definition, missing src attribute!"); } } else if ("structr:component".equals(tag)) { final String src = node.attr("src"); if (src != null) { DOMNode component = null; if (DeployCommand.isUuid(src)) { component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst(); } else if (DeployCommand.endsWithUuid(src)) { final String uuid = src.substring(src.length() - 32); component = app.nodeQuery(DOMNode.class).and(GraphObject.id, uuid).getFirst(); } else { component = Importer.findSharedComponentByName(src); } if (component == null) { component = createSharedComponent(node); } isNewTemplateOrComponent = true; if (component != null) { newNode = (DOMNode) component.cloneNode(false); final String _html_src = newNode.getProperty(new StringProperty("_html_src")); if (!StringUtils.isEmpty(_html_src)) { node.attr("src", _html_src); } else { node.removeAttr("src"); } newNode.setSharedComponent(component); newNode.setOwnerDocument(page); } else { logger.warn("Unable to find shared component {} - ignored!", src); } } else { logger.warn("Invalid component definition, missing src attribute!"); } } else { if (page != null) { newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true); } } if (newNode != null) { // save root element for later use if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) { rootElement = newNode; } // set linkable if (linkable != null && newNode instanceof LinkSource) { ((LinkSource) newNode).setLinkable(linkable); } // container for bulk setProperties() final PropertyMap newNodeProperties = new PropertyMap(); final Class newNodeType = newNode.getClass(); newNodeProperties.put(AbstractNode.visibleToPublicUsers, publicVisible); newNodeProperties.put(AbstractNode.visibleToAuthenticatedUsers, authVisible); // "id" attribute: Put it into the "_html_id" field if (StringUtils.isNotBlank(id)) { newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_id"), id); } if (StringUtils.isNotBlank(classString.toString())) { newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_class"), StringUtils.trim(classString.toString())); } for (Attribute nodeAttr : node.attributes()) { final String key = nodeAttr.getKey(); if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute final String value = nodeAttr.getValue(); if (key.startsWith("data-")) { if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node, int l = DATA_META_PREFIX.length(); String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' }) .replaceAll("-", ""); String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1)); if (value != null) { // store value using actual input converter final PropertyKey actualKey = StructrApp.getConfiguration() .getPropertyKeyForJSONName(newNodeType, camelCaseKey, false); if (actualKey != null) { final PropertyConverter converter = actualKey .inputConverter(securityContext); if (converter != null) { final Object convertedValue = converter.convert(value); newNodeProperties.put(actualKey, convertedValue); } else { newNodeProperties.put(actualKey, value); } } else { logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey); } } } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal final PropertyKey propertyKey = StructrApp.getConfiguration() .getPropertyKeyForJSONName(newNodeType, key); if (propertyKey != null) { final PropertyConverter inputConverter = propertyKey .inputConverter(securityContext); if (value != null && inputConverter != null) { newNodeProperties.put(propertyKey, propertyKey.inputConverter(securityContext).convert(value)); } else { newNodeProperties.put(propertyKey, value); } } } else { // store data-* attributes in node final PropertyKey propertyKey = new StringProperty(key); if (value != null) { newNodeProperties.put(propertyKey, value); } } } else { boolean notBlank = StringUtils.isNotBlank(value); boolean isAnchor = notBlank && value.startsWith("#"); boolean isLocal = notBlank && !value.startsWith("http"); boolean isActive = notBlank && value.contains("${"); boolean isStructrLib = notBlank && value.startsWith("/structr/js/"); if (linkable != null && "link".equals(tag) && "href".equals(key) && isLocal && !isActive && !isDeployment) { newNodeProperties.put(new StringProperty(PropertyView.Html + key), "${link.path}?${link.version}"); } else if (linkable != null && ("href".equals(key) || "src".equals(key)) && isLocal && !isActive && !isAnchor && !isStructrLib && !isDeployment) { newNodeProperties.put(new StringProperty(PropertyView.Html + key), "${link.path}"); } else { if (key.startsWith("aria-")) { // use custom key newNodeProperties.put( new StringProperty( CustomHtmlAttributeProperty.CUSTOM_HTML_ATTRIBUTE_PREFIX + key), value); } else { newNodeProperties.put(new StringProperty(PropertyView.Html + key), value); } } } } } // bulk set properties on new node newNode.setProperties(securityContext, newNodeProperties); if ("script".equals(tag)) { final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type"); final String contentType = newNode.getProperty(typeKey); if (contentType == null) { // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly newNode.setProperty(typeKey, "text/javascript"); } else if (contentType.equals("application/schema+json")) { for (final Node scriptContentNode : node.childNodes()) { final String source = scriptContentNode.toString(); // Import schema JSON SchemaJsonImporter.importSchemaJson(source); } } else if (contentType.equals("application/x-structr-script")) { for (final Node scriptContentNode : node.childNodes()) { final String source = scriptContentNode.toString(); try { Actions.execute(securityContext, null, source, null); } catch (UnlicensedScriptException ex) { ex.log(logger); } } continue; } else if (contentType.equals("application/x-structr-javascript")) { for (final Node scriptContentNode : node.childNodes()) { final String source = scriptContentNode.toString(); try { Actions.execute(securityContext, null, source, null); } catch (UnlicensedScriptException ex) { ex.log(logger); } } continue; } } else if ("style".equals(tag)) { final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type"); final String contentType = newNode.getProperty(typeKey); if ("text/css".equals(contentType)) { // parse content of style elements and add referenced files to list of resources to be downloaded for (final Node styleContentNode : node.childNodes()) { final String source = styleContentNode.toString(); try { // Import referenced resources processCss(source, originalUrl); } catch (IOException ex) { logger.warn("Couldn't process CSS source", ex); } } } } if (instructions != null) { if (instructions.contains("@structr:content") && !(newNode instanceof Content)) { // unhandled instructions from previous iteration => empty content element createEmptyContentNode(page, parent, commentHandler, instructions); } else { // apply instructions to new DOM element if (commentHandler != null) { commentHandler.handleComment(page, newNode, instructions, true); } } instructions = null; } // allow parent to be null to prevent direct child relationship if (parent != null) { // special handling for <head> elements if (newNode instanceof Head && parent instanceof Body) { final org.w3c.dom.Node html = parent.getParentNode(); html.insertBefore(newNode, parent); } else { parent.appendChild(newNode); } } // Link new node to its parent node // linkNodes(parent, newNode, page, localIndex); // Step down and process child nodes except for newly created templates if (!isNewTemplateOrComponent) { createChildNodes(node, newNode, page, removeHashAttribute, depth + 1); } } } // reset instructions when leaving a level if (instructions != null) { createEmptyContentNode(page, parent, commentHandler, instructions); instructions = null; } return rootElement; }
From source file:org.structr.web.Importer.java
private void createChildNodes(final Node startNode, final DOMNode parent, final Page page, final boolean removeHashAttribute) throws FrameworkException { Linkable res = null;/* w w w . j a v a 2s .c o m*/ final List<Node> children = startNode.childNodes(); for (Node node : children) { String tag = node.nodeName(); // clean tag, remove non-word characters if (tag != null) { tag = tag.replaceAll("[^a-zA-Z0-9#]+", ""); } String type = CaseHelper.toUpperCamelCase(tag); String comment = null; String content = null; String id = null; StringBuilder classString = new StringBuilder(); if (ArrayUtils.contains(ignoreElementNames, type)) { continue; } if (node instanceof Element) { Element el = ((Element) node); Set<String> classes = el.classNames(); for (String cls : classes) { classString.append(cls).append(" "); } id = el.id(); String downloadAddressAttr = (ArrayUtils.contains(srcElements, tag) ? "src" : ArrayUtils.contains(hrefElements, tag) ? "href" : null); if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) { String downloadAddress = node.attr(downloadAddressAttr); res = downloadFile(downloadAddress, originalUrl); } if (removeHashAttribute) { // Remove data-structr-hash attribute node.removeAttr(DOMNode.dataHashProperty.jsonName()); } } // Data and comment nodes: Trim the text and put it into the "content" field without changes if (/*type.equals("#data") || */type.equals("#comment")) { tag = ""; comment = ((Comment) node).getData(); // Don't add content node for whitespace if (StringUtils.isBlank(comment)) { continue; } // store for later use commentSource.append(comment).append("\n"); } else if (type.equals("#data")) { tag = ""; content = ((DataNode) node).getWholeData(); // Don't add content node for whitespace if (StringUtils.isBlank(content)) { continue; } } else // Text-only nodes: Trim the text and put it into the "content" field { if (type.equals("#text")) { // type = "Content"; tag = ""; //content = ((TextNode) node).getWholeText(); content = ((TextNode) node).text(); // Add content node for whitespace within <p> elements only if (!("p".equals(startNode.nodeName().toLowerCase())) && StringUtils.isWhitespace(content)) { continue; } } } org.structr.web.entity.dom.DOMNode newNode; // create node if (StringUtils.isBlank(tag)) { // create comment or content node if (!StringUtils.isBlank(comment)) { newNode = (DOMNode) page.createComment(comment); newNode.setProperty(org.structr.web.entity.dom.Comment.contentType, "text/html"); } else { newNode = (Content) page.createTextNode(content); } } else { newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag); } if (newNode != null) { newNode.setProperty(AbstractNode.visibleToPublicUsers, publicVisible); newNode.setProperty(AbstractNode.visibleToAuthenticatedUsers, authVisible); if (res != null) { newNode.setProperty(LinkSource.linkable, res); } // "id" attribute: Put it into the "_html_id" field if (StringUtils.isNotBlank(id)) { newNode.setProperty(DOMElement._id, id); } if (StringUtils.isNotBlank(classString.toString())) { newNode.setProperty(DOMElement._class, StringUtils.trim(classString.toString())); } for (Attribute nodeAttr : node.attributes()) { final String key = nodeAttr.getKey(); if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute final String value = nodeAttr.getValue(); if (key.startsWith("data-")) { if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node, int l = DATA_META_PREFIX.length(); String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' }) .replaceAll("-", ""); String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1)); if (value != null) { if (value.equalsIgnoreCase("true")) { newNode.setProperty(new BooleanProperty(camelCaseKey), true); } else if (value.equalsIgnoreCase("false")) { newNode.setProperty(new BooleanProperty(camelCaseKey), false); } else { newNode.setProperty(new StringProperty(camelCaseKey), nodeAttr.getValue()); } } } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal PropertyKey propertyKey = config.getPropertyKeyForJSONName(newNode.getClass(), key); if (propertyKey != null) { final PropertyConverter inputConverter = propertyKey .inputConverter(securityContext); if (value != null && inputConverter != null) { newNode.setProperty(propertyKey, propertyKey.inputConverter(securityContext).convert(value)); } else { newNode.setProperty(propertyKey, value); } } } } else { boolean notBlank = StringUtils.isNotBlank(value); boolean isAnchor = notBlank && value.startsWith("#"); boolean isLocal = notBlank && !value.startsWith("http"); boolean isActive = notBlank && value.contains("${"); boolean isStructrLib = notBlank && value.startsWith("/structr/js/"); if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive) { newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), "${link.path}?${link.version}"); } else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive && !isAnchor && !isStructrLib) { newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), "${link.path}"); } else { newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), value); } } } } final StringProperty typeKey = new StringProperty(PropertyView.Html.concat("type")); if ("script".equals(tag) && newNode.getProperty(typeKey) == null) { // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly newNode.setProperty(typeKey, "text/javascript"); } parent.appendChild(newNode); // Link new node to its parent node // linkNodes(parent, newNode, page, localIndex); // Step down and process child nodes createChildNodes(node, newNode, page, removeHashAttribute); } } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
private void stripTags(StringBuilder builder, List<Node> nodesList) { for (Node node : nodesList) { String nodeName = node.nodeName(); if (nodeName.equalsIgnoreCase("#text")) { builder.append(node.toString().trim()).append(" "); } else {//from w ww . j a v a 2 s . c o m if (nodeName.equalsIgnoreCase(NodeTypes.ANCHOR.toString())) { if (node.attributes().hasKey(AttribTypes.HREF.toString())) builder.append(node.attr(AttribTypes.HREF.toString())); } else if (nodeName.equalsIgnoreCase(NodeTypes.HASHTAG.toString())) { if (node.attributes().hasKey(AttribTypes.TAG.toString())) builder.append("#").append(node.attr(AttribTypes.TAG.toString())).append(" "); } else if (nodeName.equalsIgnoreCase(NodeTypes.MENTION.toString())) { SymUser user = new SymUser(); user.setEmailAddress("UID:" + node.attr(AttribTypes.UID.toString())); user.setId(Long.valueOf(node.attr(AttribTypes.UID.toString()))); if (node.attributes().hasKey(AttribTypes.UID.toString())) { if (symClient != null) try { user = symClient.getUsersClient() .getUserFromId(Long.valueOf(node.attr(AttribTypes.UID.toString()))); } catch (UsersClientException e) { logger.error("Could not identify user from userID", e); } } else if (node.attributes().hasKey(AttribTypes.EMAIL.toString())) { user.setEmailAddress(node.attr(AttribTypes.EMAIL.toString())); } builder.append(user.getEmailAddress()); } else if (nodeName.equalsIgnoreCase(NodeTypes.CASHTAG.toString())) { if (node.attributes().hasKey(AttribTypes.TAG.toString())) builder.append("$").append(node.attr(AttribTypes.TAG.toString())).append(" "); } else { // recurse stripTags(builder, node.childNodes()); } } } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
private void getHtmlStartingFromNode(String nodeType, String attrib, String attribValue, StringBuilder builder, List<Node> nodesList, boolean append) { for (Node node : nodesList) { String nodeName = node.nodeName(); if (append) { if (node.nodeName().equalsIgnoreCase("#text") && node.outerHtml().charAt(0) != ' ') builder.append(" "); builder.append(node.outerHtml()); if (!node.nodeName().equalsIgnoreCase("#text")) builder.append(" "); continue; }/* www . jav a 2 s .c o m*/ if (nodeName.equalsIgnoreCase(nodeType)) { if (node.attributes().hasKey(attrib) && node.attr(attrib).equalsIgnoreCase(attribValue)) append = true; } getHtmlStartingFromNode(nodeType, attrib, attribValue, builder, node.childNodes(), append); } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
private void updateMentionUidToEmail(SymphonyClient symClient, List<Node> nodesList) { for (Node node : nodesList) { String nodeName = node.nodeName(); if (nodeName.equalsIgnoreCase(NodeTypes.MENTION.toString())) { if (node.attributes().hasKey(AttribTypes.UID.toString())) { String uid = node.attr(AttribTypes.UID.toString()); SymUser user = null;//from www . j a va 2s . c o m try { user = symClient.getUsersClient().getUserFromId(Long.parseLong(uid)); logger.info("Translated mention uid {} to email {}", uid, user.getEmailAddress()); } catch (UsersClientException e) { logger.error("Could not identify user email from id", e); } if (user != null && user.getEmailAddress() != null) { uid = user.getEmailAddress(); } Attribute emailAttribute = new Attribute(AttribTypes.EMAIL.toString(), uid); node.attributes().put(emailAttribute); node.removeAttr(AttribTypes.UID.toString()); } } updateMentionUidToEmail(symClient, node.childNodes()); } }
From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java
private boolean analyze(Node node) { // System.out.println(node.nodeName()); for (Attribute attribute : node.attributes().asList()) { String key = attribute.getKey(); String value = attribute.getValue(); // System.out.println(" attr:" + key + " value:" + value); if (!foundDateStringSwitch) { foundDateStringSwitch = findDate(node, value); }//from w w w.j av a 2s .c o m if (foundDateStringSwitch) { boolean foundDateString = findDate(node, value); if (foundDateString) { String child = node.childNode(0).toString(); foundDate = findDateValue(node, child); dateScore = 10; } } else { foundDate = findDateValue(node, value); dateScore = 5; } } return false; // return foundDate && foundAuthor && foundText; }
From source file:sk.svec.jan.acb.extraction.Finder.java
public void traversePageFindAuthor(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // System.out.println(node.attributes()); for (Attribute attribute : node.attributes().asList()) { String value = attribute.getValue(); if (!foundAuthor) { foundAuthor = findAuthorInText(node, value); break; }/*ww w . ja v a 2s .com*/ } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
private boolean analyze(Node node) { // System.out.println(node.nodeName()); for (Attribute attribute : node.attributes().asList()) { String key = attribute.getKey(); String value = attribute.getValue(); // System.out.println(" attr:" + key + " value:" + value); if (!foundDate) { boolean foundDateString = findDate(node, value); if (foundDateString) { if (node.childNodeSize() != 0) { String child = node.childNode(0).toString(); foundDate = findDateValue(node, child); dateScore = 10;/*from w ww . j a v a 2s . c om*/ } else { } } else { // nodesToRemove.add(node); foundDate = findDateValue(node, value); dateScore = 5; } if (foundDate) { nodesToRemove.add(node); } } if (!foundAuthor) { foundAuthor = findAuthor(node, value); } } if (!foundTitle) { foundTitle = findTitle(node, node.nodeName()); } return foundDate && foundAuthor && foundTitle; }