Example usage for org.jsoup.nodes Attribute getKey

List of usage examples for org.jsoup.nodes Attribute getKey

Introduction

In this page you can find the example usage for org.jsoup.nodes Attribute getKey.

Prototype

public String getKey() 

Source Link

Document

Get the attribute key.

Usage

From source file:com.astamuse.asta4d.render.RenderUtil.java

private static Map<String, Object> getMessageParams(final Attributes attributes, final Locale locale,
        final String key) {
    List<String> excludeAttrNameList = EXCLUDE_ATTR_NAME_LIST;
    final Map<String, Object> paramMap = new HashMap<>();
    for (Attribute attribute : attributes) {
        String attrKey = attribute.getKey();
        if (excludeAttrNameList.contains(attrKey)) {
            continue;
        }/*from   w w w .j av a2  s.c  o m*/
        String value = attribute.getValue();

        final String recursiveKey;

        if (attrKey.startsWith("@")) {
            attrKey = attrKey.substring(1);
            recursiveKey = value;
        } else if (attrKey.startsWith("#")) {
            attrKey = attrKey.substring(1);
            // we treat the # prefixed attribute value as a sub key of current key
            if (StringUtils.isEmpty(key)) {
                recursiveKey = value;
            } else {
                recursiveKey = key + "." + value;
            }
        } else {
            recursiveKey = null;
        }

        if (recursiveKey == null) {
            paramMap.put(attrKey, value);
        } else {
            paramMap.put(attrKey, new Object() {
                @Override
                public String toString() {
                    switch (I18nMessageHelperTypeAssistant.configuredHelperType()) {
                    case Mapped:
                        // for the mapped helper, we can pass the parameter map recursively
                        return I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessage(locale,
                                recursiveKey, paramMap);
                    case Ordered:
                    default:
                        return I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessage(locale,
                                recursiveKey);
                    }
                }
            });
        }
    }
    return paramMap;
}

From source file:io.knotx.knot.service.service.ServiceEntry.java

public ServiceEntry(Attribute serviceAttribute, Attribute paramsAttribute) {
    this.namespace = ServiceAttributeUtil.extractNamespace(serviceAttribute.getKey());
    this.name = serviceAttribute.getValue();
    this.params = getParams(paramsAttribute);
    this.cacheKey = String.format("%s|%s", getName(), getParams());
}

From source file:com.iorga.iraj.servlet.AgglomeratorServlet.java

private long searchAndAppendAfter(final ServletConfig config, final Element agglomerateElement,
        final String scriptSrc, final String pathPrefix, final String pathSuffix, final String urlAttribute,
        long lastModified) throws MalformedURLException, IOException, URISyntaxException {
    if (mode == Mode.DEVELOPMENT) {
        // add a watch for that directory
        final Path path = Paths.get(config.getServletContext().getRealPath(scriptSrc));
        path.register(watchService, StandardWatchEventKinds.ENTRY_CREATE, StandardWatchEventKinds.ENTRY_DELETE);
    }/*www . j av  a2  s . co  m*/
    final Set<String> childrenPaths = config.getServletContext().getResourcePaths(scriptSrc);
    for (final String path : childrenPaths) {
        if (path.endsWith(pathSuffix)) {
            // add that JS
            final StringBuilder targetScript = new StringBuilder("<");
            targetScript.append(agglomerateElement.tagName());
            // copy all the origin attributes
            for (final Attribute attribute : agglomerateElement.attributes()) {
                final String key = attribute.getKey();
                if (!ATTRIBUTE_NAME.equalsIgnoreCase(key) && !urlAttribute.equalsIgnoreCase(key)
                        && !URL_ATTRIBUTE_ATTRIBUTE_NAME.equalsIgnoreCase(key)) {
                    targetScript.append(" ").append(attribute.html());
                }
            }
            // specify the src path
            final String childUrl = StringUtils.removeStart(path, pathPrefix);
            targetScript.append(" ").append(new Attribute(urlAttribute, childUrl).html()).append(" />");
            agglomerateElement.after(targetScript.toString());
            lastModified = Math.max(
                    config.getServletContext().getResource(childUrl).openConnection().getLastModified(),
                    lastModified);
        } else if (path.endsWith("/")) {
            // it's a directory, recurse search & append
            lastModified = Math.max(searchAndAppendAfter(config, agglomerateElement, path, pathPrefix,
                    pathSuffix, urlAttribute, lastModified), lastModified);
        }
    }
    return lastModified;
}

From source file:com.kingfong.webcrawler.util.DOMContentUtils.java

/**
 * This method finds all anchors below the supplied DOM
 * <code>node</code>, and creates appropriate {@link Outlink}
 * records for each (relative to the supplied <code>base</code>
 * URL), and adds them to the <code>outlinks</code> {@link
 * ArrayList}./*from   w w  w  . jav a  2 s  .c  om*/
 *
 * <p>
 *
 * Links without inner structure (tags, text, etc) are discarded, as
 * are links which contain only single nested links and empty text
 * nodes (this is a common DOM-fixup artifact, at least with
 * nekohtml).
 */
public void getOutlinks(String html, URL url, HashSet<String> outlinks) {

    Document document = Jsoup.parse(html);
    Elements elements = document.getAllElements();
    for (Element currentNode : elements) {
        String nodeName = currentNode.tagName();
        // short nodeType = currentNode.;
        Elements children = currentNode.children();
        nodeName = nodeName.toLowerCase();
        LinkParams params = linkParams.get(nodeName);
        if (params != null) {
            // if (!shouldThrowAwayLink(currentNode, children, childLen,
            // params)) {

            // StringBuilder linkText = new StringBuilder();
            // getText(linkText, currentNode, true);

            Attributes attrs = currentNode.attributes();
            String target = null;
            boolean noFollow = false;
            boolean post = false;
            Iterator<Attribute> iterator = attrs.iterator();
            while (iterator.hasNext()) {
                Attribute attr = iterator.next();
                String attrName = attr.getKey();
                if (params.attrName.equalsIgnoreCase(attrName)) {
                    target = attr.getValue();
                } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getValue())) {
                    noFollow = true;
                } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getValue())) {
                    post = true;
                }
            }
            if (StringUtils.startsWith(target, "/")) {
                target = url.getProtocol() + "://" + url.getHost() + target;
            }
            if (target != null && URLFilter.filt(target)) {
                outlinks.add(target);
            }
            // }
            // this should not have any children, skip them
            if (params.childLen == 0)
                continue;
        }
    }
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * pulls out videos we like// w ww .  j  a va 2s  .  c  o m
 *
 * @return
 */
private ArrayList<Element> extractVideos(Element node) {
    ArrayList<Element> candidates = new ArrayList<Element>();
    ArrayList<Element> goodMovies = new ArrayList<Element>();
    try {

        Elements embeds = node.parent().getElementsByTag("embed");
        for (Element el : embeds) {
            candidates.add(el);
        }
        Elements objects = node.parent().getElementsByTag("object");
        for (Element el : objects) {
            candidates.add(el);
        }
        if (logger.isDebugEnabled()) {
            logger.debug("extractVideos: Starting to extract videos. Found: " + candidates.size());
        }

        for (Element el : candidates) {

            Attributes attrs = el.attributes();

            for (Attribute a : attrs) {
                try {
                    if (logger.isDebugEnabled()) {
                        logger.debug(a.getKey() + " : " + a.getValue());
                    }
                    if ((a.getValue().contains("youtube") || a.getValue().contains("vimeo"))
                            && a.getKey().equals("src")) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Found video... setting");
                            logger.debug("This page has a video!: " + a.getValue());
                        }
                        goodMovies.add(el);

                    }
                } catch (Exception e) {
                    logger.error(e.toString());
                    e.printStackTrace();
                }
            }

        }
    } catch (NullPointerException e) {
        logger.error(e.toString(), e);
    } catch (Exception e) {
        logger.error(e.toString(), e);
    }
    if (logger.isDebugEnabled()) {
        logger.debug("extractVideos:  done looking videos");
    }
    return goodMovies;
}

From source file:org.norvelle.addressdiscoverer.parse.unstructured.ForwardsFlattenedDocumentIterator.java

private String extractText(Element currElement) {
    StringBuilder sb = new StringBuilder();
    Attributes attrs = currElement.attributes();
    for (Attribute attr : attrs.asList()) {
        String attrValue = attr.getValue();
        sb.append(attr.getKey()).append(": ").append(attrValue).append("\n");
    }//from   w  w  w .  j  a  v a 2  s .  c o m
    sb.append(currElement.ownText());
    return sb.toString();
}

From source file:org.structr.web.importer.Importer.java

private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page,
        final boolean removeHashAttribute, final int depth) throws FrameworkException {

    DOMNode rootElement = null;/*from  www.  j a  v a 2  s . c  om*/
    Linkable linkable = null;
    String instructions = null;

    final List<Node> children = startNode.childNodes();
    for (Node node : children) {

        String tag = node.nodeName();

        // clean tag, remove non-word characters except : and #
        if (tag != null) {
            tag = tag.replaceAll("[^a-zA-Z0-9#:.\\-_]+", "");
        }

        final StringBuilder classString = new StringBuilder();
        final String type = CaseHelper.toUpperCamelCase(tag);
        String comment = null;
        String content = null;
        String id = null;
        boolean isNewTemplateOrComponent = false;

        if (ignoreElementNames.contains(type)) {

            continue;
        }

        if (node instanceof Element) {

            final Element el = ((Element) node);
            final Set<String> classes = el.classNames();

            for (String cls : classes) {

                classString.append(cls).append(" ");
            }

            id = el.id();

            // do not download files when called from DeployCommand!
            if (!isDeployment) {

                String downloadAddressAttr = srcElements.contains(tag) ? "src"
                        : hrefElements.contains(tag) ? "href" : null;

                if (originalUrl != null && downloadAddressAttr != null
                        && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {

                    String downloadAddress = node.attr(downloadAddressAttr);
                    linkable = downloadFile(downloadAddress, originalUrl);
                } else {
                    linkable = null;
                }
            }

            if (removeHashAttribute) {

                // Remove data-structr-hash attribute
                node.removeAttr("data-structr-hash");
            }
        }

        // Data and comment nodes: Trim the text and put it into the "content" field without changes
        if (type.equals("#comment")) {

            comment = ((Comment) node).getData();
            tag = "";

            // Don't add content node for whitespace
            if (StringUtils.isBlank(comment)) {

                continue;
            }

            // store for later use
            commentSource.append(comment).append("\n");

            // check if comment contains instructions
            if (commentHandler != null && commentHandler.containsInstructions(comment)) {

                if (instructions != null) {

                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);
                }

                instructions = comment;
                continue;
            }

        } else if (type.equals("#data")) {

            tag = "";
            content = ((DataNode) node).getWholeData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(content)) {

                continue;
            }

        } else // Text-only nodes: Trim the text and put it into the "content" field
        {
            if (type.equals("#text")) {

                tag = "";

                if (isDeployment) {

                    content = trimTrailingNewline(((TextNode) node).getWholeText());

                    if (content == null || content.length() == 0) {
                        continue;
                    }

                } else {

                    content = trimTrailingNewline(((TextNode) node).text());

                    if (StringUtils.isBlank(content)) {
                        continue;
                    }
                }
            }
        }

        org.structr.web.entity.dom.DOMNode newNode = null;

        // create node
        if (StringUtils.isBlank(tag)) {

            if (page != null) {

                // create comment or content node
                if (!StringUtils.isBlank(comment)) {

                    final PropertyKey<String> contentTypeKey = StructrApp.key(Content.class, "contentType");

                    newNode = (DOMNode) page.createComment(comment);
                    newNode.setProperty(contentTypeKey, "text/html");

                } else {

                    newNode = (Content) page.createTextNode(content);
                }
            }

        } else if ("structr:template".equals(tag)) {

            final String src = node.attr("src");
            if (src != null) {

                DOMNode template = null;

                if (DeployCommand.isUuid(src)) {

                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class)
                            .and(GraphObject.id, src).getFirst();

                    if (template == null) {

                        System.out.println("##################################### template with UUID " + src
                                + " not found, this is a known bug");

                    }

                } else if (DeployCommand.endsWithUuid(src)) {
                    final String uuid = src.substring(src.length() - 32);
                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class)
                            .and(GraphObject.id, uuid).getFirst();

                    if (template == null) {

                        System.out.println("##################################### template with UUID " + uuid
                                + " not found, this is a known bug");

                    }

                } else {

                    template = Importer.findSharedComponentByName(src);
                    if (template == null) {

                        template = Importer.findTemplateByName(src);

                        if (template == null) {

                            template = createNewTemplateNode(parent, node.childNodes());
                            isNewTemplateOrComponent = true;

                        }
                    }
                }

                if (template != null) {

                    newNode = template;

                    if (template.isSharedComponent()) {

                        newNode = (DOMNode) template.cloneNode(false);

                        newNode.setSharedComponent(template);
                        newNode.setOwnerDocument(page);

                    } else if (page != null) {

                        newNode.setOwnerDocument(page);
                    }

                } else {

                    logger.warn("Unable to find template or shared component {}, template ignored!", src);
                }

            } else {

                logger.warn("Invalid template definition, missing src attribute!");
            }

        } else if ("structr:component".equals(tag)) {

            final String src = node.attr("src");
            if (src != null) {

                DOMNode component = null;
                if (DeployCommand.isUuid(src)) {

                    component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst();

                } else if (DeployCommand.endsWithUuid(src)) {

                    final String uuid = src.substring(src.length() - 32);
                    component = app.nodeQuery(DOMNode.class).and(GraphObject.id, uuid).getFirst();

                } else {

                    component = Importer.findSharedComponentByName(src);
                }

                if (component == null) {

                    component = createSharedComponent(node);
                }

                isNewTemplateOrComponent = true;

                if (component != null) {

                    newNode = (DOMNode) component.cloneNode(false);

                    final String _html_src = newNode.getProperty(new StringProperty("_html_src"));
                    if (!StringUtils.isEmpty(_html_src)) {
                        node.attr("src", _html_src);
                    } else {
                        node.removeAttr("src");
                    }

                    newNode.setSharedComponent(component);
                    newNode.setOwnerDocument(page);

                } else {

                    logger.warn("Unable to find shared component {} - ignored!", src);
                }

            } else {

                logger.warn("Invalid component definition, missing src attribute!");
            }

        } else {

            if (page != null) {

                newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true);
            }

        }

        if (newNode != null) {

            // save root element for later use
            if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) {
                rootElement = newNode;
            }

            // set linkable
            if (linkable != null && newNode instanceof LinkSource) {
                ((LinkSource) newNode).setLinkable(linkable);
            }

            // container for bulk setProperties()
            final PropertyMap newNodeProperties = new PropertyMap();
            final Class newNodeType = newNode.getClass();

            newNodeProperties.put(AbstractNode.visibleToPublicUsers, publicVisible);
            newNodeProperties.put(AbstractNode.visibleToAuthenticatedUsers, authVisible);

            // "id" attribute: Put it into the "_html_id" field
            if (StringUtils.isNotBlank(id)) {

                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_id"), id);
            }

            if (StringUtils.isNotBlank(classString.toString())) {

                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_class"),
                        StringUtils.trim(classString.toString()));
            }

            for (Attribute nodeAttr : node.attributes()) {

                final String key = nodeAttr.getKey();

                if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute

                    final String value = nodeAttr.getValue();

                    if (key.startsWith("data-")) {

                        if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node,

                            int l = DATA_META_PREFIX.length();

                            String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' })
                                    .replaceAll("-", "");
                            String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));

                            if (value != null) {

                                // store value using actual input converter
                                final PropertyKey actualKey = StructrApp.getConfiguration()
                                        .getPropertyKeyForJSONName(newNodeType, camelCaseKey, false);
                                if (actualKey != null) {

                                    final PropertyConverter converter = actualKey
                                            .inputConverter(securityContext);
                                    if (converter != null) {

                                        final Object convertedValue = converter.convert(value);
                                        newNodeProperties.put(actualKey, convertedValue);

                                    } else {

                                        newNodeProperties.put(actualKey, value);
                                    }

                                } else {

                                    logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey);
                                }
                            }

                        } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal

                            final PropertyKey propertyKey = StructrApp.getConfiguration()
                                    .getPropertyKeyForJSONName(newNodeType, key);
                            if (propertyKey != null) {

                                final PropertyConverter inputConverter = propertyKey
                                        .inputConverter(securityContext);
                                if (value != null && inputConverter != null) {

                                    newNodeProperties.put(propertyKey,
                                            propertyKey.inputConverter(securityContext).convert(value));

                                } else {

                                    newNodeProperties.put(propertyKey, value);
                                }
                            }

                        } else {

                            // store data-* attributes in node
                            final PropertyKey propertyKey = new StringProperty(key);
                            if (value != null) {

                                newNodeProperties.put(propertyKey, value);
                            }
                        }

                    } else {

                        boolean notBlank = StringUtils.isNotBlank(value);
                        boolean isAnchor = notBlank && value.startsWith("#");
                        boolean isLocal = notBlank && !value.startsWith("http");
                        boolean isActive = notBlank && value.contains("${");
                        boolean isStructrLib = notBlank && value.startsWith("/structr/js/");

                        if (linkable != null && "link".equals(tag) && "href".equals(key) && isLocal && !isActive
                                && !isDeployment) {

                            newNodeProperties.put(new StringProperty(PropertyView.Html + key),
                                    "${link.path}?${link.version}");

                        } else if (linkable != null && ("href".equals(key) || "src".equals(key)) && isLocal
                                && !isActive && !isAnchor && !isStructrLib && !isDeployment) {

                            newNodeProperties.put(new StringProperty(PropertyView.Html + key), "${link.path}");

                        } else {

                            if (key.startsWith("aria-")) {

                                // use custom key
                                newNodeProperties.put(
                                        new StringProperty(
                                                CustomHtmlAttributeProperty.CUSTOM_HTML_ATTRIBUTE_PREFIX + key),
                                        value);

                            } else {

                                newNodeProperties.put(new StringProperty(PropertyView.Html + key), value);
                            }
                        }
                    }
                }
            }

            // bulk set properties on new node
            newNode.setProperties(securityContext, newNodeProperties);

            if ("script".equals(tag)) {

                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);

                if (contentType == null) {

                    // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
                    newNode.setProperty(typeKey, "text/javascript");

                } else if (contentType.equals("application/schema+json")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        // Import schema JSON
                        SchemaJsonImporter.importSchemaJson(source);
                    }

                } else if (contentType.equals("application/x-structr-script")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        try {

                            Actions.execute(securityContext, null, source, null);

                        } catch (UnlicensedScriptException ex) {
                            ex.log(logger);
                        }
                    }

                    continue;

                } else if (contentType.equals("application/x-structr-javascript")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        try {

                            Actions.execute(securityContext, null, source, null);

                        } catch (UnlicensedScriptException ex) {
                            ex.log(logger);
                        }
                    }

                    continue;

                }

            } else if ("style".equals(tag)) {

                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);

                if ("text/css".equals(contentType)) {

                    // parse content of style elements and add referenced files to list of resources to be downloaded
                    for (final Node styleContentNode : node.childNodes()) {

                        final String source = styleContentNode.toString();

                        try {
                            // Import referenced resources
                            processCss(source, originalUrl);

                        } catch (IOException ex) {
                            logger.warn("Couldn't process CSS source", ex);
                        }
                    }
                }

            }

            if (instructions != null) {

                if (instructions.contains("@structr:content") && !(newNode instanceof Content)) {

                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);

                } else {

                    // apply instructions to new DOM element
                    if (commentHandler != null) {

                        commentHandler.handleComment(page, newNode, instructions, true);
                    }
                }

                instructions = null;
            }

            // allow parent to be null to prevent direct child relationship
            if (parent != null) {

                // special handling for <head> elements
                if (newNode instanceof Head && parent instanceof Body) {

                    final org.w3c.dom.Node html = parent.getParentNode();
                    html.insertBefore(newNode, parent);

                } else {

                    parent.appendChild(newNode);
                }
            }

            // Link new node to its parent node
            // linkNodes(parent, newNode, page, localIndex);
            // Step down and process child nodes except for newly created templates
            if (!isNewTemplateOrComponent) {

                createChildNodes(node, newNode, page, removeHashAttribute, depth + 1);

            }

        }
    }

    // reset instructions when leaving a level
    if (instructions != null) {

        createEmptyContentNode(page, parent, commentHandler, instructions);

        instructions = null;
    }

    return rootElement;
}

From source file:org.structr.web.Importer.java

private void createChildNodes(final Node startNode, final DOMNode parent, final Page page,
        final boolean removeHashAttribute) throws FrameworkException {

    Linkable res = null;//  ww w . jav a  2 s. c  om
    final List<Node> children = startNode.childNodes();
    for (Node node : children) {

        String tag = node.nodeName();

        // clean tag, remove non-word characters
        if (tag != null) {
            tag = tag.replaceAll("[^a-zA-Z0-9#]+", "");
        }

        String type = CaseHelper.toUpperCamelCase(tag);
        String comment = null;
        String content = null;
        String id = null;
        StringBuilder classString = new StringBuilder();

        if (ArrayUtils.contains(ignoreElementNames, type)) {

            continue;
        }

        if (node instanceof Element) {

            Element el = ((Element) node);
            Set<String> classes = el.classNames();

            for (String cls : classes) {

                classString.append(cls).append(" ");
            }

            id = el.id();

            String downloadAddressAttr = (ArrayUtils.contains(srcElements, tag) ? "src"
                    : ArrayUtils.contains(hrefElements, tag) ? "href" : null);

            if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {

                String downloadAddress = node.attr(downloadAddressAttr);
                res = downloadFile(downloadAddress, originalUrl);

            }

            if (removeHashAttribute) {

                // Remove data-structr-hash attribute
                node.removeAttr(DOMNode.dataHashProperty.jsonName());

            }

        }

        // Data and comment nodes: Trim the text and put it into the "content" field without changes
        if (/*type.equals("#data") || */type.equals("#comment")) {

            tag = "";
            comment = ((Comment) node).getData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(comment)) {

                continue;
            }

            // store for later use
            commentSource.append(comment).append("\n");

        } else if (type.equals("#data")) {

            tag = "";
            content = ((DataNode) node).getWholeData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(content)) {

                continue;
            }

        } else // Text-only nodes: Trim the text and put it into the "content" field
        {
            if (type.equals("#text")) {

                //                              type    = "Content";
                tag = "";
                //content = ((TextNode) node).getWholeText();
                content = ((TextNode) node).text();

                // Add content node for whitespace within <p> elements only
                if (!("p".equals(startNode.nodeName().toLowerCase())) && StringUtils.isWhitespace(content)) {

                    continue;
                }
            }
        }

        org.structr.web.entity.dom.DOMNode newNode;

        // create node
        if (StringUtils.isBlank(tag)) {

            // create comment or content node
            if (!StringUtils.isBlank(comment)) {

                newNode = (DOMNode) page.createComment(comment);
                newNode.setProperty(org.structr.web.entity.dom.Comment.contentType, "text/html");

            } else {

                newNode = (Content) page.createTextNode(content);
            }

        } else {

            newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag);
        }

        if (newNode != null) {

            newNode.setProperty(AbstractNode.visibleToPublicUsers, publicVisible);
            newNode.setProperty(AbstractNode.visibleToAuthenticatedUsers, authVisible);

            if (res != null) {

                newNode.setProperty(LinkSource.linkable, res);

            }

            // "id" attribute: Put it into the "_html_id" field
            if (StringUtils.isNotBlank(id)) {

                newNode.setProperty(DOMElement._id, id);
            }

            if (StringUtils.isNotBlank(classString.toString())) {

                newNode.setProperty(DOMElement._class, StringUtils.trim(classString.toString()));
            }

            for (Attribute nodeAttr : node.attributes()) {

                final String key = nodeAttr.getKey();

                if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute

                    final String value = nodeAttr.getValue();

                    if (key.startsWith("data-")) {

                        if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node,

                            int l = DATA_META_PREFIX.length();

                            String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' })
                                    .replaceAll("-", "");
                            String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));

                            if (value != null) {
                                if (value.equalsIgnoreCase("true")) {
                                    newNode.setProperty(new BooleanProperty(camelCaseKey), true);
                                } else if (value.equalsIgnoreCase("false")) {
                                    newNode.setProperty(new BooleanProperty(camelCaseKey), false);
                                } else {
                                    newNode.setProperty(new StringProperty(camelCaseKey), nodeAttr.getValue());
                                }
                            }

                        } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal

                            PropertyKey propertyKey = config.getPropertyKeyForJSONName(newNode.getClass(), key);

                            if (propertyKey != null) {

                                final PropertyConverter inputConverter = propertyKey
                                        .inputConverter(securityContext);
                                if (value != null && inputConverter != null) {

                                    newNode.setProperty(propertyKey,
                                            propertyKey.inputConverter(securityContext).convert(value));
                                } else {

                                    newNode.setProperty(propertyKey, value);
                                }
                            }
                        }

                    } else {

                        boolean notBlank = StringUtils.isNotBlank(value);
                        boolean isAnchor = notBlank && value.startsWith("#");
                        boolean isLocal = notBlank && !value.startsWith("http");
                        boolean isActive = notBlank && value.contains("${");
                        boolean isStructrLib = notBlank && value.startsWith("/structr/js/");

                        if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive) {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)),
                                    "${link.path}?${link.version}");

                        } else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive
                                && !isAnchor && !isStructrLib) {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)),
                                    "${link.path}");

                        } else {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), value);
                        }

                    }
                }

            }

            final StringProperty typeKey = new StringProperty(PropertyView.Html.concat("type"));

            if ("script".equals(tag) && newNode.getProperty(typeKey) == null) {

                // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
                newNode.setProperty(typeKey, "text/javascript");
            }

            parent.appendChild(newNode);

            // Link new node to its parent node
            // linkNodes(parent, newNode, page, localIndex);
            // Step down and process child nodes
            createChildNodes(node, newNode, page, removeHashAttribute);

        }
    }
}

From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java

private boolean analyze(Node node) {
    // System.out.println(node.nodeName());

    for (Attribute attribute : node.attributes().asList()) {
        String key = attribute.getKey();
        String value = attribute.getValue();
        //            System.out.println(" attr:" + key + " value:" + value);
        if (!foundDateStringSwitch) {
            foundDateStringSwitch = findDate(node, value);
        }//w ww  .ja  v a2s  .  c  om
        if (foundDateStringSwitch) {
            boolean foundDateString = findDate(node, value);
            if (foundDateString) {
                String child = node.childNode(0).toString();
                foundDate = findDateValue(node, child);
                dateScore = 10;

            }
        } else {
            foundDate = findDateValue(node, value);
            dateScore = 5;
        }

    }

    return false;
    //        return foundDate && foundAuthor && foundText;
}