List of usage examples for com.google.gwt.dom.client Element hasTagName
@Override
public boolean hasTagName(String tagName)
From source file:com.dom_distiller.client.FilteringDomVisitor.java
License:Open Source License
@Override public boolean visit(Node n) { if (n.getNodeType() == Node.ELEMENT_NODE) { Element e = Element.as(n); // Skip invisible or uninteresting elements. boolean visible = DomUtil.isVisible(e); logVisibilityInfo(e, visible);/*from ww w . ja v a 2 s. c o m*/ if (!visible) { hiddenElements.add(e); return false; } // Skip data tables, keep track of them to be extracted by RelevantElementsFinder later. if (e.hasTagName("TABLE")) { TableClassifier.Type type = TableClassifier.table(TableElement.as(e)); logTableInfo(e, type); if (type == TableClassifier.Type.DATA) { dataTables.add(e); return false; } } // Some components are revisited later in context as they break text-flow of a document. // e.g. <video> can contain text if format is unsupported. if (sTagsProcessedAsABlock.contains(e.getTagName())) { if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { LogUtil.logToConsole( "SKIP " + e.getTagName() + " from processing. " + "It may be restored later."); } return false; } } return domVisitor.visit(n); }
From source file:com.dom_distiller.client.IEReadingViewParser.java
License:Open Source License
private static String getCaption(ImageElement image) { // If |image| is a child of <figure>, then get the <figcaption> elements. Element parent = image.getParentElement(); if (!parent.hasTagName("FIGURE")) return ""; NodeList<Element> captions = parent.getElementsByTagName("FIGCAPTION"); int numCaptions = captions.getLength(); String caption = ""; if (numCaptions > 0 && numCaptions <= 2) { // Use javascript innerText (instead of javascript textContent) to get only visible // captions. for (int i = 0; i < numCaptions && caption.isEmpty(); i++) { caption = DomUtil.getInnerText(captions.getItem(i)); }/* w w w. j a v a2s .co m*/ } return caption; }
From source file:com.dom_distiller.client.OpenGraphProtocolParser.java
License:Open Source License
private void findPrefixes(Element root) { String prefixes = ""; // See if HTML tag has "prefix" attribute. if (root.hasTagName("HTML")) prefixes = root.getAttribute("prefix"); // Otherwise, see if HEAD tag has "prefix" attribute. if (prefixes.isEmpty()) { NodeList<Element> heads = root.getElementsByTagName("HEAD"); if (heads.getLength() == 1) prefixes = heads.getItem(0).getAttribute("prefix"); }/* ww w . j a va2 s .c o m*/ // If there's "prefix" attribute, its value is something like // "og: http://ogp.me/ns# profile: http://og.me/ns/profile# article: http://ogp.me/ns/article#". if (!prefixes.isEmpty()) { Matcher matcher = sOgpNsPrefixPattern.matcher(prefixes); while (matcher.find()) { // There could be multiple prefixes. setPrefixForObjectType(matcher.group(2), matcher.group(4)); } } else { // Still no "prefix" attribute, see if HTMl tag has "xmlns" attributes e.g.: // - "xmlns:og="http://ogp.me/ns#" // - "xmlns:profile="http://ogp.me/ns/profile#" // - "xmlns:article="http://ogp.me/ns/article#". final JsArray<Node> attributes = DomUtil.getAttributes(root); for (int i = 0; i < attributes.length(); i++) { final Node node = attributes.get(i); // Look for attribute name that starts with "xmlns:". String attributeName = node.getNodeName().toLowerCase(); Matcher nameMatcher = sOgpNsNonPrefixNamePattern.matcher(attributeName); if (!nameMatcher.find()) continue; // Extract OGP namespace URI from attribute value, if available. String attributeValue = node.getNodeValue(); Matcher valueMatcher = sOgpNsNonPrefixValuePattern.matcher(attributeValue); if (valueMatcher.find()) { setPrefixForObjectType(nameMatcher.group(1), valueMatcher.group(1)); } } } setDefaultPrefixes(); }
From source file:com.dom_distiller.client.TableClassifier.java
License:Open Source License
public static Type table(TableElement t) { sReason = Reason.UNKNOWN;//from ww w. j ava 2 s . c om // The following heuristics are dropped from said url: // - table created by CSS display style is layout table, because we only handle actual // <table> elements. // 1) Table inside editable area is layout table, different from said url because we ignore // editable areas during distillation. Element parent = t.getParentElement(); while (parent != null) { if (parent.hasTagName("INPUT") || parent.getAttribute("contenteditable").equalsIgnoreCase("true")) { return logAndReturn(Reason.INSIDE_EDITABLE_AREA, "", Type.LAYOUT); } parent = parent.getParentElement(); } // 2) Table having role="presentation" is layout table. String tableRole = t.getAttribute("role").toLowerCase(); if (tableRole.equals("presentation")) { return logAndReturn(Reason.ROLE_TABLE, "_" + tableRole, Type.LAYOUT); } // 3) Table having ARIA table-related roles is data table. if (sARIATableRoles.contains(tableRole) || sARIARoles.contains(tableRole)) { return logAndReturn(Reason.ROLE_TABLE, "_" + tableRole, Type.DATA); } // 4) Table having ARIA table-related roles in its descendants is data table. // This may have deviated from said url if it only checks for <table> element but not its // descendants. List<Element> directDescendants = getDirectDescendants(t); for (Element e : directDescendants) { String role = e.getAttribute("role").toLowerCase(); if (sARIATableDescendantRoles.contains(role) || sARIARoles.contains(role)) { return logAndReturn(Reason.ROLE_DESCENDANT, "_" + role, Type.DATA); } } // 5) Table having datatable="0" attribute is layout table. if (t.getAttribute("datatable").equals("0")) { return logAndReturn(Reason.DATATABLE_0, "", Type.LAYOUT); } // 6) Table having nested table(s) is layout table. // The order here and #7 (table having <=1 row/col is layout table) is different from said // url: the latter has these heuristics after #10 (table having "summary" attribute is // data table), but our eval sets indicate the need to bump these way up to here, because // many (old) pages have layout tables that are nested or with <TH>/<CAPTION>s but only 1 // row or col. if (hasNestedTables(t)) return logAndReturn(Reason.NESTED_TABLE, "", Type.LAYOUT); // 7) Table having only one row or column is layout table. // See comments for #6 about deviation from said url. NodeList<TableRowElement> rows = t.getRows(); if (rows.getLength() <= 1) return logAndReturn(Reason.LESS_EQ_1_ROW, "", Type.LAYOUT); NodeList<TableCellElement> cols = getMaxColsAmongRows(rows); if (cols == null || cols.getLength() <= 1) { return logAndReturn(Reason.LESS_EQ_1_COL, "", Type.LAYOUT); } // 8) Table having legitimate data table structures is data table: // a) table has <caption>, <thead>, <tfoot>, <colgroup>, <col>, or <th> elements Element caption = t.getCaption(); if ((caption != null && hasValidText(caption)) || t.getTHead() != null || t.getTFoot() != null || hasOneOfElements(directDescendants, sHeaderTags)) { return logAndReturn(Reason.CAPTION_THEAD_TFOOT_COLGROUP_COL_TH, "", Type.DATA); } // Extract all <td> elements from direct descendants, for easier/faster multiple access. List<Element> directTDs = new ArrayList<Element>(); for (Element e : directDescendants) { if (e.hasTagName("TD")) directTDs.add(e); } for (Element e : directTDs) { // b) table cell has abbr, headers, or scope attributes if (e.hasAttribute("abbr") || e.hasAttribute("headers") || e.hasAttribute("scope")) { return logAndReturn(Reason.ABBR_HEADERS_SCOPE, "", Type.DATA); } // c) table cell has <abbr> element as a single child element. NodeList<Element> children = e.getElementsByTagName("*"); if (children.getLength() == 1 && children.getItem(0).hasTagName("ABBR")) { return logAndReturn(Reason.ONLY_HAS_ABBR, "", Type.DATA); } } // 9) Table occupying > 95% of document width without viewport meta is layout table; // viewport condition is not in said url, added here for typical mobile-optimized sites. // The order here is different from said url: the latter has it after #14 (>=20 rows is // data table), but our eval sets indicate the need to bump this way up to here, because // many (old) pages have layout tables with the "summary" attribute (#10). Element docElement = t.getOwnerDocument().getDocumentElement(); int docWidth = docElement.getOffsetWidth(); if (docWidth > 0 && (double) t.getOffsetWidth() > 0.95 * (double) docWidth) { boolean viewportFound = false; NodeList<Element> allMeta = docElement.getElementsByTagName("META"); for (int i = 0; i < allMeta.getLength() && !viewportFound; i++) { MetaElement meta = MetaElement.as(allMeta.getItem(i)); viewportFound = meta.getName().equalsIgnoreCase("viewport"); } if (!viewportFound) { return logAndReturn(Reason.MORE_95_PERCENT_DOC_WIDTH, "", Type.LAYOUT); } } // 10) Table having summary attribute is data table. // This is different from said url: the latter lumps "summary" attribute with #8, but we // split it so as to insert #9 in between. Many (old) pages have tables that are clearly // layout: their "summary" attributes say they're for layout. They also occupy > 95% of // document width, so #9 coming before #10 will correctly classify them as layout. if (t.hasAttribute("summary")) return logAndReturn(Reason.SUMMARY, "", Type.DATA); // 11) Table having >=5 columns is data table. if (cols.getLength() >= 5) return logAndReturn(Reason.MORE_EQ_5_COLS, "", Type.DATA); // 12) Table having borders around cells is data table. for (Element e : directTDs) { String border = DomUtil.getComputedStyle(e).getBorderStyle(); if (!border.isEmpty() && !border.equals("none") && !border.equals("hidden")) { return logAndReturn(Reason.CELLS_HAVE_BORDER, "_" + border, Type.DATA); } } // 13) Table having differently-colored rows is data table. String prevBackgroundColor = null; for (int i = 0; i < rows.getLength(); i++) { String color = DomUtil.getComputedStyle(rows.getItem(i)).getBackgroundColor(); if (prevBackgroundColor == null) { prevBackgroundColor = color; continue; } if (!prevBackgroundColor.equalsIgnoreCase(color)) { return logAndReturn(Reason.DIFFERENTLY_COLORED_ROWS, "", Type.DATA); } } // 14) Table having >=20 rows is data table. if (rows.getLength() >= 20) return logAndReturn(Reason.MORE_EQ_20_ROWS, "", Type.DATA); // 15) Table having <=10 cells is layout table. if (directTDs.size() <= 10) return logAndReturn(Reason.LESS_EQ_10_CELLS, "", Type.LAYOUT); // 16) Table containing <embed>, <object>, <applet> or <iframe> elements (typical // advertisement elements) is layout table. if (hasOneOfElements(directDescendants, sObjectTags)) { return logAndReturn(Reason.EMBED_OBJECT_APPLET_IFRAME, "", Type.LAYOUT); } // 17) Table occupying > 90% of document height is layout table. // This is not in said url, added here because many (old) pages have tables that don't fall // into any of the above heuristics but are for layout, and hence shouldn't default to data // by #18. int docHeight = docElement.getOffsetHeight(); if (docHeight > 0 && (double) t.getOffsetHeight() > 0.9 * (double) docHeight) { return logAndReturn(Reason.MORE_90_PERCENT_DOC_HEIGHT, "", Type.LAYOUT); } // 18) Otherwise, it's data table. return logAndReturn(Reason.DEFAULT, "", Type.DATA); }
From source file:com.dom_distiller.client.TableClassifier.java
License:Open Source License
private static List<Element> getDirectDescendants(Element t) { List<Element> directDescendants = new ArrayList<Element>(); NodeList<Element> allDescendants = t.getElementsByTagName("*"); if (!hasNestedTables(t)) { for (int i = 0; i < allDescendants.getLength(); i++) { directDescendants.add(allDescendants.getItem(i)); }//from w w w . j a v a 2 s. c o m } else { for (int i = 0; i < allDescendants.getLength(); i++) { // Check if the current element is a direct descendent of the |t| table element in // question, as opposed to being a descendent of a nested table in |t|. Element e = allDescendants.getItem(i); Element parent = e.getParentElement(); while (parent != null) { if (parent.hasTagName("TABLE")) { if (parent == t) directDescendants.add(e); break; } parent = parent.getParentElement(); } } } return directDescendants; }
From source file:com.sencha.gxt.widget.core.client.menu.Menu.java
License:sencha.com license
@Override protected void onWindowResize(int width, int height) { boolean doHide = true; if (GXT.isTouch()) { // should we prevent hide? // If resize occurs due to focusing an input and bringing up // virtual Keyboard, let's not hide Element activeElement = XDOM.getActiveElement(); if (activeElement.hasTagName("INPUT")) { doHide = false;// w w w . j a v a2 s. c o m } /* on touch devices, a window resize can mean only one of two things: either there was a change in orientation or * the virtual keyboard was displayed/hidden. If we determine that the last resize was not an orientation change, * we can assume it was the virtual keyboard - in which case, we do not want to hide the menu. */ if (!XWindow.isLastResizeOrientationChange()) { doHide = false; } } super.onWindowResize(width, height); if (doHide) { hide(true); } }
From source file:org.chromium.distiller.OpenGraphProtocolParser.java
License:Open Source License
private void findPrefixes(Element root) { String prefixes = ""; // See if HTML tag has "prefix" attribute. if (root.hasTagName("HTML")) prefixes = root.getAttribute("prefix"); // Otherwise, see if HEAD tag has "prefix" attribute. if (prefixes.isEmpty()) { NodeList<Element> heads = root.getElementsByTagName("HEAD"); if (heads.getLength() == 1) prefixes = heads.getItem(0).getAttribute("prefix"); }/*from www. j av a 2 s .co m*/ // If there's "prefix" attribute, its value is something like // "og: http://ogp.me/ns# profile: http://og.me/ns/profile# article: // http://ogp.me/ns/article#". if (!prefixes.isEmpty()) { sOgpNsPrefixRegExp.setLastIndex(0); while (true) { MatchResult match = sOgpNsPrefixRegExp.exec(prefixes); if (match == null) break; setPrefixForObjectType(match.getGroup(2), match.getGroup(4)); } } else { // Still no "prefix" attribute, see if HTMl tag has "xmlns" attributes e.g.: // - "xmlns:og="http://ogp.me/ns#" // - "xmlns:profile="http://ogp.me/ns/profile#" // - "xmlns:article="http://ogp.me/ns/article#". final JsArray<Node> attributes = DomUtil.getAttributes(root); for (int i = 0; i < attributes.length(); i++) { final Node node = attributes.get(i); // Look for attribute name that starts with "xmlns:". String attributeName = node.getNodeName().toLowerCase(); MatchResult nameMatch = sOgpNsNonPrefixNameRegExp.exec(attributeName); if (nameMatch == null) continue; // Extract OGP namespace URI from attribute value, if available. String attributeValue = node.getNodeValue(); MatchResult valueMatch = sOgpNsNonPrefixValueRegExp.exec(attributeValue); if (valueMatch != null) { setPrefixForObjectType(nameMatch.getGroup(1), valueMatch.getGroup(1)); } } } setDefaultPrefixes(); }
From source file:org.chromium.distiller.PageParameterParser.java
License:Open Source License
/** * Finds and adds the leaf node(s) closest to the given start node. * This recurses and keeps finding and, if necessary, adding the numeric text of valid nodes, * collecting the PageParamInfo.PageInfo's for the current adjacency group. * For backward search, i.e. nodes before start node, search terminates (i.e. recursion stops) * once a text node or anchor is encountered. If the text node contains numeric text, it's * added to the current adjacency group. Otherwise, a new group is created to break the * adjacency.//from w w w .j a v a2 s. c o m * For forward search, i.e. nodes after start node, search continues (i.e. recursion continues) * until a text node or anchor with non-numeric text is encountered. In the process, text nodes * and anchors with numeric text are added to the current adjacency group. When a non-numeric * text node or anchor is encountered, a new group is started to break the adjacency, and search * ends. * * @return true to continue search, false to stop. * * @param start node to work on. * @param checkStart true to check start node. Otherwise, the previous or next sibling of the * start node is checked. * @param backward true to search backward (i.e. nodes before start node), false to search * forward (i.e. nodes after start node). * @param baseAnchor created for the current document, only needed for forward search. */ private boolean findAndAddClosestValidLeafNodes(Node start, boolean checkStart, boolean backward, AnchorElement baseAnchor) { Node node = checkStart ? start : (backward ? start.getPreviousSibling() : start.getNextSibling()); if (node == null) { // No sibling, try parent. node = start.getParentNode(); if (sInvalidParentWrapper == null) { sInvalidParentWrapper = RegExp.compile("(BODY)|(HTML)"); } if (sInvalidParentWrapper.test(node.getNodeName())) return false; return findAndAddClosestValidLeafNodes(node, false, backward, baseAnchor); } checkStart = false; switch (node.getNodeType()) { case Node.TEXT_NODE: String text = node.getNodeValue(); // Text must contain words. if (text.isEmpty() || StringUtil.countWords(text) == 0) break; boolean added = addNonLinkTextIfValid(node.getNodeValue()); // For backward search, we're done regardless if text was added. // For forward search, we're done only if text was invalid, otherwise continue. if (backward || !added) return false; break; case Node.ELEMENT_NODE: Element e = Element.as(node); if (e.hasTagName("A")) { // For backward search, we're done because we've already processed the anchor. if (backward) return false; // For forward search, we're done only if link was invalid, otherwise continue. mNumForwardLinksProcessed++; if (!addLinkIfValid(AnchorElement.as(e), baseAnchor)) return false; break; } // Intentionally fall through. default: // Check children nodes. if (!node.hasChildNodes()) break; checkStart = true; // We want to check the child node. if (backward) { // Start the backward search with the rightmost child i.e. last and closest to // given node. node = node.getLastChild(); } else { // Start the forward search with the leftmost child i.e. first and closest to // given node. node = node.getFirstChild(); } break; } return findAndAddClosestValidLeafNodes(node, checkStart, backward, baseAnchor); }
From source file:org.chromium.distiller.TableClassifier.java
License:Open Source License
private static List<Element> getDirectDescendants(Element t) { List<Element> directDescendants = new ArrayList<Element>(); NodeList<Element> allDescendants = t.getElementsByTagName("*"); if (!hasNestedTables(t)) { for (int i = 0; i < allDescendants.getLength(); i++) { directDescendants.add(allDescendants.getItem(i)); }// w ww . j av a 2 s. co m } else { for (int i = 0; i < allDescendants.getLength(); i++) { // Check if the current element is a direct descendant of the |t| table element in // question, as opposed to being a descendant of a nested table in |t|. Element e = allDescendants.getItem(i); Element parent = e.getParentElement(); while (parent != null) { if (parent.hasTagName("TABLE")) { if (parent == t) directDescendants.add(e); break; } parent = parent.getParentElement(); } } } return directDescendants; }
From source file:org.openremote.app.client.widget.FileUploadLabelled.java
License:Open Source License
protected FormElement getParentForm() { Element parent = wrapper.getParentElement(); while (parent != null && !parent.hasTagName("form")) { parent = parent.getParentElement(); }/* www.ja v a 2 s. co m*/ return (FormElement) parent; }