List of usage examples for com.google.gwt.dom.client TableElement hasAttribute
@Override
public boolean hasAttribute(String name)
From source file:com.dom_distiller.client.TableClassifier.java
License:Open Source License
public static Type table(TableElement t) { sReason = Reason.UNKNOWN;/*from w w w. ja va2 s. c om*/ // The following heuristics are dropped from said url: // - table created by CSS display style is layout table, because we only handle actual // <table> elements. // 1) Table inside editable area is layout table, different from said url because we ignore // editable areas during distillation. Element parent = t.getParentElement(); while (parent != null) { if (parent.hasTagName("INPUT") || parent.getAttribute("contenteditable").equalsIgnoreCase("true")) { return logAndReturn(Reason.INSIDE_EDITABLE_AREA, "", Type.LAYOUT); } parent = parent.getParentElement(); } // 2) Table having role="presentation" is layout table. String tableRole = t.getAttribute("role").toLowerCase(); if (tableRole.equals("presentation")) { return logAndReturn(Reason.ROLE_TABLE, "_" + tableRole, Type.LAYOUT); } // 3) Table having ARIA table-related roles is data table. if (sARIATableRoles.contains(tableRole) || sARIARoles.contains(tableRole)) { return logAndReturn(Reason.ROLE_TABLE, "_" + tableRole, Type.DATA); } // 4) Table having ARIA table-related roles in its descendants is data table. // This may have deviated from said url if it only checks for <table> element but not its // descendants. List<Element> directDescendants = getDirectDescendants(t); for (Element e : directDescendants) { String role = e.getAttribute("role").toLowerCase(); if (sARIATableDescendantRoles.contains(role) || sARIARoles.contains(role)) { return logAndReturn(Reason.ROLE_DESCENDANT, "_" + role, Type.DATA); } } // 5) Table having datatable="0" attribute is layout table. if (t.getAttribute("datatable").equals("0")) { return logAndReturn(Reason.DATATABLE_0, "", Type.LAYOUT); } // 6) Table having nested table(s) is layout table. // The order here and #7 (table having <=1 row/col is layout table) is different from said // url: the latter has these heuristics after #10 (table having "summary" attribute is // data table), but our eval sets indicate the need to bump these way up to here, because // many (old) pages have layout tables that are nested or with <TH>/<CAPTION>s but only 1 // row or col. if (hasNestedTables(t)) return logAndReturn(Reason.NESTED_TABLE, "", Type.LAYOUT); // 7) Table having only one row or column is layout table. // See comments for #6 about deviation from said url. NodeList<TableRowElement> rows = t.getRows(); if (rows.getLength() <= 1) return logAndReturn(Reason.LESS_EQ_1_ROW, "", Type.LAYOUT); NodeList<TableCellElement> cols = getMaxColsAmongRows(rows); if (cols == null || cols.getLength() <= 1) { return logAndReturn(Reason.LESS_EQ_1_COL, "", Type.LAYOUT); } // 8) Table having legitimate data table structures is data table: // a) table has <caption>, <thead>, <tfoot>, <colgroup>, <col>, or <th> elements Element caption = t.getCaption(); if ((caption != null && hasValidText(caption)) || t.getTHead() != null || t.getTFoot() != null || hasOneOfElements(directDescendants, sHeaderTags)) { return logAndReturn(Reason.CAPTION_THEAD_TFOOT_COLGROUP_COL_TH, "", Type.DATA); } // Extract all <td> elements from direct descendants, for easier/faster multiple access. List<Element> directTDs = new ArrayList<Element>(); for (Element e : directDescendants) { if (e.hasTagName("TD")) directTDs.add(e); } for (Element e : directTDs) { // b) table cell has abbr, headers, or scope attributes if (e.hasAttribute("abbr") || e.hasAttribute("headers") || e.hasAttribute("scope")) { return logAndReturn(Reason.ABBR_HEADERS_SCOPE, "", Type.DATA); } // c) table cell has <abbr> element as a single child element. NodeList<Element> children = e.getElementsByTagName("*"); if (children.getLength() == 1 && children.getItem(0).hasTagName("ABBR")) { return logAndReturn(Reason.ONLY_HAS_ABBR, "", Type.DATA); } } // 9) Table occupying > 95% of document width without viewport meta is layout table; // viewport condition is not in said url, added here for typical mobile-optimized sites. // The order here is different from said url: the latter has it after #14 (>=20 rows is // data table), but our eval sets indicate the need to bump this way up to here, because // many (old) pages have layout tables with the "summary" attribute (#10). Element docElement = t.getOwnerDocument().getDocumentElement(); int docWidth = docElement.getOffsetWidth(); if (docWidth > 0 && (double) t.getOffsetWidth() > 0.95 * (double) docWidth) { boolean viewportFound = false; NodeList<Element> allMeta = docElement.getElementsByTagName("META"); for (int i = 0; i < allMeta.getLength() && !viewportFound; i++) { MetaElement meta = MetaElement.as(allMeta.getItem(i)); viewportFound = meta.getName().equalsIgnoreCase("viewport"); } if (!viewportFound) { return logAndReturn(Reason.MORE_95_PERCENT_DOC_WIDTH, "", Type.LAYOUT); } } // 10) Table having summary attribute is data table. // This is different from said url: the latter lumps "summary" attribute with #8, but we // split it so as to insert #9 in between. Many (old) pages have tables that are clearly // layout: their "summary" attributes say they're for layout. They also occupy > 95% of // document width, so #9 coming before #10 will correctly classify them as layout. if (t.hasAttribute("summary")) return logAndReturn(Reason.SUMMARY, "", Type.DATA); // 11) Table having >=5 columns is data table. if (cols.getLength() >= 5) return logAndReturn(Reason.MORE_EQ_5_COLS, "", Type.DATA); // 12) Table having borders around cells is data table. for (Element e : directTDs) { String border = DomUtil.getComputedStyle(e).getBorderStyle(); if (!border.isEmpty() && !border.equals("none") && !border.equals("hidden")) { return logAndReturn(Reason.CELLS_HAVE_BORDER, "_" + border, Type.DATA); } } // 13) Table having differently-colored rows is data table. String prevBackgroundColor = null; for (int i = 0; i < rows.getLength(); i++) { String color = DomUtil.getComputedStyle(rows.getItem(i)).getBackgroundColor(); if (prevBackgroundColor == null) { prevBackgroundColor = color; continue; } if (!prevBackgroundColor.equalsIgnoreCase(color)) { return logAndReturn(Reason.DIFFERENTLY_COLORED_ROWS, "", Type.DATA); } } // 14) Table having >=20 rows is data table. if (rows.getLength() >= 20) return logAndReturn(Reason.MORE_EQ_20_ROWS, "", Type.DATA); // 15) Table having <=10 cells is layout table. if (directTDs.size() <= 10) return logAndReturn(Reason.LESS_EQ_10_CELLS, "", Type.LAYOUT); // 16) Table containing <embed>, <object>, <applet> or <iframe> elements (typical // advertisement elements) is layout table. if (hasOneOfElements(directDescendants, sObjectTags)) { return logAndReturn(Reason.EMBED_OBJECT_APPLET_IFRAME, "", Type.LAYOUT); } // 17) Table occupying > 90% of document height is layout table. // This is not in said url, added here because many (old) pages have tables that don't fall // into any of the above heuristics but are for layout, and hence shouldn't default to data // by #18. int docHeight = docElement.getOffsetHeight(); if (docHeight > 0 && (double) t.getOffsetHeight() > 0.9 * (double) docHeight) { return logAndReturn(Reason.MORE_90_PERCENT_DOC_HEIGHT, "", Type.LAYOUT); } // 18) Otherwise, it's data table. return logAndReturn(Reason.DEFAULT, "", Type.DATA); }