List of usage examples for com.google.common.collect Table put
@Nullable V put(R rowKey, C columnKey, V value);
From source file:carskit.data.processor.DataDAO.java
/** * Read data from the data file. Note that we didn't take care of the duplicated lines. * // w w w . java 2 s . c o m * @param binThold * the threshold to binarize a rating. If a rating is greater than the threshold, the value will be 1; * otherwise 0. To disable this feature, i.e., keep the original rating value, set the threshold a * negative value * @return a sparse matrix storing all the relevant data */ public SparseMatrix readData(double binThold) throws Exception { if (CARSKit.isMeasuresOnly) Logs.debug(String.format("Dataset: %s", Strings.last(dataPath, 38))); else Logs.info(String.format("Dataset: %s", Strings.last(dataPath, 38))); // Table {row-id, col-id, rate} Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); // Map {col-id, multiple row-id}: used to fast build a rating matrix Multimap<Integer, Integer> colMap = HashMultimap.create(); EmptyContextConditions = new ArrayList<>(); Logs.info("DataPath: " + dataPath); BufferedReader br = FileIO.getReader(dataPath); String line = br.readLine(); // 1st line is header in shape of: user, item, rating, dim1:c1, dim1:c2, .... String[] data = line.trim().split("[\t,]+"); // indexing context dimensions and ctx for (int i = 3; i < data.length; ++i) { String context = data[i].trim(); String[] cs = context.split(":"); String dim = cs[0].trim(); int dimc = dimIds.containsKey(dim) ? dimIds.get(dim) : dimIds.size(); // hash dimension Ids, from 0 to N dimIds.put(dim, dimc); condIds.put(context, i - 3); dimConditionsList.put(dimc, i - 3); condDimensionMap.put(i - 3, dimc); // key = condId, value = dimId // record which conditions are the empty contexts, i.e., the condition value = NA if (context.endsWith(":na")) EmptyContextConditions.add(i - 3); } while ((line = br.readLine()) != null) { //data = line.trim().split("[\t,]+"); data = line.trim().split(",", -1); String user = data[0]; String item = data[1]; Double rate = Double.valueOf(data[2]); // binarize the rating for item recommendation task if (binThold >= 0) rate = rate > binThold ? 1.0 : 0.0; scaleDist.add(rate); // inner id starting from 0 int row = userIds.containsKey(user) ? userIds.get(user) : userIds.size(); userIds.put(user, row); int col = itemIds.containsKey(item) ? itemIds.get(item) : itemIds.size(); itemIds.put(item, col); // also, indexing (user,item); note: user inner id as key String useritem = row + "," + col; int uic = uiIds.containsKey(useritem) ? uiIds.get(useritem) : uiIds.size(); uiIds.put(useritem, uic); // add ui to uList and iList; multiple non-duplicate values will be added to a same key uRatedList.put(row, uic); iRatedList.put(col, uic); uiUserIds.put(uic, row); uiItemIds.put(uic, col); // indexing ctx; only record ID which is correlated with the header StringBuilder sb_ctx = new StringBuilder(); ArrayList<Integer> condList = new ArrayList<>(); for (int i = 3; i < data.length; ++i) { if (data[i].trim().equals("")) Logs.error(line + "; " + data[i]); int value = Integer.valueOf(data[i].trim()); if (value == 1) { if (sb_ctx.length() > 0) sb_ctx.append(","); sb_ctx.append(i - 3); condList.add(i - 3); } } String ctx = sb_ctx.toString(); // inner id starting from 0 int cc = ctxIds.containsKey(ctx) ? ctxIds.get(ctx) : ctxIds.size(); ctxIds.put(ctx, cc); contextConditionsList.put(cc, condList); for (Integer cond : condList) { //contextConditionsList.put(cc, cond); this.condContextsList.put(cond, cc); } //System.out.println(useritem+"; "+ctx+"; "+rate); dataTable.put(uic, cc, rate); // useritem, ctx, rating colMap.put(cc, uic); } br.close(); numRatings = scaleDist.size(); ratingScale = new ArrayList<>(scaleDist.elementSet()); Collections.sort(ratingScale); // build rating matrix rateMatrix = new SparseMatrix(numUserItems(), numContexts(), dataTable, colMap); // release memory of data table dataTable = null; Logs.info("Rating data set has been successfully loaded."); return rateMatrix; }
From source file:lcmc.crm.domain.CrmXml.java
private void parseTransientAttributes(final String uname, final Node transientAttrNode, final Table<String, String, String> failedMap, final Table<String, String, Set<String>> failedClonesMap, final Map<String, String> pingCountMap) { /* <instance_attributes> */ final Node instanceAttrNode = XMLTools.getChildNode(transientAttrNode, "instance_attributes"); /* <nvpair...> */ if (instanceAttrNode != null) { final NodeList nvpairsRes; if (Tools.versionBeforePacemaker(host)) { /* <attributtes> only til 2.1.4 */ final Node attrNode = XMLTools.getChildNode(instanceAttrNode, "attributes"); nvpairsRes = attrNode.getChildNodes(); } else {//from w w w.j av a2 s . c om nvpairsRes = instanceAttrNode.getChildNodes(); } for (int j = 0; j < nvpairsRes.getLength(); j++) { final Node optionNode = nvpairsRes.item(j); if (optionNode.getNodeName().equals("nvpair")) { final String name = XMLTools.getAttribute(optionNode, "name"); final String value = XMLTools.getAttribute(optionNode, "value"); /* TODO: last-failure-" */ if ("pingd".equals(name)) { pingCountMap.put(uname, value); } else if (name.indexOf(FAIL_COUNT_PREFIX) == 0) { final String resId = name.substring(FAIL_COUNT_PREFIX.length()); final String unameLowerCase = uname.toLowerCase(Locale.US); failedMap.put(unameLowerCase, resId, value); final Pattern p = Pattern.compile("(.*):(\\d+)$"); final Matcher m = p.matcher(resId); if (m.matches()) { final String crmId = m.group(1); Set<String> clones = failedClonesMap.get(unameLowerCase, crmId); if (clones == null) { clones = new LinkedHashSet<String>(); failedClonesMap.put(unameLowerCase, crmId, clones); } clones.add(m.group(2)); failedMap.put(uname.toLowerCase(Locale.US), crmId, value); } } } } } }
From source file:com.ggvaidya.scinames.summary.NameStabilityView.java
public void init() { Project project = projectView.getProject(); // What do we actually need to do? boolean flag_calculateNameSimilarity = (toCalculate & NAME_SIMILARITY) == NAME_SIMILARITY; boolean flag_calculateClusterSimilarity = (toCalculate & CLUSTER_SIMILARITY) == CLUSTER_SIMILARITY; boolean flag_calculateCircumscriptionSimilarity = (toCalculate & CIRCUMSCRIPTIONAL_SIMILARITY) == CIRCUMSCRIPTIONAL_SIMILARITY; // Setup stage. stage.setTitle("Name stability between " + project.getDatasets().size() + " datasets"); // Setup table. controller.getTableEditableProperty().set(false); //controller.setTableColumnResizeProperty(TableView.CONSTRAINED_RESIZE_POLICY); ObservableList<TableColumn> cols = controller.getTableColumnsProperty(); cols.clear();// ww w .j a va2s . co m // Precalculating. Table<Dataset, String, String> precalc = HashBasedTable.create(); // Set up columns. cols.add(createTableColumnFromPrecalc(precalc, "dataset")); cols.add(createTableColumnFromPrecalc(precalc, "date")); cols.add(createTableColumnFromPrecalc(precalc, "year")); cols.add(createTableColumnFromPrecalc(precalc, "count_binomial")); cols.add(createTableColumnFromPrecalc(precalc, "count_genera")); cols.add(createTableColumnFromPrecalc(precalc, "count_monotypic_genera")); cols.add(createTableColumnFromPrecalc(precalc, "names_added")); //cols.add(createTableColumnFromPrecalc(precalc, "names_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "names_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "names_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_added")); //cols.add(createTableColumnFromPrecalc(precalc, "species_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "species_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "mean_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "median_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "mode_binomials_per_genera_list")); /* All them stability calculations */ if (flag_calculateNameSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_last")); } if (flag_calculateClusterSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_last")); } if (flag_calculateCircumscriptionSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_last")); } Set<String> recognitionColumns = new HashSet<>(); // Calculate binomials per dataset. Map<Name, Set<Dataset>> datasetsPerName = new HashMap<>(); // Prepare to loop! List<Dataset> checklists = project.getChecklists(); // BIRD HACK! Include all datasets! // checklists = project.getDatasets(); // Set table items. We're only interested in checklists, because // there's no such thing as "name stability" between non-checklist datasets. controller.getTableItemsProperty().set(FXCollections.observableArrayList(checklists)); List<Dataset> prevChecklists = new LinkedList<>(); Dataset firstChecklist = checklists.get(0); Dataset lastChecklist = checklists.get(checklists.size() - 1); // TODO: This used to be prevDataset, but prevChecklist makes a lot more sense, since we // want to compare checklists with each other, ignoring datasets. Would be nice if someone // with copious free time could look over the calculations and make sure they don't assume // that the previous checklist is also the previous dataset? Dataset prevChecklist = null; int index = -1; for (Dataset ds : checklists) { index++; Dataset nextChecklist = (index < (checklists.size() - 1) ? checklists.get(index + 1) : null); precalc.put(ds, "dataset", ds.getName()); precalc.put(ds, "date", ds.getDate().asYYYYmmDD("-")); precalc.put(ds, "year", ds.getDate().getYearAsString()); Set<Name> recognizedBinomials = project.getRecognizedNames(ds).stream().flatMap(n -> n.asBinomial()) .collect(Collectors.toSet()); precalc.put(ds, "count_binomial", String.valueOf(recognizedBinomials.size())); Set<Name> recognizedGenera = recognizedBinomials.stream().flatMap(n -> n.asGenus()) .collect(Collectors.toSet()); precalc.put(ds, "count_genera", String.valueOf(recognizedGenera.size())); precalc.put(ds, "mean_binomials_per_genera", new BigDecimal(((double) recognizedBinomials.size()) / recognizedGenera.size()) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); Map<Name, List<Name>> countBinomialsPerGenus = recognizedBinomials.stream() // Eliminate names that have zero (or more than one?!) genus name. .filter(n -> (n.asGenus().count() == 1)) .collect(Collectors.groupingBy(n -> n.asGenus().findAny().get())); /* LOGGER.info("Debugging: list of " + recognizedGenera.size() + " genera: " + recognizedGenera.stream().map(n -> n.getFullName()).collect(Collectors.joining(", ")) ); */ precalc.put(ds, "count_monotypic_genera", String.valueOf(countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1).count())); /* LOGGER.info("Debugging: list of monotypic genera: " + countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1) .map(entry -> entry.getKey().getFullName()) .collect(Collectors.joining(", ")) ); */ // Species added and deleted Set<Name> namesAdded = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.ADDITION)) .flatMap(ch -> ch.getToStream()).collect(Collectors.toSet()); Set<Name> namesDeleted = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.DELETION)) .flatMap(ch -> ch.getFromStream()).collect(Collectors.toSet()); // TODO: This isn't so useful -- the more useful measure would be the number of all species added // and all species deleted, making sure there isn't a cluster-al overlap. precalc.put(ds, "names_added", String.valueOf(namesAdded.size())); //precalc.put(ds, "names_added_list", namesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "names_deleted", String.valueOf(namesDeleted.size())); //precalc.put(ds, "names_deleted_list", namesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Eliminate names that have been added, but were previously recognized at the species level. Set<Name> speciesAdded = namesAdded; if (prevChecklist != null) { Set<Name> prevRecognizedNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(prevChecklist)).stream() .flatMap(nc -> nc.getNames().stream()).collect(Collectors.toSet()); speciesAdded = namesAdded.stream().filter(n -> !prevRecognizedNames.contains(n)) .collect(Collectors.toSet()); } // Eliminate names that are still represented in the checklist by a species cluster. // (Note that this includes cases where a subspecies is removed, but another subspecies // or the nominal species is still recognized!) Set<Name> currentlyRecognizedBinomialNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(ds)).stream().flatMap(nc -> nc.getNames().stream()) .flatMap(n -> n.asBinomial()).collect(Collectors.toSet()); Set<Name> speciesDeleted = namesDeleted.stream() .filter(n -> !n.asBinomial().anyMatch(bn -> currentlyRecognizedBinomialNames.contains(bn))) .collect(Collectors.toSet()); precalc.put(ds, "species_added", String.valueOf(speciesAdded.size())); precalc.put(ds, "species_added_list", speciesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "species_deleted", String.valueOf(speciesDeleted.size())); precalc.put(ds, "species_deleted_list", speciesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Measures of species per genera java.util.Map<String, Set<Name>> binomialsPerGenera = recognizedBinomials.stream() .collect(Collectors.toMap(n -> n.getGenus(), n -> { Set<Name> set = new HashSet<Name>(); set.add(n); return set; }, (a, b) -> { a.addAll(b); return a; })); List<Integer> binomialsPerGeneraCounts = binomialsPerGenera.values().stream().map(set -> set.size()) .sorted().collect(Collectors.toList()); Frequency freq = new Frequency(); for (String genus : binomialsPerGenera.keySet()) { // Blech. for (Name binom : binomialsPerGenera.get(genus)) { freq.addValue(genus); } } List<Comparable<?>> modeGenera = freq.getMode(); precalc.put(ds, "mode_binomials_per_genera_list", modeGenera.stream().map(o -> o.toString() + ": " + freq.getCount(o) + " binomials") .collect(Collectors.joining("; "))); double[] binomialsPerGeneraCountsAsDouble = binomialsPerGeneraCounts.stream() .mapToDouble(Integer::doubleValue).toArray(); Median median = new Median(); precalc.put(ds, "median_binomials_per_genera", String.valueOf(median.evaluate(binomialsPerGeneraCountsAsDouble))); if (firstChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_first", String.valueOf(getBinomialNamesIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "names_identical_to_first_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_first_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_first_pc_first", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, firstChecklist, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_first", String.valueOf(getBinomialClustersIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "clusters_identical_to_first_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_first", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_first", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_first_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_first", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (lastChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_last", String.valueOf(getBinomialNamesIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "names_identical_to_last_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_last_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_last_pc_last", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_last", String.valueOf(getBinomialClustersIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "clusters_identical_to_last_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_last", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_last", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_last_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_last", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, lastChecklist, lastChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (prevChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_prev", String.valueOf(getBinomialNamesIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "names_identical_to_prev_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_prev", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_prev", String.valueOf(getBinomialClustersIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "clusters_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_prev", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, prevChecklist, prevChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } // FYI, getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() should always be equal to the number of species in the dataset. } if (nextChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_next", String.valueOf(getBinomialNamesIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "names_identical_to_next_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_next_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_next_pc_next", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_next", String.valueOf(getBinomialClustersIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "clusters_identical_to_next_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_next", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_next", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_next_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_next", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, nextChecklist, nextChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } /* // For the visualization thingie. int total = prevChecklists.size(); List<Integer> counts = new LinkedList<>(); for(Name name: recognizedBinomials) { int prevRecognized = 0; if(!datasetsPerName.containsKey(name)) { datasetsPerName.put(name, new HashSet<>()); } else { prevRecognized = datasetsPerName.get(name).size(); } datasetsPerName.get(name).add(ds); counts.add( (int)( ((double)prevRecognized)/total*100 ) ); } Map<Integer, List<Integer>> countsByPercentage = counts.stream().sorted().collect(Collectors.groupingBy(n -> (int)(n/10)*10)); for(int percentage: countsByPercentage.keySet()) { precalc.put(ds, "previously_recognized_" + percentage + "pc", String.valueOf(countsByPercentage.get(percentage).size())); recognitionColumns.add("previously_recognized_" + percentage + "pc"); } prevChecklists.add(ds); */ // Set up the previous checklist for the next loop. prevChecklist = ds; } /* LinkedList<String> recognitionColumnsList = new LinkedList<>(recognitionColumns); recognitionColumnsList.sort(null); for(String colName: recognitionColumnsList) { cols.add(createTableColumnFromPrecalc(precalc, colName)); }*/ }
From source file:lcmc.crm.domain.CrmXml.java
/** Returns CibQuery object with information from the cib node. */ CibQuery parseCibQuery(final String query) { final Document document = XMLTools.getXMLDocument(query); final CibQuery cibQueryData = new CibQuery(); if (document == null) { LOG.appWarning("parseCibQuery: cib error: " + query); return cibQueryData; }/* w w w . java 2 s. c o m*/ /* get root <pacemaker> */ final Node pcmkNode = XMLTools.getChildNode(document, "pcmk"); if (pcmkNode == null) { LOG.appWarning("parseCibQuery: there is no pcmk node"); return cibQueryData; } /* get fenced nodes */ final Set<String> fencedNodes = new HashSet<String>(); final Node fencedNode = XMLTools.getChildNode(pcmkNode, "fenced"); if (fencedNode != null) { final NodeList nodes = fencedNode.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) { final Node hostNode = nodes.item(i); if (hostNode.getNodeName().equals("node")) { final String h = XMLTools.getText(hostNode); if (h != null) { fencedNodes.add(h.toLowerCase(Locale.US)); } } } } /* get <cib> */ final Node cibNode = XMLTools.getChildNode(pcmkNode, "cib"); if (cibNode == null) { LOG.appWarning("parseCibQuery: there is no cib node"); return cibQueryData; } /* Designated Co-ordinator */ final String dcUuid = XMLTools.getAttribute(cibNode, "dc-uuid"); //TODO: more attributes are here /* <configuration> */ final Node confNode = XMLTools.getChildNode(cibNode, "configuration"); if (confNode == null) { LOG.appWarning("parseCibQuery: there is no configuration node"); return cibQueryData; } /* <rsc_defaults> */ final Node rscDefaultsNode = XMLTools.getChildNode(confNode, "rsc_defaults"); String rscDefaultsId = null; final Map<String, String> rscDefaultsParams = new HashMap<String, String>(); final Map<String, String> rscDefaultsParamsNvpairIds = new HashMap<String, String>(); if (rscDefaultsNode != null) { rscDefaultsId = parseResourceDefaults(rscDefaultsNode, rscDefaultsParams, rscDefaultsParamsNvpairIds); } /* <op_defaults> */ final Node opDefaultsNode = XMLTools.getChildNode(confNode, "op_defaults"); final Map<String, Value> opDefaultsParams = new HashMap<String, Value>(); if (opDefaultsNode != null) { parseOpDefaults(opDefaultsNode, opDefaultsParams); } /* <crm_config> */ final Node crmConfNode = XMLTools.getChildNode(confNode, "crm_config"); if (crmConfNode == null) { LOG.appWarning("parseCibQuery: there is no crm_config node"); return cibQueryData; } /* <cluster_property_set> */ final Node cpsNode = XMLTools.getChildNode(crmConfNode, "cluster_property_set"); if (cpsNode == null) { LOG.appWarning("parseCibQuery: there is no cluster_property_set node"); } else { final NodeList nvpairs; if (Tools.versionBeforePacemaker(host)) { /* <attributtes> only til 2.1.4 */ final Node attrNode = XMLTools.getChildNode(cpsNode, "attributes"); nvpairs = attrNode.getChildNodes(); } else { nvpairs = cpsNode.getChildNodes(); } final Map<String, String> crmConfMap = new HashMap<String, String>(); /* <nvpair...> */ for (int i = 0; i < nvpairs.getLength(); i++) { final Node optionNode = nvpairs.item(i); if (optionNode.getNodeName().equals("nvpair")) { final String name = XMLTools.getAttribute(optionNode, "name"); final String value = XMLTools.getAttribute(optionNode, "value"); crmConfMap.put(name, value); } } cibQueryData.setCrmConfig(crmConfMap); } /* <nodes> */ /* xml node with cluster node make stupid variable names, but let's * keep the convention. */ String dc = null; final Table<String, String, String> nodeParametersMap = HashBasedTable.create(); final Node nodesNode = XMLTools.getChildNode(confNode, "nodes"); final Map<String, String> nodeOnline = new HashMap<String, String>(); final Map<String, String> nodeID = new HashMap<String, String>(); if (nodesNode != null) { final NodeList nodes = nodesNode.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) { final Node nodeNode = nodes.item(i); if (nodeNode.getNodeName().equals("node")) { /* TODO: doing nothing with the info, just getting the dc, * for now. */ final String id = XMLTools.getAttribute(nodeNode, "id"); final String uname = XMLTools.getAttribute(nodeNode, "uname"); if (!nodeID.containsKey(uname)) { nodeID.put(uname, id); } if (dcUuid != null && dcUuid.equals(id)) { dc = uname; } parseNode(uname, nodeNode, nodeParametersMap); if (!nodeOnline.containsKey(uname.toLowerCase(Locale.US))) { nodeOnline.put(uname.toLowerCase(Locale.US), "no"); } } } } /* <resources> */ final Node resourcesNode = XMLTools.getChildNode(confNode, "resources"); if (resourcesNode == null) { LOG.appWarning("parseCibQuery: there is no resources node"); return cibQueryData; } /* <primitive> */ final Map<String, Map<String, String>> parametersMap = new HashMap<String, Map<String, String>>(); final Map<String, Map<String, String>> parametersNvpairsIdsMap = new HashMap<String, Map<String, String>>(); final Map<String, ResourceAgent> resourceTypeMap = new HashMap<String, ResourceAgent>(); final Set<String> orphanedList = new HashSet<String>(); /* host -> inLRMList list */ final Map<String, Set<String>> inLRMList = new HashMap<String, Set<String>>(); final Map<String, String> resourceInstanceAttrIdMap = new HashMap<String, String>(); final MultiKeyMap<String, Value> operationsMap = new MultiKeyMap<String, Value>(); final Map<String, String> metaAttrsIdMap = new HashMap<String, String>(); final Map<String, String> operationsIdMap = new HashMap<String, String>(); final Map<String, Map<String, String>> resOpIdsMap = new HashMap<String, Map<String, String>>(); /* must be linked, so that clone from group is before the group itself. */ final Map<String, List<String>> groupsToResourcesMap = new LinkedHashMap<String, List<String>>(); final Map<String, String> cloneToResourceMap = new HashMap<String, String>(); final List<String> masterList = new ArrayList<String>(); final Table<String, String, String> failedMap = HashBasedTable.create(); final Table<String, String, Set<String>> failedClonesMap = HashBasedTable.create(); final Map<String, String> pingCountMap = new HashMap<String, String>(); groupsToResourcesMap.put("none", new ArrayList<String>()); final NodeList primitivesGroups = resourcesNode.getChildNodes(); final Map<String, String> operationsIdRefs = new HashMap<String, String>(); final Map<String, String> operationsIdtoCRMId = new HashMap<String, String>(); final Map<String, String> metaAttrsIdRefs = new HashMap<String, String>(); final Map<String, String> metaAttrsIdToCRMId = new HashMap<String, String>(); for (int i = 0; i < primitivesGroups.getLength(); i++) { final Node primitiveGroupNode = primitivesGroups.item(i); final String nodeName = primitiveGroupNode.getNodeName(); if ("primitive".equals(nodeName)) { final List<String> resList = groupsToResourcesMap.get("none"); parsePrimitiveNode(primitiveGroupNode, resList, resourceTypeMap, parametersMap, parametersNvpairsIdsMap, resourceInstanceAttrIdMap, operationsMap, metaAttrsIdMap, operationsIdMap, resOpIdsMap, operationsIdRefs, operationsIdtoCRMId, metaAttrsIdRefs, metaAttrsIdToCRMId); } else if ("group".equals(nodeName)) { parseGroupNode(primitiveGroupNode, null, groupsToResourcesMap, parametersMap, resourceTypeMap, parametersNvpairsIdsMap, resourceInstanceAttrIdMap, operationsMap, metaAttrsIdMap, operationsIdMap, resOpIdsMap, operationsIdRefs, operationsIdtoCRMId, metaAttrsIdRefs, metaAttrsIdToCRMId); } else if ("master".equals(nodeName) || "master_slave".equals(nodeName) || "clone".equals(nodeName)) { final NodeList primitives = primitiveGroupNode.getChildNodes(); final String cloneId = XMLTools.getAttribute(primitiveGroupNode, "id"); List<String> resList = groupsToResourcesMap.get(cloneId); if (resList == null) { resList = new ArrayList<String>(); groupsToResourcesMap.put(cloneId, resList); } parseAttributes(primitiveGroupNode, cloneId, parametersMap, parametersNvpairsIdsMap, resourceInstanceAttrIdMap, operationsMap, metaAttrsIdMap, operationsIdMap, resOpIdsMap, operationsIdRefs, operationsIdtoCRMId, metaAttrsIdRefs, metaAttrsIdToCRMId, false); for (int j = 0; j < primitives.getLength(); j++) { final Node primitiveNode = primitives.item(j); if (primitiveNode.getNodeName().equals("primitive")) { parsePrimitiveNode(primitiveNode, resList, resourceTypeMap, parametersMap, parametersNvpairsIdsMap, resourceInstanceAttrIdMap, operationsMap, metaAttrsIdMap, operationsIdMap, resOpIdsMap, operationsIdRefs, operationsIdtoCRMId, metaAttrsIdRefs, metaAttrsIdToCRMId); } else if (primitiveNode.getNodeName().equals("group")) { parseGroupNode(primitiveNode, resList, groupsToResourcesMap, parametersMap, resourceTypeMap, parametersNvpairsIdsMap, resourceInstanceAttrIdMap, operationsMap, metaAttrsIdMap, operationsIdMap, resOpIdsMap, operationsIdRefs, operationsIdtoCRMId, metaAttrsIdRefs, metaAttrsIdToCRMId); } } if (!resList.isEmpty()) { cloneToResourceMap.put(cloneId, resList.get(0)); if ("master".equals(nodeName) || "master_slave".equals(nodeName)) { masterList.add(cloneId); } } } } /* operationsRefs crm id -> crm id */ final Map<String, String> operationsRefs = new HashMap<String, String>(); for (final String crmId : operationsIdRefs.keySet()) { final String idRef = operationsIdRefs.get(crmId); operationsRefs.put(crmId, operationsIdtoCRMId.get(idRef)); } /* mettaAttrsRefs crm id -> crm id */ final Map<String, String> metaAttrsRefs = new HashMap<String, String>(); for (final String crmId : metaAttrsIdRefs.keySet()) { final String idRef = metaAttrsIdRefs.get(crmId); metaAttrsRefs.put(crmId, metaAttrsIdToCRMId.get(idRef)); } /* <constraints> */ final Map<String, ColocationData> colocationIdMap = new LinkedHashMap<String, ColocationData>(); final Map<String, List<ColocationData>> colocationRscMap = new HashMap<String, List<ColocationData>>(); final Map<String, OrderData> orderIdMap = new LinkedHashMap<String, OrderData>(); final Map<String, List<RscSet>> orderIdRscSetsMap = new HashMap<String, List<RscSet>>(); final Map<String, List<RscSet>> colocationIdRscSetsMap = new HashMap<String, List<RscSet>>(); final List<RscSetConnectionData> rscSetConnections = new ArrayList<RscSetConnectionData>(); final Map<String, List<OrderData>> orderRscMap = new HashMap<String, List<OrderData>>(); final Map<String, Map<String, HostLocation>> locationMap = new HashMap<String, Map<String, HostLocation>>(); final Map<String, HostLocation> pingLocationMap = new HashMap<String, HostLocation>(); final Map<String, List<String>> locationsIdMap = new HashMap<String, List<String>>(); final Table<String, String, String> resHostToLocIdMap = HashBasedTable.create(); final Map<String, String> resPingToLocIdMap = new HashMap<String, String>(); final Node constraintsNode = XMLTools.getChildNode(confNode, "constraints"); if (constraintsNode != null) { final NodeList constraints = constraintsNode.getChildNodes(); String rscString = "rsc"; String rscRoleString = "rsc-role"; String withRscString = "with-rsc"; String withRscRoleString = "with-rsc-role"; String firstString = "first"; String thenString = "then"; String firstActionString = "first-action"; String thenActionString = "then-action"; if (Tools.versionBeforePacemaker(host)) { rscString = "from"; rscRoleString = "from_role"; withRscString = "to"; withRscRoleString = "to_role"; firstString = "to"; thenString = "from"; firstActionString = "to_action"; thenActionString = "action"; } for (int i = 0; i < constraints.getLength(); i++) { final Node constraintNode = constraints.item(i); if (constraintNode.getNodeName().equals("rsc_colocation")) { final String colId = XMLTools.getAttribute(constraintNode, "id"); final String rsc = XMLTools.getAttribute(constraintNode, rscString); final String withRsc = XMLTools.getAttribute(constraintNode, withRscString); if (rsc == null || withRsc == null) { final List<RscSet> rscSets = new ArrayList<RscSet>(); parseResourceSets(constraintNode, colId, null, rscSets, rscSetConnections); colocationIdRscSetsMap.put(colId, rscSets); } final String rscRole = XMLTools.getAttribute(constraintNode, rscRoleString); final String withRscRole = XMLTools.getAttribute(constraintNode, withRscRoleString); final String score = XMLTools.getAttribute(constraintNode, SCORE_CONSTRAINT_PARAM); final ColocationData colocationData = new ColocationData(colId, rsc, withRsc, rscRole, withRscRole, score); colocationIdMap.put(colId, colocationData); List<ColocationData> withs = colocationRscMap.get(rsc); if (withs == null) { withs = new ArrayList<ColocationData>(); } withs.add(colocationData); colocationRscMap.put(rsc, withs); } else if (constraintNode.getNodeName().equals("rsc_order")) { String rscFirst = XMLTools.getAttribute(constraintNode, firstString); String rscThen = XMLTools.getAttribute(constraintNode, thenString); final String ordId = XMLTools.getAttribute(constraintNode, "id"); if (rscFirst == null || rscThen == null) { final List<RscSet> rscSets = new ArrayList<RscSet>(); parseResourceSets(constraintNode, null, ordId, rscSets, rscSetConnections); orderIdRscSetsMap.put(ordId, rscSets); } final String score = XMLTools.getAttribute(constraintNode, SCORE_CONSTRAINT_PARAM); final String symmetrical = XMLTools.getAttribute(constraintNode, "symmetrical"); String firstAction = XMLTools.getAttribute(constraintNode, firstActionString); String thenAction = XMLTools.getAttribute(constraintNode, thenActionString); final String type = XMLTools.getAttribute(constraintNode, "type"); if (type != null && "before".equals(type)) { /* exchange resoruces */ final String rsc = rscFirst; rscFirst = rscThen; rscThen = rsc; final String act = firstAction; firstAction = thenAction; thenAction = act; } final OrderData orderData = new OrderData(ordId, rscFirst, rscThen, score, symmetrical, firstAction, thenAction); orderIdMap.put(ordId, orderData); List<OrderData> thens = orderRscMap.get(rscFirst); if (thens == null) { thens = new ArrayList<OrderData>(); } thens.add(orderData); orderRscMap.put(rscFirst, thens); } else if ("rsc_location".equals(constraintNode.getNodeName())) { final String locId = XMLTools.getAttribute(constraintNode, "id"); final String node = XMLTools.getAttribute(constraintNode, "node"); final String rsc = XMLTools.getAttribute(constraintNode, "rsc"); final String score = XMLTools.getAttribute(constraintNode, SCORE_CONSTRAINT_PARAM); List<String> locs = locationsIdMap.get(rsc); if (locs == null) { locs = new ArrayList<String>(); locationsIdMap.put(rsc, locs); } Map<String, HostLocation> hostScoreMap = locationMap.get(rsc); if (hostScoreMap == null) { hostScoreMap = new HashMap<String, HostLocation>(); locationMap.put(rsc, hostScoreMap); } final String role = null; // TODO if (node != null) { resHostToLocIdMap.put(rsc, node.toLowerCase(Locale.US), locId); if (score != null) { hostScoreMap.put(node.toLowerCase(Locale.US), new HostLocation(score, "eq", null, role)); } } locs.add(locId); final Node ruleNode = XMLTools.getChildNode(constraintNode, "rule"); if (ruleNode != null) { final String score2 = XMLTools.getAttribute(ruleNode, SCORE_CONSTRAINT_PARAM); final String booleanOp = XMLTools.getAttribute(ruleNode, "boolean-op"); // TODO: I know only "and", ignoring everything we // don't know. final Node expNode = XMLTools.getChildNode(ruleNode, "expression"); if (expNode != null && "expression".equals(expNode.getNodeName())) { final String attr = XMLTools.getAttribute(expNode, "attribute"); final String op = XMLTools.getAttribute(expNode, "operation"); final String value = XMLTools.getAttribute(expNode, "value"); if ((booleanOp == null || "and".equals(booleanOp)) && "#uname".equals(attr) && value != null) { hostScoreMap.put(value.toLowerCase(Locale.US), new HostLocation(score2, op, null, role)); resHostToLocIdMap.put(rsc, value.toLowerCase(Locale.US), locId); } else if ((booleanOp == null || "and".equals(booleanOp)) && "pingd".equals(attr)) { pingLocationMap.put(rsc, new HostLocation(score2, op, value, null)); resPingToLocIdMap.put(rsc, locId); } else { LOG.appWarning("parseCibQuery: could not parse rsc_location: " + locId); } } } } } } /* <status> */ final Node statusNode = XMLTools.getChildNode(cibNode, "status"); final Set<String> nodePending = new HashSet<String>(); if (statusNode != null) { /* <node_state ...> */ final NodeList nodes = statusNode.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) { final Node nodeStateNode = nodes.item(i); if ("node_state".equals(nodeStateNode.getNodeName())) { final String uname = XMLTools.getAttribute(nodeStateNode, "uname"); final String id = XMLTools.getAttribute(nodeStateNode, "id"); if (uname == null || !id.equals(nodeID.get(uname))) { LOG.appWarning("parseCibQuery: skipping " + uname + ' ' + id); continue; } final String join = XMLTools.getAttribute(nodeStateNode, "join"); final String inCCM = XMLTools.getAttribute(nodeStateNode, "in_ccm"); final String crmd = XMLTools.getAttribute(nodeStateNode, "crmd"); if ("member".equals(join) && "true".equals(inCCM) && !"offline".equals(crmd)) { nodeOnline.put(uname.toLowerCase(Locale.US), "yes"); } else { nodeOnline.put(uname.toLowerCase(Locale.US), "no"); } if ("pending".equals(join)) { nodePending.add(uname.toLowerCase(Locale.US)); } final NodeList nodeStates = nodeStateNode.getChildNodes(); /* transient attributes. */ for (int j = 0; j < nodeStates.getLength(); j++) { final Node nodeStateChild = nodeStates.item(j); if ("transient_attributes".equals(nodeStateChild.getNodeName())) { parseTransientAttributes(uname, nodeStateChild, failedMap, failedClonesMap, pingCountMap); } } final List<String> resList = groupsToResourcesMap.get("none"); for (int j = 0; j < nodeStates.getLength(); j++) { final Node nodeStateChild = nodeStates.item(j); if ("lrm".equals(nodeStateChild.getNodeName())) { parseLrmResources(uname.toLowerCase(Locale.US), nodeStateChild, resList, resourceTypeMap, parametersMap, inLRMList, orphanedList, failedClonesMap); } } } } } cibQueryData.setDC(dc); cibQueryData.setNodeParameters(nodeParametersMap); cibQueryData.setResourceParameters(parametersMap); cibQueryData.setResourceParametersNvpairsIds(parametersNvpairsIdsMap); cibQueryData.setResourceType(resourceTypeMap); cibQueryData.setInLRM(inLRMList); cibQueryData.setOrphaned(orphanedList); cibQueryData.setResourceInstanceAttrId(resourceInstanceAttrIdMap); cibQueryData.setColocationRsc(colocationRscMap); cibQueryData.setColocationId(colocationIdMap); cibQueryData.setOrderId(orderIdMap); cibQueryData.setOrderIdRscSets(orderIdRscSetsMap); cibQueryData.setColocationIdRscSets(colocationIdRscSetsMap); cibQueryData.setRscSetConnections(rscSetConnections); cibQueryData.setOrderRsc(orderRscMap); cibQueryData.setLocations(locationMap); cibQueryData.setPingLocations(pingLocationMap); cibQueryData.setLocationsId(locationsIdMap); cibQueryData.setResHostToLocId(resHostToLocIdMap); cibQueryData.setResPingToLocId(resPingToLocIdMap); cibQueryData.setOperations(operationsMap); cibQueryData.setOperationsId(operationsIdMap); cibQueryData.setOperationsRefs(operationsRefs); cibQueryData.setMetaAttrsId(metaAttrsIdMap); cibQueryData.setMetaAttrsRefs(metaAttrsRefs); cibQueryData.setResOpIds(resOpIdsMap); cibQueryData.setNodeOnline(nodeOnline); cibQueryData.setNodePending(nodePending); cibQueryData.setGroupsToResources(groupsToResourcesMap); cibQueryData.setCloneToResource(cloneToResourceMap); cibQueryData.setMasterList(masterList); cibQueryData.setNodeFailedCount(failedMap); cibQueryData.setResourceFailedCloneIds(failedClonesMap); cibQueryData.setNodePingCount(pingCountMap); cibQueryData.setRscDefaultsId(rscDefaultsId); cibQueryData.setRscDefaultsParams(rscDefaultsParams); cibQueryData.setRscDefaultsParamsNvpairIds(rscDefaultsParamsNvpairIds); cibQueryData.setOpDefaultsParams(opDefaultsParams); cibQueryData.setFencedNodes(fencedNodes); return cibQueryData; }
From source file:com.ggvaidya.scinames.ui.DataReconciliatorController.java
private void reconcileDataFromOneDataset() { Project project = dataReconciliatorView.getProjectView().getProject(); String reconciliationMethod = reconcileUsingComboBox.getValue(); Table<String, String, Set<String>> precalc = HashBasedTable.create(); Dataset namesDataset = useNamesFromComboBox.getSelectionModel().getSelectedItem(); List<NameCluster> nameClusters = null; List<Name> namesInDataset = null; // Set up namesInDataset. switch (namesToUseComboBox.getValue()) { case USE_NAMES_IN_DATASET_ROWS: if (namesDataset == ALL) { namesInDataset = project.getDatasets().stream().flatMap(ds -> ds.getNamesInAllRows().stream()) .distinct().sorted().collect(Collectors.toList()); } else {/*from w w w. jav a2s . co m*/ namesInDataset = namesDataset.getNamesInAllRows().stream().sorted().distinct() .collect(Collectors.toList()); } break; case USE_ALL_REFERENCED_NAMES: if (namesDataset == ALL) { namesInDataset = project.getDatasets().stream().flatMap(ds -> ds.getReferencedNames()).distinct() .sorted().collect(Collectors.toList()); } else { namesInDataset = namesDataset.getReferencedNames().sorted().collect(Collectors.toList()); } break; case USE_ALL_RECOGNIZED_NAMES: if (namesDataset == ALL) { namesInDataset = project.getDatasets().stream() .flatMap(ds -> project.getRecognizedNames(ds).stream()).distinct().sorted() .collect(Collectors.toList()); } else { namesInDataset = project.getRecognizedNames(namesDataset).stream().sorted() .collect(Collectors.toList()); } break; } // IMPORTANT NOTE // This algorithm now relies on nameClusters and namesInDataset // having EXACTLY the same size. So please make sure every combination // of logic here lines up exactly. boolean flag_nameClustersAreTaxonConcepts = false; switch (reconciliationMethod) { case RECONCILE_BY_NAME: // namesInDataset already has all the names we want. nameClusters = createSingleNameClusters(namesDataset, namesInDataset); break; case RECONCILE_BY_SPECIES_NAME: namesInDataset = namesInDataset.stream().filter(n -> n.hasSpecificEpithet()) .flatMap(n -> n.asBinomial()).distinct().sorted().collect(Collectors.toList()); nameClusters = createSingleNameClusters(namesDataset, namesInDataset); break; case RECONCILE_BY_SPECIES_NAME_CLUSTER: // nameClusters = project.getNameClusterManager().getSpeciesClustersAfterFiltering(project).collect(Collectors.toList()); namesInDataset = namesInDataset.stream().filter(n -> n.hasSpecificEpithet()) .flatMap(n -> n.asBinomial()).distinct().sorted().collect(Collectors.toList()); nameClusters = project.getNameClusterManager().getClusters(namesInDataset); break; case RECONCILE_BY_NAME_CLUSTER: // Note that this includes genus name clusters! nameClusters = project.getNameClusterManager().getClusters(namesInDataset); break; case RECONCILE_BY_SPECIES_TAXON_CONCEPT: /* * WARNING: untested! Please test before using! */ List<NameCluster> nameClustersByName = project.getNameClusterManager().getClusters(namesInDataset); List<Name> namesInDatasetCorresponding = new LinkedList<>(); List<NameCluster> nameClustersCorresponding = new LinkedList<>(); for (int x = 0; x < namesInDataset.size(); x++) { Name name = namesInDataset.get(0); NameCluster nameCluster = nameClustersByName.get(0); List<TaxonConcept> taxonConcepts; if (nameCluster == null) { taxonConcepts = new ArrayList<>(); } else { taxonConcepts = nameCluster.getTaxonConcepts(project); } // Now we need to unwind this data structure: each entry in nameClusters // should have a corresponding entry in namesInDataset. for (TaxonConcept tc : taxonConcepts) { namesInDatasetCorresponding.add(name); nameClustersCorresponding.add((NameCluster) tc); } } // All good? Let's swap in those variables to replace their actual counterparts. namesInDataset = namesInDatasetCorresponding; nameClusters = nameClustersCorresponding; // This is special, at least for now. Maybe some day it won't? flag_nameClustersAreTaxonConcepts = true; break; default: LOGGER.log(Level.SEVERE, "Reconciliation method ''{0}'' has not yet been implemented!", reconciliationMethod); return; } if (nameClusters == null) { dataTableView.setItems(FXCollections.emptyObservableList()); return; } LOGGER.info("Name clusters ready to display: " + nameClusters.size() + " clusters"); LOGGER.info("Based on " + namesInDataset.size() + " names from " + namesDataset + ": " + namesInDataset); // What columns do we have from the other dataset? Dataset dataDataset = includeDataFromComboBox.getSelectionModel().getSelectedItem(); List<Dataset> datasets = null; if (dataDataset == ALL) datasets = project.getDatasets(); else if (dataDataset == NONE) datasets = new ArrayList<>(); else datasets = Arrays.asList(dataDataset); // Precalculate. List<String> existingColNames = new ArrayList<>(); existingColNames.add("id"); existingColNames.add("name"); existingColNames.add("names_in_dataset"); existingColNames.add("all_names_in_cluster"); existingColNames.add("dataset_rows_for_name"); existingColNames.add("name_cluster_id"); // existingColNames.add("distinct_dataset_rows_for_name"); // If these are taxon concepts, there's three other columns we want // to emit. if (flag_nameClustersAreTaxonConcepts) { existingColNames.add("starts_with"); existingColNames.add("ends_with"); existingColNames.add("is_ongoing"); } else { existingColNames.add("taxon_concept_count"); existingColNames.add("taxon_concepts"); existingColNames.add("trajectory"); existingColNames.add("trajectory_without_renames"); existingColNames.add("trajectory_lumps_splits"); } existingColNames.add("first_added_dataset"); existingColNames.add("first_added_year"); existingColNames.add("reconciliation_duplicate_of"); // Precalculate all dataset rows. Map<Name, Set<DatasetRow>> datasetRowsByName = new HashMap<>(); for (Dataset ds : datasets) { Map<Name, Set<DatasetRow>> rowsByName = ds.getRowsByName(); // Merge into the main list. for (Name n : rowsByName.keySet()) { Set<DatasetRow> rows = rowsByName.get(n); if (!reconciliationMethod.equals(RECONCILE_BY_NAME)) { // If we're reconciling by binomial names, then // we should include binomial names for each row, too. Optional<Name> binomialName = n.asBinomial().findAny(); if (binomialName.isPresent()) { Set<DatasetRow> rowsForBinomial = rowsByName.get(binomialName.get()); if (rowsForBinomial != null) rows.addAll(rowsForBinomial); // Don't write this to the sub-binomial name, // just write to the binomial name. n = binomialName.get(); } } if (!datasetRowsByName.containsKey(n)) datasetRowsByName.put(n, new HashSet<>()); datasetRowsByName.get(n).addAll(rows); } } LOGGER.info("Precalculating all dataset rows"); // Finally, come up with unique names for every dataset we might have. Map<DatasetColumn, String> datasetColumnMap = new HashMap<>(); existingColNames.addAll(datasets.stream().flatMap(ds -> ds.getColumns().stream()).distinct().map(col -> { String colName = col.getName(); String baseName = colName; int uniqueCounter = 0; while (existingColNames.contains(colName)) { // Duplicate column name! Map it elsewhere. uniqueCounter++; colName = baseName + "." + uniqueCounter; } // Where did we map it to? datasetColumnMap.put(col, colName); // Okay, now return the new column name we need to create. return colName; }).collect(Collectors.toList())); LOGGER.info("Precalculating " + nameClusters.size() + " name clusters"); // Make sure names and name clusters are unique, otherwise bail. // Earlier this was being ensured by keeping namesInDataset as a // Set, but since it's a List now, duplicates might sneak in. assert (namesInDataset.size() == new HashSet<>(namesInDataset).size()); // Since it's a list, we can set it up so that it always corresponds to // the correct name cluster. assert (namesInDataset.size() == nameClusters.size()); // Now, nameClusters should NOT be de-duplicated: we might have the same // cluster appear multiple times! If so, we'll set // "reconciliation_duplicate_of" to point to the first reconciliation, // so we don't duplicate reconciliations. // Let's track which IDs we use for duplicated name clusters. Map<NameCluster, List<String>> idsForNameClusters = new HashMap<>(); if (nameClusters.size() != new HashSet<>(nameClusters).size()) { LOGGER.warning("Clusters not unique: " + nameClusters.size() + " clusters found, but only " + new HashSet<>(nameClusters).size() + " are unique."); } // Track duplicates. Map<NameCluster, List<String>> clusterIDsPerNameCluster = new HashMap<>(); int totalClusterCount = nameClusters.size(); int currentClusterCount = 0; List<String> nameClusterIDs = new LinkedList<>(); for (NameCluster cluster : nameClusters) { currentClusterCount++; // Probably don't need GUIDs here, right? String clusterID = String.valueOf(currentClusterCount); nameClusterIDs.add(clusterID); LOGGER.info("(" + currentClusterCount + "/" + totalClusterCount + ") Precalculating name cluster: " + cluster); precalc.put(clusterID, "id", getOneElementSet(clusterID)); precalc.put(clusterID, "name_cluster_id", getOneElementSet(cluster.getId().toString())); // The 'name' should come from namesInDataset. precalc.put(clusterID, "name", getOneElementSet(namesInDataset.get(currentClusterCount - 1).getFullName())); // Okay, here's what we need to do: // - If names is ALL, then we can't do better than cluster.getName(). if (namesDataset == ALL) { precalc.put(clusterID, "names_in_dataset", cluster.getNames().stream().map(n -> n.getFullName()).collect(Collectors.toSet())); } else { // hey, here's something cool we can do: figure out which name(s) // this dataset uses from this cluster! Set<Name> namesToFilterTo = new HashSet<>(namesInDataset); List<String> namesInCluster = cluster.getNames().stream().filter(n -> namesToFilterTo.contains(n)) .map(n -> n.getFullName()).collect(Collectors.toList()); precalc.put(clusterID, "names_in_dataset", new HashSet<>(namesInCluster)); } precalc.put(clusterID, "all_names_in_cluster", cluster.getNames().stream().map(n -> n.getFullName()).collect(Collectors.toSet())); // Is this a duplicate? if (clusterIDsPerNameCluster.containsKey(cluster)) { List<String> duplicatedRows = clusterIDsPerNameCluster.get(cluster); // Only the first one should have the actual data. precalc.put(clusterID, "reconciliation_duplicate_of", getOneElementSet(duplicatedRows.get(0))); duplicatedRows.add(clusterID); // Okay, do no other work on this cluster, since all the actual information is // in the other entry. continue; } else { precalc.put(clusterID, "reconciliation_duplicate_of", getOneElementSet("NA")); List<String> clusterIds = new LinkedList<>(); clusterIds.add(clusterID); clusterIDsPerNameCluster.put(cluster, clusterIds); } LOGGER.fine("Cluster calculation began for " + cluster); // If it's a taxon concept, precalculate a few more columns. if (flag_nameClustersAreTaxonConcepts) { TaxonConcept tc = (TaxonConcept) cluster; precalc.put(clusterID, "starts_with", tc.getStartsWith().stream().map(ch -> ch.toString()).collect(Collectors.toSet())); precalc.put(clusterID, "ends_with", tc.getEndsWith().stream().map(ch -> ch.toString()).collect(Collectors.toSet())); precalc.put(clusterID, "is_ongoing", getOneElementSet(tc.isOngoing(project) ? "yes" : "no")); } else { // If it's a true name cluster, then perhaps people will want // to know what taxon concepts are in here? Maybe for some sort // of PhD? List<TaxonConcept> tcs = cluster.getTaxonConcepts(project); precalc.put(clusterID, "taxon_concept_count", getOneElementSet(String.valueOf(tcs.size()))); precalc.put(clusterID, "taxon_concepts", tcs.stream().map(tc -> tc.toString()).collect(Collectors.toSet())); } LOGGER.fine("Cluster calculation ended for " + cluster); // When was this first added? List<Dataset> foundInSorted = cluster.getFoundInSortedWithDates(); if (!foundInSorted.isEmpty()) { precalc.put(clusterID, "first_added_dataset", getOneElementSet(foundInSorted.get(0).getCitation())); precalc.put(clusterID, "first_added_year", getOneElementSet(foundInSorted.get(0).getDate().getYearAsString())); } LOGGER.fine("Trajectory began for " + cluster); // For name clusters we can also figure out trajectories! if (!flag_nameClustersAreTaxonConcepts) { List<String> trajectorySteps = cluster.getFoundInSortedWithDates().stream().map(dataset -> { String changes = dataset.getChanges(project).filter(ch -> cluster.containsAny(ch.getAllNames())) .map(ch -> ch.getType().toString()).collect(Collectors.joining("|")); if (!changes.isEmpty()) return changes; // This can happen when a change is referenced without an explicit addition. if (cluster.containsAny(dataset.getReferencedNames().collect(Collectors.toList()))) return "referenced"; else return "missing"; }).collect(Collectors.toList()); precalc.put(clusterID, "trajectory", getOneElementSet(String.join(" -> ", trajectorySteps))); precalc.put(clusterID, "trajectory_without_renames", getOneElementSet(trajectorySteps.stream() .filter(ch -> !ch.contains("rename")).collect(Collectors.joining(" -> ")))); precalc.put(clusterID, "trajectory_lumps_splits", getOneElementSet( trajectorySteps.stream().filter(ch -> ch.contains("split") || ch.contains("lump")) .collect(Collectors.joining(" -> ")))); } LOGGER.fine("Trajectory ended for " + cluster); // Okay, here's where we reconcile! LOGGER.fine("Reconciliation began for " + cluster); // Now we need to actually reconcile the data from these unique row objects. Set<DatasetRow> allDatasetRowsCombined = new HashSet<>(); for (Name name : cluster.getNames()) { // We don't have to convert cluster names to binomial, // because the cluster formation -- or the hacky thing we do // for RECONCILE_SPECIES_NAME -- should already have done that! // // Where necessary, the previous code will automatically // set up datasetRowsByName so it matched binomial names. Set<DatasetRow> rowsToReconcile = datasetRowsByName.get(name); if (rowsToReconcile == null) continue; allDatasetRowsCombined.addAll(rowsToReconcile); Set<DatasetColumn> columns = rowsToReconcile.stream().flatMap(row -> row.getColumns().stream()) .collect(Collectors.toSet()); for (DatasetColumn col : columns) { // We've precalculated column names. String colName = datasetColumnMap.get(col); // Make sure we get this column down into 'precalc'. if (!precalc.contains(clusterID, colName)) precalc.put(clusterID, colName, new HashSet<>()); // Add all values for all rows in this column. Set<String> vals = rowsToReconcile.stream().flatMap(row -> { if (!row.hasColumn(col)) return Stream.empty(); else return Stream.of(row.get(col)); }).collect(Collectors.toSet()); precalc.get(clusterID, colName).addAll(vals); LOGGER.fine("Added " + vals.size() + " rows under name cluster '" + cluster + "'"); } } LOGGER.info("(" + currentClusterCount + "/" + totalClusterCount + ") Reconciliation completed for " + cluster); precalc.put(clusterID, "dataset_rows_for_name", getOneElementSet(allDatasetRowsCombined.size())); } // Set up table items. dataTableView.setItems(FXCollections.observableList(nameClusterIDs)); LOGGER.info("Setting up columns: " + existingColNames); dataTableView.getColumns().clear(); for (String colName : existingColNames) { dataTableView.getColumns().add(createColumnFromPrecalc(colName, precalc)); } // Get distinct column names. Stream<String> colNames = precalc.cellSet().stream().map(set -> set.getColumnKey()); // Eliminate columns that are in the existingColNames. colNames = colNames.filter(colName -> !existingColNames.contains(colName)); // And add tablecolumns for the rest. List<TableColumn<String, String>> cols = colNames.distinct().sorted() .map(colName -> createColumnFromPrecalc(colName, precalc)).collect(Collectors.toList()); dataTableView.getColumns().addAll(cols); dataTableView.refresh(); // Fill in status text field. long distinctNameCount = precalc.cellSet().stream().map(cluster -> precalc.get(cluster, "name")).distinct() .count(); String str_duplicates = ""; if (distinctNameCount != dataTableView.getItems().size()) { str_duplicates = " for " + distinctNameCount + " distinct names"; } statusTextField.setText(dataTableView.getItems().size() + " rows across " + cols.size() + " reconciled columns" + str_duplicates); LOGGER.info("All done!"); }