List of usage examples for com.google.common.collect Table put
@Nullable V put(R rowKey, C columnKey, V value);
From source file:com.google.errorprone.bugpatterns.DeduplicateConstants.java
@Override public Description matchCompilationUnit(CompilationUnitTree tree, VisitorState state) { Table<VarSymbol, Tree, SuggestedFix> fixes = HashBasedTable.create(); new TreeScanner<Void, Scope>() { @Override//from w w w . j av a2s . c o m public Void visitBlock(BlockTree tree, Scope scope) { // enter a new block scope (includes block trees for method and class bodies) return super.visitBlock(tree, scope.enter()); } @Override public Void visitVariable(VariableTree tree, Scope scope) { // record that this variables hides previous declarations before entering its initializer scope.remove(ASTHelpers.getSymbol(tree)); scan(tree.getInitializer(), scope); saveConstValue(tree, scope); return null; } @Override public Void visitLiteral(LiteralTree tree, Scope scope) { replaceLiteral(tree, scope, state); return super.visitLiteral(tree, scope); } private void replaceLiteral(LiteralTree tree, Scope scope, VisitorState state) { Object value = ASTHelpers.constValue(tree); if (value == null) { return; } VarSymbol sym = scope.get(state.getSourceForNode(tree)); if (sym == null) { return; } SuggestedFix fix = SuggestedFix.replace(tree, sym.getSimpleName().toString()); fixes.put(sym, tree, fix); } private void saveConstValue(VariableTree tree, Scope scope) { VarSymbol sym = ASTHelpers.getSymbol(tree); if (sym == null) { return; } if ((sym.flags() & (Flags.EFFECTIVELY_FINAL | Flags.FINAL)) == 0) { return; } // heuristic: long string constants are generally more interesting than short ones, or // than non-string constants (e.g. `""`, `0`, or `false`). String constValue = ASTHelpers.constValue(tree.getInitializer(), String.class); if (constValue == null || constValue.length() <= 1) { return; } scope.put(state.getSourceForNode(tree.getInitializer()), sym); } }.scan(tree, new Scope(null)); for (Map.Entry<VarSymbol, Map<Tree, SuggestedFix>> entries : fixes.rowMap().entrySet()) { Map<Tree, SuggestedFix> occurrences = entries.getValue(); if (occurrences.size() < 2) { // heuristic: only de-duplicate when there are two or more occurrences continue; } // report the finding on each occurrence, but provide a fix for all related occurrences, // so it works better on changed-lines only SuggestedFix fix = mergeFix(occurrences.values()); occurrences.keySet().forEach(t -> state.reportMatch(describeMatch(t, fix))); } return Description.NO_MATCH; }
From source file:org.broad.igv.sam.SpliceJunctionHelper.java
public void addAlignment(Alignment alignment) { AlignmentBlock[] blocks = alignment.getAlignmentBlocks(); if (blocks == null || blocks.length < 2) { return;// w w w .ja v a 2s . c om } //there may be other ways in which this is indicated. May have to code for them later boolean isNegativeStrand; Object strandAttr = alignment.getAttribute("XS"); if (strandAttr != null) { isNegativeStrand = strandAttr.toString().charAt(0) == '-'; } else { isNegativeStrand = alignment.isNegativeStrand(); // <= TODO -- this isn't correct for all libraries. } Table<Integer, Integer, SpliceJunctionFeature> startEndJunctionsTableThisStrand = isNegativeStrand ? negStartEndJunctionsMap : posStartEndJunctionsMap; int flankingStart = -1; int junctionStart = -1; int gapCount = -1; char[] gapTypes = alignment.getGapTypes(); //for each pair of blocks, create or add evidence to a splice junction for (AlignmentBlock block : blocks) { int flankingEnd = block.getEnd(); int junctionEnd = block.getStart(); if (junctionStart != -1 && gapCount < gapTypes.length && gapTypes[gapCount] == SamAlignment.SKIPPED_REGION) { //only proceed if the flanking regions are both bigger than the minimum if (loadOptions.minReadFlankingWidth == 0 || ((junctionStart - flankingStart >= loadOptions.minReadFlankingWidth) && (flankingEnd - junctionEnd >= loadOptions.minReadFlankingWidth))) { SpliceJunctionFeature junction = startEndJunctionsTableThisStrand.get(junctionStart, junctionEnd); if (junction == null) { junction = new SpliceJunctionFeature(alignment.getChr(), junctionStart, junctionEnd, isNegativeStrand ? Strand.NEGATIVE : Strand.POSITIVE); startEndJunctionsTableThisStrand.put(junctionStart, junctionEnd, junction); allSpliceJunctionFeatures.add(junction); } junction.addRead(flankingStart, flankingEnd); } } flankingStart = junctionEnd; junctionStart = flankingEnd; gapCount += 1; } }
From source file:de.tudarmstadt.ukp.dkpro.keyphrases.bookindexing.aggregation.RankedPhraseAggregationAnnotator.java
/** * Iterates through {@link Segment}s and aggregates all {@link Keyphrase}s as * specified in the concrete {@link AggregationStrategy}. * * @param jcas/* w w w . j a va 2 s .c o m*/ * @return table representing phrases contained in segments * @throws AnalysisEngineProcessException */ private Table<String, Integer, Double> getTable(JCas jcas) throws AnalysisEngineProcessException { Table<String, Integer, Double> phraseSegmentTable = TreeBasedTable.create(new Comparator<String>() { @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } }, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return o1.compareTo(o2); } }); // there have to be segments in the JCas assert jcas.getAnnotationIndex(Segment.type).size() > 0; if (getContext().getLogger().isLoggable(Level.INFO)) { getContext().getLogger().log(Level.INFO, String.format("Found %d segments", jcas.getAnnotationIndex(Segment.type).size())); } // iterate through all segments and add all phrases with their score to // the table int segmentNr = 0; for (Segment segment : JCasUtil.select(jcas, Segment.class)) { // aggregate all (keyphrase, segment) -> score mappings in a table for (Keyphrase keyphrase : JCasUtil.selectCovered(Keyphrase.class, segment)) { String phrase = convertToLowercase ? keyphrase.getKeyphrase().toLowerCase() : keyphrase.getKeyphrase(); if (getContext().getLogger().isLoggable(Level.FINEST)) { getContext().getLogger().log(Level.FINEST, String.format(Locale.US, "(Phrase=[%s], SegNr=[%d]) -> Score=[%.3f]", phrase, segmentNr, keyphrase.getScore())); } phraseSegmentTable.put(phrase, segmentNr, keyphrase.getScore()); } segmentNr++; } return phraseSegmentTable; }
From source file:i5.las2peer.services.recommender.librec.data.DataSplitter.java
/** * Split ratings into two parts where one rating per user is preserved as the test set and the remaining data as the * training set/*from w w w . j a v a 2s.c o m*/ * */ public SparseMatrix[] getLOOByUser(boolean isByDate, SparseMatrix timestamps) throws Exception { SparseMatrix trainMatrix = new SparseMatrix(rateMatrix); // for building test matrix Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); Multimap<Integer, Integer> colMap = HashMultimap.create(); for (int u = 0, um = rateMatrix.numRows(); u < um; u++) { List<Integer> items = rateMatrix.getColumns(u); int i = -1; if (!isByDate) { // by random int randIdx = (int) (items.size() * Math.random()); i = items.get(randIdx); } else { // by date List<RatingContext> rcs = new ArrayList<>(); for (int j : items) { rcs.add(new RatingContext(u, j, (long) timestamps.get(u, j))); } Collections.sort(rcs); i = rcs.get(rcs.size() - 1).getItem(); // most recent item } trainMatrix.set(u, i, 0); // remove from training dataTable.put(u, i, rateMatrix.get(u, i)); colMap.put(i, u); } // remove zero entries SparseMatrix.reshape(trainMatrix); // build test matrix SparseMatrix testMatrix = new SparseMatrix(rateMatrix.numRows, rateMatrix.numColumns, dataTable, colMap); debugInfo(trainMatrix, testMatrix, -1); return new SparseMatrix[] { trainMatrix, testMatrix }; }
From source file:i5.las2peer.services.recommender.librec.data.DataSplitter.java
/** * Split ratings into two parts where one rating per item is preserved as the test set and the remaining data as the * training set// www . j a va2s . c o m * */ public SparseMatrix[] getLOOByItem(boolean isByDate, SparseMatrix timestamps) throws Exception { SparseMatrix trainMatrix = new SparseMatrix(rateMatrix); // for building test matrix Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); Multimap<Integer, Integer> colMap = HashMultimap.create(); for (int i = 0, im = rateMatrix.numColumns(); i < im; i++) { List<Integer> users = rateMatrix.getRows(i); int u = -1; if (!isByDate) { // by random int randIdx = (int) (users.size() * Math.random()); u = users.get(randIdx); } else { // by date List<RatingContext> rcs = new ArrayList<>(); for (int v : users) { rcs.add(new RatingContext(v, i, (long) timestamps.get(v, i))); } Collections.sort(rcs); u = rcs.get(rcs.size() - 1).getUser(); // most recent rating user } trainMatrix.set(u, i, 0); // remove from training dataTable.put(u, i, rateMatrix.get(u, i)); colMap.put(i, u); } // remove zero entries SparseMatrix.reshape(trainMatrix); // build test matrix SparseMatrix testMatrix = new SparseMatrix(rateMatrix.numRows, rateMatrix.numColumns, dataTable, colMap); debugInfo(trainMatrix, testMatrix, -1); return new SparseMatrix[] { trainMatrix, testMatrix }; }
From source file:i5.las2peer.services.recommender.librec.data.CSVDataDAO.java
/** * Read data from the data file. Note that we didn't take care of the duplicated lines. * // www .j a v a2s . co m * @param cols * the indexes of the relevant columns in the data file: {user, item, [rating, timestamp] (optional)} * @param binThold * the threshold to binarize a rating. If a rating is greater than the threshold, the value will be 1; * otherwise 0. To disable this feature, i.e., keep the original rating value, set the threshold a * negative value * @return a sparse matrix storing all the relevant data */ public SparseMatrix[] readData(int[] cols, double binThold) throws Exception { Logs.info(String.format("Dataset: %s", Strings.last(dataPath, 38))); // Table {row-id, col-id, rate} Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); // Table {row-id, col-id, timestamp} Table<Integer, Integer, Long> timeTable = null; // Map {col-id, multiple row-id}: used to fast build a rating matrix Multimap<Integer, Integer> colMap = HashMultimap.create(); BufferedReader br = FileIO.getReader(dataPath); String line = null; minTimestamp = Long.MAX_VALUE; maxTimestamp = Long.MIN_VALUE; while ((line = br.readLine()) != null) { if (isHeadline()) { setHeadline(false); continue; } String[] data = line.trim().split("[ \t,]+"); if (data.length < 2) { Logs.error(String.format("Dataset: Cannot read line \"%s\"", line)); continue; } String user = data[cols[0]]; String item = data[cols[1]]; Double rate = (cols.length >= 3 && data.length >= 3) ? Double.valueOf(data[cols[2]]) : 1.0; // binarize the rating for item recommendation task if (binThold >= 0) rate = rate > binThold ? 1.0 : 0.0; scaleDist.add(rate); // inner id starting from 0 int row = userIds.containsKey(user) ? userIds.get(user) : userIds.size(); userIds.put(user, row); int col = itemIds.containsKey(item) ? itemIds.get(item) : itemIds.size(); itemIds.put(item, col); dataTable.put(row, col, rate); colMap.put(col, row); // record rating's issuing time if (cols.length >= 4 && data.length >= 4) { if (timeTable == null) timeTable = HashBasedTable.create(); // convert to million-seconds long mms = 0L; try { mms = Long.parseLong(data[cols[3]]); // cannot format "9.7323480e+008" } catch (NumberFormatException e) { mms = (long) Double.parseDouble(data[cols[3]]); } long timestamp = timeUnit.toMillis(mms); if (minTimestamp > timestamp) minTimestamp = timestamp; if (maxTimestamp < timestamp) maxTimestamp = timestamp; timeTable.put(row, col, timestamp); } } br.close(); numRatings = scaleDist.size(); ratingScale = new ArrayList<>(scaleDist.elementSet()); Collections.sort(ratingScale); int numRows = numUsers(), numCols = numItems(); // if min-rate = 0.0, shift upper a scale double minRate = ratingScale.get(0).doubleValue(); double epsilon = minRate == 0.0 ? ratingScale.get(1).doubleValue() - minRate : 0; if (epsilon > 0) { // shift upper a scale for (int i = 0, im = ratingScale.size(); i < im; i++) { double val = ratingScale.get(i); ratingScale.set(i, val + epsilon); } // update data table for (int row = 0; row < numRows; row++) { for (int col = 0; col < numCols; col++) { if (dataTable.contains(row, col)) dataTable.put(row, col, dataTable.get(row, col) + epsilon); } } } String dateRange = ""; if (cols.length >= 4) dateRange = String.format(", Timestamps = {%s, %s}", Dates.toString(minTimestamp), Dates.toString(maxTimestamp)); Logs.debug("With Specs: {Users, {}} = {{}, {}, {}}, Scale = {{}}{}", (isItemAsUser ? "Users, Links" : "Items, Ratings"), numRows, numCols, numRatings, Strings.toString(ratingScale), dateRange); // build rating matrix rateMatrix = new SparseMatrix(numRows, numCols, dataTable, colMap); if (timeTable != null) timeMatrix = new SparseMatrix(numRows, numCols, timeTable, colMap); // release memory of data table dataTable = null; timeTable = null; return new SparseMatrix[] { rateMatrix, timeMatrix }; }
From source file:i5.las2peer.services.recommender.librec.data.NetflixDataDAO.java
/** * Read data from the data file. Note that we didn't take care of the duplicated lines. * /* www . j av a 2 s . co m*/ * @param cols * the indexes of the relevant columns in the data file: {user, item, [rating, timestamp] (optional)}, * not used for the Netflix dataset. * @param binThold * the threshold to binarize a rating. If a rating is greater than the threshold, the value will be 1; * otherwise 0. To disable this feature, i.e., keep the original rating value, set the threshold a * negative value * @return a sparse matrix storing all the relevant data */ public SparseMatrix[] readData(int[] cols, double binThold) throws Exception { Logs.info(String.format("Dataset: %s", Strings.last(dataPath, 38))); // Table {row-id, col-id, rate} Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); // Table {row-id, col-id, timestamp} Table<Integer, Integer, Long> timeTable = HashBasedTable.create(); // Map {col-id, multiple row-id}: used to fast build a rating matrix Multimap<Integer, Integer> colMap = HashMultimap.create(); File[] fileList = new File(dataPath).listFiles(); setHeadline(true); for (File file : fileList) { BufferedReader br = FileIO.getReader(file); String line = null; minTimestamp = Long.MAX_VALUE; maxTimestamp = Long.MIN_VALUE; line = br.readLine(); if (line == null) { continue; } String item = line.trim().split(":")[0]; while ((line = br.readLine()) != null) { String[] data = line.trim().split("[ \t,]+"); String user = data[0]; Double rate = Double.valueOf(data[1]); // binarize the rating for item recommendation task if (binThold >= 0) { rate = rate > binThold ? 1.0 : 0.0; } scaleDist.add(rate); // inner id starting from 0 int row = userIds.containsKey(user) ? userIds.get(user) : userIds.size(); userIds.put(user, row); int col = itemIds.containsKey(item) ? itemIds.get(item) : itemIds.size(); itemIds.put(item, col); dataTable.put(row, col, rate); colMap.put(col, row); // record rating's issuing time // convert to timestamp (milliseconds since 1970-01-01 String dateStr = data[2]; // format e.g. 2005-09-06 long timestamp = new SimpleDateFormat("yyyy-MM-dd").parse(dateStr).getTime(); if (minTimestamp > timestamp) minTimestamp = timestamp; if (maxTimestamp < timestamp) maxTimestamp = timestamp; timeTable.put(row, col, timestamp); } br.close(); } numRatings = scaleDist.size(); ratingScale = new ArrayList<>(scaleDist.elementSet()); Collections.sort(ratingScale); int numRows = numUsers(), numCols = numItems(); // if min-rate = 0.0, shift upper a scale double minRate = ratingScale.get(0).doubleValue(); double epsilon = minRate == 0.0 ? ratingScale.get(1).doubleValue() - minRate : 0; if (epsilon > 0) { // shift upper a scale for (int i = 0, im = ratingScale.size(); i < im; i++) { double val = ratingScale.get(i); ratingScale.set(i, val + epsilon); } // update data table for (int row = 0; row < numRows; row++) { for (int col = 0; col < numCols; col++) { if (dataTable.contains(row, col)) dataTable.put(row, col, dataTable.get(row, col) + epsilon); } } } String dateRange = ""; if (cols.length >= 4) dateRange = String.format(", Timestamps = {%s, %s}", Dates.toString(minTimestamp), Dates.toString(maxTimestamp)); Logs.debug("With Specs: {Users, {}} = {{}, {}, {}}, Scale = {{}}{}", (isItemAsUser ? "Users, Links" : "Items, Ratings"), numRows, numCols, numRatings, Strings.toString(ratingScale), dateRange); // build rating matrix rateMatrix = new SparseMatrix(numRows, numCols, dataTable, colMap); if (timeTable != null) timeMatrix = new SparseMatrix(numRows, numCols, timeTable, colMap); // release memory of data table dataTable = null; timeTable = null; return new SparseMatrix[] { rateMatrix, timeMatrix }; }
From source file:net.librec.math.structure.SparseTensor.java
/** * Slice is a two-dimensional sub-array of a tensor, defined by fixing all but two indices. * * @param rowDim row dimension//from w w w. jav a2s .c om * @param colDim column dimension * @param otherKeys keys of other dimensions * @return a sparse matrix */ public SparseMatrix slice(int rowDim, int colDim, int... otherKeys) { if (otherKeys.length != numDimensions - 2) throw new Error("The input dimensions do not match the tensor specification!"); // find an indexed array to search int d = -1; boolean cond1 = indexedDimensions.size() == 0; boolean cond2 = (indexedDimensions.contains(rowDim) || indexedDimensions.contains(colDim)) && indexedDimensions.size() == 1; boolean cond3 = indexedDimensions.contains(rowDim) && indexedDimensions.contains(colDim) && indexedDimensions.size() == 2; if (cond1 || cond2 || cond3) { for (d = 0; d < numDimensions; d++) { if (d != rowDim && d != colDim) break; } buildIndex(d); } else { for (int dd : indexedDimensions) { if (dd != rowDim && dd != colDim) { d = dd; break; } } } // get search key int key = -1; for (int dim = 0, i = 0; dim < numDimensions; dim++) { if (dim == rowDim || dim == colDim) continue; if (dim == d) { key = otherKeys[i]; break; } i++; } // all relevant positions Collection<Integer> indices = keyIndices[d].get(key); if (indices == null || indices.size() == 0) return null; Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); Multimap<Integer, Integer> colMap = HashMultimap.create(); // for each possible position for (int index : indices) { boolean found = true; for (int dd = 0, j = 0; dd < numDimensions; dd++) { if (dd == rowDim || dd == colDim) continue; if (otherKeys[j++] != key(dd, index)) { found = false; break; } } if (found) { int row = ndKeys[rowDim].get(index); int col = ndKeys[colDim].get(index); double val = values.get(index); dataTable.put(row, col, val); colMap.put(col, row); } } return new SparseMatrix(dimensions[rowDim], dimensions[colDim], dataTable, colMap); }
From source file:net.librec.data.convertor.ArffDataConvertor.java
/** * Build the {@link #oneHotFeatureMatrix} * and {@link #oneHotRatingVector}//w w w. j av a2 s . c o m */ public void oneHotEncoding() { Table<Integer, Integer, Double> dataTable = HashBasedTable.create(); Multimap<Integer, Integer> colMap = HashMultimap.create(); int numRows = instances.size(); int numCols = 0; int numAttrs = attributes.size(); double[] ratings = new double[numRows]; // set numCols for (int i = 0; i < attributes.size(); i++) { // skip rating column if (i == ratingCol) continue; ArffAttribute attr = attributes.get(i); numCols += attr.getColumnSet().size() == 0 ? 1 : attr.getColumnSet().size(); } // build one-hot encoding matrix for (int row = 0; row < numRows; row++) { ArffInstance instance = instances.get(row); int colPrefix = 0; int col = 0; for (int i = 0; i < numAttrs; i++) { String type = attrTypes.get(i); Object val = instance.getValueByIndex(i); // rating column if (i == ratingCol) { ratings[row] = (double) val; continue; } // appender column switch (type) { case "NUMERIC": case "REAL": case "INTEGER": col = colPrefix; dataTable.put(row, col, (double) val); colMap.put(col, row); colPrefix += 1; break; case "STRING": col = colPrefix + columnIds.get(i).get(val); dataTable.put(row, col, 1d); colMap.put(col, row); colPrefix += columnIds.get(i).size(); break; case "NOMINAL": for (String v : (ArrayList<String>) val) { col = colPrefix + columnIds.get(i).get(v); colMap.put(col, row); dataTable.put(row, col, 1d); } colPrefix += columnIds.get(i).size(); break; } } } oneHotFeatureMatrix = new SparseMatrix(numRows, numCols, dataTable, colMap); oneHotRatingVector = new DenseVector(ratings); // release memory dataTable = null; colMap = null; }
From source file:carskit.alg.cars.transformation.prefiltering.splitting.ItemSplitting.java
public Table<Integer, Integer, Integer> split(SparseMatrix sm, int min) { Table<Integer, Integer, Integer> datatable = HashBasedTable.create(); for (Integer j : itemRatingList.keySet()) { Collection<Integer> uis = itemRatingList.get(j); double maxt = Double.MIN_VALUE; int splitcond = -1; for (Integer cond : condContextsList.keySet()) { Collection<Integer> ctx = condContextsList.get(cond); // start to extract two rating list HashMultiset<Double> rate1 = HashMultiset.create(); HashMultiset<Double> rate2 = HashMultiset.create(); for (Integer ui : uis) { List<Integer> uctx = sm.getColumns(ui); for (Integer c : uctx) { double rate = sm.get(ui, c); if (ctx.contains(c)) rate1.add(rate); else rate2.add(rate); }/*from w w w .j av a 2 s .com*/ } double[] drate1 = Doubles.toArray(rate1); double[] drate2 = Doubles.toArray(rate2); if (drate1.length >= min && drate2.length >= min) { TTest tt = new TTest(); double p = tt.tTest(drate1, drate2); if (p < 0.05) { double t = tt.t(drate1, drate2); if (t > maxt) { // update the split splitcond = cond; maxt = t; } } } } if (splitcond != -1) { // put u, ctx, new uid into datatable int newid = startId++; Collection<Integer> ctx = condContextsList.get(splitcond); for (Integer c : ctx) datatable.put(j, c, newid); } } Logs.info(datatable.rowKeySet().size() + " items have been splitted."); return datatable; }