List of usage examples for org.apache.hadoop.io Text equals
@Override public boolean equals(Object o)
o
is a Text with the same contents. From source file:joshelser.as2015.query.Query.java
License:Apache License
public static void main(String[] args) throws Exception { JCommander commander = new JCommander(); final Opts options = new Opts(); commander.addObject(options);//w ww . j a va 2s .c om commander.setProgramName("Query"); try { commander.parse(args); } catch (ParameterException ex) { commander.usage(); System.err.println(ex.getMessage()); System.exit(1); } ClientConfiguration conf = ClientConfiguration.loadDefault(); if (null != options.clientConfFile) { conf = new ClientConfiguration(new PropertiesConfiguration(options.clientConfFile)); } conf.withInstance(options.instanceName).withZkHosts(options.zookeepers); ZooKeeperInstance inst = new ZooKeeperInstance(conf); Connector conn = inst.getConnector(options.user, new PasswordToken(options.password)); BatchScanner bs = conn.createBatchScanner(options.table, Authorizations.EMPTY, 16); try { bs.setRanges(Collections.singleton(new Range())); final Text categoryText = new Text("category"); bs.fetchColumn(categoryText, new Text("name")); bs.fetchColumn(new Text("review"), new Text("score")); bs.fetchColumn(new Text("review"), new Text("userId")); bs.addScanIterator(new IteratorSetting(50, "wri", WholeRowIterator.class)); final Text colf = new Text(); Map<String, List<Integer>> scoresByUser = new HashMap<>(); for (Entry<Key, Value> entry : bs) { SortedMap<Key, Value> row = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()); Iterator<Entry<Key, Value>> iter = row.entrySet().iterator(); if (!iter.hasNext()) { // row was empty continue; } Entry<Key, Value> categoryEntry = iter.next(); categoryEntry.getKey().getColumnFamily(colf); if (!colf.equals(categoryText)) { throw new IllegalArgumentException("Unknown!"); } if (!categoryEntry.getValue().toString().equals("books")) { // not a book review continue; } if (!iter.hasNext()) { continue; } Entry<Key, Value> reviewScore = iter.next(); if (!iter.hasNext()) { continue; } Entry<Key, Value> reviewUserId = iter.next(); String userId = reviewUserId.getValue().toString(); if (userId.equals("unknown")) { // filter unknow user id continue; } List<Integer> scores = scoresByUser.get(userId); if (null == scores) { scores = new ArrayList<>(); scoresByUser.put(userId, scores); } scores.add(Float.valueOf(reviewScore.getValue().toString()).intValue()); } for (Entry<String, List<Integer>> entry : scoresByUser.entrySet()) { int sum = 0; for (Integer val : entry.getValue()) { sum += val; } System.out.println(entry.getKey() + " => " + new Float(sum) / entry.getValue().size()); } } finally { bs.close(); } }
From source file:microbench.WordCountOnHDFSDataLocal.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException { try {/* w w w .ja v a 2 s. co m*/ parseArgs(args); HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); JobConf jobConf = new JobConf(confPath); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { DataMPIUtil.printArgs(args); } System.out.println("The O task " + rank + " of " + size + " is working..."); HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir, TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O); Text word = new Text(); IntWritable one = new IntWritable(1); LongWritable khead = reader.createKey(); Text vhead = reader.createValue(); while (reader.next(khead, vhead)) { StringTokenizer itr = new StringTokenizer(vhead.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); // send key-value MPI_D.Send(word, one); } } reader.close(); } else if (MPI_D.COMM_BIPARTITE_A != null) { // A communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A); System.out.println("The A task " + rank + " of " + size + " is working..."); HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class, IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A); Text oldKey = null; IntWritable valueData = new IntWritable(); int sum = 0; Object[] keyValue = MPI_D.Recv(); while (keyValue != null) { Text key = (Text) keyValue[0]; IntWritable value = (IntWritable) keyValue[1]; if (oldKey == null) { oldKey = key; sum = value.get(); } else { if (key.equals(oldKey)) { sum += value.get(); } else { valueData.set(sum); outrw.write(oldKey, valueData); oldKey = key; sum = value.get(); } } keyValue = MPI_D.Recv(); } if (oldKey != null) { valueData.set(sum); outrw.write(oldKey, valueData); } outrw.close(); } MPI_D.Finalize(); } catch (MPI_D_Exception e) { e.printStackTrace(); } }
From source file:mlbench.bayes.train.WeightSummer.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);//from w w w . jav a 2s .c o m HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); Vector weightsPerFeature = null; Vector weightsPerLabel = new DenseVector(labNum); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); IntWritable index = kvrr.createKey(); VectorWritable value = kvrr.createValue(); while (kvrr.next(index, value)) { Vector instance = value.get(); if (weightsPerFeature == null) { weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements()); } int label = index.get(); weightsPerFeature.assign(instance, Functions.PLUS); weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum()); } } if (weightsPerFeature != null) { MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature)); MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel)); } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDirW); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(Text.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<Text, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } Text key = null, newKey = null; VectorWritable point = null, newPoint = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (Text) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { } else if (!key.equals(newKey)) { outrw.write(key, new VectorWritable(vector)); vector = null; } if (vector == null) { vector = newPoint.get(); } else { vector.assign(newPoint.get(), Functions.PLUS); } key = newKey; point = newPoint; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { outrw.write(key, new VectorWritable(vector)); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } MPI_D.COMM_BIPARTITE_A.Barrier(); if (rank == 0) { Path resOut = new Path(outDir); NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config); naiveBayesModel.serialize(resOut, config); } } MPI_D.Finalize(); }
From source file:org.apache.accumulo.core.client.admin.FindMax.java
License:Apache License
private static Text _findMax(Scanner scanner, Text start, boolean inclStart, Text end, boolean inclEnd) { // System.out.printf("findMax(%s, %s, %s, %s)%n", Key.toPrintableString(start.getBytes(), 0, start.getLength(), 1000), inclStart, // Key.toPrintableString(end.getBytes(), 0, end.getLength(), 1000), inclEnd); int cmp = start.compareTo(end); if (cmp >= 0) { if (inclStart && inclEnd && cmp == 0) { scanner.setRange(new Range(start, true, end, true)); Iterator<Entry<Key, Value>> iter = scanner.iterator(); if (iter.hasNext()) return iter.next().getKey().getRow(); }//from ww w .j a va 2 s.c o m return null; } Text mid = findMidPoint(start, end); // System.out.println("mid = :"+Key.toPrintableString(mid.getBytes(), 0, mid.getLength(), 1000)+":"); scanner.setRange(new Range(mid, mid.equals(start) ? inclStart : true, end, inclEnd)); Iterator<Entry<Key, Value>> iter = scanner.iterator(); if (iter.hasNext()) { Key next = iter.next().getKey(); int count = 0; while (count < 10 && iter.hasNext()) { next = iter.next().getKey(); count++; } if (!iter.hasNext()) return next.getRow(); Text ret = _findMax(scanner, next.followingKey(PartialKey.ROW).getRow(), true, end, inclEnd); if (ret == null) return next.getRow(); else return ret; } else { return _findMax(scanner, start, inclStart, mid, mid.equals(start) ? inclStart : false); } }
From source file:org.apache.accumulo.core.data.impl.KeyExtent.java
License:Apache License
private boolean equals(Text t1, Text t2) { if (t1 == null || t2 == null) return t1 == t2; return t1.equals(t2); }
From source file:org.apache.accumulo.core.metadata.MetadataLocationObtainer.java
License:Apache License
public static TabletLocations getMetadataLocationEntries(SortedMap<Key, Value> entries) { Key key;/*from w w w.j a v a2s. c om*/ Value val; Text location = null; Text session = null; Value prevRow = null; KeyExtent ke; List<TabletLocation> results = new ArrayList<>(); ArrayList<KeyExtent> locationless = new ArrayList<>(); Text lastRowFromKey = new Text(); // text obj below is meant to be reused in loop for efficiency Text colf = new Text(); Text colq = new Text(); for (Entry<Key, Value> entry : entries.entrySet()) { key = entry.getKey(); val = entry.getValue(); if (key.compareRow(lastRowFromKey) != 0) { prevRow = null; location = null; session = null; key.getRow(lastRowFromKey); } colf = key.getColumnFamily(colf); colq = key.getColumnQualifier(colq); // interpret the row id as a key extent if (colf.equals(TabletsSection.CurrentLocationColumnFamily.NAME) || colf.equals(TabletsSection.FutureLocationColumnFamily.NAME)) { if (location != null) { throw new IllegalStateException("Tablet has multiple locations : " + lastRowFromKey); } location = new Text(val.toString()); session = new Text(colq); } else if (TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.equals(colf, colq)) { prevRow = new Value(val); } if (prevRow != null) { ke = new KeyExtent(key.getRow(), prevRow); if (location != null) results.add(new TabletLocation(ke, location.toString(), session.toString())); else locationless.add(ke); location = null; prevRow = null; } } return new TabletLocations(results, locationless); }
From source file:org.apache.accumulo.core.metadata.schema.TabletMetadata.java
License:Apache License
public static TabletMetadata convertRow(Iterator<Entry<Key, Value>> rowIter, EnumSet<FetchedColumns> fetchedColumns) { Objects.requireNonNull(rowIter); TabletMetadata te = new TabletMetadata(); Builder<String> filesBuilder = ImmutableList.builder(); ByteSequence row = null;/*from ww w. j a va 2 s . co m*/ while (rowIter.hasNext()) { Entry<Key, Value> kv = rowIter.next(); Key k = kv.getKey(); Value v = kv.getValue(); Text fam = k.getColumnFamily(); if (row == null) { row = k.getRowData(); KeyExtent ke = new KeyExtent(k.getRow(), (Text) null); te.endRow = ke.getEndRow(); te.tableId = ke.getTableId(); } else if (!row.equals(k.getRowData())) { throw new IllegalArgumentException( "Input contains more than one row : " + row + " " + k.getRowData()); } if (PREV_ROW_COLUMN.hasColumns(k)) { te.prevEndRow = KeyExtent.decodePrevEndRow(v); } if (fam.equals(DataFileColumnFamily.NAME)) { filesBuilder.add(k.getColumnQualifier().toString()); } else if (fam.equals(CurrentLocationColumnFamily.NAME)) { if (te.location != null) { throw new IllegalArgumentException( "Input contains more than one location " + te.location + " " + v); } te.location = new Location(v.toString(), k.getColumnQualifierData().toString(), LocationType.CURRENT); } else if (fam.equals(FutureLocationColumnFamily.NAME)) { if (te.location != null) { throw new IllegalArgumentException( "Input contains more than one location " + te.location + " " + v); } te.location = new Location(v.toString(), k.getColumnQualifierData().toString(), LocationType.FUTURE); } else if (fam.equals(LastLocationColumnFamily.NAME)) { te.last = new Location(v.toString(), k.getColumnQualifierData().toString(), LocationType.LAST); } } te.files = filesBuilder.build(); te.fetchedColumns = fetchedColumns; return te; }
From source file:org.apache.accumulo.core.metadata.TableMetadataServicer.java
License:Apache License
@Override public void getTabletLocations(SortedMap<KeyExtent, String> tablets) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { Scanner scanner = context.getConnector().createScanner(getServicingTableName(), Authorizations.EMPTY); TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner); scanner.fetchColumnFamily(TabletsSection.CurrentLocationColumnFamily.NAME); // position at first entry in metadata table for given table scanner.setRange(TabletsSection.getRange(getServicedTableId())); Text colf = new Text(); Text colq = new Text(); KeyExtent currentKeyExtent = null;/*from w w w.j a va 2s. c o m*/ String location = null; Text row = null; // acquire this table's tablets from the metadata table which services it for (Entry<Key, Value> entry : scanner) { if (row != null) { if (!row.equals(entry.getKey().getRow())) { currentKeyExtent = null; location = null; row = entry.getKey().getRow(); } } else { row = entry.getKey().getRow(); } colf = entry.getKey().getColumnFamily(colf); colq = entry.getKey().getColumnQualifier(colq); if (TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.equals(colf, colq)) { currentKeyExtent = new KeyExtent(entry.getKey().getRow(), entry.getValue()); tablets.put(currentKeyExtent, location); currentKeyExtent = null; location = null; } else if (colf.equals(TabletsSection.CurrentLocationColumnFamily.NAME)) { location = entry.getValue().toString(); } } validateEntries(tablets); }
From source file:org.apache.accumulo.core.util.MetadataTable.java
License:Apache License
public static Pair<SortedMap<KeyExtent, Text>, List<KeyExtent>> getMetadataLocationEntries( SortedMap<Key, Value> entries) { Key key;/*from www . j a va 2s .c om*/ Value val; Text location = null; Value prevRow = null; KeyExtent ke; SortedMap<KeyExtent, Text> results = new TreeMap<KeyExtent, Text>(); ArrayList<KeyExtent> locationless = new ArrayList<KeyExtent>(); Text lastRowFromKey = new Text(); // text obj below is meant to be reused in loop for efficiency Text colf = new Text(); Text colq = new Text(); for (Entry<Key, Value> entry : entries.entrySet()) { key = entry.getKey(); val = entry.getValue(); if (key.compareRow(lastRowFromKey) != 0) { prevRow = null; location = null; key.getRow(lastRowFromKey); } colf = key.getColumnFamily(colf); colq = key.getColumnQualifier(colq); // interpret the row id as a key extent if (colf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY) || colf.equals(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY)) { if (location != null) { throw new IllegalStateException("Tablet has multiple locations : " + lastRowFromKey); } location = new Text(val.toString()); } else if (Constants.METADATA_PREV_ROW_COLUMN.equals(colf, colq)) { prevRow = new Value(val); } if (prevRow != null) { ke = new KeyExtent(key.getRow(), prevRow); if (location != null) results.put(ke, location); else locationless.add(ke); location = null; prevRow = null; } } return new Pair<SortedMap<KeyExtent, Text>, List<KeyExtent>>(results, locationless); }
From source file:org.apache.accumulo.core.util.MetadataTable.java
License:Apache License
public static void getEntries(Instance instance, TCredentials credentials, String table, boolean isTid, Map<KeyExtent, String> locations, SortedSet<KeyExtent> tablets) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { String tableId = isTid ? table : Tables.getNameToIdMap(instance).get(table); Scanner scanner = instance/*from www .j a va 2 s .c o m*/ .getConnector(credentials.getPrincipal(), CredentialHelper.extractToken(credentials)) .createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS); Constants.METADATA_PREV_ROW_COLUMN.fetch(scanner); scanner.fetchColumnFamily(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY); // position at first entry in metadata table for given table KeyExtent ke = new KeyExtent(new Text(tableId), new Text(), null); Key startKey = new Key(ke.getMetadataEntry()); ke = new KeyExtent(new Text(tableId), null, null); Key endKey = new Key(ke.getMetadataEntry()).followingKey(PartialKey.ROW); scanner.setRange(new Range(startKey, endKey)); Text colf = new Text(); Text colq = new Text(); KeyExtent currentKeyExtent = null; String location = null; Text row = null; // acquire this tables METADATA table entries boolean haveExtent = false; boolean haveLocation = false; for (Entry<Key, Value> entry : scanner) { if (row != null) { if (!row.equals(entry.getKey().getRow())) { currentKeyExtent = null; haveExtent = false; haveLocation = false; row = entry.getKey().getRow(); } } else row = entry.getKey().getRow(); colf = entry.getKey().getColumnFamily(colf); colq = entry.getKey().getColumnQualifier(colq); // stop scanning metadata table when another table is reached if (!(new KeyExtent(entry.getKey().getRow(), (Text) null)).getTableId().toString().equals(tableId)) break; if (Constants.METADATA_PREV_ROW_COLUMN.equals(colf, colq)) { currentKeyExtent = new KeyExtent(entry.getKey().getRow(), entry.getValue()); tablets.add(currentKeyExtent); haveExtent = true; } else if (colf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)) { location = entry.getValue().toString(); haveLocation = true; } if (haveExtent && haveLocation) { locations.put(currentKeyExtent, location); haveExtent = false; haveLocation = false; currentKeyExtent = null; } } validateEntries(tableId, tablets); }