List of usage examples for org.apache.hadoop.io IntWritable equals
@Override public boolean equals(Object o)
o is a IntWritable with the same value. From source file:com.axiomine.largecollections.turboutil.IntWritableList.java
License:Apache License
@Override public int indexOf(Object o) { int index = -1; int myIndex = -1; Iterator<IntWritable> iter = this.iterator(); while (iter.hasNext()) { index++;//from w w w .j a v a2s. c om IntWritable e = iter.next(); if (e.equals(o)) { myIndex = index; break; } } return myIndex; }
From source file:com.axiomine.largecollections.turboutil.IntWritableList.java
License:Apache License
@Override public int lastIndexOf(Object o) { int index = -1; int myIndex = -1; Iterator<IntWritable> iter = this.iterator(); while (iter.hasNext()) { index++;/* w w w. java2 s . c o m*/ IntWritable e = iter.next(); if (e.equals(o)) { myIndex = index; } } return myIndex; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}//from ww w. j a v a 2s .co m
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
// vv LookupRecordsByTemperature-ReaderFragment
Reader reader = readers[partitioner.getPartition(key, val, readers.length)];
// ^^ LookupRecordsByTemperature-ReaderFragment
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}
From source file:crunch.MaxTemperature.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}//from www .j a va2s. c o m
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
FileSystem fs = path.getFileSystem(getConf());
Reader[] readers = MapFileOutputFormat.getReaders(fs, path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Reader reader = readers[partitioner.getPartition(key, val, readers.length)];
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}
From source file:mlbench.bayes.train.IndexInstances.java
License:Apache License
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
parseArgs(args);// w ww . ja v a 2s .c o m
HashMap<String, String> conf = new HashMap<String, String>();
initConf(conf);
MPI_D.Init(args, MPI_D.Mode.Common, conf);
if (MPI_D.COMM_BIPARTITE_O != null) {
rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
if (rank == 0) {
System.out.println(IndexInstances.class.getSimpleName() + " O start.");
createLabelIndex(labPath);
}
HadoopUtil.cacheFiles(labPath, config);
MPI_D.COMM_BIPARTITE_O.Barrier();
OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config);
if (MPI_D.COMM_BIPARTITE_O != null) {
// O communicator
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
(JobConf) config, inDir, rank);
for (int i = 0; i < inputs.length; i++) {
FileSplit fsplit = inputs[i];
SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
fsplit);
Text labelText = kvrr.createKey();
VectorWritable instance = kvrr.createValue();
while (kvrr.next(labelText, instance)) {
String label = SLASH.split(labelText.toString())[1];
if (labelIndex.containsKey(label)) {
MPI_D.Send(new IntWritable(labelIndex.get(label)), instance);
}
}
}
}
} else if (MPI_D.COMM_BIPARTITE_A != null) {
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
config.set(MAPRED_OUTPUT_DIR, outDir);
config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
((JobConf) config).setOutputKeyClass(IntWritable.class);
((JobConf) config).setOutputValueClass(VectorWritable.class);
TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
DataMPIUtil.getHadoopTaskAttemptID());
SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>();
FileSystem fs = FileSystem.get(config);
Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
RecordWriter<IntWritable, VectorWritable> outrw = null;
try {
fcommitter.setupJob(taskContext);
outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
} catch (IOException e) {
e.printStackTrace();
System.err.println("ERROR: Please set the HDFS configuration properly\n");
System.exit(-1);
}
IntWritable key = null, newKey = null;
VectorWritable point = null, newPoint = null;
Vector vector = null;
Object[] vals = MPI_D.Recv();
while (vals != null) {
newKey = (IntWritable) vals[0];
newPoint = (VectorWritable) vals[1];
if (key == null && point == null) {
} else if (!key.equals(newKey)) {
outrw.write(key, new VectorWritable(vector));
vector = null;
}
if (vector == null) {
vector = newPoint.get();
} else {
vector.assign(newPoint.get(), Functions.PLUS);
}
key = newKey;
point = newPoint;
vals = MPI_D.Recv();
}
if (newKey != null && newPoint != null) {
outrw.write(key, new VectorWritable(vector));
}
outrw.close(null);
if (fcommitter.needsTaskCommit(taskContext)) {
fcommitter.commitTask(taskContext);
}
}
MPI_D.Finalize();
}
From source file:mlbench.kmeans.KmeansInit.java
License:Apache License
/** * get the input values and choose the K clusters' centers * * @param dataPath/* w w w . ja v a 2 s .c o m*/ * @throws MPI_D_Exception * @throws IOException * @throws MPIException */ @SuppressWarnings("deprecation") private static void init(String args[], String dataPath, int kCluster, HashMap<String, String> conf) throws MPI_D_Exception, IOException, MPIException { MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config, dataPath, rank); // for record the initialized state for (FileSplit path : inputs) { SequenceFileInputFormat f = new SequenceFileInputFormat(); JobConf jobConf = new JobConf(confPath); Reporter r = new KmeansUtils.EmptyReport(); RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r); Random random = new Random(1000); LongWritable k = reader.createKey(); VectorWritable v = reader.createValue(); IntWritable cluster = new IntWritable(); while (reader.next(k, v)) { cluster.set(random.nextInt(kCluster)); MPI_D.Send(cluster, v); } reader.close(); } } else { IntWritable key = null, newKey = null; VectorWritable point = null, newPoint = null; double sum[] = null; int count = 0; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { sum = new double[newPoint.get().size()]; } else if (!key.equals(newKey)) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals); centers.add(oneCenter); sum = new double[point.get().size()]; count = 0; } key = newKey; point = newPoint; KmeansUtils.accumulate(sum, newPoint.get()); count++; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(key.get(), centerVals); centers.add(oneCenter); } transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.gatherCentersByP2P(centers); if (rank == 0) { OutputStream resOut = KmeansUtils.getOutputStream(outPath, config); DataOutput os = new DataOutputStream(resOut); for (PointVector centerPoint : centers) { os.write((centerPoint.toString() + "\n").getBytes()); } resOut.flush(); resOut.close(); } System.out.println("rank " + rank + " finish"); } MPI_D.Finalize(); }
From source file:mlbench.kmeans.KmeansIter.java
License:Apache License
/** * Calculate the new center iteratively/* w ww .j av a 2 s. co m*/ * * @return true: finish; false: continue * @throws MPI_D_Exception * @throws MPIException * @throws IOException */ @SuppressWarnings("deprecation") private static void iterBody(String args[], HashMap<String, String> conf) throws MPI_D_Exception, MPIException, IOException { MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { System.out.println(centerPath); DataInputStream in = KmeansUtils.readFromHDFSF(new Path(centerPath), config); String lineVal; try { while ((lineVal = in.readLine()) != null) { String lineSeq[] = lineVal.split(":"); PointVector p = new PointVector(Integer.valueOf(lineSeq[0]), format(lineSeq[1])); centers.add(p); } } catch (IOException e) { e.printStackTrace(); } finally { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } MPI_D.COMM_BIPARTITE_O.Barrier(); KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.broadcastCenters(centers); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config, dataPath, rank); double centerSum[][] = new double[kCluster][]; long centerPNum[] = new long[kCluster]; // for record the initialized state for (FileSplit path : inputs) { SequenceFileInputFormat f = new SequenceFileInputFormat(); JobConf jobConf = new JobConf(confPath); Reporter r = new KmeansUtils.EmptyReport(); RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r); LongWritable k = reader.createKey(); VectorWritable v = reader.createValue(); while (reader.next(k, v)) { int centerBelong = (int) getBelongPoint(v); // int i = (int) p.getStrClusterClass(); // double[] vals = p.getDoubleValue(); int len = v.get().size(); if (centerSum[centerBelong] == null) { centerSum[centerBelong] = new double[len]; } for (int j = 0; j < len; j++) { centerSum[centerBelong][j] += v.get().get(j); } centerPNum[centerBelong]++; } reader.close(); } for (int i = 0; i < centerPNum.length; i++) { if (centerSum[i] == null && centerPNum[i] == 0) { continue; } MPI_D.Send(new IntWritable(i), new KmeansCenters(centerPNum[i], centerSum[i])); } } else { centers.clear(); IntWritable key = null, newKey = null; KmeansCenters value = null, newValue = null; double sum[] = null; long count = 0; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newValue = (KmeansCenters) vals[1]; if (key == null && value == null) { sum = new double[newValue.getVector().length]; } else if (!key.equals(newKey)) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = (double) sum[i] / count; } PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals); centers.add(oneCenter); sum = new double[value.getVector().length]; count = 0; } key = newKey; value = newValue; KmeansUtils.accumulate(sum, newValue.getVector()); count += Long.valueOf(newValue.getPointSize()); vals = MPI_D.Recv(); } if (newKey != null && newValue != null) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(key.get(), centerVals); centers.add(oneCenter); } KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.gatherCentersByP2P(centers); if (rank == 0) { OutputStream resOut = KmeansUtils.getOutputStream(outPath, config); DataOutput os = new DataOutputStream(resOut); for (PointVector centerPoint : centers) { os.write((centerPoint.toString() + "\n").getBytes()); } resOut.flush(); resOut.close(); } } MPI_D.Finalize(); }
From source file:org.apache.giraph.block_app.library.algo.DistributedIndependentSet.java
License:Apache License
/** * Piece to confirm selection of some vertices for the independent set. Also, * changes the state of neighboring vertices of newly assigned vertices to * NOT_IN_SET, so not to consider them for the discovery of the current * independent set./*from w w w .j a v a 2s . com*/ * * @param foundMIS Specifies the end of discovery for current independent set. * @param done Specifies the end of whole computation of decomposing to * independent sets. */ private static <I extends WritableComparable, V extends Writable> Block createSelectAndRefinePiece( SupplierFromVertex<I, V, Writable, IntWritable> getIndependentSet, ConsumerWithVertex<I, V, Writable, IntWritable> setIndependentSet, IntRef iteration, Consumer<Boolean> foundMIS, Consumer<Boolean> done) { return new Piece<I, V, Writable, BooleanWritable, Object>() { private ReducerHandle<IntWritable, IntWritable> numVerticesUnknown; private ReducerHandle<IntWritable, IntWritable> numVerticesNotAssigned; @Override public void registerReducers(CreateReducersApi reduceApi, Object executionStage) { numVerticesUnknown = reduceApi.createLocalReducer(SumReduce.INT); numVerticesNotAssigned = reduceApi.createLocalReducer(SumReduce.INT); } @Override public VertexSender<I, V, Writable> getVertexSender( final BlockWorkerSendApi<I, V, Writable, BooleanWritable> workerApi, Object executionStage) { BooleanWritable ack = new BooleanWritable(true); IntWritable one = new IntWritable(1); return (vertex) -> { IntWritable vertexState = getIndependentSet.get(vertex); if (vertexState.equals(IN_SET)) { setIndependentSet.apply(vertex, new IntWritable(iteration.value)); workerApi.sendMessageToAllEdges(vertex, ack); } else if (vertexState.equals(UNKNOWN)) { numVerticesUnknown.reduce(one); numVerticesNotAssigned.reduce(one); } else if (vertexState.equals(NOT_IN_SET)) { numVerticesNotAssigned.reduce(one); } }; } @Override public void masterCompute(BlockMasterApi master, Object executionStage) { done.apply(numVerticesNotAssigned.getReducedValue(master).get() == 0); foundMIS.apply(numVerticesUnknown.getReducedValue(master).get() == 0); } @Override public VertexReceiver<I, V, Writable, BooleanWritable> getVertexReceiver( final BlockWorkerReceiveApi<I> workerApi, Object executionStage) { return (vertex, messages) -> { if (getIndependentSet.get(vertex).equals(UNKNOWN) && Iterables.size(messages) > 0) { setIndependentSet.apply(vertex, NOT_IN_SET); } }; } @Override public Class<BooleanWritable> getMessageClass() { return BooleanWritable.class; } @Override public String toString() { return "SelectAndRefinePiece"; } }; }