List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:RunPageRankSchimmy.java
License:Apache License
private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner, boolean useRange) throws Exception { Configuration conf = getConf(); String in = path + "/iter" + FORMAT.format(i); String out = path + "/iter" + FORMAT.format(j) + "t"; String outm = out + "-mass"; FileSystem fs = FileSystem.get(conf); // We need to actually count the number of part files to get the number // of partitions (because the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) { numPartitions++;//from w w w . j a v a 2 s .c o m } } conf.setInt("NodeCount", n); Partitioner<IntWritable, Writable> p = null; if (useRange) { p = new RangePartitioner(); ((Configurable) p).setConf(conf); } else { p = new HashPartitioner<IntWritable, Writable>(); } // This is really annoying: the mapping between the partition numbers on // disk (i.e., part-XXXX) and what partition the file contains (i.e., // key.hash % #reducer) is arbitrary... so this means that we need to // open up each partition, peek inside to find out. IntWritable key = new IntWritable(); PageRankNode value = new PageRankNode(); FileStatus[] status = fs.listStatus(new Path(in)); StringBuilder sb = new StringBuilder(); for (FileStatus f : status) { if (!f.getPath().getName().contains("part-")) { continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath())); reader.next(key, value); int np = p.getPartition(key, value, numPartitions); reader.close(); LOG.info(f.getPath() + "\t" + np); sb.append(np + "=" + f.getPath() + ";"); } LOG.info(sb.toString().trim()); LOG.info("PageRankSchimmy: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + n); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInmapCombiner); LOG.info(" - numPartitions: " + numPartitions); LOG.info(" - useRange: " + useRange); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("PageRankMassPath", outm); conf.set("BasePath", in); conf.set("PartitionMapping", sb.toString().trim()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = Job.getInstance(conf); job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useInmapCombiner) { job.setMapperClass(MapWithInMapperCombiningClass.class); } else { job.setMapperClass(MapClass.class); } if (useCombiner) { job.setCombinerClass(CombineClass.class); } if (useRange) { job.setPartitionerClass(RangePartitioner.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(conf).delete(new Path(out), true); FileSystem.get(conf).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; }
From source file:ac.keio.sslab.nlp.lda.RowIdJob.java
License:Apache License
@SuppressWarnings("deprecation") @Override/*www.j ava 2 s.c om*/ public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = getOutputPath(); Path indexPath = new Path(outputPath, "docIndex"); Path matrixPath = new Path(outputPath, "matrix"); try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class, Text.class); SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class, VectorWritable.class)) { IntWritable docId = new IntWritable(); int i = 0; int numCols = 0; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) { VectorWritable value = record.getSecond(); docId.set(i); indexWriter.append(docId, record.getFirst()); matrixWriter.append(docId, value); i++; numCols = value.get().size(); } log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath); return 0; } }
From source file:Analysis.A4_High_Traffic_Countries.Top_10_Countries_by_User_Traffic_Reducer.java
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Map.Entry<Integer, String> entry : top10.entrySet()) { IntWritable result = new IntWritable(); //Integer key = entry.getKey(); String value = entry.getValue().substring(0, 1).toUpperCase() + entry.getValue().substring(1); result.set(entry.getKey());/*from w w w . ja v a2 s. co m*/ // print top 10 counntries context.write(new Text(value), result); } }
From source file:ar.edu.ungs.garules.CensusJob.java
License:Apache License
/** * Toma la salida del reducer del file system distribuido y la carga en el mapa "ocurrencias" en memoria * @param conf/* w ww . j av a 2s . c o m*/ * @param path * @throws IOException */ @SuppressWarnings("deprecation") private static void llenarOcurrencias(Configuration conf, String path) throws IOException { FileSystem fs = new DistributedFileSystem( new InetSocketAddress(DEFAULT_FILE_SYSTEM_HOST, DEFAULT_FILE_SYSTEM_PORT), conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-r-00000"), conf); Text key = new Text(); IntWritable value = new IntWritable(); while (reader.next(key, value)) ocurrencias.put(key.toString(), value.get()); reader.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
@Override public void bsp(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer) throws IOException, SyncException, InterruptedException { BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs/*from www. j ava 2 s . co m*/ .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("HelloHybrid.bsp executed on CPU!\n"); ArrayList<Integer> summation = new ArrayList<Integer>(); // test input IntWritable key = new IntWritable(); NullWritable nullValue = NullWritable.get(); while (peer.readNext(key, nullValue)) { outStream.writeChars("input: key: '" + key.get() + "'\n"); summation.add(key.get()); } // test sequenceFileReader Path example = new Path(peer.getConfiguration().get(CONF_EXAMPLE_PATH)); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, example, peer.getConfiguration()); int i = 0; while (reader.next(key, nullValue)) { outStream.writeChars("sequenceFileReader: key: '" + key.get() + "'\n"); if (i < summation.size()) { summation.set(i, summation.get(i) + key.get()); } i++; } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } // test output for (Integer i : summation) { key.set(i); outStream.writeChars("output: key: '" + key.get() + "'\n"); peer.write(key, nullValue); } // test getAllPeerNames outStream.writeChars("getAllPeerNames: '" + Arrays.toString(peer.getAllPeerNames()) + "'\n"); // test String.split String splitString = "boo:and:foo"; String[] splits; outStream.writeChars("splitString: '" + splitString + "'\n"); splits = splitString.split(":"); outStream.writeChars("split(\":\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", 2); outStream.writeChars( "split(\":\",2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", 5); outStream.writeChars( "split(\":\",5) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(":", -2); outStream.writeChars( "split(\":\",-2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); splits = splitString.split(";"); outStream.writeChars("split(\";\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n"); outStream.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class, CompressionType.NONE);/*from w ww . j ava 2 s.com*/ } // Create example file writer SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class, NullWritable.class, CompressionType.NONE); // Write random values to input files and example IntWritable inputKey = new IntWritable(); NullWritable nullValue = NullWritable.get(); Random r = new Random(); for (long i = 0; i < n; i++) { inputKey.set(r.nextInt(n)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(inputKey, nullValue); } inputKey.set(r.nextInt(n)); exampleWriter.append(inputKey, nullValue); } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } exampleWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
static void printOutput(BSPJob job, Path path) throws IOException { FileSystem fs = path.getFileSystem(job.getConfiguration()); FileStatus[] files = fs.listStatus(path); for (int i = 0; i < files.length; i++) { if (files[i].getLen() > 0) { System.out.println("File " + files[i].getPath()); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration()); IntWritable key = new IntWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { System.out.println("key: '" + key.get() + "' value: '" + value + "'\n"); }/* w ww . j a v a 2 s.c o m*/ } catch (IOException e) { FSDataInputStream in = fs.open(files[i].getPath()); IOUtils.copyBytes(in, System.out, job.getConfiguration(), false); in.close(); } finally { if (reader != null) { reader.close(); } } } } // fs.delete(FileOutputFormat.getOutputPath(job), true); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
private void recalculateAssignmentsAndWrite( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException { IntWritable keyWrite = new IntWritable(); for (DoubleVector v : m_cache) { final int lowestDistantCenter = getNearestCenter(v); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, new PipesVectorWritable(v)); }/* w w w. j a v a 2 s. c o m*/ // just on the first task write the centers to filesystem to prevent // collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = m_conf.get(CONF_CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf, new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); final NullWritable value = NullWritable.get(); for (DoubleVector center : m_centers_cpu) { dataWriter.append(new PipesVectorWritable(center), value); } dataWriter.close(); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//w ww. j a v a 2 s . c om int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
/********************************* CPU *********************************/ @Override/*from w w w . j a v a 2s .c o m*/ public void setup( BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer) throws IOException { HamaConfiguration conf = peer.getConfiguration(); this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO // task must be 0 otherwise write out does NOT work! this.m_masterTask = peer.getPeerName(0); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } // Load transposed Matrix B SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf); IntWritable bKey = new IntWritable(); PipesVectorWritable bVector = new PipesVectorWritable(); // for each col of matrix B (cause by transposed B) while (reader.next(bKey, bVector)) { m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector())); if (m_isDebuggingEnabled) { m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value=" + bVector.getVector().toString() + "\n"); } } reader.close(); }