List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:TorrentWebExtracter.java
License:Apache License
@Override public void map(LongWritable key, WarcRecord value, Context context) throws IOException, InterruptedException { context.setStatus(Counters.CURRENT_RECORD + ": " + key.get()); //Record2Hashcode r1 = new Record2Hashcode(); Record2Torrent rt = new Record2Torrent(value); String hex = rt.getHEXhash(); if (!"".equals(hex) && hex != null) { context.getCounter(Counters.NUM_HTTP_RESPONSE_RECORDS).increment(1); context.write(new Text(hex), new Text(rt.getContent())); }//from ww w .j a v a 2s.c o m }
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* www.ja v a 2 s. c o m*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:DeprecatedBAMBaseRecordReader.java
License:Open Source License
private boolean nextFromOldCurrent(LongWritable key, SAMBaseRecord base) { assert base.getParent() == current; if (!base.gotoNextBase()) return false; key.set((key.get() & ~0xffffffffL) | base.getPos()); return true;/*w w w . j a v a 2s. c o m*/ }
From source file:TestHashMap.java
License:Apache License
@Test public void testHashSetLong() throws Exception { final Set<Long> hashSet = new HashSet<>(); final Random random = new Random(0xDEADBEEF); int matched = 0; LongWritable num = new LongWritable(); long startTime = System.nanoTime(); for (int i = 0; i < SET_SIZE; i++) { // input data is String String input = Long.toString(random.nextLong()); // disable optimizer if (input.length() > 5) { hashSet.add(Long.parseLong(input)); }//from w w w .j a v a 2 s . co m } random.setSeed(0xDEADBEEF); for (int i = 0; i < DATA_SIZE; i++) { // query data is LongWritable num.set(random.nextLong()); if (hashSet.contains(num.get())) { matched++; } } long endTime = System.nanoTime(); System.out.println(" HashSet<Long>"); System.out.println(" Elapsed time: " + (endTime - startTime) / 1000000f + " ms"); System.out.println(" Matched " + matched + " times"); }
From source file:alluxio.client.hadoop.AbstractIOMapper.java
License:Apache License
/** * Map file name and offset into statistical data. * <p>/*w w w .ja va2 s .c o m*/ * The map task is to get the <tt>key</tt>, which contains the file name, and the <tt>value</tt>, * which is the offset within the file. * * The parameters are passed to the abstract method * {@link #doIO(Reporter, String,long)}, which performs the io operation, * usually read or write data, and then * {@link #collectStats(OutputCollector, String,long, Object)} is called * to prepare stat data for a subsequent reducer. */ @Override public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String name = key.toString(); long longValue = value.get(); reporter.setStatus("starting " + name + " ::host = " + mHostname); mStream = getIOStream(name); T statValue = null; long tStart = System.currentTimeMillis(); try { statValue = doIO(reporter, name, longValue); } finally { if (mStream != null) { mStream.close(); } } long tEnd = System.currentTimeMillis(); long execTime = tEnd - tStart; collectStats(output, name, execTime, statValue); reporter.setStatus("finished " + name + " ::host = " + mHostname); }
From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConscious_Reducer.java
public void reduce(IntWritable key, Iterable<SortedMapWritable> values, Context context) throws IOException, InterruptedException { int running_count = 0; long running_sum = 0; float median = 0; list = new TreeMap<>(); // loop through all ratings in received hashmap for this movieID for (SortedMapWritable val : values) { // iterate through every entry consisting of movieRating, countOfRating for ex (4.5, 10) i.e 10 people rated it 4.5 for (Map.Entry<WritableComparable, Writable> entry : val.entrySet()) { // extract movieRating for ex : 4.5 FloatWritable number = (FloatWritable) entry.getKey(); float movieRating = number.get(); //extract countOfRating for ex : 10 LongWritable counter = (LongWritable) entry.getValue(); long count = counter.get(); // calculate running sum by multiplying movieRating and countRating i.e (4.5 * 10 = 45) running_sum += (movieRating * count); // increment running count for getting average later i.e 10 running_count += count;/* w w w . j a v a 2s .co m*/ // make <count> entries for <movieRating> in new hashmap i.e (4.5, 10) if (list.containsKey(movieRating)) { list.put(movieRating, list.get(movieRating) + count); } else { list.put(movieRating, count); } } } System.out.println("Running count for movieID " + key + " is :- " + running_count); System.out.println("Rating List size for movieID " + key + " is :- " + list.size()); // calculating mean float mean = running_sum / running_count; System.out.println("Mean for movieID " + key + " is :- " + mean); // calculating standard deviation float sumSquare = 0; float stdDev = 0; for (Map.Entry<Float, Long> entry : list.entrySet()) { sumSquare += (entry.getKey() - mean) * (entry.getKey() - mean) * (entry.getValue()); } // finally, std dev stdDev = (float) Math.sqrt(sumSquare / (running_count - 1)); System.out.println("Standard deviation for movieID " + key + " is :- " + stdDev); //.append(median) String outcome = new StringBuilder().append("\t").append(stdDev).append("\t").append(running_sum) .append("\t").append(running_count).toString(); result = new Text(outcome); context.write(key, result); }
From source file:Assignment4_P4_MemoryConscious.MovingRatingMemConscious_Combiner.java
public void reduce(IntWritable key, Iterable<SortedMapWritable> values, Context context) throws IOException, InterruptedException { // loop through each hashmap for this movie id for (SortedMapWritable val : values) { // inside each hashmap, loop for every entry for (Map.Entry<WritableComparable, Writable> entry : val.entrySet()) { // check if current entry's key is already present in new hashmap if (result.containsKey(entry.getKey())) { //if yes, extract current value from result hashmap for this key LongWritable existingValue = (LongWritable) result.get(entry.getKey()); // increment existing value by 1 existingValue.set(existingValue.get() + 1); // update result hashmap with new value result.put(entry.getKey(), existingValue); } else { //if not, create new entry with init value 1 result.put(entry.getKey(), new LongWritable(1)); }// w w w. j ava 2s . c om } val.clear(); } context.write(key, result); }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
private void collectInput( BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer) throws IOException { LongWritable key = new LongWritable(); PipesVectorWritable value = new PipesVectorWritable(); int counter = 0; while (peer.readNext(key, value)) { long actualId = key.get(); // parse as <k:userId, v:(itemId, score)> long itemId = (long) value.getVector().get(0); double score = value.getVector().get(1); if (m_usersMatrix.containsKey(actualId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); }//ww w .ja va 2s . com m_usersMatrix.put(actualId, new PipesVectorWritable(vals)); } if (m_itemsMatrix.containsKey(itemId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); } m_itemsMatrix.put(itemId, new PipesVectorWritable(vals)); } m_preferences.add(new Preference<Long, Long>(actualId, itemId, score)); m_indexes.add(counter); counter++; } }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
@Override public void bspGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { long startTime = System.currentTimeMillis(); // ********************************************************************** // Collect inputs // ********************************************************************** Map<Long, HashMap<Long, Double>> preferencesMap = new HashMap<Long, HashMap<Long, Double>>(); Map<Long, Long> userRatingCount = new HashMap<Long, Long>(); Map<Long, Long> itemRatingCount = new HashMap<Long, Long>(); LongWritable key = new LongWritable(); PipesVectorWritable value = new PipesVectorWritable(); int counter = 0; while (peer.readNext(key, value)) { // parse as <k:userId, v:(itemId, score)> long userId = key.get(); long itemId = (long) value.getVector().get(0); double score = value.getVector().get(1); // Add User vector if (m_usersMatrix.containsKey(userId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); }//from ww w. j a va 2s. co m m_usersMatrix.put(userId, new PipesVectorWritable(vals)); userRatingCount.put(userId, 1l); } else { userRatingCount.put(userId, userRatingCount.get(userId) + 1); } // Add Item vector if (m_itemsMatrix.containsKey(itemId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); } m_itemsMatrix.put(itemId, new PipesVectorWritable(vals)); itemRatingCount.put(itemId, 1l); } else { itemRatingCount.put(itemId, itemRatingCount.get(itemId) + 1); } // Add preference m_preferences.add(new Preference<Long, Long>(userId, itemId, score)); if (preferencesMap.containsKey(userId) == false) { HashMap<Long, Double> map = new HashMap<Long, Double>(); map.put(itemId, score); preferencesMap.put(userId, map); } else { preferencesMap.get(userId).put(itemId, score); } // Add counter m_indexes.add(counter); counter++; } // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("collected: " + m_usersMatrix.size() + " users, " + m_itemsMatrix.size() + " items, " + m_preferences.size() + " preferences\n"); } // ********************************************************************** // Prepare input for GPU // ********************************************************************** Map<Long, Long> sortedUserRatingCount = sortByValues(userRatingCount); Map<Long, Long> sortedItemRatingCount = sortByValues(itemRatingCount); // Convert preferences to userItemMatrix double[][] // sortedUserRatingCount.size() x sortedItemRatingCount.size() double[][] userItemMatrix = new double[m_usersMatrix.size()][m_itemsMatrix.size()]; // Mappers Map<Long, Integer> userItemMatrixUserRowMap = new HashMap<Long, Integer>(); GpuIntegerMap userItemMatrixItemColMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 GpuIntegerMap userItemMatrixColItemMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 // Create userHelper to int[][] // userHelper[userId][0] = userRatingCount // userHelper[userId][1] = colId of userItemMatrix int[][] userHelper = null; // Create itemHelper to int[][] // itemHelper[itemId][0] = itemRatingCount // itemHelper[userId][1] = rowId of userItemMatrix int[][] itemHelper = null; Map<Long, Integer> itemHelperId = new HashMap<Long, Integer>(); // Debug if (m_isDebuggingEnabled) { m_logger.writeChars( "userItemMatrix: (m x n): " + m_usersMatrix.size() + " x " + m_itemsMatrix.size() + "\n"); } int rowId = 0; for (Long userId : sortedUserRatingCount.keySet()) { // Map userId to rowId in userItemMatrixUserRowMap userItemMatrixUserRowMap.put(userId, rowId); // Setup userHelper if (userHelper == null) { // TODO sortedUserRatingCount.size() userHelper = new int[m_usersMatrix.size()][sortedUserRatingCount.get(userId).intValue() + 1]; } userHelper[rowId][0] = sortedUserRatingCount.get(userId).intValue(); int colId = 0; int userHelperId = 1; for (Long itemId : sortedItemRatingCount.keySet()) { // Map itemId to colId in userItemMatrixItemColMap if (rowId == 0) { userItemMatrixItemColMap.put(itemId.intValue(), colId); userItemMatrixColItemMap.put(colId, itemId.intValue()); } // Setup itemHelper if (itemHelper == null) { // TODO sortedItemRatingCount.size() itemHelper = new int[m_itemsMatrix.size()][sortedItemRatingCount.get(itemId).intValue() + 1]; } itemHelper[colId][0] = sortedItemRatingCount.get(itemId).intValue(); if (preferencesMap.get(userId).containsKey(itemId)) { // Add userItemMatrix userItemMatrix[rowId][colId] = preferencesMap.get(userId).get(itemId); // Add userHelper userHelper[rowId][userHelperId] = colId; userHelperId++; // Add itemHelper if (itemHelperId.containsKey(itemId)) { int idx = itemHelperId.get(itemId); itemHelper[colId][idx] = rowId; itemHelperId.put(itemId, idx + 1); } else { itemHelper[colId][1] = rowId; itemHelperId.put(itemId, 2); } } colId++; } // Debug userItemMatrix if (m_isDebuggingEnabled) { m_logger.writeChars("userItemMatrix userId: " + userId + " row[" + rowId + "]: " + Arrays.toString(userItemMatrix[rowId]) + " userRatings: " + sortedUserRatingCount.get(userId) + "\n"); } rowId++; } // Debug userHelper and itemHelper if (m_isDebuggingEnabled) { // TODO sortedUserRatingCount.size() for (int i = 0; i < m_usersMatrix.size(); i++) { m_logger.writeChars("userHelper row " + i + ": " + Arrays.toString(userHelper[i]) + "\n"); } // TODO sortedItemRatingCount.size() for (int i = 0; i < m_itemsMatrix.size(); i++) { m_logger.writeChars("itemHelper row " + i + ": " + Arrays.toString(itemHelper[i]) + "\n"); } } // Convert usersMatrix to double[][] double[][] userMatrix = new double[m_usersMatrix.size()][m_matrixRank]; rowId = 0; if (m_isDebuggingEnabled) { m_logger.writeChars("userMatrix: length: " + m_usersMatrix.size() + "\n"); } for (Long userId : sortedUserRatingCount.keySet()) { DoubleVector vector = m_usersMatrix.get(userId).getVector(); for (int i = 0; i < m_matrixRank; i++) { userMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("userId: " + userId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Convert itemsMatrix to double[][] double[][] itemMatrix = new double[m_itemsMatrix.size()][m_matrixRank]; rowId = 0; GpuIntegerMap counterMap = new GpuIntegerMap(m_itemsMatrix.size()); if (m_isDebuggingEnabled) { m_logger.writeChars("itemMatrix: length: " + m_itemsMatrix.size() + "\n"); } for (Long itemId : sortedItemRatingCount.keySet()) { counterMap.put(itemId.intValue(), 0); DoubleVector vector = m_itemsMatrix.get(itemId).getVector(); for (int i = 0; i < m_matrixRank; i++) { itemMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("itemId: " + itemId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Sync tasks after input has been collected peer.sync(); // ********************************************************************** // Run GPU Kernels // ********************************************************************** OnlineCFTrainHybridKernel kernel = new OnlineCFTrainHybridKernel(userItemMatrix, userHelper, itemHelper, userItemMatrixItemColMap, userItemMatrixColItemMap, userMatrix, itemMatrix, m_usersMatrix.size(), m_itemsMatrix.size(), ALPHA, m_matrixRank, m_maxIterations, counterMap, m_skipCount, peer.getNumPeers(), peer.getPeerIndex(), peer.getAllPeerNames()); Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context); watch.stop(); // ********************************************************************** // Save Model // ********************************************************************** // save users for (Entry<Long, Integer> userMap : userItemMatrixUserRowMap.entrySet()) { if (m_isDebuggingEnabled) { m_logger.writeChars("user: " + userMap.getKey() + " vector: " + Arrays.toString(kernel.m_usersMatrix[userMap.getValue()]) + "\n"); } peer.write(new Text("u" + userMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_usersMatrix[userMap.getValue()]))); } // TODO duplicated item saves, but one item may belong to one task only // save items for (GpuIntIntPair itemMap : userItemMatrixItemColMap.getList()) { if (itemMap != null) { if (m_isDebuggingEnabled) { m_logger.writeChars("item: " + itemMap.getKey() + " vector: " + Arrays.toString(kernel.m_itemsMatrix[itemMap.getValue()]) + "\n"); } peer.write(new Text("i" + itemMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_itemsMatrix[itemMap.getValue()]))); } } this.m_bspTimeGpu = System.currentTimeMillis() - startTime; // ********************************************************************** // Logging // ********************************************************************** if (m_isDebuggingEnabled) { m_logger.writeChars("OnlineCFTrainHybridBSP.bspGpu executed on GPU!\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds\n"); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { m_logger.writeChars(" StatsRow:\n"); m_logger.writeChars(" serial time: " + row.getSerializationTime() + "\n"); m_logger.writeChars(" exec time: " + row.getExecutionTime() + "\n"); m_logger.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); m_logger.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); m_logger.writeChars(" num threads: " + row.getNumThreads() + "\n"); m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n"); } m_logger.close(); } // Logging List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { LOG.info(" StatsRow:"); LOG.info(" serial time: " + row.getSerializationTime()); LOG.info(" exec time: " + row.getExecutionTime()); LOG.info(" deserial time: " + row.getDeserializationTime()); LOG.info(" num blocks: " + row.getNumBlocks()); LOG.info(" num threads: " + row.getNumThreads()); LOG.info("GPUTime: " + watch.elapsedTimeMillis() + " ms"); } LOG.info("OnlineCFTrainHybridBSP.bspGpu executed on GPU!"); LOG.info("OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds"); }
From source file:at.illecker.hama.hybrid.examples.piestimator.PiEstimatorHybridBSP.java
License:Apache License
@Override public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer) throws IOException { long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); }/* ww w .j av a2 s . c o m*/ // MasterTask writes out results if (peer.getPeerName().equals(m_masterTask)) { long totalHits = 0; LongWritable received; while ((received = peer.getCurrentMessage()) != null) { totalHits += received.get(); } double pi = 4.0 * totalHits / m_iterations; // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,Iterations=" + m_iterations + "\n"); m_logger.writeChars("PiEstimatorHybrid,numMessages: " + peer.getNumCurrentMessages() + "\n"); m_logger.writeChars("PiEstimatorHybrid,totalHits: " + totalHits + "\n"); } peer.write(new Text("Estimated value of PI(3,14159265) using " + m_iterations + " iterations is"), new DoubleWritable(pi)); } long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec"); } if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); m_logger.close(); } }