List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:com.moz.fiji.mapreduce.lib.reduce.LongSumReducer.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww .j a v a 2s . c o m protected void reduce(K key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { sum += value.get(); } mValue.set(sum); context.write(key, mValue); }
From source file:com.moz.fiji.mapreduce.testlib.SimpleBulkImporter.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w.j av a2 s. co m public void produce(LongWritable filePos, Text value, FijiTableContext context) throws IOException { final String line = value.toString(); final String[] split = line.split(":"); Preconditions.checkState(split.length == 2, String.format("Unable to parse bulk-import test input line: '%s'.", line)); final String rowKey = split[0]; final int integerValue = Integer.parseInt(split[1]); final EntityId eid = context.getEntityId(rowKey); context.put(eid, "primitives", "int", integerValue); context.put(eid, "primitives", "long", filePos.get()); context.put(eid, "primitives", "string", String.format("%s-%d", rowKey, integerValue)); }
From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w ww . j a v a 2 s . c o m */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); // setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.waitForCompletion(true); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:com.pinterest.secor.tools.LogFilePrinter.java
License:Apache License
public void printFile(String path) throws Exception { FileSystem fileSystem = FileUtil.getFileSystem(path); Path fsPath = new Path(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); System.out.println("reading file " + path); while (reader.next(key, value)) { if (mPrintOffsetsOnly) { System.out.println(Long.toString(key.get())); } else {/* w w w .ja v a2s. c om*/ System.out.println(Long.toString(key.get()) + ": " + new String(value.getBytes())); } } }
From source file:com.pinterest.secor.tools.LogFileVerifier.java
License:Apache License
private void getOffsets(LogFilePath logFilePath, Set<Long> offsets) throws Exception { String path = logFilePath.getLogFilePath(); Path fsPath = new Path(path); FileSystem fileSystem = FileUtil.getFileSystem(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { if (!offsets.add(key.get())) { throw new RuntimeException( "duplicate key " + key.get() + " found in file " + logFilePath.getLogFilePath()); }//from www. j a v a 2 s . c o m } reader.close(); }
From source file:com.pinterest.secor.uploader.Uploader.java
License:Apache License
private void trim(LogFilePath srcPath, long startOffset) throws Exception { if (startOffset == srcPath.getOffset()) { return;//w w w .j a va 2 s. c om } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); String srcFilename = srcPath.getLogFilePath(); Path srcFsPath = new Path(srcFilename); SequenceFile.Reader reader = null; SequenceFile.Writer writer = null; LogFilePath dstPath = null; int copiedMessages = 0; // Deleting the writer closes its stream flushing all pending data to the disk. mFileRegistry.deleteWriter(srcPath); try { reader = createReader(fs, srcFsPath, config); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); CompressionCodec codec = null; String extension = ""; if (mConfig.getCompressionCodec() != null && !mConfig.getCompressionCodec().isEmpty()) { codec = (CompressionCodec) ReflectionUtil.createCompressionCodec(mConfig.getCompressionCodec()); extension = codec.getDefaultExtension(); } while (reader.next(key, value)) { if (key.get() >= startOffset) { if (writer == null) { String localPrefix = mConfig.getLocalPath() + '/' + IdUtil.getLocalMessageDir(); dstPath = new LogFilePath(localPrefix, srcPath.getTopic(), srcPath.getPartitions(), srcPath.getGeneration(), srcPath.getKafkaPartition(), startOffset, extension); writer = mFileRegistry.getOrCreateWriter(dstPath, codec); } writer.append(key, value); copiedMessages++; } } } finally { if (reader != null) { reader.close(); } } mFileRegistry.deletePath(srcPath); if (dstPath == null) { LOG.info("removed file " + srcPath.getLogFilePath()); } else { LOG.info("trimmed " + copiedMessages + " messages from " + srcFilename + " to " + dstPath.getLogFilePath() + " with start offset " + startOffset); } }
From source file:com.ricemap.spateDB.operations.Sampler.java
License:Apache License
/** * Records as many records as wanted until the total size of the text * serialization of sampled records exceed the given limit * @param fs/*from w ww . j av a 2 s . co m*/ * @param files * @param total_size * @param output * @param inObj * @return * @throws IOException */ public static <T extends TextSerializable, O extends TextSerializable> int sampleLocalWithSize(FileSystem fs, Path[] files, long total_size, long seed, final ResultCollector<O> output, final T inObj, final O outObj) throws IOException { int average_record_size = 1024; // A wild guess for record size final LongWritable current_sample_size = new LongWritable(); int sample_count = 0; final ResultCollector<T> converter = createConverter(output, inObj, outObj); final ResultCollector<Text2> counter = new ResultCollector<Text2>() { @Override public void collect(Text2 r) { current_sample_size.set(current_sample_size.get() + r.getLength()); inObj.fromText(r); converter.collect(inObj); } }; while (current_sample_size.get() < total_size) { int count = (int) ((total_size - current_sample_size.get()) / average_record_size); if (count < 10) count = 10; sample_count += sampleLocalByCount(fs, files, count, seed, counter, new Text2(), new Text2()); // Change the seed to get different sample next time. // Still we need to ensure that repeating the program will generate // the same value seed += sample_count; // Update average_records_size average_record_size = (int) (current_sample_size.get() / sample_count); } return sample_count; }
From source file:com.talis.labs.pagerank.mapreduce.CountPagesReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { sum += value.get(); }//from w w w . j av a 2 s.c om context.write(key, new LongWritable(sum)); }
From source file:com.test.PiEstimatorKrb.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from ww w .ja v a 2 s . c om */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimatorKrb.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } sLogger.info("Wrote input for Map #" + i); } //start a map/reduce job sLogger.info("Starting Job"); final long startTime = System.currentTimeMillis(); if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; sLogger.info("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.twitter.algebra.nmf.NMFCommon.java
License:Apache License
public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException { HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>(); Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr + "/part-r-00000"); @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; LongWritable key = new LongWritable(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { hashMap.put(key.get(), value.get()); }/* w w w . ja v a 2 s . c o m*/ System.out.println("SUM " + sum); reader.close(); return hashMap; }