Example usage for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get()

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:com.moz.fiji.mapreduce.lib.reduce.LongSumReducer.java

License:Apache License

/** {@inheritDoc} */
@Override//from w  ww .j a v a  2s  . c  o m
protected void reduce(K key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    long sum = 0;
    for (LongWritable value : values) {
        sum += value.get();
    }
    mValue.set(sum);
    context.write(key, mValue);
}

From source file:com.moz.fiji.mapreduce.testlib.SimpleBulkImporter.java

License:Apache License

/** {@inheritDoc} */
@Override//from w  w w.j av a2 s. co  m
public void produce(LongWritable filePos, Text value, FijiTableContext context) throws IOException {
    final String line = value.toString();
    final String[] split = line.split(":");
    Preconditions.checkState(split.length == 2,
            String.format("Unable to parse bulk-import test input line: '%s'.", line));
    final String rowKey = split[0];
    final int integerValue = Integer.parseInt(split[1]);

    final EntityId eid = context.getEntityId(rowKey);
    context.put(eid, "primitives", "int", integerValue);
    context.put(eid, "primitives", "long", filePos.get());
    context.put(eid, "primitives", "string", String.format("%s-%d", rowKey, integerValue));
}

From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from  w  ww  . j a v  a 2 s  .  c  o m
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:com.pinterest.secor.tools.LogFilePrinter.java

License:Apache License

public void printFile(String path) throws Exception {
    FileSystem fileSystem = FileUtil.getFileSystem(path);
    Path fsPath = new Path(path);
    SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration());
    LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
    BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
    System.out.println("reading file " + path);
    while (reader.next(key, value)) {
        if (mPrintOffsetsOnly) {
            System.out.println(Long.toString(key.get()));
        } else {/*  w  w  w  .ja  v a2s.  c om*/
            System.out.println(Long.toString(key.get()) + ": " + new String(value.getBytes()));
        }
    }
}

From source file:com.pinterest.secor.tools.LogFileVerifier.java

License:Apache License

private void getOffsets(LogFilePath logFilePath, Set<Long> offsets) throws Exception {
    String path = logFilePath.getLogFilePath();
    Path fsPath = new Path(path);
    FileSystem fileSystem = FileUtil.getFileSystem(path);
    SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration());
    LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
    BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
    while (reader.next(key, value)) {
        if (!offsets.add(key.get())) {
            throw new RuntimeException(
                    "duplicate key " + key.get() + " found in file " + logFilePath.getLogFilePath());
        }//from www.  j a v  a  2  s  .  c o m
    }
    reader.close();
}

From source file:com.pinterest.secor.uploader.Uploader.java

License:Apache License

private void trim(LogFilePath srcPath, long startOffset) throws Exception {
    if (startOffset == srcPath.getOffset()) {
        return;//w  w  w  .j a va 2 s.  c om
    }
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    String srcFilename = srcPath.getLogFilePath();
    Path srcFsPath = new Path(srcFilename);
    SequenceFile.Reader reader = null;
    SequenceFile.Writer writer = null;
    LogFilePath dstPath = null;
    int copiedMessages = 0;
    // Deleting the writer closes its stream flushing all pending data to the disk.
    mFileRegistry.deleteWriter(srcPath);
    try {
        reader = createReader(fs, srcFsPath, config);
        LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
        BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
        CompressionCodec codec = null;
        String extension = "";
        if (mConfig.getCompressionCodec() != null && !mConfig.getCompressionCodec().isEmpty()) {
            codec = (CompressionCodec) ReflectionUtil.createCompressionCodec(mConfig.getCompressionCodec());
            extension = codec.getDefaultExtension();
        }
        while (reader.next(key, value)) {
            if (key.get() >= startOffset) {
                if (writer == null) {
                    String localPrefix = mConfig.getLocalPath() + '/' + IdUtil.getLocalMessageDir();
                    dstPath = new LogFilePath(localPrefix, srcPath.getTopic(), srcPath.getPartitions(),
                            srcPath.getGeneration(), srcPath.getKafkaPartition(), startOffset, extension);
                    writer = mFileRegistry.getOrCreateWriter(dstPath, codec);
                }
                writer.append(key, value);
                copiedMessages++;
            }
        }
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
    mFileRegistry.deletePath(srcPath);
    if (dstPath == null) {
        LOG.info("removed file " + srcPath.getLogFilePath());
    } else {
        LOG.info("trimmed " + copiedMessages + " messages from " + srcFilename + " to "
                + dstPath.getLogFilePath() + " with start offset " + startOffset);
    }
}

From source file:com.ricemap.spateDB.operations.Sampler.java

License:Apache License

/**
 * Records as many records as wanted until the total size of the text
 * serialization of sampled records exceed the given limit
 * @param fs/*from w  ww . j av  a  2  s .  co  m*/
 * @param files
 * @param total_size
 * @param output
 * @param inObj
 * @return
 * @throws IOException
 */
public static <T extends TextSerializable, O extends TextSerializable> int sampleLocalWithSize(FileSystem fs,
        Path[] files, long total_size, long seed, final ResultCollector<O> output, final T inObj,
        final O outObj) throws IOException {
    int average_record_size = 1024; // A wild guess for record size
    final LongWritable current_sample_size = new LongWritable();
    int sample_count = 0;

    final ResultCollector<T> converter = createConverter(output, inObj, outObj);

    final ResultCollector<Text2> counter = new ResultCollector<Text2>() {
        @Override
        public void collect(Text2 r) {
            current_sample_size.set(current_sample_size.get() + r.getLength());
            inObj.fromText(r);
            converter.collect(inObj);
        }
    };

    while (current_sample_size.get() < total_size) {
        int count = (int) ((total_size - current_sample_size.get()) / average_record_size);
        if (count < 10)
            count = 10;

        sample_count += sampleLocalByCount(fs, files, count, seed, counter, new Text2(), new Text2());
        // Change the seed to get different sample next time.
        // Still we need to ensure that repeating the program will generate
        // the same value
        seed += sample_count;
        // Update average_records_size
        average_record_size = (int) (current_sample_size.get() / sample_count);
    }
    return sample_count;
}

From source file:com.talis.labs.pagerank.mapreduce.CountPagesReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    long sum = 0;
    for (LongWritable value : values) {
        sum += value.get();
    }//from   w  w w  . j  av a 2 s.c  om
    context.write(key, new LongWritable(sum));
}

From source file:com.test.PiEstimatorKrb.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from ww w .ja v a  2  s  . c om
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimatorKrb.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            sLogger.info("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        sLogger.info("Starting Job");
        final long startTime = System.currentTimeMillis();

        if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
            jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
        }

        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        sLogger.info("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.twitter.algebra.nmf.NMFCommon.java

License:Apache License

public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException {
    HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>();

    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr + "/part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    LongWritable key = new LongWritable();
    IntWritable value = new IntWritable();
    while (reader.next(key, value)) {
        hashMap.put(key.get(), value.get());
    }/*  w  w  w . ja v a 2  s  .  c o  m*/
    System.out.println("SUM " + sum);
    reader.close();
    return hashMap;
}