Example usage for org.apache.hadoop.io LongWritable LongWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable LongWritable.

Prototype

public LongWritable(long value)

Source Link

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*from w w  w. ja v a 2s.  c  om*/
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:HistogramBucket.java

License:Apache License

@Override
public void write(DataOutput d) throws IOException {
    attribute.write(d);/*from ww w  . j av  a2  s .c  o  m*/
    LongWritable arraySize = new LongWritable(splits.size());
    arraySize.write(d);
    for (DoubleWritable w : splits) {
        w.write(d);
    }
}

From source file:FlintHadoopTest.java

License:Apache License

/**
 * Test the Map class/*from   w w  w . j a va2 s  .  c om*/
 * @throws IOException
 * @throws InstantiationException
 * @throws IllegalAccessException
 */
@Test
public void testMap() throws IOException, InstantiationException, IllegalAccessException {
    mapDriver.withInput(new LongWritable(0), new Text(testPdf1Path));
    assertOutputMatchesRecord(mapDriver.run().get(0), testPdf1CheckResult, testPdf1Name);
}

From source file:MRDriver.java

License:Apache License

public int run(String args[]) throws Exception {
    FileSystem fs = null;//from w  w  w  .j  a  v  a 2  s. c om
    Path samplesMapPath = null;

    float epsilon = Float.parseFloat(args[0]);
    double delta = Double.parseDouble(args[1]);
    int minFreqPercent = Integer.parseInt(args[2]);
    int d = Integer.parseInt(args[3]);
    int datasetSize = Integer.parseInt(args[4]);
    int numSamples = Integer.parseInt(args[5]);
    double phi = Double.parseDouble(args[6]);
    Random rand;

    /************************ Job 1 (local FIM) Configuration ************************/

    JobConf conf = new JobConf(getConf());

    /*
     * Compute the number of required "votes" for an itemsets to be
     * declared frequent    
     */
    // The +1 at the end is needed to ensure reqApproxNum > numsamples / 2.
    int reqApproxNum = (int) Math
            .floor((numSamples * (1 - phi)) - Math.sqrt(numSamples * (1 - phi) * 2 * Math.log(1 / delta))) + 1;
    int sampleSize = (int) Math.ceil((2 / Math.pow(epsilon, 2)) * (d + Math.log(1 / phi)));
    //System.out.println("reducersNum: " + numSamples + " reqApproxNum: " + reqApproxNum);

    conf.setInt("PARMM.reducersNum", numSamples);
    conf.setInt("PARMM.datasetSize", datasetSize);
    conf.setInt("PARMM.minFreqPercent", minFreqPercent);
    conf.setInt("PARMM.sampleSize", sampleSize);
    conf.setFloat("PARMM.epsilon", epsilon);

    // Set the number of reducers equal to the number of samples, to
    // maximize parallelism. Required by our Partitioner.
    conf.setNumReduceTasks(numSamples);

    // XXX: why do we disable the speculative execution? MR
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI);

    /* 
     * Enable compression of map output.
     *
     * We do it for this job and not for the aggregation one because
     * each mapper there only print out one record for each itemset,
     * so there isn't much to compress, I'd say. MR
     *
     * In Amazon MapReduce compression of the map output seems to be
     * happen by default and the Snappy codec is used, which is
     * extremely fast.
     */
    conf.setBoolean("mapred.compress.map.output", true);
    //conf.setMapOutputCompressorClass(com.hadoop.compression.lzo.LzoCodec.class);

    conf.setJarByClass(MRDriver.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    // We write the collections found in a reducers as a SequenceFile 
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[9]));

    // set the mapper class based on command line option
    switch (Integer.parseInt(args[7])) {
    case 1:
        System.out.println("running partition mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(PartitionMapper.class);
        break;
    case 2:
        System.out.println("running binomial mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(BinomialSamplerMapper.class);
        break;
    case 3:
        System.out.println("running coin mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(CoinFlipSamplerMapper.class);
    case 4:
        System.out.println("running sampler mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(InputSamplerMapper.class);

        // create a random sample of size T*m
        rand = new Random();
        long sampling_start_time = System.nanoTime();
        int[] samples = new int[numSamples * sampleSize];
        for (int i = 0; i < numSamples * sampleSize; i++) {
            samples[i] = rand.nextInt(datasetSize);
        }

        // for each key in the sample, create a list of all T samples to which this key belongs
        Hashtable<LongWritable, ArrayList<IntWritable>> hashTable = new Hashtable<LongWritable, ArrayList<IntWritable>>();
        for (int i = 0; i < numSamples * sampleSize; i++) {
            ArrayList<IntWritable> sampleIDs = null;
            LongWritable key = new LongWritable(samples[i]);
            if (hashTable.containsKey(key))
                sampleIDs = hashTable.get(key);
            else
                sampleIDs = new ArrayList<IntWritable>();
            sampleIDs.add(new IntWritable(i % numSamples));
            hashTable.put(key, sampleIDs);
        }

        /*
         * Convert the Hastable to a MapWritable which we will
         * write to HDFS and distribute to all Mappers using
         * DistributedCache
         */
        MapWritable map = new MapWritable();
        for (LongWritable key : hashTable.keySet()) {
            ArrayList<IntWritable> sampleIDs = hashTable.get(key);
            IntArrayWritable sampleIDsIAW = new IntArrayWritable();
            sampleIDsIAW.set(sampleIDs.toArray(new IntWritable[sampleIDs.size()]));
            map.put(key, sampleIDsIAW);
        }

        fs = FileSystem.get(URI.create("samplesMap.ser"), conf);
        samplesMapPath = new Path("samplesMap.ser");
        FSDataOutputStream out = fs.create(samplesMapPath, true);
        map.write(out);
        out.sync();
        out.close();
        DistributedCache.addCacheFile(new URI(fs.getWorkingDirectory() + "/samplesMap.ser#samplesMap.ser"),
                conf);
        // stop the sampling timer   
        long sampling_end_time = System.nanoTime();
        long sampling_runtime = (sampling_end_time - sampling_start_time) / 1000000;
        System.out.println("sampling runtime (milliseconds): " + sampling_runtime);
        break; // end switch case
    case 5:
        System.out.println("running random integer partition mapper...");
        conf.setInputFormat(WholeSplitInputFormat.class);
        Path inputFilePath = new Path(args[8]);
        WholeSplitInputFormat.addInputPath(conf, inputFilePath);
        conf.setMapperClass(RandIntPartSamplerMapper.class);
        // Compute number of map tasks.
        fs = inputFilePath.getFileSystem(conf);
        FileStatus inputFileStatus = fs.getFileStatus(inputFilePath);
        long len = inputFileStatus.getLen();
        long blockSize = inputFileStatus.getBlockSize();
        conf.setLong("mapred.min.split.size", blockSize);
        conf.setLong("mapred.max.split.size", blockSize);
        int mapTasksNum = ((int) (len / blockSize)) + 1;
        conf.setNumMapTasks(mapTasksNum);
        //System.out.println("len: " + len + " blockSize: " 
        //      + blockSize + " mapTasksNum: " + mapTasksNum);
        // Extract random integer partition of total sample
        // size into up to mapTasksNum partitions.
        // XXX I'm not sure this is a correct way to do
        // it.
        rand = new Random();
        IntWritable[][] toSampleArr = new IntWritable[mapTasksNum][numSamples];
        for (int j = 0; j < numSamples; j++) {
            IntWritable[] tempToSampleArr = new IntWritable[mapTasksNum];
            int sum = 0;
            int i;
            for (i = 0; i < mapTasksNum - 1; i++) {
                int size = rand.nextInt(sampleSize - sum);
                tempToSampleArr[i] = new IntWritable(size);
                sum += size;
                if (sum > numSamples * sampleSize) {
                    System.out.println("Something went wrong generating the sample Sizes");
                    System.exit(1);
                }
                if (sum == sampleSize) {
                    break;
                }
            }
            if (i == mapTasksNum - 1) {
                tempToSampleArr[i] = new IntWritable(sampleSize - sum);
            } else {
                for (; i < mapTasksNum; i++) {
                    tempToSampleArr[i] = new IntWritable(0);
                }
            }
            Collections.shuffle(Arrays.asList(tempToSampleArr));
            for (i = 0; i < mapTasksNum; i++) {
                toSampleArr[i][j] = tempToSampleArr[i];
            }
        }

        for (int i = 0; i < mapTasksNum; i++) {
            DefaultStringifier.storeArray(conf, toSampleArr[i], "PARMM.toSampleArr_" + i);
        }
        break;
    default:
        System.err.println("Wrong Mapper ID. Can only be in [1,5]");
        System.exit(1);
        break;
    }

    /*
     * We don't use the default hash partitioner because we want to
     * maximize the parallelism. That's why we also fix the number
     * of reducers.
     */
    conf.setPartitionerClass(FIMPartitioner.class);

    conf.setReducerClass(FIMReducer.class);

    /************************ Job 2 (aggregation) Configuration ************************/

    JobConf confAggr = new JobConf(getConf());

    confAggr.setInt("PARMM.reducersNum", numSamples);
    confAggr.setInt("PARMM.reqApproxNum", reqApproxNum);
    confAggr.setInt("PARMM.sampleSize", sampleSize);
    confAggr.setFloat("PARMM.epsilon", epsilon);

    // XXX: Why do we disable speculative execution? MR
    confAggr.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    confAggr.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI);

    confAggr.setJarByClass(MRDriver.class);

    confAggr.setMapOutputKeyClass(Text.class);
    confAggr.setMapOutputValueClass(DoubleWritable.class);

    confAggr.setOutputKeyClass(Text.class);
    confAggr.setOutputValueClass(Text.class);

    confAggr.setMapperClass(AggregateMapper.class);
    confAggr.setReducerClass(AggregateReducer.class);

    confAggr.setInputFormat(CombineSequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(confAggr, new Path(args[9]));

    FileOutputFormat.setOutputPath(confAggr, new Path(args[10]));

    long FIMjob_start_time = System.currentTimeMillis();
    RunningJob FIMjob = JobClient.runJob(conf);
    long FIMjob_end_time = System.currentTimeMillis();

    RunningJob aggregateJob = JobClient.runJob(confAggr);
    long aggrJob_end_time = System.currentTimeMillis();

    long FIMjob_runtime = FIMjob_end_time - FIMjob_start_time;

    long aggrJob_runtime = aggrJob_end_time - FIMjob_end_time;

    if (args[7].equals("4")) {
        // Remove samplesMap file 
        fs.delete(samplesMapPath, false);
    }

    Counters counters = FIMjob.getCounters();
    Counters.Group FIMMapperStartTimesCounters = counters.getGroup("FIMMapperStart");
    long[] FIMMapperStartTimes = new long[FIMMapperStartTimesCounters.size()];
    int i = 0;
    for (Counters.Counter counter : FIMMapperStartTimesCounters) {
        FIMMapperStartTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMMapperEndTimesCounters = counters.getGroup("FIMMapperEnd");
    long[] FIMMapperEndTimes = new long[FIMMapperEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMMapperEndTimesCounters) {
        FIMMapperEndTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMReducerStartTimesCounters = counters.getGroup("FIMReducerStart");
    long[] FIMReducerStartTimes = new long[FIMReducerStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMReducerStartTimesCounters) {
        FIMReducerStartTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMReducerEndTimesCounters = counters.getGroup("FIMReducerEnd");
    long[] FIMReducerEndTimes = new long[FIMReducerEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMReducerEndTimesCounters) {
        FIMReducerEndTimes[i++] = counter.getCounter();
    }

    Counters countersAggr = aggregateJob.getCounters();
    Counters.Group AggregateMapperStartTimesCounters = countersAggr.getGroup("AggregateMapperStart");
    long[] AggregateMapperStartTimes = new long[AggregateMapperStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateMapperStartTimesCounters) {
        AggregateMapperStartTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateMapperEndTimesCounters = countersAggr.getGroup("AggregateMapperEnd");
    long[] AggregateMapperEndTimes = new long[AggregateMapperEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateMapperEndTimesCounters) {
        AggregateMapperEndTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateReducerStartTimesCounters = countersAggr.getGroup("AggregateReducerStart");
    long[] AggregateReducerStartTimes = new long[AggregateReducerStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateReducerStartTimesCounters) {
        AggregateReducerStartTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateReducerEndTimesCounters = countersAggr.getGroup("AggregateReducerEnd");
    long[] AggregateReducerEndTimes = new long[AggregateReducerEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateReducerEndTimesCounters) {
        AggregateReducerEndTimes[i++] = counter.getCounter();
    }

    long FIMMapperStartMin = FIMMapperStartTimes[0];
    for (long l : FIMMapperStartTimes) {
        if (l < FIMMapperStartMin) {
            FIMMapperStartMin = l;
        }
    }
    long FIMMapperEndMax = FIMMapperEndTimes[0];
    for (long l : FIMMapperEndTimes) {
        if (l > FIMMapperEndMax) {
            FIMMapperEndMax = l;
        }
    }
    System.out.println("FIM job setup time (milliseconds): " + (FIMMapperStartMin - FIMjob_start_time));
    System.out.println("FIMMapper total runtime (milliseconds): " + (FIMMapperEndMax - FIMMapperStartMin));
    long[] FIMMapperRunTimes = new long[FIMMapperStartTimes.length];
    long FIMMapperRunTimesSum = 0;
    for (int l = 0; l < FIMMapperStartTimes.length; l++) {
        FIMMapperRunTimes[l] = FIMMapperEndTimes[l] - FIMMapperStartTimes[l];
        FIMMapperRunTimesSum += FIMMapperRunTimes[l];
    }
    System.out.println("FIMMapper average task runtime (milliseconds): "
            + FIMMapperRunTimesSum / FIMMapperStartTimes.length);
    long FIMMapperRunTimesMin = FIMMapperRunTimes[0];
    long FIMMapperRunTimesMax = FIMMapperRunTimes[0];
    for (long l : FIMMapperRunTimes) {
        if (l < FIMMapperRunTimesMin) {
            FIMMapperRunTimesMin = l;
        }
        if (l > FIMMapperRunTimesMax) {
            FIMMapperRunTimesMax = l;
        }
    }
    System.out.println("FIMMapper minimum task runtime (milliseconds): " + FIMMapperRunTimesMin);
    System.out.println("FIMMapper maximum task runtime (milliseconds): " + FIMMapperRunTimesMax);

    long FIMReducerStartMin = FIMReducerStartTimes[0];
    for (long l : FIMReducerStartTimes) {
        if (l < FIMReducerStartMin) {
            FIMReducerStartMin = l;
        }
    }
    long FIMReducerEndMax = FIMReducerEndTimes[0];
    for (long l : FIMReducerEndTimes) {
        if (l > FIMReducerEndMax) {
            FIMReducerEndMax = l;
        }
    }
    System.out
            .println("FIM job shuffle phase runtime (milliseconds): " + (FIMReducerStartMin - FIMMapperEndMax));
    System.out.println("FIMReducer total runtime (milliseconds): " + (FIMReducerEndMax - FIMReducerStartMin));
    long[] FIMReducerRunTimes = new long[FIMReducerStartTimes.length];
    long FIMReducerRunTimesSum = 0;
    for (int l = 0; l < FIMReducerStartTimes.length; l++) {
        FIMReducerRunTimes[l] = FIMReducerEndTimes[l] - FIMReducerStartTimes[l];
        FIMReducerRunTimesSum += FIMReducerRunTimes[l];
    }
    System.out.println("FIMReducer average task runtime (milliseconds): "
            + FIMReducerRunTimesSum / FIMReducerStartTimes.length);
    long FIMReducerRunTimesMin = FIMReducerRunTimes[0];
    long FIMReducerRunTimesMax = FIMReducerRunTimes[0];
    for (long l : FIMReducerRunTimes) {
        if (l < FIMReducerRunTimesMin) {
            FIMReducerRunTimesMin = l;
        }
        if (l > FIMReducerRunTimesMax) {
            FIMReducerRunTimesMax = l;
        }
    }
    System.out.println("FIMReducer minimum task runtime (milliseconds): " + FIMReducerRunTimesMin);
    System.out.println("FIMReducer maximum task runtime (milliseconds): " + FIMReducerRunTimesMax);
    System.out.println("FIM job cooldown time (milliseconds): " + (FIMjob_end_time - FIMReducerEndMax));

    long AggregateMapperStartMin = AggregateMapperStartTimes[0];
    for (long l : AggregateMapperStartTimes) {
        if (l < AggregateMapperStartMin) {
            AggregateMapperStartMin = l;
        }
    }
    long AggregateMapperEndMax = AggregateMapperEndTimes[0];
    for (long l : AggregateMapperEndTimes) {
        if (l > AggregateMapperEndMax) {
            AggregateMapperEndMax = l;
        }
    }
    System.out.println(
            "Aggregation job setup time (milliseconds): " + (AggregateMapperStartMin - FIMjob_end_time));
    System.out.println("AggregateMapper total runtime (milliseconds): "
            + (AggregateMapperEndMax - AggregateMapperStartMin));
    long[] AggregateMapperRunTimes = new long[AggregateMapperStartTimes.length];
    long AggregateMapperRunTimesSum = 0;
    for (int l = 0; l < AggregateMapperStartTimes.length; l++) {
        AggregateMapperRunTimes[l] = AggregateMapperEndTimes[l] - AggregateMapperStartTimes[l];
        AggregateMapperRunTimesSum += AggregateMapperRunTimes[l];
    }
    System.out.println("AggregateMapper average task runtime (milliseconds): "
            + AggregateMapperRunTimesSum / AggregateMapperStartTimes.length);
    long AggregateMapperRunTimesMin = AggregateMapperRunTimes[0];
    long AggregateMapperRunTimesMax = AggregateMapperRunTimes[0];
    for (long l : AggregateMapperRunTimes) {
        if (l < AggregateMapperRunTimesMin) {
            AggregateMapperRunTimesMin = l;
        }
        if (l > AggregateMapperRunTimesMax) {
            AggregateMapperRunTimesMax = l;
        }
    }
    System.out.println("AggregateMapper minimum task runtime (milliseconds): " + AggregateMapperRunTimesMin);
    System.out.println("AggregateMapper maximum task runtime (milliseconds): " + AggregateMapperRunTimesMax);

    long AggregateReducerStartMin = AggregateReducerStartTimes[0];
    for (long l : AggregateReducerStartTimes) {
        if (l < AggregateReducerStartMin) {
            AggregateReducerStartMin = l;
        }
    }
    long AggregateReducerEndMax = AggregateReducerEndTimes[0];
    for (long l : AggregateReducerEndTimes) {
        if (l > AggregateReducerEndMax) {
            AggregateReducerEndMax = l;
        }
    }
    System.out.println("Aggregate job round shuffle phase runtime (milliseconds): "
            + (AggregateReducerStartMin - AggregateMapperEndMax));
    System.out.println("AggregateReducer total runtime (milliseconds): "
            + (AggregateReducerEndMax - AggregateReducerStartMin));
    long[] AggregateReducerRunTimes = new long[AggregateReducerStartTimes.length];
    long AggregateReducerRunTimesSum = 0;
    for (int l = 0; l < AggregateReducerStartTimes.length; l++) {
        AggregateReducerRunTimes[l] = AggregateReducerEndTimes[l] - AggregateReducerStartTimes[l];
        AggregateReducerRunTimesSum += AggregateReducerRunTimes[l];
    }
    System.out.println("AggregateReducer average task runtime (milliseconds): "
            + AggregateReducerRunTimesSum / AggregateReducerStartTimes.length);
    long AggregateReducerRunTimesMin = AggregateReducerRunTimes[0];
    long AggregateReducerRunTimesMax = AggregateReducerRunTimes[0];
    for (long l : AggregateReducerRunTimes) {
        if (l < AggregateReducerRunTimesMin) {
            AggregateReducerRunTimesMin = l;
        }
        if (l > AggregateReducerRunTimesMax) {
            AggregateReducerRunTimesMax = l;
        }
    }
    System.out.println("AggregateReducer minimum task runtime (milliseconds): " + AggregateReducerRunTimesMin);
    System.out.println("AggregateReducer maximum task runtime (milliseconds): " + AggregateReducerRunTimesMax);

    System.out.println(
            "Aggregation job cooldown time (milliseconds): " + (aggrJob_end_time - AggregateReducerEndMax));

    System.out
            .println("total runtime (all inclusive) (milliseconds): " + (aggrJob_end_time - FIMjob_start_time));
    System.out.println("total runtime (no FIM job setup, no aggregation job cooldown) (milliseconds): "
            + (AggregateReducerEndMax - FIMMapperStartMin));
    System.out.println("total runtime (no setups, no cooldowns) (milliseconds): "
            + (FIMReducerEndMax - FIMMapperStartMin + AggregateReducerEndMax - AggregateMapperStartMin));
    System.out.println("FIM job runtime (including setup and cooldown) (milliseconds): " + FIMjob_runtime);
    System.out.println("FIM job runtime (no setup, no cooldown) (milliseconds): "
            + (FIMReducerEndMax - FIMMapperStartMin));
    System.out.println(
            "Aggregation job runtime (including setup and cooldown) (milliseconds): " + aggrJob_runtime);
    System.out.println("Aggregation job runtime (no setup, no cooldown) (milliseconds): "
            + (AggregateReducerEndMax - AggregateMapperStartMin));

    return 0;
}

From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java

License:Apache License

@SuppressWarnings("deprecation")
private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes
        int nrFiles) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");

    Path controlDir = getControlDir(mConfig);

    if (!fs.exists(controlDir)) {

        fs.delete(controlDir, true);/*from  w w  w  . j  a  v  a2s . c  om*/

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(controlDir, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(nrBytes));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null) {
                    writer.close();
                }
                writer = null;
            }
        }
    }
    LOG.info("created control files for: " + nrFiles + " files");
}

From source file:Assignment4_P4_MemoryConscious.MovingRatingMemConscious_Combiner.java

public void reduce(IntWritable key, Iterable<SortedMapWritable> values, Context context)
        throws IOException, InterruptedException {

    // loop through each hashmap for this movie id
    for (SortedMapWritable val : values) {
        // inside each hashmap, loop for every entry
        for (Map.Entry<WritableComparable, Writable> entry : val.entrySet()) {
            // check if current entry's key is already present in new hashmap
            if (result.containsKey(entry.getKey())) {
                //if yes, extract current value from result hashmap for this key
                LongWritable existingValue = (LongWritable) result.get(entry.getKey());

                // increment existing value by 1
                existingValue.set(existingValue.get() + 1);

                // update result hashmap with new value
                result.put(entry.getKey(), existingValue);
            } else {
                //if not, create new entry with init value 1
                result.put(entry.getKey(), new LongWritable(1));
            }// w w  w  .ja v  a  2 s  .  com
        }

        val.clear();
    }

    context.write(key, result);
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java

License:Apache License

public static List<double[]> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask,
        int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage,
        int maxTestPrefs) throws IOException {

    // Delete input directory if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);/*from   w  ww.  j av  a  2s . c o  m*/
    }

    Random rand = new Random(32L);
    Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>();
    List<double[]> testItems = new ArrayList<double[]>();

    int possibleUserItemRatings = userCount * itemCount;
    int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100;
    System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: "
            + userItemRatings);

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    long ratingsPerGPUTask = 0;
    long ratingsPerCPU = 0;
    long ratingsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100;
        ratingsPerCPU = userItemRatings - ratingsPerGPUTask;
    } else {
        ratingsPerCPU = userItemRatings;
    }
    if (cpuTaskNum > 0) {
        ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum;
    }

    System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: "
            + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask);

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(in, "part" + part + ".seq");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class,
                PipesVectorWritable.class, CompressionType.NONE);

        long interval = 0;
        if (part > cpuTaskNum) {
            interval = ratingsPerGPUTask;
        } else {
            interval = ratingsPerCPUTask;
        }
        long start = interval * part;
        long end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = userItemRatings;
        }
        System.out.println("Partition " + part + ": from " + start + " to " + end);

        for (long i = start; i <= end; i++) {

            // Find new user item rating which was not used before
            Map.Entry<Long, Long> userItemPair;
            do {
                long userId = rand.nextInt(userCount);
                long itemId = rand.nextInt(itemCount);
                userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId);
            } while (userItemPairs.contains(userItemPair));

            // Add user item rating
            userItemPairs.add(userItemPair);

            // Generate rating
            int rating = rand.nextInt(5) + 1; // values between 1 and 5

            // Add user item rating to test data
            if (i < maxTestPrefs) {
                testItems.add(new double[] { userItemPair.getKey(), userItemPair.getValue(), rating });
            }

            // Write out user item rating
            dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable(
                    new DenseDoubleVector(new double[] { userItemPair.getValue(), rating })));
        }
        dataWriter.close();
    }

    return testItems;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java

License:Apache License

public static List<double[]> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn,
        String inputFile, String separator, int maxTestPrefs) throws IOException {

    List<double[]> testItems = new ArrayList<double[]>();

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);// ww w.  j ava2s  . com
    }
    if (fs.exists(preferencesIn)) {
        fs.delete(preferencesIn, true);
    }

    final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
            LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

    BufferedReader br = new BufferedReader(new FileReader(inputFile));
    String line;
    while ((line = br.readLine()) != null) {
        String[] values = line.split(separator);
        long userId = Long.parseLong(values[0]);
        long itemId = Long.parseLong(values[1]);
        double rating = Double.parseDouble(values[2]);
        // System.out.println("userId: " + userId + " itemId: " + itemId
        // + " rating: " + rating);

        double vector[] = new double[2];
        vector[0] = itemId;
        vector[1] = rating;
        prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector)));

        // Add test preferences
        maxTestPrefs--;
        if (maxTestPrefs > 0) {
            testItems.add(new double[] { userId, itemId, rating });
        }

    }
    br.close();
    prefWriter.close();

    return testItems;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static List<Preference<Long, Long>> prepareTestInputData(Configuration conf, FileSystem fs, Path in,
        Path preferencesIn) throws IOException {

    Preference[] train_prefs = { new Preference<Integer, Integer>(1, 0, 4),
            new Preference<Integer, Integer>(1, 1, 2.5), new Preference<Integer, Integer>(1, 2, 3.5),

            new Preference<Integer, Integer>(2, 0, 4), new Preference<Integer, Integer>(2, 1, 2.5),
            new Preference<Integer, Integer>(2, 2, 3.5), new Preference<Integer, Integer>(2, 3, 1),
            new Preference<Integer, Integer>(2, 4, 3.5),

            new Preference<Integer, Integer>(3, 0, 4), new Preference<Integer, Integer>(3, 1, 2.5),
            new Preference<Integer, Integer>(3, 2, 3.5), new Preference<Integer, Integer>(3, 3, 1),
            new Preference<Integer, Integer>(3, 4, 3.5) };

    List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>();
    test_prefs.add(new Preference<Long, Long>(1l, 0l, 4));
    test_prefs.add(new Preference<Long, Long>(1l, 1l, 2.5));
    test_prefs.add(new Preference<Long, Long>(1l, 2l, 3.5));
    test_prefs.add(new Preference<Long, Long>(1l, 3l, 1));
    test_prefs.add(new Preference<Long, Long>(1l, 4l, 3.5));

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);//from   w w  w  .j  a v a 2  s  .c o m
    }
    if (fs.exists(preferencesIn)) {
        fs.delete(preferencesIn, true);
    }

    final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
            LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

    for (Preference<Integer, Integer> taste : train_prefs) {
        double values[] = new double[2];
        values[0] = taste.getItemId();
        values[1] = taste.getValue().get();
        prefWriter.append(new LongWritable(taste.getUserId()),
                new PipesVectorWritable(new DenseDoubleVector(values)));
    }
    prefWriter.close();

    return test_prefs;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static List<Preference<Long, Long>> generateRandomInputData(Configuration conf, FileSystem fs, Path in,
        int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues,
        int GPUPercentage, int maxTestPrefs) throws IOException {

    // Delete input directory if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);//  ww w.ja va  2  s .  c  o m
    }

    Random rand = new Random(32L);
    Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>();
    List<Preference<Long, Long>> testItems = new ArrayList<Preference<Long, Long>>();

    int possibleUserItemRatings = userCount * itemCount;
    int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100;
    System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: "
            + userItemRatings);

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    long ratingsPerGPUTask = 0;
    long ratingsPerCPU = 0;
    long ratingsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100;
        ratingsPerCPU = userItemRatings - ratingsPerGPUTask;
    } else {
        ratingsPerCPU = userItemRatings;
    }
    if (cpuTaskNum > 0) {
        ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum;
    }

    System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: "
            + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask);

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(in, "part" + part + ".seq");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class,
                PipesVectorWritable.class, CompressionType.NONE);

        long interval = 0;
        if (part > cpuTaskNum) {
            interval = ratingsPerGPUTask;
        } else {
            interval = ratingsPerCPUTask;
        }
        long start = interval * part;
        long end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = userItemRatings;
        }
        LOG.info("Partition " + part + ": from " + start + " to " + end);

        for (long i = start; i <= end; i++) {

            // Find new user item rating which was not used before
            Map.Entry<Long, Long> userItemPair;
            do {
                long userId = rand.nextInt(userCount);
                long itemId = rand.nextInt(itemCount);
                userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId);
            } while (userItemPairs.contains(userItemPair));

            // Add user item rating
            userItemPairs.add(userItemPair);

            // Generate rating
            int rating = rand.nextInt(5) + 1; // values between 1 and 5

            // Add user item rating to test data
            if (i < maxTestPrefs) {
                testItems.add(
                        new Preference<Long, Long>(userItemPair.getKey(), userItemPair.getValue(), rating));
            }

            // Write out user item rating
            dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable(
                    new DenseDoubleVector(new double[] { userItemPair.getValue(), rating })));
        }
        dataWriter.close();
    }

    return testItems;
}