Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testNoFillLastSegment() {
    try {/*from w w  w  . j  a v  a  2 s  .  com*/
        String fileName = prefix + "testNoFillLastSegment";
        Head head = new Head();

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2));
        fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4));
        fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6));

        head.setFieldMap(fieldMap);

        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(fileName, head);

        Record record = new Record(7);
        record.addValue(new FieldValue((byte) 1, (short) 0));
        record.addValue(new FieldValue((short) 2, (short) 1));
        record.addValue(new FieldValue((int) 3, (short) 2));
        record.addValue(new FieldValue((long) 4, (short) 3));
        record.addValue(new FieldValue((float) 5.5, (short) 4));
        record.addValue(new FieldValue((double) 6.6, (short) 5));
        record.addValue(new FieldValue("hello konten", (short) 6));

        fd.addRecord(record);

        fd.close();

        FileSystem fs = FileSystem.get(conf);
        long fileLen = fs.getFileStatus(new Path(fileName)).getLen();

        int tlen = head.len() + full7chunkLen + 8 + ConstVar.DataChunkMetaOffset + ConstVar.LineIndexRecordLen
                + ConstVar.IndexMetaOffset + ConstVar.LineIndexRecordLen + ConstVar.IndexMetaOffset;
        if (fileLen != tlen) {
            fail("error file len:" + fileLen);
        }

        FormatDataFile fd2 = new FormatDataFile(new Configuration());
        fd2.open(fileName);
        if (fd2.recordNum() != 1) {
            fail("error record num:" + fd2.recordNum());
        }
        if (fd2.segmentNum() != 1) {
            fail("error segment num:" + fd2.segmentNum());
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testOpenNoRecord() {
    try {/*  w w  w  .  j  av a 2s.  c  o m*/
        String fileName = prefix + "testOpenNoRecord";
        Head head = new Head();
        FormatDataFile fd = new FormatDataFile(new Configuration());
        fd.create(fileName, head);
        fd.close();

        FileSystem fs = FileSystem.get(new Configuration());
        long fileLen = fs.getFileStatus(new Path(fileName)).getLen();
        if (fileLen != head.len() + ConstVar.IndexMetaOffset) {
            fail("error file len:" + fileLen);
        }

        FormatDataFile fd2 = new FormatDataFile(new Configuration());
        fd2.open(fileName);
        if (fd2.recordNum() != 0) {
            fail("error record num:" + fd2.recordNum());
        }
        if (fd2.segmentNum() != 0) {
            fail("error segment num:" + fd2.segmentNum());
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:RunText.java

License:Apache License

public static void main(String[] args) throws Exception {
    o = new Options();
    JCommander jc = null;/*w  w  w  .j  a v  a  2 s  .  c  o m*/
    try {
        jc = new JCommander(o, args);
        jc.setProgramName("./runText");
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        String[] valid = { "-p", "path", "-d", "delimiter", "v", "value", "-i", "index" };
        new JCommander(o, valid).usage();
        System.exit(-1);
    }
    if (o.help) {
        jc.usage();
        System.exit(0);
    }
    path = new Path(o.path);
    delim = o.delimiter.getBytes()[0];
    toFind = o.value;
    index = o.index;
    numThreads = o.threads;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    TextInputFormat format = new TextInputFormat();
    long len = fs.getFileStatus(path).getLen() / numThreads;

    List<Thread> threads = Lists.newArrayList();

    for (int i = 0; i < numThreads; i++) {
        FileSplit split = new FileSplit(path, i * len, len, new String[] { "" });
        threads.add(new Thread(new RunText(split, format)));
    }

    runningThreads = new AtomicInteger(numThreads);

    for (Thread t : threads) {
        t.start();
    }

    int prev = 0;
    int current;
    long t1 = System.nanoTime();
    long t2;
    while (runningThreads.get() > 0) {
        Thread.sleep(5000);
        current = totalCount.get();
        t2 = System.nanoTime();
        System.out.println(String.format("%f records/sec", (current - prev) * 1e9 / (t2 - t1)));
        t1 = t2;
        prev = current;
    }

    for (Thread t : threads) {
        t.join();
    }

    fs.close();
}

From source file:HDFSRandomAccessFile.java

License:Apache License

public HDFSRandomAccessFile(String fileSystemURI, String location, int bufferSize) throws IOException {
    super(bufferSize);
    fsURI = URI.create(fileSystemURI);
    filePath = new Path(location);
    this.location = location;
    if (debugLeaks) {
        openFiles.add(location);/* w w  w  . j a  v  a 2  s . co  m*/
    }

    FileSystem fs = FileSystem.get(fsURI, new Configuration());
    hfile = fs.open(filePath);

    fileStatus = fs.getFileStatus(filePath);
}

From source file:MRDriver.java

License:Apache License

public int run(String args[]) throws Exception {
    FileSystem fs = null;
    Path samplesMapPath = null;/*from   www. j a v a2 s.  com*/

    float epsilon = Float.parseFloat(args[0]);
    double delta = Double.parseDouble(args[1]);
    int minFreqPercent = Integer.parseInt(args[2]);
    int d = Integer.parseInt(args[3]);
    int datasetSize = Integer.parseInt(args[4]);
    int numSamples = Integer.parseInt(args[5]);
    double phi = Double.parseDouble(args[6]);
    Random rand;

    /************************ Job 1 (local FIM) Configuration ************************/

    JobConf conf = new JobConf(getConf());

    /*
     * Compute the number of required "votes" for an itemsets to be
     * declared frequent    
     */
    // The +1 at the end is needed to ensure reqApproxNum > numsamples / 2.
    int reqApproxNum = (int) Math
            .floor((numSamples * (1 - phi)) - Math.sqrt(numSamples * (1 - phi) * 2 * Math.log(1 / delta))) + 1;
    int sampleSize = (int) Math.ceil((2 / Math.pow(epsilon, 2)) * (d + Math.log(1 / phi)));
    //System.out.println("reducersNum: " + numSamples + " reqApproxNum: " + reqApproxNum);

    conf.setInt("PARMM.reducersNum", numSamples);
    conf.setInt("PARMM.datasetSize", datasetSize);
    conf.setInt("PARMM.minFreqPercent", minFreqPercent);
    conf.setInt("PARMM.sampleSize", sampleSize);
    conf.setFloat("PARMM.epsilon", epsilon);

    // Set the number of reducers equal to the number of samples, to
    // maximize parallelism. Required by our Partitioner.
    conf.setNumReduceTasks(numSamples);

    // XXX: why do we disable the speculative execution? MR
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI);

    /* 
     * Enable compression of map output.
     *
     * We do it for this job and not for the aggregation one because
     * each mapper there only print out one record for each itemset,
     * so there isn't much to compress, I'd say. MR
     *
     * In Amazon MapReduce compression of the map output seems to be
     * happen by default and the Snappy codec is used, which is
     * extremely fast.
     */
    conf.setBoolean("mapred.compress.map.output", true);
    //conf.setMapOutputCompressorClass(com.hadoop.compression.lzo.LzoCodec.class);

    conf.setJarByClass(MRDriver.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    // We write the collections found in a reducers as a SequenceFile 
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[9]));

    // set the mapper class based on command line option
    switch (Integer.parseInt(args[7])) {
    case 1:
        System.out.println("running partition mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(PartitionMapper.class);
        break;
    case 2:
        System.out.println("running binomial mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(BinomialSamplerMapper.class);
        break;
    case 3:
        System.out.println("running coin mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(CoinFlipSamplerMapper.class);
    case 4:
        System.out.println("running sampler mapper...");
        SequenceFileInputFormat.addInputPath(conf, new Path(args[8]));
        conf.setMapperClass(InputSamplerMapper.class);

        // create a random sample of size T*m
        rand = new Random();
        long sampling_start_time = System.nanoTime();
        int[] samples = new int[numSamples * sampleSize];
        for (int i = 0; i < numSamples * sampleSize; i++) {
            samples[i] = rand.nextInt(datasetSize);
        }

        // for each key in the sample, create a list of all T samples to which this key belongs
        Hashtable<LongWritable, ArrayList<IntWritable>> hashTable = new Hashtable<LongWritable, ArrayList<IntWritable>>();
        for (int i = 0; i < numSamples * sampleSize; i++) {
            ArrayList<IntWritable> sampleIDs = null;
            LongWritable key = new LongWritable(samples[i]);
            if (hashTable.containsKey(key))
                sampleIDs = hashTable.get(key);
            else
                sampleIDs = new ArrayList<IntWritable>();
            sampleIDs.add(new IntWritable(i % numSamples));
            hashTable.put(key, sampleIDs);
        }

        /*
         * Convert the Hastable to a MapWritable which we will
         * write to HDFS and distribute to all Mappers using
         * DistributedCache
         */
        MapWritable map = new MapWritable();
        for (LongWritable key : hashTable.keySet()) {
            ArrayList<IntWritable> sampleIDs = hashTable.get(key);
            IntArrayWritable sampleIDsIAW = new IntArrayWritable();
            sampleIDsIAW.set(sampleIDs.toArray(new IntWritable[sampleIDs.size()]));
            map.put(key, sampleIDsIAW);
        }

        fs = FileSystem.get(URI.create("samplesMap.ser"), conf);
        samplesMapPath = new Path("samplesMap.ser");
        FSDataOutputStream out = fs.create(samplesMapPath, true);
        map.write(out);
        out.sync();
        out.close();
        DistributedCache.addCacheFile(new URI(fs.getWorkingDirectory() + "/samplesMap.ser#samplesMap.ser"),
                conf);
        // stop the sampling timer   
        long sampling_end_time = System.nanoTime();
        long sampling_runtime = (sampling_end_time - sampling_start_time) / 1000000;
        System.out.println("sampling runtime (milliseconds): " + sampling_runtime);
        break; // end switch case
    case 5:
        System.out.println("running random integer partition mapper...");
        conf.setInputFormat(WholeSplitInputFormat.class);
        Path inputFilePath = new Path(args[8]);
        WholeSplitInputFormat.addInputPath(conf, inputFilePath);
        conf.setMapperClass(RandIntPartSamplerMapper.class);
        // Compute number of map tasks.
        fs = inputFilePath.getFileSystem(conf);
        FileStatus inputFileStatus = fs.getFileStatus(inputFilePath);
        long len = inputFileStatus.getLen();
        long blockSize = inputFileStatus.getBlockSize();
        conf.setLong("mapred.min.split.size", blockSize);
        conf.setLong("mapred.max.split.size", blockSize);
        int mapTasksNum = ((int) (len / blockSize)) + 1;
        conf.setNumMapTasks(mapTasksNum);
        //System.out.println("len: " + len + " blockSize: " 
        //      + blockSize + " mapTasksNum: " + mapTasksNum);
        // Extract random integer partition of total sample
        // size into up to mapTasksNum partitions.
        // XXX I'm not sure this is a correct way to do
        // it.
        rand = new Random();
        IntWritable[][] toSampleArr = new IntWritable[mapTasksNum][numSamples];
        for (int j = 0; j < numSamples; j++) {
            IntWritable[] tempToSampleArr = new IntWritable[mapTasksNum];
            int sum = 0;
            int i;
            for (i = 0; i < mapTasksNum - 1; i++) {
                int size = rand.nextInt(sampleSize - sum);
                tempToSampleArr[i] = new IntWritable(size);
                sum += size;
                if (sum > numSamples * sampleSize) {
                    System.out.println("Something went wrong generating the sample Sizes");
                    System.exit(1);
                }
                if (sum == sampleSize) {
                    break;
                }
            }
            if (i == mapTasksNum - 1) {
                tempToSampleArr[i] = new IntWritable(sampleSize - sum);
            } else {
                for (; i < mapTasksNum; i++) {
                    tempToSampleArr[i] = new IntWritable(0);
                }
            }
            Collections.shuffle(Arrays.asList(tempToSampleArr));
            for (i = 0; i < mapTasksNum; i++) {
                toSampleArr[i][j] = tempToSampleArr[i];
            }
        }

        for (int i = 0; i < mapTasksNum; i++) {
            DefaultStringifier.storeArray(conf, toSampleArr[i], "PARMM.toSampleArr_" + i);
        }
        break;
    default:
        System.err.println("Wrong Mapper ID. Can only be in [1,5]");
        System.exit(1);
        break;
    }

    /*
     * We don't use the default hash partitioner because we want to
     * maximize the parallelism. That's why we also fix the number
     * of reducers.
     */
    conf.setPartitionerClass(FIMPartitioner.class);

    conf.setReducerClass(FIMReducer.class);

    /************************ Job 2 (aggregation) Configuration ************************/

    JobConf confAggr = new JobConf(getConf());

    confAggr.setInt("PARMM.reducersNum", numSamples);
    confAggr.setInt("PARMM.reqApproxNum", reqApproxNum);
    confAggr.setInt("PARMM.sampleSize", sampleSize);
    confAggr.setFloat("PARMM.epsilon", epsilon);

    // XXX: Why do we disable speculative execution? MR
    confAggr.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    confAggr.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI);

    confAggr.setJarByClass(MRDriver.class);

    confAggr.setMapOutputKeyClass(Text.class);
    confAggr.setMapOutputValueClass(DoubleWritable.class);

    confAggr.setOutputKeyClass(Text.class);
    confAggr.setOutputValueClass(Text.class);

    confAggr.setMapperClass(AggregateMapper.class);
    confAggr.setReducerClass(AggregateReducer.class);

    confAggr.setInputFormat(CombineSequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(confAggr, new Path(args[9]));

    FileOutputFormat.setOutputPath(confAggr, new Path(args[10]));

    long FIMjob_start_time = System.currentTimeMillis();
    RunningJob FIMjob = JobClient.runJob(conf);
    long FIMjob_end_time = System.currentTimeMillis();

    RunningJob aggregateJob = JobClient.runJob(confAggr);
    long aggrJob_end_time = System.currentTimeMillis();

    long FIMjob_runtime = FIMjob_end_time - FIMjob_start_time;

    long aggrJob_runtime = aggrJob_end_time - FIMjob_end_time;

    if (args[7].equals("4")) {
        // Remove samplesMap file 
        fs.delete(samplesMapPath, false);
    }

    Counters counters = FIMjob.getCounters();
    Counters.Group FIMMapperStartTimesCounters = counters.getGroup("FIMMapperStart");
    long[] FIMMapperStartTimes = new long[FIMMapperStartTimesCounters.size()];
    int i = 0;
    for (Counters.Counter counter : FIMMapperStartTimesCounters) {
        FIMMapperStartTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMMapperEndTimesCounters = counters.getGroup("FIMMapperEnd");
    long[] FIMMapperEndTimes = new long[FIMMapperEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMMapperEndTimesCounters) {
        FIMMapperEndTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMReducerStartTimesCounters = counters.getGroup("FIMReducerStart");
    long[] FIMReducerStartTimes = new long[FIMReducerStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMReducerStartTimesCounters) {
        FIMReducerStartTimes[i++] = counter.getCounter();
    }

    Counters.Group FIMReducerEndTimesCounters = counters.getGroup("FIMReducerEnd");
    long[] FIMReducerEndTimes = new long[FIMReducerEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : FIMReducerEndTimesCounters) {
        FIMReducerEndTimes[i++] = counter.getCounter();
    }

    Counters countersAggr = aggregateJob.getCounters();
    Counters.Group AggregateMapperStartTimesCounters = countersAggr.getGroup("AggregateMapperStart");
    long[] AggregateMapperStartTimes = new long[AggregateMapperStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateMapperStartTimesCounters) {
        AggregateMapperStartTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateMapperEndTimesCounters = countersAggr.getGroup("AggregateMapperEnd");
    long[] AggregateMapperEndTimes = new long[AggregateMapperEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateMapperEndTimesCounters) {
        AggregateMapperEndTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateReducerStartTimesCounters = countersAggr.getGroup("AggregateReducerStart");
    long[] AggregateReducerStartTimes = new long[AggregateReducerStartTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateReducerStartTimesCounters) {
        AggregateReducerStartTimes[i++] = counter.getCounter();
    }

    Counters.Group AggregateReducerEndTimesCounters = countersAggr.getGroup("AggregateReducerEnd");
    long[] AggregateReducerEndTimes = new long[AggregateReducerEndTimesCounters.size()];
    i = 0;
    for (Counters.Counter counter : AggregateReducerEndTimesCounters) {
        AggregateReducerEndTimes[i++] = counter.getCounter();
    }

    long FIMMapperStartMin = FIMMapperStartTimes[0];
    for (long l : FIMMapperStartTimes) {
        if (l < FIMMapperStartMin) {
            FIMMapperStartMin = l;
        }
    }
    long FIMMapperEndMax = FIMMapperEndTimes[0];
    for (long l : FIMMapperEndTimes) {
        if (l > FIMMapperEndMax) {
            FIMMapperEndMax = l;
        }
    }
    System.out.println("FIM job setup time (milliseconds): " + (FIMMapperStartMin - FIMjob_start_time));
    System.out.println("FIMMapper total runtime (milliseconds): " + (FIMMapperEndMax - FIMMapperStartMin));
    long[] FIMMapperRunTimes = new long[FIMMapperStartTimes.length];
    long FIMMapperRunTimesSum = 0;
    for (int l = 0; l < FIMMapperStartTimes.length; l++) {
        FIMMapperRunTimes[l] = FIMMapperEndTimes[l] - FIMMapperStartTimes[l];
        FIMMapperRunTimesSum += FIMMapperRunTimes[l];
    }
    System.out.println("FIMMapper average task runtime (milliseconds): "
            + FIMMapperRunTimesSum / FIMMapperStartTimes.length);
    long FIMMapperRunTimesMin = FIMMapperRunTimes[0];
    long FIMMapperRunTimesMax = FIMMapperRunTimes[0];
    for (long l : FIMMapperRunTimes) {
        if (l < FIMMapperRunTimesMin) {
            FIMMapperRunTimesMin = l;
        }
        if (l > FIMMapperRunTimesMax) {
            FIMMapperRunTimesMax = l;
        }
    }
    System.out.println("FIMMapper minimum task runtime (milliseconds): " + FIMMapperRunTimesMin);
    System.out.println("FIMMapper maximum task runtime (milliseconds): " + FIMMapperRunTimesMax);

    long FIMReducerStartMin = FIMReducerStartTimes[0];
    for (long l : FIMReducerStartTimes) {
        if (l < FIMReducerStartMin) {
            FIMReducerStartMin = l;
        }
    }
    long FIMReducerEndMax = FIMReducerEndTimes[0];
    for (long l : FIMReducerEndTimes) {
        if (l > FIMReducerEndMax) {
            FIMReducerEndMax = l;
        }
    }
    System.out
            .println("FIM job shuffle phase runtime (milliseconds): " + (FIMReducerStartMin - FIMMapperEndMax));
    System.out.println("FIMReducer total runtime (milliseconds): " + (FIMReducerEndMax - FIMReducerStartMin));
    long[] FIMReducerRunTimes = new long[FIMReducerStartTimes.length];
    long FIMReducerRunTimesSum = 0;
    for (int l = 0; l < FIMReducerStartTimes.length; l++) {
        FIMReducerRunTimes[l] = FIMReducerEndTimes[l] - FIMReducerStartTimes[l];
        FIMReducerRunTimesSum += FIMReducerRunTimes[l];
    }
    System.out.println("FIMReducer average task runtime (milliseconds): "
            + FIMReducerRunTimesSum / FIMReducerStartTimes.length);
    long FIMReducerRunTimesMin = FIMReducerRunTimes[0];
    long FIMReducerRunTimesMax = FIMReducerRunTimes[0];
    for (long l : FIMReducerRunTimes) {
        if (l < FIMReducerRunTimesMin) {
            FIMReducerRunTimesMin = l;
        }
        if (l > FIMReducerRunTimesMax) {
            FIMReducerRunTimesMax = l;
        }
    }
    System.out.println("FIMReducer minimum task runtime (milliseconds): " + FIMReducerRunTimesMin);
    System.out.println("FIMReducer maximum task runtime (milliseconds): " + FIMReducerRunTimesMax);
    System.out.println("FIM job cooldown time (milliseconds): " + (FIMjob_end_time - FIMReducerEndMax));

    long AggregateMapperStartMin = AggregateMapperStartTimes[0];
    for (long l : AggregateMapperStartTimes) {
        if (l < AggregateMapperStartMin) {
            AggregateMapperStartMin = l;
        }
    }
    long AggregateMapperEndMax = AggregateMapperEndTimes[0];
    for (long l : AggregateMapperEndTimes) {
        if (l > AggregateMapperEndMax) {
            AggregateMapperEndMax = l;
        }
    }
    System.out.println(
            "Aggregation job setup time (milliseconds): " + (AggregateMapperStartMin - FIMjob_end_time));
    System.out.println("AggregateMapper total runtime (milliseconds): "
            + (AggregateMapperEndMax - AggregateMapperStartMin));
    long[] AggregateMapperRunTimes = new long[AggregateMapperStartTimes.length];
    long AggregateMapperRunTimesSum = 0;
    for (int l = 0; l < AggregateMapperStartTimes.length; l++) {
        AggregateMapperRunTimes[l] = AggregateMapperEndTimes[l] - AggregateMapperStartTimes[l];
        AggregateMapperRunTimesSum += AggregateMapperRunTimes[l];
    }
    System.out.println("AggregateMapper average task runtime (milliseconds): "
            + AggregateMapperRunTimesSum / AggregateMapperStartTimes.length);
    long AggregateMapperRunTimesMin = AggregateMapperRunTimes[0];
    long AggregateMapperRunTimesMax = AggregateMapperRunTimes[0];
    for (long l : AggregateMapperRunTimes) {
        if (l < AggregateMapperRunTimesMin) {
            AggregateMapperRunTimesMin = l;
        }
        if (l > AggregateMapperRunTimesMax) {
            AggregateMapperRunTimesMax = l;
        }
    }
    System.out.println("AggregateMapper minimum task runtime (milliseconds): " + AggregateMapperRunTimesMin);
    System.out.println("AggregateMapper maximum task runtime (milliseconds): " + AggregateMapperRunTimesMax);

    long AggregateReducerStartMin = AggregateReducerStartTimes[0];
    for (long l : AggregateReducerStartTimes) {
        if (l < AggregateReducerStartMin) {
            AggregateReducerStartMin = l;
        }
    }
    long AggregateReducerEndMax = AggregateReducerEndTimes[0];
    for (long l : AggregateReducerEndTimes) {
        if (l > AggregateReducerEndMax) {
            AggregateReducerEndMax = l;
        }
    }
    System.out.println("Aggregate job round shuffle phase runtime (milliseconds): "
            + (AggregateReducerStartMin - AggregateMapperEndMax));
    System.out.println("AggregateReducer total runtime (milliseconds): "
            + (AggregateReducerEndMax - AggregateReducerStartMin));
    long[] AggregateReducerRunTimes = new long[AggregateReducerStartTimes.length];
    long AggregateReducerRunTimesSum = 0;
    for (int l = 0; l < AggregateReducerStartTimes.length; l++) {
        AggregateReducerRunTimes[l] = AggregateReducerEndTimes[l] - AggregateReducerStartTimes[l];
        AggregateReducerRunTimesSum += AggregateReducerRunTimes[l];
    }
    System.out.println("AggregateReducer average task runtime (milliseconds): "
            + AggregateReducerRunTimesSum / AggregateReducerStartTimes.length);
    long AggregateReducerRunTimesMin = AggregateReducerRunTimes[0];
    long AggregateReducerRunTimesMax = AggregateReducerRunTimes[0];
    for (long l : AggregateReducerRunTimes) {
        if (l < AggregateReducerRunTimesMin) {
            AggregateReducerRunTimesMin = l;
        }
        if (l > AggregateReducerRunTimesMax) {
            AggregateReducerRunTimesMax = l;
        }
    }
    System.out.println("AggregateReducer minimum task runtime (milliseconds): " + AggregateReducerRunTimesMin);
    System.out.println("AggregateReducer maximum task runtime (milliseconds): " + AggregateReducerRunTimesMax);

    System.out.println(
            "Aggregation job cooldown time (milliseconds): " + (aggrJob_end_time - AggregateReducerEndMax));

    System.out
            .println("total runtime (all inclusive) (milliseconds): " + (aggrJob_end_time - FIMjob_start_time));
    System.out.println("total runtime (no FIM job setup, no aggregation job cooldown) (milliseconds): "
            + (AggregateReducerEndMax - FIMMapperStartMin));
    System.out.println("total runtime (no setups, no cooldowns) (milliseconds): "
            + (FIMReducerEndMax - FIMMapperStartMin + AggregateReducerEndMax - AggregateMapperStartMin));
    System.out.println("FIM job runtime (including setup and cooldown) (milliseconds): " + FIMjob_runtime);
    System.out.println("FIM job runtime (no setup, no cooldown) (milliseconds): "
            + (FIMReducerEndMax - FIMMapperStartMin));
    System.out.println(
            "Aggregation job runtime (including setup and cooldown) (milliseconds): " + aggrJob_runtime);
    System.out.println("Aggregation job runtime (no setup, no cooldown) (milliseconds): "
            + (AggregateReducerEndMax - AggregateMapperStartMin));

    return 0;
}

From source file:HDFSFileFinder.java

License:Apache License

private static void getBlockLocationsFromHdfs() {
    StringBuilder sb = new StringBuilder();
    Configuration conf = new Configuration();
    boolean first = true;

    // make connection to hdfs
    try {// w w  w. j  a  va  2s. c  o m
        if (verbose) {
            writer.println("DEBUG: Trying to connect to " + fsName);
        }
        FileSystem fs = FileSystem.get(conf);
        Path file = new Path(fileName);
        FileStatus fStatus = fs.getFileStatus(file);
        status = fStatus;
        bLocations = fs.getFileBlockLocations(status, 0, status.getLen());
        //print out all block locations
        for (BlockLocation aLocation : bLocations) {
            String[] names = aLocation.getHosts();
            for (String name : names) {
                InetAddress addr = InetAddress.getByName(name);
                String host = addr.getHostName();
                int idx = host.indexOf('.');
                String hostname;
                if (0 < idx) {
                    hostname = host.substring(0, host.indexOf('.'));
                } else {
                    hostname = host;
                }
                if (first) {
                    sb.append(hostname);
                    first = false;
                } else {
                    sb.append(",").append(hostname);
                }
            }
        }
        sb.append(NEWLINE);
    } catch (IOException e) {
        writer.println("Error getting block location data from namenode");
        e.printStackTrace();
    }
    writer.print(sb.toString());
    writer.flush();
}

From source file:a.TestConcatExample.java

License:Apache License

private long writeFile(FileSystem fs, Path p, int blockSize, short replication, int numBlocks)
        throws IOException {

    int bufferSize = 4096;
    FSDataOutputStream os = fs.create(p, true, bufferSize, replication, blockSize);

    int i = 0;/*www  .j  a  va 2s .  co  m*/

    byte[] data = new byte[bufferSize];
    r.nextBytes(data);
    while (i < blockSize * numBlocks) {
        os.write(data);
        i += data.length;
    }
    os.close();
    FileStatus fileStatus = fs.getFileStatus(p);
    long f1Len = fileStatus.getLen();

    assertEquals(i, f1Len);

    return f1Len;
}

From source file:azkaban.crypto.Decryptions.java

License:Open Source License

public String decrypt(final String cipheredText, final String passphrasePath, final FileSystem fs)
        throws IOException {
    Preconditions.checkNotNull(cipheredText);
    Preconditions.checkNotNull(passphrasePath);

    final Path path = new Path(passphrasePath);
    Preconditions.checkArgument(fs.exists(path), "File does not exist at " + passphrasePath);
    Preconditions.checkArgument(fs.isFile(path), "Passphrase path is not a file. " + passphrasePath);

    final FileStatus fileStatus = fs.getFileStatus(path);
    Preconditions.checkArgument(USER_READ_PERMISSION_ONLY.equals(fileStatus.getPermission()),
            "Passphrase file should only have read only permission on only user. " + passphrasePath);

    final Crypto crypto = new Crypto();
    try (BufferedReader br = new BufferedReader(
            new InputStreamReader(fs.open(path), Charset.defaultCharset()))) {
        final String passphrase = br.readLine();
        final String decrypted = crypto.decrypt(cipheredText, passphrase);
        Preconditions.checkNotNull(decrypted, "Was not able to decrypt");
        return decrypted;
    }/*from   w  w w  .j  av  a  2s.c  o  m*/
}

From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java

License:Apache License

private void handleFsDisplay(String user, HttpServletRequest req, HttpServletResponse resp, Session session)
        throws IOException, ServletException, IllegalArgumentException, IllegalStateException {
    FileSystem fs = null;
    try {//from w w  w . ja  v a  2s.c o m
        fs = getFileSystem(user);
    } catch (HadoopSecurityManagerException e) {
        errorPage(user, req, resp, session, "Cannot get FileSystem.");
        return;
    }

    Path path = getPath(req);
    if (logger.isDebugEnabled()) {
        logger.debug("path: '" + path.toString() + "'");
    }

    try {
        if (!fs.exists(path)) {
            errorPage(user, req, resp, session, path.toUri().getPath() + " does not exist.");
            fs.close();
            return;
        }
    } catch (IOException ioe) {
        logger.error("Got exception while checking for existence of path '" + path + "'", ioe);
        errorPage(user, req, resp, session,
                path.toUri().getPath() + " Encountered error while trying to detect if path '" + path
                        + "' exists. Reason: " + ioe.getMessage());
        fs.close();
        return;
    }

    if (fs.isFile(path)) {
        displayFilePage(fs, user, req, resp, session, path);
    } else if (fs.getFileStatus(path).isDir()) {
        displayDirPage(fs, user, req, resp, session, path);
    } else {
        errorPage(user, req, resp, session,
                "It exists, it is not a file, and it is not a directory, what " + "is it precious?");
    }
    fs.close();
}

From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java

License:Apache License

private void displayFilePage(FileSystem fs, String user, HttpServletRequest req, HttpServletResponse resp,
        Session session, Path path) {

    Page page = newPage(req, resp, session, "azkaban/viewer/hdfs/velocity/hdfs-file.vm");

    List<Path> paths = new ArrayList<Path>();
    List<String> segments = new ArrayList<String>();
    getPathSegments(path, paths, segments);

    page.add("allowproxy", allowGroupProxy);
    page.add("viewerPath", viewerPath);
    page.add("viewerName", viewerName);

    page.add("paths", paths);
    page.add("segments", segments);
    page.add("user", user);
    page.add("path", path.toString());
    page.add("homedir", getHomeDir(fs));

    try {//from   w ww  .j  av  a2  s.c  o m
        boolean hasSchema = false;
        int viewerId = -1;
        for (int i = 0; i < viewers.size(); ++i) {
            HdfsFileViewer viewer = viewers.get(i);
            Set<Capability> capabilities = EnumSet.noneOf(Capability.class);
            capabilities = viewer.getCapabilities(fs, path);
            if (capabilities.contains(Capability.READ)) {
                if (capabilities.contains(Capability.SCHEMA)) {
                    hasSchema = true;
                }
                viewerId = i;
                break;
            }
        }
        page.add("viewerId", viewerId);
        page.add("hasSchema", hasSchema);

        FileStatus status = fs.getFileStatus(path);
        page.add("status", status);

    } catch (Exception ex) {
        page.add("no_fs", "true");
        page.add("error_message", "Error: " + ex.getMessage());
    }
    page.render();
}