Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:ExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("ExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;//www  .  ja v a 2  s.  c om
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);/* w ww .  j  a  v a2 s.  c  om*/
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:Hw2Part1.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: <input file> <output directory>");
        System.exit(2);/*from  w w  w . j  av  a2 s  .  com*/
    }

    //    FileSystem hdfs = FileSystem.get(conf);
    String target = "hdfs://localhost:9000/";
    FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent
    Path outputpath = new Path(otherArgs[otherArgs.length - 1]);
    if (fs.exists(outputpath)) {
        fs.delete(outputpath, true);
    }

    Job job = Job.getInstance(conf, "Hw2Part1");

    job.setJarByClass(Hw2Part1.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(InfoWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InfoWritable.class);

    // add the input paths as given by command line
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    // add the output path as given by the command line
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DataHBase.java

License:Open Source License

public void run(HashMap<String, String> config) throws Exception {

    //clean the former output if it exists
    Path p = new Path(config.get("hdfs_output_dir"));
    FileSystem fs = FileSystem.get(new Configuration());
    if (fs.exists(p)) {
        fs.delete(p, true);//  w w  w.java  2 s .  c o  m
    }

    String junction = config.get("what_to_find"); // the name of the junction
    String date1 = config.get("date1");
    String date2 = config.get("date2");
    //date1 and date2 can be of a format YYYY-MM-DD
    if (date1.length() == 10)
        date1 = date1 + " 00:00:00";
    if (date2.length() == 10)
        date2 = date2 + " 23:59:59";
    System.out.println("Looking for data of " + junction + ": " + date1 + " - " + date2);

    //create timestamps (considering time zone!) to limit data
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    sdf.setTimeZone(TimeZone.getDefault());
    Long time1 = sdf.parse(date1).getTime();
    Long time2 = sdf.parse(date2).getTime();

    //run a job
    Configuration conf = HBaseConfiguration.create();
    conf.set("mapreduce.output.textoutputformat.separator", ","); //set comma as a delimiter

    Job job = new Job(conf, "Retrieve data from hbase");
    job.setJarByClass(DataHBase.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    scan.setMaxVersions(1);
    scan.setTimeRange(time1, time2); //take a day we are interested in
    //set a filter for a junction name
    if (!junction.equals("")) {
        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("data"),
                Bytes.toBytes("location_name"), CompareOp.EQUAL, Bytes.toBytes(junction));
        scan.setFilter(filter);
    }
    //add the specific columns to the output to limit the amount of data
    scan.addFamily(Bytes.toBytes("data"));

    TableMapReduceUtil.initTableMapperJob(config.get("hbase_table"), // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            TableMap.class, // mapper
            Text.class, // mapper output key
            Text.class, // mapper output value
            job);

    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(config.get("hdfs_output_dir")));

    job.waitForCompletion(true);

}

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//  ww  w  .  j a  va2  s .c  o  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 */// w ww  .jav  a 2s.  com
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:CouchbaseExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;/*from   ww w .  j a v  a  2  s.  c om*/
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:PerformanceEvaluation.java

License:Apache License

private Path writeInputFile(final Configuration c) throws IOException {
    FileSystem fs = FileSystem.get(c);
    if (!fs.exists(PERF_EVAL_DIR)) {
        fs.mkdirs(PERF_EVAL_DIR);//ww w.  j  a va 2 s.com
    }
    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
    Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
    fs.mkdirs(subdir);
    Path inputFile = new Path(subdir, "input.txt");
    PrintStream out = new PrintStream(fs.create(inputFile));
    // Make input random.
    Map<Integer, String> m = new TreeMap<Integer, String>();
    Hash h = MurmurHash.getInstance();
    int perClientRows = (this.R / this.N);
    try {
        for (int i = 0; i < 10; i++) {
            for (int j = 0; j < N; j++) {
                String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10)))
                        + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients="
                        + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL
                        + ", scanCache=" + this.S;
                int hash = h.hash(Bytes.toBytes(s));
                m.put(hash, s);
            }
        }
        for (Map.Entry<Integer, String> e : m.entrySet()) {
            out.println(e.getValue());
        }
    } finally {
        out.close();
    }
    return subdir;
}

From source file:Eggshell.java

License:Open Source License

/** In charge of setting up and submitting the Hadoop job
 *  The method receives the remaining command-line arguments
 *  after first being processed by Hadoop.
 *
 * @param args        Hadoop processed command-line arguments
 * @return            Returns 0 for sucess
 *//*from ww w  .ja v a  2  s.  c o m*/
public int run(String[] args) throws Exception {
    String name = args[0];
    String[] params = Arrays.copyOfRange(args, 1, args.length);
    Object[] arguments = Arrays.copyOf(params, params.length, Object[].class);

    script = new Script(); // start the Javascript interpreter
    script.putProperty("arguments", script.newArray(arguments));

    EggGlobal.script = script;
    Egg.script = script;
    Egg.name = name;
    Egg.conf = getConf();

    Scriptable global = script.newObject("EggGlobal", null);
    script.setGlobalScope(global);

    script.evalLibrary(); // load the Eggshell Javascript library
    script.evalFile(name); // load the javascript job file

    /* create a temporary directory in hdfs to hold the seralized functions */
    FileSystem fs = FileSystem.get(getConf());
    Path dir = new Path(SCRIPT_DIR);
    if (fs.exists(dir))
        fs.delete(dir, true);
    fs.mkdirs(dir);

    /* call the 'eggshell' function */
    Object o = script.getProperty("eggshell");
    if (o instanceof Function) {
        Scriptable thisObj = script.newObject("Egg", null);
        Function f = (Function) o;
        o = script.callFunction(f, thisObj, params);
        script.exit();

        /* return the result of the 'eggshell' function */
        if (o instanceof NativeJavaObject)
            o = ((NativeJavaObject) o).unwrap();
        if (o instanceof Boolean)
            return (Boolean) o ? 0 : 1;
        if (o instanceof Integer)
            return (Integer) o;
        if (o instanceof Double)
            return ((Double) o).intValue();
    }
    return 0;
}

From source file:BwaInterpreter.java

License:Open Source License

private void createOutputFolder() {
    try {//from w  w  w.  ja  va2  s  .  c o m
        FileSystem fs = FileSystem.get(this.conf);

        // Path variable
        Path outputDir = new Path(options.getOutputPath());

        // Directory creation
        if (!fs.exists(outputDir)) {
            fs.mkdirs(outputDir);
        } else {
            fs.delete(outputDir, true);
            fs.mkdirs(outputDir);
        }

        fs.close();
    } catch (IOException e) {
        LOG.error(e.toString());
        e.printStackTrace();
    }
}