Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

@Deprecated
public boolean delete(Path f) throws IOException 

Source Link

Document

Delete a file/directory.

Usage

From source file:DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];// www . jav  a2 s. c o  m
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:bb.BranchAndBound.java

License:Apache License

static Job getJob(String input, String output, String dataDir, int iteration) throws Exception {
    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);
    FileStatus[] fileStatus = hdfs.listStatus(new Path(input));
    for (int i = 0; i < fileStatus.length; ++i) {
        if (fileStatus[i].getLen() == 0) {
            hdfs.delete(fileStatus[i].getPath());
        }//  w  ww. j a  v  a  2  s .  co  m
    }
    DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf);
    Job ret = new Job(conf, dataDir + "_iteration_" + iteration);
    ret.setJarByClass(BranchAndBound.class);
    ret.setMapperClass(BBMapper1.class);
    ret.setReducerClass(BBReducer.class);
    //ret.setReducerClass(MergeReducer.class);
    FileInputFormat.setInputPaths(ret, new Path(input));
    //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864);
    FileOutputFormat.setOutputPath(ret, new Path(output));
    ret.setOutputKeyClass(NullWritable.class);
    ret.setOutputValueClass(Text.class);
    return ret;
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);
    }//from  www.  ja va2s .c o  m

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Injector.java

public static void inject(Path crawlPath, CrawlDatums datums, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldbPath = new Path(crawlPath, "crawldb");
    FileSystem fs = FileSystem.get(conf);
    Path tempdb = new Path(crawldbPath, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }//w w  w .ja  v a  2s .c o  m

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(tempdb, "info"), Text.class,
            CrawlDatum.class);

    for (CrawlDatum datum : datums) {

        String key = datum.getKey();
        writer.append(new Text(key), datum);
        LOG.info("inject:" + key);
    }
    writer.close();

    Path[] mergePaths = new Path[] { tempdb };

    Merge.merge(crawlPath, mergePaths, conf, "inject");
    Merge.install(crawlPath, conf);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }/*www .  ja  v a  2 s  .c  om*/

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void install(Path crawlPath, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");
    Path olddb = new Path(crawldbPath, "old");
    if (fs.exists(currentdb)) {
        if (fs.exists(olddb)) {
            fs.delete(olddb);
        }/*from   ww w. j  a v  a  2 s.  co m*/
        fs.rename(currentdb, olddb);
    }
    fs.rename(newdb, currentdb);
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.crawler.Crawler.java

public void start(int depth) throws Exception {

    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = crawlDir.getFileSystem(conf);

    if (!resumable) {
        if (fs.exists(crawlDir)) {
            fs.delete(crawlDir);
        }//from   w ww.  j  a v a  2  s .c  o m
    }

    inject();

    for (int i = 0; i < depth; i++) {
        LogUtils.getLogger().info("starting depth " + (i + 1));
        String segmentName = SegmentUtils.createSegmengName();
        Path segmentPath = new Path(segments, segmentName);

        String[] args = new String[] { crawldb.toString(), segmentPath.toString() };
        ToolRunner.run(CrawlerConfiguration.create(), new Fetcher(), args);
        ToolRunner.run(CrawlerConfiguration.create(), new DbUpdater(), args);
    }

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java

@Override
public int run(String[] args) throws Exception {
    JobConf jc = new JobConf(getConf());
    jc.setJarByClass(Fetcher.class);
    jc.setInputFormat(SequenceFileInputFormat.class);
    Path input = new Path(args[0], "current");
    Path output = new Path(args[1]);
    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = output.getFileSystem(conf);
    if (fs.exists(output)) {
        fs.delete(output);
    }//  www  .  j  av a2 s  .c o m
    FileInputFormat.addInputPath(jc, input);
    FileOutputFormat.setOutputPath(jc, output);

    jc.setMapOutputKeyClass(Text.class);
    jc.setMapOutputValueClass(WebWritable.class);

    jc.setMapRunnerClass(Fetcher.class);
    jc.setOutputFormat(FetcherOutputFormat.class);

    JobClient.runJob(jc);
    return 0;
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Injector.java

public void inject(Path crawlDir, ArrayList<String> urls)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldb = new Path(crawlDir, "crawldb");
    Configuration config = CrawlerConfiguration.create();
    System.out.println(config.get("mapred.jar"));
    FileSystem fs = crawldb.getFileSystem(config);
    Path tempdb = new Path(crawldb, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }//from w w w . j a  v a 2s.  co  m

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(tempdb, "info.avro"), Text.class,
            CrawlDatum.class);
    for (String url : urls) {
        CrawlDatum crawldatum = new CrawlDatum();
        crawldatum.setUrl(url);
        crawldatum.setStatus(CrawlDatum.STATUS_DB_INJECTED);
        writer.append(new Text(url), crawldatum);
        System.out.println("inject:" + url);
    }
    writer.close();

    String[] args = new String[] { crawldb.toString(), tempdb.toString() };

    ToolRunner.run(CrawlerConfiguration.create(), new Merge(), args);
    Merge.install(crawldb);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Merge.java

public static Job createJob(Configuration conf, Path crawldb) throws IOException {

    Job job = new Job(conf);
    //job.setJarByClass(Merge.class);
    job.getConfiguration().set("mapred",
            "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path newdb = new Path(crawldb, "new");
    Path currentdb = new Path(crawldb, "current");

    FileSystem fs = crawldb.getFileSystem(CrawlerConfiguration.create());
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }/* w w w  . ja  v  a  2  s  .  co  m*/

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }

    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job;
}