Example usage for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException

Source Link

Document

Check if a path exists.

Usage

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;//from ww w .  j a v a  2  s  .c om
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException {
    if (!fileSystem.exists(path)) {
        throw new FileNotFoundException(path.toString());
    }/* w w  w .ja v  a2 s  . c o  m*/
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    _hdfsLength = fileStatus.getLen();
    _inputStream = fileSystem.open(path, bufferSize);

    // read meta blocks
    _inputStream.seek(_hdfsLength - 16);
    int numberOfBlocks = _inputStream.readInt();
    _length = _inputStream.readLong();
    int version = _inputStream.readInt();
    if (version != VERSION) {
        throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]");
    }
    _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per
                                                                 // block
    _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks);
    for (int i = 0; i < numberOfBlocks; i++) {
        HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock();
        hdfsMetaBlock.readFields(_inputStream);
        _metaBlocks.add(hdfsMetaBlock);
    }
    seek(0);
}

From source file:chapter5.KMeanSample.java

License:Apache License

/**
 * Return the path to the final iteration's clusters
 *//*from  w  w  w  .  j a va  2 s  . c  o m*/
private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (int i = maxIterations; i >= 0; i--) {
        Path clusters = new Path(output, "clusters-" + i);
        if (fs.exists(clusters)) {
            return clusters;
        }
    }
    return null;
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);/*ww w .j  av  a2  s.  co m*/
    }

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Injector.java

public static void inject(Path crawlPath, CrawlDatums datums, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldbPath = new Path(crawlPath, "crawldb");
    FileSystem fs = FileSystem.get(conf);
    Path tempdb = new Path(crawldbPath, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);//from  ww w .j av  a2  s.  co  m
    }

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(tempdb, "info"), Text.class,
            CrawlDatum.class);

    for (CrawlDatum datum : datums) {

        String key = datum.getKey();
        writer.append(new Text(key), datum);
        LOG.info("inject:" + key);
    }
    writer.close();

    Path[] mergePaths = new Path[] { tempdb };

    Merge.merge(crawlPath, mergePaths, conf, "inject");
    Merge.install(crawlPath, conf);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }/*from  ww w . j  a  v a2 s  .c om*/

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void install(Path crawlPath, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");
    Path olddb = new Path(crawldbPath, "old");
    if (fs.exists(currentdb)) {
        if (fs.exists(olddb)) {
            fs.delete(olddb);/*from www  . ja  v  a  2  s .c o  m*/
        }
        fs.rename(currentdb, olddb);
    }
    fs.rename(newdb, currentdb);
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.SegmentUtil.java

public static void initSegments(Path crawlPath, Configuration conf) throws IOException {
    Path segmentsPath = new Path(crawlPath, "segments");
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(segmentsPath)) {
        fs.mkdirs(segmentsPath);/*from   w w w .ja v  a 2  s .c  o m*/
    }
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.crawler.Crawler.java

public void start(int depth) throws Exception {

    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = crawlDir.getFileSystem(conf);

    if (!resumable) {
        if (fs.exists(crawlDir)) {
            fs.delete(crawlDir);//from  ww w  .j ava  2 s .c  o m
        }
    }

    inject();

    for (int i = 0; i < depth; i++) {
        LogUtils.getLogger().info("starting depth " + (i + 1));
        String segmentName = SegmentUtils.createSegmengName();
        Path segmentPath = new Path(segments, segmentName);

        String[] args = new String[] { crawldb.toString(), segmentPath.toString() };
        ToolRunner.run(CrawlerConfiguration.create(), new Fetcher(), args);
        ToolRunner.run(CrawlerConfiguration.create(), new DbUpdater(), args);
    }

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java

@Override
public int run(String[] args) throws Exception {
    JobConf jc = new JobConf(getConf());
    jc.setJarByClass(Fetcher.class);
    jc.setInputFormat(SequenceFileInputFormat.class);
    Path input = new Path(args[0], "current");
    Path output = new Path(args[1]);
    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = output.getFileSystem(conf);
    if (fs.exists(output)) {
        fs.delete(output);//from   www .ja  v a 2s.c o m
    }
    FileInputFormat.addInputPath(jc, input);
    FileOutputFormat.setOutputPath(jc, output);

    jc.setMapOutputKeyClass(Text.class);
    jc.setMapOutputValueClass(WebWritable.class);

    jc.setMapRunnerClass(Fetcher.class);
    jc.setOutputFormat(FetcherOutputFormat.class);

    JobClient.runJob(jc);
    return 0;
}