Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException {
    if (!fileSystem.exists(path)) {
        throw new FileNotFoundException(path.toString());
    }/*from   www. j  av a2 s  . c o  m*/
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    _hdfsLength = fileStatus.getLen();
    _inputStream = fileSystem.open(path, bufferSize);

    // read meta blocks
    _inputStream.seek(_hdfsLength - 16);
    int numberOfBlocks = _inputStream.readInt();
    _length = _inputStream.readLong();
    int version = _inputStream.readInt();
    if (version != VERSION) {
        throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]");
    }
    _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per
                                                                 // block
    _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks);
    for (int i = 0; i < numberOfBlocks; i++) {
        HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock();
        hdfsMetaBlock.readFields(_inputStream);
        _metaBlocks.add(hdfsMetaBlock);
    }
    seek(0);
}

From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar");

    HTableDescriptor htd = new HTableDescriptor("testtable");
    htd.addFamily(new HColumnDescriptor("colfam1"));
    htd.setValue("COPROCESSOR$1", path.toString() + "|"
            + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER);

    HBaseAdmin admin = new HBaseAdmin(conf);
    admin.createTable(htd);//from www  . j  ava2s.  c  om

    System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable")));
}

From source file:clustering.link_back.step1.SetKeyMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    Path filePath = fileSplit.getPath();
    this.joinOrder = filePath.toString().contains("mst") ? 1 : 2;
}

From source file:clustering.link_back.step2.SetKeyMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    Path filePath = fileSplit.getPath();
    this.joinOrder = filePath.toString().contains("step1") ? 1 : 2;
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);/*from   ww w . ja v a2 s.  c  o  m*/
    }

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java

public static String generate(Path crawlPath, Configuration conf) throws Exception {
    SegmentUtil.initSegments(crawlPath, conf);
    String segmentName = SegmentUtil.createSegment(crawlPath, conf);

    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate");

    Job job = new Job(conf);
    job.setJobName("generate " + crawlPath.toString());
    job.setJarByClass(Generator.class);

    job.setReducerClass(GeneratorReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, generatePath);
    job.waitForCompletion(true);/*from w  w w  . ja va2s  .com*/
    long count = job.getCounters().findCounter("generator", "count").getValue();
    System.out.println("total generate:" + count);
    if (count == 0) {
        return null;
    } else {
        return segmentName;
    }

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }//from  www.j  a  va  2 s  .  com

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java

public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception {
    Path segmentPath = new Path(crawlPath, "segments/" + segmentName);
    Path generatePath = new Path(segmentPath, "generate");

    Job job = new Job(conf);
    job.setJobName("fetch " + crawlPath.toString());
    job.setJarByClass(Fetcher.class);

    job.setReducerClass(FetcherReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(FetcherOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    FileInputFormat.addInputPath(job, generatePath);
    FileOutputFormat.setOutputPath(job, segmentPath);

    job.waitForCompletion(true);//from  w  w w  .  j  a v a  2s.co  m
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.crawler.Crawler.java

public void start(int depth) throws Exception {

    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = crawlDir.getFileSystem(conf);

    if (!resumable) {
        if (fs.exists(crawlDir)) {
            fs.delete(crawlDir);//from   ww w. j a  va  2 s  .  com
        }
    }

    inject();

    for (int i = 0; i < depth; i++) {
        LogUtils.getLogger().info("starting depth " + (i + 1));
        String segmentName = SegmentUtils.createSegmengName();
        Path segmentPath = new Path(segments, segmentName);

        String[] args = new String[] { crawldb.toString(), segmentPath.toString() };
        ToolRunner.run(CrawlerConfiguration.create(), new Fetcher(), args);
        ToolRunner.run(CrawlerConfiguration.create(), new DbUpdater(), args);
    }

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Injector.java

public void inject(Path crawlDir, ArrayList<String> urls)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldb = new Path(crawlDir, "crawldb");
    Configuration config = CrawlerConfiguration.create();
    System.out.println(config.get("mapred.jar"));
    FileSystem fs = crawldb.getFileSystem(config);
    Path tempdb = new Path(crawldb, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);/*  ww  w  .ja v a 2 s .c  om*/
    }

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(tempdb, "info.avro"), Text.class,
            CrawlDatum.class);
    for (String url : urls) {
        CrawlDatum crawldatum = new CrawlDatum();
        crawldatum.setUrl(url);
        crawldatum.setStatus(CrawlDatum.STATUS_DB_INJECTED);
        writer.append(new Text(url), crawldatum);
        System.out.println("inject:" + url);
    }
    writer.close();

    String[] args = new String[] { crawldb.toString(), tempdb.toString() };

    ToolRunner.run(CrawlerConfiguration.create(), new Merge(), args);
    Merge.install(crawldb);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}