List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java
License:Apache License
public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException { if (!fileSystem.exists(path)) { throw new FileNotFoundException(path.toString()); }/*from www. j av a2 s . c o m*/ FileStatus fileStatus = fileSystem.getFileStatus(path); _hdfsLength = fileStatus.getLen(); _inputStream = fileSystem.open(path, bufferSize); // read meta blocks _inputStream.seek(_hdfsLength - 16); int numberOfBlocks = _inputStream.readInt(); _length = _inputStream.readLong(); int version = _inputStream.readInt(); if (version != VERSION) { throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]"); } _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per // block _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks); for (int i = 0; i < numberOfBlocks; i++) { HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock(); hdfsMetaBlock.readFields(_inputStream); _metaBlocks.add(hdfsMetaBlock); } seek(0); }
From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar"); HTableDescriptor htd = new HTableDescriptor("testtable"); htd.addFamily(new HColumnDescriptor("colfam1")); htd.setValue("COPROCESSOR$1", path.toString() + "|" + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER); HBaseAdmin admin = new HBaseAdmin(conf); admin.createTable(htd);//from www . j ava2s. c om System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable"))); }
From source file:clustering.link_back.step1.SetKeyMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) context.getInputSplit(); Path filePath = fileSplit.getPath(); this.joinOrder = filePath.toString().contains("mst") ? 1 : 2; }
From source file:clustering.link_back.step2.SetKeyMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) context.getInputSplit(); Path filePath = fileSplit.getPath(); this.joinOrder = filePath.toString().contains("step1") ? 1 : 2; }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java
public static void main(String[] args) throws Exception { Path crawlPath = new Path("task2"); Path currentPath = new Path(crawlPath, "crawldb/current"); Path output = new Path("output"); Configuration config = CrawlerConfiguration.create(); FileSystem fs = FileSystem.get(config); if (fs.exists(output)) { fs.delete(output);/*from ww w . ja v a2 s. c o m*/ } Job job = new Job(config); job.setJobName("dbreader " + crawlPath.toString()); job.setMapperClass(DBReaderMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java
public static String generate(Path crawlPath, Configuration conf) throws Exception { SegmentUtil.initSegments(crawlPath, conf); String segmentName = SegmentUtil.createSegment(crawlPath, conf); Path currentPath = new Path(crawlPath, "crawldb/current"); Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate"); Job job = new Job(conf); job.setJobName("generate " + crawlPath.toString()); job.setJarByClass(Generator.class); job.setReducerClass(GeneratorReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, generatePath); job.waitForCompletion(true);/*from w w w . ja va2s .com*/ long count = job.getCounters().findCounter("generator", "count").getValue(); System.out.println("total generate:" + count); if (count == 0) { return null; } else { return segmentName; } }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName) throws Exception { Job job = new Job(conf); job.setJobName(jobName + " " + crawlPath.toString()); job.setJarByClass(Merge.class); // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); FileSystem fs = FileSystem.get(conf); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }//from www.j a va 2 s . com if (fs.exists(newdb)) { fs.delete(newdb); } for (Path mergePath : mergePaths) { FileInputFormat.addInputPath(job, mergePath); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java
public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception { Path segmentPath = new Path(crawlPath, "segments/" + segmentName); Path generatePath = new Path(segmentPath, "generate"); Job job = new Job(conf); job.setJobName("fetch " + crawlPath.toString()); job.setJarByClass(Fetcher.class); job.setReducerClass(FetcherReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(FetcherOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, generatePath); FileOutputFormat.setOutputPath(job, segmentPath); job.waitForCompletion(true);//from w w w . j a v a 2s.co m }
From source file:cn.edu.hfut.dmic.webcollectorcluster.crawler.Crawler.java
public void start(int depth) throws Exception { Configuration conf = CrawlerConfiguration.create(); FileSystem fs = crawlDir.getFileSystem(conf); if (!resumable) { if (fs.exists(crawlDir)) { fs.delete(crawlDir);//from ww w. j a va 2 s . com } } inject(); for (int i = 0; i < depth; i++) { LogUtils.getLogger().info("starting depth " + (i + 1)); String segmentName = SegmentUtils.createSegmengName(); Path segmentPath = new Path(segments, segmentName); String[] args = new String[] { crawldb.toString(), segmentPath.toString() }; ToolRunner.run(CrawlerConfiguration.create(), new Fetcher(), args); ToolRunner.run(CrawlerConfiguration.create(), new DbUpdater(), args); } }
From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Injector.java
public void inject(Path crawlDir, ArrayList<String> urls) throws IOException, InterruptedException, ClassNotFoundException, Exception { Path crawldb = new Path(crawlDir, "crawldb"); Configuration config = CrawlerConfiguration.create(); System.out.println(config.get("mapred.jar")); FileSystem fs = crawldb.getFileSystem(config); Path tempdb = new Path(crawldb, "temp"); if (fs.exists(tempdb)) { fs.delete(tempdb);/* ww w .ja v a 2 s .c om*/ } SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(tempdb, "info.avro"), Text.class, CrawlDatum.class); for (String url : urls) { CrawlDatum crawldatum = new CrawlDatum(); crawldatum.setUrl(url); crawldatum.setStatus(CrawlDatum.STATUS_DB_INJECTED); writer.append(new Text(url), crawldatum); System.out.println("inject:" + url); } writer.close(); String[] args = new String[] { crawldb.toString(), tempdb.toString() }; ToolRunner.run(CrawlerConfiguration.create(), new Merge(), args); Merge.install(crawldb); if (fs.exists(tempdb)) { fs.delete(tempdb); } }