List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
public NameNodeBackupBlockCheckProcessor(Configuration conf, NameNodeRestoreProcessor processor, NameNode namenode, UserGroupInformation ugi) throws Exception { String[] nnStorageLocations = conf.getStrings(DFS_NAMENODE_NAME_DIR); URI uri = new URI(nnStorageLocations[0]); _reportPath = new File(new File(uri.getPath()).getParent(), "backup-reports"); _reportPath.mkdirs();/*from w w w . ja va 2s . c o m*/ if (!_reportPath.exists()) { throw new IOException("Report path " + _reportPath + " does not exist"); } this.ugi = ugi; this.namenode = namenode; this.conf = conf; this.processor = processor; backupStore = BackupStore.create(BackupUtil.convert(conf)); this.fileSystem = (DistributedFileSystem) FileSystem.get(conf); this.ignorePath = conf.get(DFS_BACKUP_IGNORE_PATH_FILE_KEY, DFS_BACKUP_IGNORE_PATH_FILE_DEFAULT); this.batchSize = conf.getInt(DFS_BACKUP_REMOTE_BACKUP_BATCH_KEY, DFS_BACKUP_REMOTE_BACKUP_BATCH_DEFAULT); this.checkInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_KEY, DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DEFAULT); this.initInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_KEY, DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_DEFAULT); start(); }
From source file:basic.PartitionGraph.java
License:Apache License
/** * Runs this tool.//from w w w . j a va 2 s. co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:bb.BranchAndBound.java
License:Apache License
static Job getJob(String input, String output, String dataDir, int iteration) throws Exception { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); FileStatus[] fileStatus = hdfs.listStatus(new Path(input)); for (int i = 0; i < fileStatus.length; ++i) { if (fileStatus[i].getLen() == 0) { hdfs.delete(fileStatus[i].getPath()); }//from w w w. j av a2 s . c o m } DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf); Job ret = new Job(conf, dataDir + "_iteration_" + iteration); ret.setJarByClass(BranchAndBound.class); ret.setMapperClass(BBMapper1.class); ret.setReducerClass(BBReducer.class); //ret.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(ret, new Path(input)); //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864); FileOutputFormat.setOutputPath(ret, new Path(output)); ret.setOutputKeyClass(NullWritable.class); ret.setOutputValueClass(Text.class); return ret; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { String inputFilesDir = opt.outputDir + separator + "pg" + separator; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1);/*from www . j a v a 2 s. c o m*/ Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of * closed sets. All information is reported to the reducer which finally writes the output to disk. * /* www. j ava2s . c om*/ * * @param inputDir * @param config * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startMining(String inputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFilesDir = inputDir; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } if (inputPaths.isEmpty()) { System.out.println("[StartMining]: No prefixes to extend further"); return; } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:bigfat.hadoop.HDFSDirInputStream.java
License:Apache License
public HDFSDirInputStream(String dir) throws IOException { this(FileSystem.get(new Configuration()), dir, null); }
From source file:bigfat.hadoop.HDFSDirInputStream.java
License:Apache License
/** * Test case, input int dir wants to read and the file to output * //from w ww. j a v a 2 s . c o m * @param args * @throws IOException */ public static void main(String args[]) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); HDFSDirInputStream inp = new HDFSDirInputStream(fs, args[0]); FileOutputStream ops = new FileOutputStream(args[1]); int r; while ((r = inp.read()) != -1) { ops.write(r); } ops.close(); }
From source file:biglayer.AutoCoder.java
License:Apache License
/** * Runs this tool./*from ww w. j ava2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/MNIST-mingled-key/part*"; String outputPath = "shangfu/layeroutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); job0.setNumReduceTasks(reduceTasks); job0.getConfiguration().setInt("layer_ind", 0); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(ModelNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(ModelNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "_0"); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); long codeStart = System.currentTimeMillis(); double codeTimeSum = 0; job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) { Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); job1.setNumReduceTasks(reduceTasks); job1.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1))); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train")); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + (iterations - 1)); LOG.info(" - output path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - number of reducers: " + reduceTasks); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(ModelNode.class); job1.setOutputKeyClass(PairOfInts.class); job1.setOutputValueClass(ModelNode.class); job1.setMapperClass(MyMapper.class); job1.setReducerClass(MyReducer_Train.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations + "_train"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Job job2 = Job.getInstance(conf); job2.setJobName(AutoCoder.class.getSimpleName()); job2.setJarByClass(AutoCoder.class); job2.setNumReduceTasks(reduceTasks); job2.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train"))); FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations)); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - output path: " + outputPath + "_" + iterations); LOG.info(" - number of reducers: " + reduceTasks); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(SequenceFileOutputFormat.class); job2.setMapOutputKeyClass(PairOfInts.class); job2.setMapOutputValueClass(ModelNode.class); job2.setOutputKeyClass(PairOfInts.class); job2.setOutputValueClass(ModelNode.class); job2.setMapperClass(MyMapper.class); job2.setReducerClass(MyReducer_GenData.class); job2.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job2.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; } LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER); LOG.info(" - output path: " + outputPath); reduceTasks = 1; LOG.info(" - number of reducers: " + reduceTasks); Job job_super = Job.getInstance(conf); job_super.setJobName(AutoCoder.class.getSimpleName()); job_super.setJarByClass(AutoCoder.class); job_super.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER)); FileOutputFormat.setOutputPath(job_super, new Path(outputPath)); job_super.setInputFormatClass(SequenceFileInputFormat.class); job_super.setOutputFormatClass(SequenceFileOutputFormat.class); job_super.setMapOutputKeyClass(PairOfInts.class); job_super.setMapOutputValueClass(ModelNode.class); job_super.setOutputKeyClass(NullWritable.class); job_super.setOutputValueClass(NullWritable.class); job_super.setMapperClass(MyMapper_Super.class); job_super.setReducerClass(MyReducer_Super.class); job_super.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job_super.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + codeTimeSum + " seconds."); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:bigmodel.AutoCoderLocal.java
License:Apache License
/** * Runs this tool.//from www . j a v a2 s . co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT) + "/part-r-00000"; String outputPath = cmdline.getOptionValue(OUTPUT); String dataPath = cmdline.getOptionValue(INPUT) + "/common"; //String inputPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/part*"; //String outputPath = "output"; //String dataPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/common"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoderLocal.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); initialParameters(conf); conf.set("dataPath", dataPath); Job job = Job.getInstance(conf); job.setJobName(AutoCoderLocal.class.getSimpleName()); job.setJarByClass(AutoCoderLocal.class); // set the path of the information of k clusters in this iteration job.getConfiguration().set("sidepath", inputPath + "/side_output"); job.setNumReduceTasks(reduceTasks); dataShuffle(); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ModelNode.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SuperModel.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:bigsatgps.BigDataHandler.java
License:Open Source License
/** * * @param infile//from www. j av a2s . c o m * @return * @throws Exception */ public String ImageToSequence(String infile) throws Exception { String log4jConfPath = "lib/log4j.properties"; PropertyConfigurator.configure(log4jConfPath); confHadoop = new Configuration(); confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/core-site.xml")); confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/hdfs-site.xml")); FileSystem fs = FileSystem.get(confHadoop); Path inPath = new Path(infile); String outfile = infile.substring(0, infile.indexOf(".")) + ".seq"; Path outPath = new Path(outfile); System.out.println(); System.out.println("Successfully created the sequencefile " + outfile); FSDataInputStream in = null; Text key = new Text(); BytesWritable value = new BytesWritable(); SequenceFile.Writer writer = null; try { in = fs.open(inPath); byte buffer[] = new byte[in.available()]; in.read(buffer); writer = SequenceFile.createWriter(fs, confHadoop, outPath, key.getClass(), value.getClass()); writer.append(new Text(inPath.getName()), new BytesWritable(buffer)); IOUtils.closeStream(writer); return outfile; } catch (IOException e) { System.err.println("Exception MESSAGES = " + e.getMessage()); IOUtils.closeStream(writer); return null; } }