List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:BooleanRetrievalCompressed.java
License:Apache License
/** * Runs this tool./* w ww .j ava 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } FileSystem fs = FileSystem.get(new Configuration()); initialize(indexPath, collectionPath, fs); String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" }; for (String q : queries) { System.out.println("Query: " + q); runQuery(q); System.out.println(""); } return 1; }
From source file:HdfsReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/* w w w . j a va2 s. c o m*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "read-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "read-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "read-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 4096; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { InputStream is = fs.open(hdfsFilePath); long startTime = System.currentTimeMillis(); int bytesRead = is.read(buf); while (bytesRead != -1) { bytesRead = is.read(buf); } is.close(); long endTime = System.currentTimeMillis(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:Relevance.java
License:Apache License
/** * Exact relevance is slower, non-exact relevance will have false positives *//*from w w w. ja va 2 s . com*/ protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap, String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException { if (!useBloom && !exact) throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!"); FileSystem fs = FileSystem.get(new Configuration()); Pipe finalPipe = new Pipe("data"); finalPipe = new Each(finalPipe, wantedFields, new Identity()); Map<String, Tap> sources = new HashMap<String, Tap>(); sources.put("data", source); Map properties = new HashMap(); String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter"; if (useBloom) { String jobId = UUID.randomUUID().toString(); LOG.info("Creating bloom filter"); writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes); properties.put("mapred.job.reuse.jvm.num.tasks", -1); if (!TEST_MODE) { properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath); } else { properties.put("batch_query.relevance.file", bloomFilterPath); } LOG.info("Done creating bloom filter"); finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId)); } if (exact) { sources.put("relevant", keysTap); Pipe relevantRecords = new Pipe("relevant"); relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity()); finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT))); finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored"))); finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity()); if (func.canHaveMultipleMatches()) { finalPipe = new Distinct(finalPipe, new Fields(ID)); } finalPipe = new Each(finalPipe, wantedFields, new Identity()); } Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources, output, finalPipe); flow.complete(); if (useBloom) fs.delete(new Path(bloomFilterPath), false); }
From source file:FlinkBootstrap.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new IllegalArgumentException( "Provide `TaskManager` or `JobManager` parameter with config folder"); }/* www. ja v a 2s .co m*/ //Load Hadoop S3 wrapper classes, due to ClassNotFound Exception without Class.forName("org.apache.flink.runtime.fs.hdfs.HadoopFileSystem"); Class.forName("org.apache.hadoop.fs.s3a.S3AFileSystem"); //Verify s3 is accessible Configuration conf = new Configuration(); conf.addResource(new Path("config/hadoop/core-site.xml")); conf.addResource(new Path("config/hadoop/hdfs-site.xml")); FileSystem fs = FileSystem.get(conf); fs.listStatus(new Path("s3://dir")); if (args[0].equals("TaskManager")) { TaskManager.main(new String[] { "--configDir", args[1], }); } else if (args[0].equals("JobManager")) { JobManager.main(new String[] { "--configDir", args[1], "--executionMode", "cluster", }); } else { throw new IllegalArgumentException("Unknown parameter `" + args[0] + "`"); } }
From source file:SBP.java
License:Apache License
protected static void copyToLocalFile(Configuration conf, Path hdfs_path, Path local_path) throws Exception { FileSystem fs = FileSystem.get(conf); // read the result fs.copyToLocalFile(hdfs_path, local_path); }
From source file:SBP.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 11) { for (int i = 0; i < args.length; i++) { System.out.println("Args: " + i + " " + args[i]); }/*from w w w. j a va 2 s . c om*/ System.out.println(args.length); return printUsage(); } lambda = Double.parseDouble(args[10]); edge_path = new Path(args[0]); prior_path = new Path(args[1]); output_path = new Path(args[2]); Path prev_local_path = new Path("run_tmp/prev_local/"); Path new_local_path = new Path("run_tmp/new_local/"); Path tmp_output_path = new Path(output_path.toString()); number_msg = Long.parseLong(args[3]); nreducer = Integer.parseInt(args[4]); nreducer = 1; max_iter = Integer.parseInt(args[5]); nstate = Integer.parseInt(args[7]); edge_potential_str = read_edge_potential(args[8]); int cur_iter = 1; if (args[9].startsWith("new") == false) { cur_iter = Integer.parseInt(args[9].substring(4)); } System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString() + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str + ", cur_iter=" + cur_iter + ", lambda=" + lambda); fs = FileSystem.get(getConf()); // Run Stage1 and Stage2. if (cur_iter == 1) { System.out.println("BP: Initializing messages..."); JobClient.runJob(configInitMessage()); } double converge_threshold = number_msg * EPS * nstate; for (int i = cur_iter; i <= max_iter; i++) { System.out.println(" *** ITERATION " + (i) + "/" + max_iter + " ***"); JobClient.runJob(configUpdateMessage()); JobClient.runJob(configSmoothMessage()); JobClient.runJob(configCheckErr()); JobClient.runJob(configSumErr()); String line = readLocaldirOneline(sum_error_path.toString()); fs.delete(check_error_path, true); fs.delete(sum_error_path, true); String[] parts = line.split("\t"); int n = Integer.parseInt(parts[0]); double sum = Double.parseDouble(parts[1]); System.out.println("Converged Msg: " + (number_msg - n)); System.out.println("Sum Error: " + sum); if (sum < converge_threshold) { break; } // rotate directory fs.delete(message_cur_path); fs.delete(message_next_path); fs.rename(message_smooth_path, message_cur_path); } System.out.println("BP: Computing beliefs..."); JobClient.runJob(configComputeBelief()); System.out.println("BP finished. The belief vector is in the HDFS " + args[2]); return 0; }
From source file:ExportStressTest.java
License:Apache License
public void createFile(int fileId) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("ExportStressTest"); fs.mkdirs(dirPath);//ww w .ja v a2s .c om Path filePath = new Path(dirPath, "input-" + fileId); OutputStream os = fs.create(filePath); Writer w = new BufferedWriter(new OutputStreamWriter(os)); for (int i = 0; i < RECORDS_PER_FILE; i++) { long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId); w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n"); } w.close(); os.close(); }
From source file:ExportStressTest.java
License:Apache License
/** Create a set of data files to export. */ public void createData() throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("ExportStressTest"); if (fs.exists(dirPath)) { System.out.println("Export directory appears to already exist. Skipping data-gen."); return;/*from w w w . j a v a 2s . co m*/ } for (int i = 0; i < NUM_FILES; i++) { createFile(i); } }
From source file:DumpRecordsExtended.java
License:Apache License
/** * Runs this tool.//from w w w .j a v a 2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName()); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); Configuration conf = new Configuration(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(DumpRecordsExtended.class.getSimpleName()); job.setJarByClass(DumpRecordsExtended.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:LookupQuery.java
License:Apache License
public static void initQuery(String[] args) throws IOException { indexPath = args[0];/*from w ww . ja va 2 s .com*/ collectionPath = args[1]; config = new Configuration(); fs = FileSystem.get(config); reader = new MapFile.Reader(fs, indexPath, config); key = new Text(); value = new ArrayListWritable<PairOfInts>(); areThereMoreLookups = true; query = ""; Qvalue = 0; }