List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:FormatStoragePerformanceTest.java
License:Open Source License
static void doTextReadRand(int count) { try {/*from ww w. j a va2 s.c om*/ String textFile = "MR_input_text/testPerformanceReadText"; Path path = new Path(textFile); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); InputStream stream = new BufferedInputStream(in); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); long begin = System.currentTimeMillis(); count = 1 * 1000; for (int i = 0; i < count; i++) { int line = (int) (Math.random() * count); for (int j = 0; j < line; j++) { String value = reader.readLine(); value = null; } } reader.close(); long end = System.currentTimeMillis(); String string = "text read seq, count:" + count + ", delay:" + (long) ((end - begin) / 1000) + " s"; output.write(string.getBytes()); System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:FormatStoragePerformanceTest.java
License:Open Source License
static void doTextReadSeq(int count, boolean var) { try {//from w w w .j ava 2 s . c o m ArrayList<Integer> meta = new ArrayList<Integer>(10); for (int i = 0; i < 7; i++) { meta.add(i); } String textFile = "MR_input_text/testPerformanceReadText"; if (var) { textFile += "_var"; } Path path = new Path(textFile); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); InputStream stream = new BufferedInputStream(in); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { String value = reader.readLine(); String[] fields = value.split(","); /* ByteArrayInputStream bin = new ByteArrayInputStream(value.getBytes()); meta.get(0); byte[] bb= new byte[4]; bin.read(bb); meta.get(1); byte[] sb= new byte[6]; bin.read(sb); meta.get(2); byte[] ib= new byte[9]; bin.read(ib); meta.get(3); byte[] lb= new byte[13]; bin.read(lb); meta.get(4); byte[] fb= new byte[13]; bin.read(fb); meta.get(5); byte[] db= new byte[18]; bin.read(db); meta.get(6); value = null; */ Byte.valueOf(fields[0]); Short.valueOf(fields[1]); Integer.valueOf(fields[2]); Long.valueOf(fields[3]); Float.valueOf(fields[4]); Double.valueOf(fields[5]); if (var) { String.valueOf(fields[6]); } } reader.close(); long end = System.currentTimeMillis(); String string = "text read seq " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n"; System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:ColumnStoragePerformanceTest.java
License:Open Source License
static void doInitTextFile() { try {//from ww w .j a va 2s . c om Path path = new Path(textFilename); FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(path); OutputStream stream = new BufferedOutputStream(out); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream)); String value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten\n"; long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { writer.write(value); if (i % 1000000 == 0) { String string = "write " + i + " record, delay: " + ((System.currentTimeMillis() - begin) / 1000) + " s \n"; output.write(string.getBytes()); } } writer.close(); out.close(); long end = System.currentTimeMillis(); String string = "write " + count + " record over(text), delay: " + ((end - begin) / 1000) + " s \n"; output.write(string.getBytes()); System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:ColumnStoragePerformanceTest.java
License:Open Source License
static void doTextReadSeq(int count) { try {//from w w w .ja va2 s. c o m ArrayList<Integer> meta = new ArrayList<Integer>(10); for (int i = 0; i < 7; i++) { meta.add(i); } Path path = new Path(textFilename); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); InputStream stream = new BufferedInputStream(in); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { String value = reader.readLine(); String[] fields = value.split(","); ByteArrayInputStream bin = new ByteArrayInputStream(value.getBytes()); meta.get(0); byte[] bb = new byte[4]; bin.read(bb); meta.get(1); byte[] sb = new byte[6]; bin.read(sb); meta.get(2); byte[] ib = new byte[9]; bin.read(ib); meta.get(3); byte[] lb = new byte[13]; bin.read(lb); meta.get(4); byte[] fb = new byte[13]; bin.read(fb); meta.get(5); byte[] db = new byte[18]; bin.read(db); meta.get(6); value = null; } reader.close(); long end = System.currentTimeMillis(); String string = "text read seq " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n"; System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:HdfsCacheReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/*from w ww . j a v a2s . c o m*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "cacheRead-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "cacheRead-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "cacheRead-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 65536; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOut = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { FSDataInputStream in = fs.open(hdfsFilePath); ByteBuffer bbuf = null; ElasticByteBufferPool ebbp = new ElasticByteBufferPool(); long startTime = System.currentTimeMillis(); while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) { in.releaseBuffer(bbuf); } long endTime = System.currentTimeMillis(); in.close(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile; p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); cmdOutLine = ""; cmdOut.setLength(0); cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); fs.delete(hdfsFilePath, true); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:RunPersonalizedPageRankBasic.java
License:Apache License
private void phase1(int i, int j, String basePath, int numNodes, boolean useCombiner, boolean useInMapperCombiner) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase1"); job.setJarByClass(RunPersonalizedPageRankBasic.class); String in = basePath + "/iter" + formatter.format(i); String out = basePath + "/iter" + formatter.format(j); //String outm = out + "-mass"; // We need to actually count the number of part files to get the number of partitions (because // the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) numPartitions++;//w w w . j a va 2 s .c om } LOG.info("PageRank: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + numNodes); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInMapperCombiner); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); //job.getConfiguration().set("PageRankMassPath", outm); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeMultiSrc.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeMultiSrc.class); job.setMapperClass(/*useInMapperCombiner ? MapWithInMapperCombiningClass.class : */MapClass.class); if (useCombiner) { job.setCombinerClass(CombineClass.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(getConf()).delete(new Path(out), true); //FileSystem.get(getConf()).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //ArrayList<Float> mass; /*float[] masslist; FileSystem fs = FileSystem.get(getConf()); int flag=0 for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); while(fin.available()>0) { if (flag==0) { mass.add(fin.readFloat()); flag++; } } fin.close(); } return mass;*/ }
From source file:inMapperStripes.java
License:Apache License
/** * Runs this tool.//from www . ja v a 2s. co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + inMapperStripes.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(inMapperStripes.class.getSimpleName()); job.setJarByClass(inMapperStripes.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapSIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:RunText.java
License:Apache License
public static void main(String[] args) throws Exception { o = new Options(); JCommander jc = null;//from ww w . j av a2 s . com try { jc = new JCommander(o, args); jc.setProgramName("./runText"); } catch (ParameterException e) { System.out.println(e.getMessage()); String[] valid = { "-p", "path", "-d", "delimiter", "v", "value", "-i", "index" }; new JCommander(o, valid).usage(); System.exit(-1); } if (o.help) { jc.usage(); System.exit(0); } path = new Path(o.path); delim = o.delimiter.getBytes()[0]; toFind = o.value; index = o.index; numThreads = o.threads; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); TextInputFormat format = new TextInputFormat(); long len = fs.getFileStatus(path).getLen() / numThreads; List<Thread> threads = Lists.newArrayList(); for (int i = 0; i < numThreads; i++) { FileSplit split = new FileSplit(path, i * len, len, new String[] { "" }); threads.add(new Thread(new RunText(split, format))); } runningThreads = new AtomicInteger(numThreads); for (Thread t : threads) { t.start(); } int prev = 0; int current; long t1 = System.nanoTime(); long t2; while (runningThreads.get() > 0) { Thread.sleep(5000); current = totalCount.get(); t2 = System.nanoTime(); System.out.println(String.format("%f records/sec", (current - prev) * 1e9 / (t2 - t1))); t1 = t2; prev = current; } for (Thread t : threads) { t.join(); } fs.close(); }
From source file:RecordExtracting.java
License:Apache License
/** * Runs this tool.//from w w w . j a va 2s .c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); /* options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("number of nodes").create(NUM_NODES)); //parsing more than 1 integer later;*/ options.addOption( OptionBuilder.withArgName("src").hasArg().withDescription("spamming users").create(SOURCES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /* if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)||!cmdline.hasOption(SOURCES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ String inputPath = "xzzqskfinal/reviewsNew.txt";//cmdline.getOptionValue(INPUT); String outputPath = "xzzqskfinal/SpammingRecord";//cmdline.getOptionValue(OUTPUT); //int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); //Change to array later String src = cmdline.getOptionValue(SOURCES); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + RecordExtracting.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); //LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); //conf.setInt(NODE_CNT_FIELD, n); //more to be set later; conf.set(NODE_SRC, src); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(RatingSpamming.class.getSimpleName()); job.setJarByClass(RecordExtracting.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); //job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:DataHBase.java
License:Open Source License
public void run(HashMap<String, String> config) throws Exception { //clean the former output if it exists Path p = new Path(config.get("hdfs_output_dir")); FileSystem fs = FileSystem.get(new Configuration()); if (fs.exists(p)) { fs.delete(p, true);/*from ww w .j a va 2s . c om*/ } String junction = config.get("what_to_find"); // the name of the junction String date1 = config.get("date1"); String date2 = config.get("date2"); //date1 and date2 can be of a format YYYY-MM-DD if (date1.length() == 10) date1 = date1 + " 00:00:00"; if (date2.length() == 10) date2 = date2 + " 23:59:59"; System.out.println("Looking for data of " + junction + ": " + date1 + " - " + date2); //create timestamps (considering time zone!) to limit data SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); sdf.setTimeZone(TimeZone.getDefault()); Long time1 = sdf.parse(date1).getTime(); Long time2 = sdf.parse(date2).getTime(); //run a job Configuration conf = HBaseConfiguration.create(); conf.set("mapreduce.output.textoutputformat.separator", ","); //set comma as a delimiter Job job = new Job(conf, "Retrieve data from hbase"); job.setJarByClass(DataHBase.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.setMaxVersions(1); scan.setTimeRange(time1, time2); //take a day we are interested in //set a filter for a junction name if (!junction.equals("")) { SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("data"), Bytes.toBytes("location_name"), CompareOp.EQUAL, Bytes.toBytes(junction)); scan.setFilter(filter); } //add the specific columns to the output to limit the amount of data scan.addFamily(Bytes.toBytes("data")); TableMapReduceUtil.initTableMapperJob(config.get("hbase_table"), // input HBase table name scan, // Scan instance to control CF and attribute selection TableMap.class, // mapper Text.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(config.get("hdfs_output_dir"))); job.waitForCompletion(true); }