List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:FormatStoragePerformanceTest.java
License:Open Source License
static void doTextReadSeq(int count, boolean var) { try {//from w ww.jav a 2s. c o m ArrayList<Integer> meta = new ArrayList<Integer>(10); for (int i = 0; i < 7; i++) { meta.add(i); } String textFile = "MR_input_text/testPerformanceReadText"; if (var) { textFile += "_var"; } Path path = new Path(textFile); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); InputStream stream = new BufferedInputStream(in); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { String value = reader.readLine(); String[] fields = value.split(","); /* ByteArrayInputStream bin = new ByteArrayInputStream(value.getBytes()); meta.get(0); byte[] bb= new byte[4]; bin.read(bb); meta.get(1); byte[] sb= new byte[6]; bin.read(sb); meta.get(2); byte[] ib= new byte[9]; bin.read(ib); meta.get(3); byte[] lb= new byte[13]; bin.read(lb); meta.get(4); byte[] fb= new byte[13]; bin.read(fb); meta.get(5); byte[] db= new byte[18]; bin.read(db); meta.get(6); value = null; */ Byte.valueOf(fields[0]); Short.valueOf(fields[1]); Integer.valueOf(fields[2]); Long.valueOf(fields[3]); Float.valueOf(fields[4]); Double.valueOf(fields[5]); if (var) { String.valueOf(fields[6]); } } reader.close(); long end = System.currentTimeMillis(); String string = "text read seq " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n"; System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:ColumnStoragePerformanceTest.java
License:Open Source License
static void doInitTextFile() { try {/*w ww. j a v a 2s .c o m*/ Path path = new Path(textFilename); FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(path); OutputStream stream = new BufferedOutputStream(out); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream)); String value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten,111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten\n"; long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { writer.write(value); if (i % 1000000 == 0) { String string = "write " + i + " record, delay: " + ((System.currentTimeMillis() - begin) / 1000) + " s \n"; output.write(string.getBytes()); } } writer.close(); out.close(); long end = System.currentTimeMillis(); String string = "write " + count + " record over(text), delay: " + ((end - begin) / 1000) + " s \n"; output.write(string.getBytes()); System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:ColumnStoragePerformanceTest.java
License:Open Source License
static void doColumnReadSeq(ArrayList<Short> idx, int count, boolean compress) throws Exception { Path path = new Path(columnPrefix); ColumnStorageClient client = new ColumnStorageClient(path, idx, conf); for (int i = 0; i < count; i++) { try {/* w ww .ja va2 s . co m*/ if (!compress) { Record record = client.getRecordByLine(i); } else { Record record = client.getNextRecord(); } /*if (record == null) { String string = "record no:" + i + " return null"; output.write(string.getBytes()); } if(i % (1*1000000) == 0) { String string = "read format seq " + i +" record, delay:" + ((System.currentTimeMillis() - begin) / 1000) + " s \n" ; output.write(string.getBytes()); }*/ } catch (Exception e) { System.out.println("get exception, line:" + i); System.exit(i); break; } } client.close(); }
From source file:ColumnStoragePerformanceTest.java
License:Open Source License
static void doTextReadSeq(int count) { try {//from w w w. j av a 2 s .c o m ArrayList<Integer> meta = new ArrayList<Integer>(10); for (int i = 0; i < 7; i++) { meta.add(i); } Path path = new Path(textFilename); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); InputStream stream = new BufferedInputStream(in); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { String value = reader.readLine(); String[] fields = value.split(","); ByteArrayInputStream bin = new ByteArrayInputStream(value.getBytes()); meta.get(0); byte[] bb = new byte[4]; bin.read(bb); meta.get(1); byte[] sb = new byte[6]; bin.read(sb); meta.get(2); byte[] ib = new byte[9]; bin.read(ib); meta.get(3); byte[] lb = new byte[13]; bin.read(lb); meta.get(4); byte[] fb = new byte[13]; bin.read(fb); meta.get(5); byte[] db = new byte[18]; bin.read(db); meta.get(6); value = null; } reader.close(); long end = System.currentTimeMillis(); String string = "text read seq " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n"; System.out.println(string); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:TestUtil.java
License:Open Source License
public static void genfdfseq(String datadir, int filenum, int recnum, boolean var, boolean overwrite) throws Exception { Head head = new Head(); FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); if (var) { fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); }//from ww w . j a v a2s . co m head.setFieldMap(fieldMap); datadir = datadir.endsWith("/") ? datadir : (datadir + "/"); if (overwrite && fs.exists(new Path(datadir))) { fs.delete(new Path(datadir), true); fs.mkdirs(new Path(datadir)); } else if (overwrite) { fs.mkdirs(new Path(datadir)); } int x = fs.listStatus(new Path(datadir)).length; for (int i = 0; i < filenum; i++) { FormatDataFile fdf = new FormatDataFile(conf); fdf.create(datadir + "datafile" + (x + i), head); int fieldnum = var ? 7 : 6; for (int j = 0; j < recnum; j++) { Record record = new Record(fieldnum); record.addValue(new FieldValue((byte) j, (short) 0)); record.addValue(new FieldValue((short) j, (short) 1)); record.addValue(new FieldValue((int) j, (short) 2)); record.addValue(new FieldValue((long) j, (short) 3)); record.addValue(new FieldValue((float) j, (short) 4)); record.addValue(new FieldValue((double) j, (short) 5)); if (var) { record.addValue(new FieldValue("test", (short) 6)); } fdf.addRecord(record); } fdf.close(); } }
From source file:TestUtil.java
License:Open Source License
public static void genfdfrandom(String datadir, int filenum, int recnum, boolean var, boolean overwrite) throws Exception { Head head = new Head(); FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); if (var) { fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); }/* ww w . j a va 2 s . c o m*/ head.setFieldMap(fieldMap); datadir = datadir.endsWith("/") ? datadir : (datadir + "/"); if (overwrite && fs.exists(new Path(datadir))) { fs.delete(new Path(datadir), true); fs.mkdirs(new Path(datadir)); } int x = fs.listStatus(new Path(datadir)).length; Random r = new Random(); for (int i = 0; i < filenum; i++) { FormatDataFile fdf = new FormatDataFile(conf); fdf.create(datadir + "datafile" + (x + i), head); int fieldnum = var ? 7 : 6; for (int j = 0; j < recnum; j++) { Record record = new Record(fieldnum); record.addValue(new FieldValue((byte) r.nextInt(), (short) 0)); record.addValue(new FieldValue((short) r.nextInt(), (short) 1)); record.addValue(new FieldValue(r.nextInt(), (short) 2)); record.addValue(new FieldValue(r.nextLong(), (short) 3)); record.addValue(new FieldValue(r.nextFloat(), (short) 4)); record.addValue(new FieldValue(r.nextDouble(), (short) 5)); if (var) { record.addValue(new FieldValue("test", (short) 6)); } } fdf.close(); } }
From source file:TestUtil.java
License:Open Source License
public static void genifdfindex(String indexdir, int filenum, int recnum, short idx, boolean overwrite) throws IOException { indexdir = indexdir.endsWith("/") ? indexdir : (indexdir + "/"); if (overwrite && fs.exists(new Path(indexdir))) { fs.delete(new Path(indexdir), true); fs.mkdirs(new Path(indexdir)); }//from w w w. j a v a 2s. com IHead head = new IHead(); head.setPrimaryIndex(idx); IFieldMap map = new IFieldMap(); map.addFieldType(new IFieldType.IFieldIntType()); map.addFieldType(new IFieldType.IFieldShortType()); map.addFieldType(new IFieldType.IFieldIntType()); head.setFieldMap(map); IUserDefinedHeadInfo udi = new IUserDefinedHeadInfo(); udi.addInfo(0, "testdata1"); udi.addInfo(1, "testdata2"); udi.addInfo(2, "testdata3"); udi.addInfo(3, "testdata4"); udi.addInfo(4, "testdata5"); head.setUdi(udi); Random r = new Random(); int x = fs.listStatus(new Path(indexdir)).length; for (int i = 0; i < filenum; i++) { IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.create(indexdir + "indexfile" + (x + i), head); int start = r.nextInt(5); for (int j = 0; j < recnum; j++) { IRecord rec = new IRecord(); rec.addFieldValue(new IFieldValue(start)); start += r.nextInt(5); rec.addFieldValue(new IFieldValue((short) r.nextInt(5))); rec.addFieldValue(new IFieldValue(r.nextInt(100000))); ifdf.addRecord(rec); } ifdf.close(); } }
From source file:TestUtil.java
License:Open Source License
public static void main(String[] args) throws Exception { String indexdir = "indexdir"; StringBuffer sb = new StringBuffer(); FileStatus[] ss = fs.listStatus(new Path(indexdir)); for (FileStatus fileStatus : ss) { sb.append(fileStatus.getPath().toString()).append(","); }// w ww. j ava 2s .co m IndexMergeMR.run(sb.substring(0, sb.length() - 1), "indexdir1", conf); }
From source file:HdfsCacheReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }// ww w . j a va 2 s. c o m double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "cacheRead-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "cacheRead-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "cacheRead-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 65536; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOut = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { FSDataInputStream in = fs.open(hdfsFilePath); ByteBuffer bbuf = null; ElasticByteBufferPool ebbp = new ElasticByteBufferPool(); long startTime = System.currentTimeMillis(); while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) { in.releaseBuffer(bbuf); } long endTime = System.currentTimeMillis(); in.close(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile; p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); cmdOutLine = ""; cmdOut.setLength(0); cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); fs.delete(hdfsFilePath, true); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:RunPersonalizedPageRankBasic.java
License:Apache License
private void phase1(int i, int j, String basePath, int numNodes, boolean useCombiner, boolean useInMapperCombiner) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase1"); job.setJarByClass(RunPersonalizedPageRankBasic.class); String in = basePath + "/iter" + formatter.format(i); String out = basePath + "/iter" + formatter.format(j); //String outm = out + "-mass"; // We need to actually count the number of part files to get the number of partitions (because // the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) numPartitions++;//from www .ja va 2 s.c om } LOG.info("PageRank: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + numNodes); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInMapperCombiner); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); //job.getConfiguration().set("PageRankMassPath", outm); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeMultiSrc.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeMultiSrc.class); job.setMapperClass(/*useInMapperCombiner ? MapWithInMapperCombiningClass.class : */MapClass.class); if (useCombiner) { job.setCombinerClass(CombineClass.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(getConf()).delete(new Path(out), true); //FileSystem.get(getConf()).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //ArrayList<Float> mass; /*float[] masslist; FileSystem fs = FileSystem.get(getConf()); int flag=0 for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); while(fin.available()>0) { if (flag==0) { mass.add(fin.readFloat()); flag++; } } fin.close(); } return mass;*/ }