List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:StringRelevance.java
License:Apache License
@Override protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits, int bloom_hashes) throws IOException { BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes); TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(), new TapIterator(keysTap, new JobConf())); while (it.hasNext()) { TupleEntry t = it.next();/*from ww w. ja v a 2 s. c o m*/ String s = t.getString(keyField); filter.add(s.getBytes()); } it.close(); filter.writeToFileSystem(fs, new Path(path)); }
From source file:PT.java
License:Open Source License
public static void testgenrawfile(String filename, int recordnum) throws IOException { Random r = new Random(); FSDataOutputStream fos = fs.create(new Path(filename)); StringBuffer sb = new StringBuffer(); for (int i = 0; i < recordnum; i++) { fos.writeByte(i / 1000);//from w w w .j a v a2 s .com fos.writeShort(i / 1000); fos.writeInt(i / 1000); fos.writeLong(i / 1000); fos.writeFloat(i / 1000); fos.writeDouble(i / 1000); int strnum = r.nextInt(12) + 7; sb.setLength(0); for (int j = 0; j < strnum; j++) { sb.append((char) ('a' + j)); } fos.writeUTF(sb.toString()); if (i % 1000000 == 0) { } } fos.close(); }
From source file:PT.java
License:Open Source License
public static void testgenrawfiler(String filename, int recordnum) throws IOException { Random r = new Random(); FSDataOutputStream fos = fs.create(new Path(filename)); StringBuffer sb = new StringBuffer(); for (int i = 0; i < recordnum; i++) { fos.writeByte(r.nextInt());/*from www. j av a 2 s .c o m*/ fos.writeShort(r.nextInt()); fos.writeInt(r.nextInt()); fos.writeLong(r.nextLong()); fos.writeFloat(r.nextFloat()); fos.writeDouble(r.nextDouble()); int strnum = r.nextInt(12) + 7; sb.setLength(0); for (int j = 0; j < strnum; j++) { sb.append((char) ('a' + r.nextInt(26))); } fos.writeUTF(sb.toString()); if (i % 1000000 == 0) { } } fos.close(); }
From source file:PT.java
License:Open Source License
public static void testwritefdf(String filename, String fromfile, boolean compress, short keyindex) throws Exception { FSDataInputStream fis = fs.open(new Path(fromfile)); FormatDataFile fdf = createfdf(filename, compress, keyindex); for (int i = 0; i < 10000 * 10000; i++) { Record record = readrecord(fis); if (record == null) { break; }//www.j av a 2 s . com fdf.addRecord(record); if (i % 1000000 == 0) { System.out.println(i + "\trecords"); } } fdf.close(); fis.close(); }
From source file:PT.java
License:Open Source License
public static void testwritecolumn(String filename, String fromfile, boolean compress) throws Exception { FormatDataFile[] fdfs = createfdfs(filename, compress); FSDataInputStream fis = fs.open(new Path(fromfile)); for (int i = 0; i < 10000 * 10000; i++) { Record record = readrecord(fis); if (record == null) { break; }//w ww . j av a2s . com ArrayList<FieldValue> fvs = record.fieldValues(); for (int j = 0; j < fvs.size(); j++) { Record re = new Record(1); re.addValue(fvs.get(j)); fdfs[j].addRecord(re); } if (i % 1000000 == 0) { } } for (int i = 0; i < fdfs.length; i++) { fdfs[i].close(); } fis.close(); }
From source file:PT.java
License:Open Source License
public static void testreadcolumnseq(String filename, int num, boolean compress, String mode) throws Exception { Path path = new Path(filename); ArrayList<Short> vector = new ArrayList<Short>(10); if (mode == null || mode.equals("-1")) { for (short i = 0; i < 7; i++) { vector.add(i);//from w w w . jav a 2 s . co m } } else if (mode.equals("half")) { short x = 0; vector.add(x); x = 1; vector.add(x); x = 2; vector.add(x); x = 3; vector.add(x); } else { vector.add(Short.valueOf(mode)); } Configuration conf = new Configuration(); ColumnStorageClient client = new ColumnStorageClient(path, vector, conf); if (compress) { for (int i = 0; i < num; i++) { client.getNextRecord(); } } else { for (int i = 0; i < num; i++) { client.getRecordByLine(i); } } client.close(); }
From source file:PT.java
License:Open Source License
public static void testreadcolumnrand(String filename, int num, int size, String mode) throws Exception { Path path = new Path(filename); ArrayList<Short> vector = new ArrayList<Short>(); if (mode == null || mode.equals("-1")) { for (short i = 0; i < 7; i++) { vector.add(i);/*w ww . jav a 2 s . c om*/ } } else if (mode.equals("half")) { short x = 0; vector.add(x); x = 1; vector.add(x); x = 2; vector.add(x); x = 3; vector.add(x); } else { vector.add(Short.valueOf(mode)); } Configuration conf = new Configuration(); ColumnStorageClient client = new ColumnStorageClient(path, vector, conf); Random r = new Random(); for (int i = 0; i < num; i++) { client.getRecordByLine(r.nextInt(size)); if (i % 1000000 == 0) { } } client.close(); }
From source file:PT.java
License:Open Source License
private static FormatDataFile[] createfdfs(String filename, boolean compress) throws Exception { if (fs.exists(new Path(filename))) fs.delete(new Path(filename), true); String fn = filename.endsWith("/") ? filename : (filename + "/"); String byteFileName = fn + "Column_Byte"; String shortFileName = fn + "Column_Short"; String intFileName = fn + "Column_Int"; String longFileName = fn + "Column_Long"; String floatFileName = fn + "Column_Float"; String doubleFileName = fn + "Column_Double"; String stringFileName = fn + "Column_String"; FormatDataFile[] fdfs = new FormatDataFile[7]; Configuration conf = new Configuration(); FieldMap byteFieldMap = new FieldMap(); byteFieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); Head byteHead = new Head(); byteHead.setCompress(compress ? (byte) 1 : (byte) 0); byteHead.setCompressStyle(ConstVar.LZOCompress); byteHead.setFieldMap(byteFieldMap);// www.j ava2 s. c o m FormatDataFile byteFD = new FormatDataFile(conf); byteFD.create(byteFileName, byteHead); FieldMap shortFieldMap = new FieldMap(); shortFieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); Head shortHead = new Head(); shortHead.setCompress(compress ? (byte) 1 : (byte) 0); shortHead.setCompressStyle(ConstVar.LZOCompress); shortHead.setFieldMap(shortFieldMap); FormatDataFile shortFD = new FormatDataFile(conf); shortFD.create(shortFileName, shortHead); FieldMap intFieldMap = new FieldMap(); intFieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); Head intHead = new Head(); intHead.setCompress(compress ? (byte) 1 : (byte) 0); intHead.setCompressStyle(ConstVar.LZOCompress); intHead.setFieldMap(intFieldMap); FormatDataFile intFD = new FormatDataFile(conf); intFD.create(intFileName, intHead); FieldMap longFieldMap = new FieldMap(); longFieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); Head longHead = new Head(); longHead.setFieldMap(longFieldMap); longHead.setCompress(compress ? (byte) 1 : (byte) 0); longHead.setCompressStyle(ConstVar.LZOCompress); FormatDataFile longFD = new FormatDataFile(conf); longFD.create(longFileName, longHead); FieldMap floatFieldMap = new FieldMap(); floatFieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); Head floatHead = new Head(); floatHead.setCompress(compress ? (byte) 1 : (byte) 0); floatHead.setCompressStyle(ConstVar.LZOCompress); floatHead.setFieldMap(floatFieldMap); FormatDataFile floatFD = new FormatDataFile(conf); floatFD.create(floatFileName, floatHead); FieldMap doubleFieldMap = new FieldMap(); doubleFieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); Head doubleHead = new Head(); doubleHead.setCompress(compress ? (byte) 1 : (byte) 0); doubleHead.setCompressStyle(ConstVar.LZOCompress); doubleHead.setFieldMap(doubleFieldMap); FormatDataFile doubleFD = new FormatDataFile(conf); doubleFD.create(doubleFileName, doubleHead); FieldMap strFieldMap = new FieldMap(); strFieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); Head strHead = new Head(); strHead.setCompress(compress ? (byte) 1 : (byte) 0); strHead.setCompressStyle(ConstVar.LZOCompress); strHead.setFieldMap(strFieldMap); FormatDataFile strFD = new FormatDataFile(conf); strFD.create(stringFileName, strHead); fdfs[0] = byteFD; fdfs[1] = shortFD; fdfs[2] = intFD; fdfs[3] = longFD; fdfs[4] = floatFD; fdfs[5] = doubleFD; fdfs[6] = strFD; return fdfs; }
From source file:removeDup.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: removeDup <in> [<in>...] <out>"); System.exit(2);/*from w ww . j a v a2 s. c o m*/ } Job job = new Job(conf, "removeDup"); job.setJarByClass(removeDup.class); job.setMapperClass(Map.class); //job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:HiddenMarkovModelBuilder.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "HMM model builder"; job.setJobName(jobName);//from w w w . j a v a2s . co m job.setJarByClass(HiddenMarkovModelBuilder.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(HiddenMarkovModelBuilder.StateTransitionMapper.class); job.setReducerClass(HiddenMarkovModelBuilder.StateTransitionReducer.class); job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }