List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:Top20AZRestaurants.java
@Override public int run(String[] args) throws Exception { Job job1 = new Job(getConf()); job1.setSortComparatorClass(MyDecreasingDoubleComparator.class); job1.setJobName("Top20 AZ Restaurants ChainJob"); job1.setJarByClass(Top20AZRestaurants.class); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class, map1Conf);//from w ww .j a v a 2 s .c om JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class, map2Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class, DoubleWritable.class, reduceConf); FileInputFormat.setInputPaths(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean success = job1.waitForCompletion(true); return success ? 0 : 1; }
From source file:WordCountB.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/* w w w . ja v a 2s .com*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountB.class); job.setMapperClass(TokenizerMapper.class); // Setup the Combiner job.setCombinerClass(IntSumReducer.class); // Setup the Partitioner job.setPartitionerClass(Letterpartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:BytesRelevance.java
License:Apache License
@Override protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits, int bloom_hashes) throws IOException { BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes); TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(), new TapIterator(keysTap, new JobConf())); while (it.hasNext()) { TupleEntry t = it.next();//from w w w .j a v a 2s. com byte[] b = getBytes((BytesWritable) t.get("keyField")); filter.add(b); } it.close(); filter.writeToFileSystem(fs, new Path(path)); }
From source file:TestColumnStorageInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {/* w w w .j a v a 2 s . c o m*/ if (argv.length != 2) { System.out.println("TestColumnStorageInputFormat <input> idx"); System.exit(-1); } JobConf conf = new JobConf(TestColumnStorageInputFormat.class); conf.setJobName("TestColumnStorageInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); conf.set("hive.io.file.readcolumn.ids", argv[1]); FormatStorageSerDe serDe = initSerDe(conf); StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new ColumnStorageInputFormat(); long begin = System.currentTimeMillis(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); long end = System.currentTimeMillis(); System.out.println("getsplit delay " + (end - begin) + " ms"); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int size = inputSplits.length; System.out.println("getSplits return size:" + size); for (int i = 0; i < size; i++) { ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i]; System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength() + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:" + split.getEndLine()); if (split.getFileName() != null) { System.out.println("fileName:" + split.getFileName()); } else { System.out.println("fileName null"); } if (split.fileList() != null) { System.out.println("fileList.num:" + split.fileList().size()); for (int j = 0; j < split.fileList().size(); j++) { System.out.println("filelist " + j + ":" + split.fileList().get(j)); } } } while (true) { int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Record record = (Record) value; Object row = serDe.deserialize(record); count++; } end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay + "\n"); } } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:HoodieClientExample.java
License:Apache License
public void run() throws Exception { SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example"); sparkConf.setMaster("local[1]"); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); sparkConf.set("spark.kryoserializer.buffer.max", "512m"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); // Generator of some records to be loaded in. HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(); // initialize the table, if not done already Path path = new Path(tablePath); FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration()); if (!fs.exists(path)) { HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName()); }//from w w w.j ava 2 s . c o m // Create the write client to write some records in HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); /** * Write 1 (only inserts) */ String newCommitTime = client.startCommit(); logger.info("Starting commit " + newCommitTime); List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100); JavaRDD<HoodieRecord> writeRecords = jsc.<HoodieRecord>parallelize(records, 1); client.upsert(writeRecords, newCommitTime); /** * Write 2 (updates) */ newCommitTime = client.startCommit(); logger.info("Starting commit " + newCommitTime); records.addAll(dataGen.generateUpdates(newCommitTime, 100)); writeRecords = jsc.<HoodieRecord>parallelize(records, 1); client.upsert(writeRecords, newCommitTime); /** * Schedule a compaction and also perform compaction on a MOR dataset */ if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) { Optional<String> instant = client.scheduleCompaction(Optional.empty()); JavaRDD<WriteStatus> writeStatues = client.compact(instant.get()); client.commitCompaction(instant.get(), writeStatues, Optional.empty()); } }
From source file:PT1.java
License:Open Source License
static void testgenrawfile(String filename, int recordnum) throws IOException { Random r = new Random(); FSDataOutputStream fos = fs.create(new Path(filename)); StringBuffer sb = new StringBuffer(); for (int i = 0; i < recordnum; i++) { fos.writeByte(i / 1000);//from w ww.jav a2 s.co m fos.writeShort(i / 1000); fos.writeInt(i / 1000); fos.writeLong(i / 1000); fos.writeFloat(i / 1000); fos.writeDouble(i / 1000); int strnum = r.nextInt(12) + 7; sb.setLength(0); for (int j = 0; j < strnum; j++) { sb.append((char) ('a' + j)); } fos.writeUTF(sb.toString()); if (i % 1000000 == 0) { } } fos.close(); }
From source file:PT1.java
License:Open Source License
static void testgenrawfiler(String filename, int recordnum) throws IOException { Random r = new Random(); FSDataOutputStream fos = fs.create(new Path(filename)); StringBuffer sb = new StringBuffer(); for (int i = 0; i < recordnum; i++) { fos.writeByte(r.nextInt());/*ww w . j av a 2 s .co m*/ fos.writeShort(r.nextInt()); fos.writeInt(r.nextInt()); fos.writeLong(r.nextLong()); fos.writeFloat(r.nextFloat()); fos.writeDouble(r.nextDouble()); int strnum = r.nextInt(12) + 7; sb.setLength(0); for (int j = 0; j < strnum; j++) { sb.append((char) ('a' + r.nextInt(26))); } fos.writeUTF(sb.toString()); if (i % 1000000 == 0) { } } fos.close(); }
From source file:PT1.java
License:Open Source License
static void testwritefdf(String filename, String fromfile, boolean compress) throws Exception { FSDataInputStream fis = fs.open(new Path(fromfile)); FormatDataFile fdf = createfdf(filename, compress); for (int i = 0; i < 10000 * 10000; i++) { Record record = readrecord(fis); if (record == null) { break; }/* w ww. java2s.co m*/ fdf.addRecord(record); if (i % 1000000 == 0) { } } fdf.close(); fis.close(); }
From source file:PT1.java
License:Open Source License
static void testwritecolumn(String filename, String fromfile, boolean compress) throws Exception { FormatDataFile[] fdfs = createfdfs(filename, compress); FSDataInputStream fis = fs.open(new Path(fromfile)); for (int i = 0; i < 10000 * 10000; i++) { Record record = readrecord(fis); if (record == null) { break; }//from w w w . j ava2 s .c om ArrayList<FieldValue> fvs = record.fieldValues(); for (int j = 0; j < fvs.size(); j++) { Record re = new Record(1); re.addValue(fvs.get(j)); fdfs[j].addRecord(re); } if (i % 1000000 == 0) { } } for (int i = 0; i < fdfs.length; i++) { fdfs[i].close(); } fis.close(); }
From source file:PT1.java
License:Open Source License
static void testreadcolumnseq(String filename, int num, boolean compress, String mode) throws Exception { Path path = new Path(filename); ArrayList<Short> vector = new ArrayList<Short>(10); if (mode == null || mode.equals("-1")) { for (short i = 0; i < 7; i++) { vector.add(i);/*from w w w.j a v a 2 s. c o m*/ } } else if (mode.equals("half")) { short x = 0; vector.add(x); x = 1; vector.add(x); x = 2; vector.add(x); x = 3; vector.add(x); } else { vector.add(Short.valueOf(mode)); } Configuration conf = new Configuration(); ColumnStorageClient client = new ColumnStorageClient(path, vector, conf); if (compress) { for (int i = 0; i < num; i++) { client.getNextRecord(); } } else { for (int i = 0; i < num; i++) { client.getRecordByLine(i); } } client.close(); }