List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:goraci.Verify.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("Usage : " + Verify.class.getSimpleName() + " <output dir> <num reducers>"); return 0; }//from w ww . j a v a 2s .co m DataStore<Long, CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration()); String outputDir = args[0]; int numReducers = Integer.parseInt(args[1]); Job job = new Job(getConf()); if (!job.getConfiguration().get("io.serializations") .contains("org.apache.hadoop.io.serializer.JavaSerialization")) { job.getConfiguration().set("io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization"); } job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Query<Long, CINode> query = store.newQuery(); query.setFields("prev"); GoraMapper.initMapperJob(job, query, store, LongWritable.class, VLongWritable.class, VerifyMapper.class, true); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(outputDir)); boolean success = job.waitForCompletion(true); store.close(); return success ? 0 : 1; }
From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(96);// ww w.j av a 2 s .c o m System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); job.setJarByClass(MeanChiSquareDistanceCalculation.class); job.setJobName("mean_chi_square_calculation"); System.out.println("Job ID" + job.getJobID()); System.out.println("Track:" + baseConf.get("mapred.job.tracker")); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); System.out.println("Caching video-metric-bak.tgz"); job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz")); URI[] cacheFiles = job.getCacheFiles(); if (cacheFiles != null && cacheFiles.length > 0) { System.out.println("Cache file ->" + cacheFiles[0]); } System.out.println("Cached video-metric-bak.tgz"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.waitForCompletion(true); }
From source file:gov.nasa.jpl.memex.pooledtimeseries.SimilarityCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapreduce.job.reduces", "0"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); baseConf.set("meanDistsFilePath", args[2]); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(196);/*from w ww .j av a 2 s .co m*/ System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); System.out.println("Track: " + baseConf.get("mapred.job.tracker")); System.out.println("Job ID" + job.getJobID()); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); job.setJarByClass(SimilarityCalculation.class); job.setJobName("similarity_calc"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.waitForCompletion(true); }
From source file:gr.ntua.h2rdf.inputFormat.Example.java
License:Open Source License
public static void main(String[] args) throws Exception { Job job = new Job(); job.setJobName("nikos"); // disable speculative execution job.setJarByClass(Example.class); // Set the table name to separate index rows based on where content is // stored//from ww w.java 2s . co m job.getConfiguration().set("TextIndexer.library", "spo"); // Set the number of reducers for the job //job.setNumReduceTasks(numReducers); // important! xoris ayto to setting, kollane oi reducers!!!!! //job.getConfiguration().setInt("io.sort.mb", 20); // space delimined string of column families to scan job.setReducerClass(SimpleReducer.class); // job.setSortComparatorClass(KeyValue.KeyComparator.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(FileTableInputFormat.class); //job.setInputFormatClass(HFileInputFormat.class); FileOutputFormat.setOutputPath(job, new Path("output3")); Scan scan = new Scan(); scan.setStartRow(Bytes.toBytes("873847660^^")); scan.setStopRow(Bytes.toBytes("873847660^^999999999")); scan.addFamily(Bytes.toBytes("A")); HBaseConfiguration HBconf = new HBaseConfiguration(); HTable table = new HTable(HBconf, "osp"); ResultScanner resultScanner = table.getScanner(scan); Result result; while ((result = resultScanner.next()) != null) { System.out.println(result.toString()); //System.out.println("hjkhjokhftyfyfgufghjghkgghfghfjgfhj"); } //System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii"); // System.out.println("scan is: " + // TableMapReduceUtil.convertScanToString(scan)); //MyTableMapReduceUtil.addCol("?x", "P0", "spo", "-1496159132", "A", "huihui", job); TableMapReduceUtil.newJob(); //MyTableMapReduceUtil.addRow("?w ?z", "P2", "osp", "982", "982", "A", job); //TableMapReduceUtil.addCol("?x", "P0", "spo", "561203963^^", "561203963^^999999999", "A:2086497232", job); //TableMapReduceUtil.addRow("?x ?y", "P1", "spo", "947805029^^", "947805029^^999999999", "A", job); //TableMapReduceUtil.addRow("?w ?z", "P2", "osp", "893972985^^", "893972985^^999999999", "A", job); //TableMapReduceUtil.addRow("?w ?z", "P24", "osp", "9947^^", "9947^^999999999", "A", job); MyFileInputFormat.addInputPath(job, new Path("output/BGP1")); //MyFileInputFormat.addInputPath(job, new Path("output/BGP0")); job.waitForCompletion(true); }
From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java
License:Open Source License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { //io.compression.codecs Job job = new Job(); job.setInputFormatClass(TextInputFormat.class); Configuration conf = new Configuration(); Path blockProjection = new Path("blockIds/"); Path translations = new Path("translations/"); Path sample = new Path("sample/"); Path temp = new Path("temp/"); Path uniqueIds = new Path("uniqueIds/"); FileSystem fs;//from w w w.j a va 2 s .co m try { fs = FileSystem.get(conf); if (fs.exists(uniqueIds)) { fs.delete(uniqueIds, true); } if (fs.exists(translations)) { fs.delete(translations, true); } if (fs.exists(blockProjection)) { fs.delete(blockProjection, true); } if (fs.exists(sample)) { fs.delete(sample, true); } if (fs.exists(temp)) { fs.delete(temp, true); } FileOutputFormat.setOutputPath(job, uniqueIds); Path inp = new Path(args[0]); FileInputFormat.setInputPaths(job, inp); double type = 1; double datasetSize = 0; if (fs.isFile(inp)) { datasetSize = fs.getFileStatus(inp).getLen(); } else if (fs.isDirectory(inp)) { FileStatus[] s = fs.listStatus(inp); for (int i = 0; i < s.length; i++) { if (s[i].getPath().getName().toString().endsWith(".gz")) type = 27; if (s[i].getPath().getName().toString().endsWith(".snappy")) type = 10; datasetSize += s[i].getLen(); } } else { FileStatus[] s = fs.globStatus(inp); for (int i = 0; i < s.length; i++) { if (s[i].getPath().getName().toString().endsWith(".gz")) type = 27; if (s[i].getPath().getName().toString().endsWith(".snappy")) type = 10; datasetSize += s[i].getLen(); } } datasetSize = datasetSize * type; System.out.println("type: " + type); System.out.println("datasetSize: " + datasetSize); samplingRate = (double) sampleChunk / (double) datasetSize; if (samplingRate >= 0.1) { samplingRate = 0.1; } if (samplingRate <= 0.001) { samplingRate = 0.001; } numReducers = (int) (datasetSize / ReducerChunk); if (numReducers == 0) numReducers = 1; numReducers++; } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("counter"); desc.addFamily(family); if (!hadmin.tableExists(TABLE_NAME)) { hadmin.createTable(desc); } job.setNumReduceTasks(numReducers); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(DistinctIds.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(SamplingPartitioner.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //job.setCombinerClass(Combiner.class); job.setJobName("Distinct Id Wordcount"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); return job; }
From source file:gr.ntua.h2rdf.loadTriples.SortIds.java
License:Apache License
public static Job createSubmittableJob(String[] args, Counters counters, int numReducers) throws IOException { //numReducers=52; Job job = new Job(); TABLE_NAME = args[1];/*from www. j a v a 2s . c om*/ Configuration conf = job.getConfiguration(); long sum = 0, maxCommon = Integer.MAX_VALUE; try { HTable table = new HTable(HBaseConfiguration.create(), "Counters"); for (int i = 1; i < numReducers; i++) { Get get = new Get(Bytes.toBytes("count." + i)); get.addColumn(Bytes.toBytes("counter"), new byte[0]); Result res = table.get(get); if (!res.isEmpty()) { long v = Bytes.toLong(res.raw()[0].getValue()); //long v = counters.findCounter("Countergroup", "count."+i).getValue(); if (v < maxCommon) { maxCommon = v; } //conf.setLong("count."+i, v); //System.out.println(v); sum += v; } } System.out.println("maxCommon: " + maxCommon); job.getConfiguration().setLong("count.MaxCommon", maxCommon); job.getConfiguration().setInt("count.numReducers", numReducers - 1); job.getConfiguration().setInt("count.sum", (int) sum); Get get = new Get(Bytes.toBytes("count.chunks")); get.addColumn(Bytes.toBytes("counter"), new byte[0]); Result res = table.get(get); int stringReducers = 0; if (!res.isEmpty()) { stringReducers = (int) Bytes.toLong(res.raw()[0].getValue()); } //int stringReducers = (int) counters.findCounter("Countergroup", "count.chunks").getValue(); int intReducers = (int) Math.ceil((double) sum / (double) bucket); sum = maxCommon * (numReducers - 1); for (int i = 1; i < numReducers; i++) { get = new Get(Bytes.toBytes("count." + i)); get.addColumn(Bytes.toBytes("counter"), new byte[0]); res = table.get(get); if (!res.isEmpty()) { long v = Bytes.toLong(res.raw()[0].getValue()); //long v = counters.findCounter("Countergroup", "count."+i).getValue(); job.getConfiguration().setLong("count." + (i - 1), sum); //System.out.println("count."+i+" "+sum); sum += v - maxCommon; } } System.out .println("stringReducers: " + stringReducers + " sum: " + sum + " intReducers: " + intReducers); job.getConfiguration().setInt("count.stringReducers", stringReducers); job.getConfiguration().setInt("count.intReducers", intReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(stringReducers + intReducers); Path out = new Path(args[1]); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } if (fs.exists(new Path("temp"))) fs.delete(new Path("temp"), true); } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(job, out); FileInputFormat.addInputPath(job, new Path("uniqueIds")); FileInputFormat.addInputPath(job, new Path("blockIds")); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(HFileOutputFormat.class); job.setJarByClass(SortIds.class); //configure compression StringBuilder compressionConfigValue = new StringBuilder(); compressionConfigValue.append(URLEncoder.encode("1", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("2", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8")); job.getConfiguration().set("hbase.hfileoutputformat.families.compression", compressionConfigValue.toString()); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(TwoTotalOrderPartitioner.class); TwoTotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partition/stringIdPartition")); //job.setCombinerClass(Combiner.class); job.setJobName("SortIds"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456); job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); } catch (IOException e) { e.printStackTrace(); } return job; }
From source file:gr.ntua.h2rdf.loadTriples.Translate.java
License:Apache License
public static Job createSubmittableJob(String[] args) throws IOException { Job job = new Job(); Configuration conf = job.getConfiguration(); FileSystem fs;//from www.j av a2s .co m int reducers = 0; try { fs = FileSystem.get(conf); FileStatus[] p = fs.listStatus(new Path("blockIds/")); reducers = p.length; job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(reducers); Path out = new Path("translations"); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); FileInputFormat.addInputPath(job, new Path("temp")); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(Translate.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(IdPartitioner.class); job.setJobName("Translate"); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); } catch (IOException e) { e.printStackTrace(); } return job; }
From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java
License:Apache License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { //compute sample partitions FileSystem fs;// w w w . ja va 2s .c o m Configuration conf = new Configuration(); int collected = 0, chunks = 0; try { fs = FileSystem.get(conf); Path sampleDir = new Path("sample"); FileStatus[] samples = fs.listStatus(sampleDir); TreeSet<String> set = new TreeSet<String>(); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); //System.out.println(tr[0].toN3()); set.add(tr[0].toN3()); set.add(tr[1].toN3()); set.add(tr[2].toN3()); } in1.close(); in.close(); } IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index"); HashMap<String, Long> index = translator.translate(set); set.clear(); TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>( new ImmutableBytesWritable.Comparator()); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()), index.get(tr[2].toN3())); set1.add(new ImmutableBytesWritable(btr.getSPOByte())); set1.add(new ImmutableBytesWritable(btr.getSOPByte())); set1.add(new ImmutableBytesWritable(btr.getOPSByte())); set1.add(new ImmutableBytesWritable(btr.getOSPByte())); set1.add(new ImmutableBytesWritable(btr.getPOSByte())); set1.add(new ImmutableBytesWritable(btr.getPSOByte())); } in1.close(); in.close(); } index.clear(); Path p = new Path("hexastorePartition"); if (fs.exists(p)) { fs.delete(p, true); } SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p, ImmutableBytesWritable.class, NullWritable.class); double chunkSize = bucketSampledTriples * DistinctIds.samplingRate; System.out.println("chunkSize: " + chunkSize); Iterator<ImmutableBytesWritable> it = set1.iterator(); while (it.hasNext()) { ImmutableBytesWritable key = it.next(); if (collected > chunkSize) { partitionWriter.append(key, NullWritable.get()); //System.out.println(Bytes.toStringBinary(key.get())); collected = 0; chunks++; } else { collected++; } } System.out.println("chunks: " + chunks); partitionWriter.close(); } catch (IOException e) { e.printStackTrace(); } Job job = new Job(); job = new Job(conf, "Import Hexastore"); FileInputFormat.setInputPaths(job, new Path(args[0])); Path out = new Path("out"); try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); StringBuilder compressionConfigValue = new StringBuilder(); compressionConfigValue.append(URLEncoder.encode("I", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("S", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("T", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); job.getConfiguration().set("hbase.hfileoutputformat.families.compression", compressionConfigValue.toString()); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setJarByClass(TranslateAndImport.class); job.setMapperClass(Map.class); //job.setReducerClass(HexaStoreHistogramsReduce.class); job.setReducerClass(HexaStoreReduce.class); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1); //job.setCombinerClass(Combiner.class); job.setJobName("Translate Projections"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); return job; }
From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java
License:Open Source License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { Job sample_job = new Job(); // Remember the real input format so the sampling input format can use // it under the hood sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true); sample_job.setInputFormatClass(TextInputFormat.class); //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9"); // Base the sample size on the number of reduce tasks that will be used // by the real job, but only use 1 reducer for this job (maps output very // little)//w ww. ja v a2s. c om sample_job.setNumReduceTasks(1); // Make this job's output a temporary filethe input file for the real job's // TotalOrderPartitioner Path partition = new Path("partitions/"); //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition); conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(partition)) { fs.delete(partition, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(sample_job, partition); FileInputFormat.setInputPaths(sample_job, new Path(args[0])); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000")); //job.setPartitionerClass(TotalOrderPartitioner.class); // If there's a combiner, turn it into an identity reducer to prevent // destruction of keys. sample_job.setCombinerClass(Combiner.class); sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class); sample_job.setMapOutputValueClass(ImmutableBytesWritable.class); sample_job.setOutputKeyClass(ImmutableBytesWritable.class); sample_job.setOutputValueClass(NullWritable.class); sample_job.setPartitionerClass(HashPartitioner.class); sample_job.setOutputFormatClass(SequenceFileOutputFormat.class); sample_job.setJarByClass(TotalOrderPrep.class); sample_job.setMapperClass(Map.class); sample_job.setReducerClass(PartitioningReducer.class); sample_job.setJobName("(Sampler)"); sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); return sample_job; }
From source file:hadoop.CountMinorKeys.java
License:Open Source License
@Override public int run(String[] args) throws Exception { @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJarByClass(CountMinorKeys.class); job.setJobName("Count Minor Keys"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KVInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); KVInputFormat.setKVStoreName(args[0]); KVInputFormat.setKVHelperHosts(new String[] { args[1] }); FileOutputFormat.setOutputPath(job, new Path(args[2])); /*/* ww w . jav a2 s .co m*/ * Load KVLoginFile if specified, otherwise try to load via reading * system property of oracle.kv.login. */ if (args.length >= 4) { KVInputFormat.setKVSecurity(args[3]); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }