List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); if (hbaseColumnsMapping == null) { throw new IOException("hbase.columns.mapping required for HBase Table."); }//from w w w. j a va 2 s. co m List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey; try { iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (SerDeException se) { throw new IOException(se); } Scan scan = new Scan(); // Take filter pushdown into account while calculating splits; this // allows us to prune off regions immediately. Note that although // the Javadoc for the superclass getSplits says that it returns one // split per region, the implementation actually takes the scan // definition into account and excludes regions which don't satisfy // the start/stop row conditions (HBASE-1829). convertFilter(jobConf, scan, null, iKey); // REVIEW: are we supposed to be applying the getReadColumnIDs // same as in getRecordReader? for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } } setScan(scan); Job job = new Job(jobConf); JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID()); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); i++) { results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]); } return results; }
From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java
License:Apache License
public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;//w w w. j av a2 s .c o m try { iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (SerDeException se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<ImmutableBytesWritable, Result>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public ImmutableBytesWritable createKey() { return new ImmutableBytesWritable(); } // @Override public Result createValue() { return new Result(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); if (next) { rowKey.set(recordReader.getCurrentValue().getRow()); Writables.copyWritable(recordReader.getCurrentValue(), value); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java
License:Apache License
/** * minimum time should be less than max time <br/> * otherwise filter will be skiped * * @param jobConf/*from w w w . j a va 2 s . c om*/ * @param scan * @throws IOException */ private void setTime(JobConf jobConf, Scan scan) throws IOException { long min = 0l; String mintime = jobConf.get("hbase.mintime"); if (StringUtils.isNotEmpty(mintime)) { min = Long.parseLong(mintime); } String maxtime = jobConf.get("hbase.maxtime"); if (StringUtils.isNotEmpty(maxtime)) { long l = Long.parseLong(maxtime); if (min <= l) scan.setTimeRange(min, l); } else if (min > 0) { long l = System.currentTimeMillis(); if (min <= l) scan.setTimeRange(min, l); } }
From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java
License:Apache License
/** * Converts a filter (which has been pushed down from Hive's optimizer) * into corresponding restrictions on the HBase scan. The * filter should already be in a form which can be fully converted. * * @param jobConf configuration for the scan * @param scan the HBase scan object to restrict * @param tableSplit the HBase table split to restrict, or null * if calculating splits * @param iKey 0-based offset of key column within Hive table * @return converted table split if any/*from w w w .j a v a 2 s .c om*/ */ private TableSplit convertFilter(JobConf jobConf, Scan scan, TableSplit tableSplit, int iKey) throws IOException { String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized == null) { return tableSplit; } ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, jobConf); String columnNameProperty = jobConf.get(Constants.LIST_COLUMNS); List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(columnNames.get(iKey)); List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions); // There should be no residual since we already negotiated // that earlier in HBaseStorageHandler.decomposePredicate. if (residualPredicate != null) { throw new RuntimeException("Unexpected residual predicate " + residualPredicate.getExprString()); } // There should be exactly one predicate since we already // negotiated that also. if (searchConditions.size() != 1) { throw new RuntimeException("Exactly one search condition expected in push down"); } // Convert the search condition into a restriction on the HBase scan IndexSearchCondition sc = searchConditions.get(0); ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc()); byte[] startRow; try { ObjectInspector objInspector = eval.initialize(null); Object writable = eval.evaluate(null); ByteStream.Output serializeStream = new ByteStream.Output(); LazyUtils.writePrimitiveUTF8(serializeStream, writable, (PrimitiveObjectInspector) objInspector, false, (byte) 0, null); startRow = new byte[serializeStream.getCount()]; System.arraycopy(serializeStream.getData(), 0, startRow, 0, serializeStream.getCount()); } catch (HiveException ex) { throw new IOException(ex); } // stopRow is exclusive, so pad it with a trailing 0 byte to // make it compare as the very next value after startRow byte[] stopRow = new byte[startRow.length + 1]; System.arraycopy(startRow, 0, stopRow, 0, startRow.length); if (tableSplit != null) { tableSplit = new TableSplit(tableSplit.getTableName(), startRow, stopRow, tableSplit.getRegionLocation()); } scan.setStartRow(startRow); scan.setStopRow(stopRow); // Add a WhileMatchFilter to make the scan terminate as soon // as we see a non-matching key. This is probably redundant // since the stopRow above should already take care of it for us. scan.setFilter( new WhileMatchFilter(new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(startRow)))); return tableSplit; }
From source file:com.ask.hive.hbase.PentahoTableCustomInputFormat.java
License:Apache License
public void configure(JobConf job) { String tableName = job.get(INPUT_TABLE); // columns can be colFam:colName or colFam: // the later can be used to set up a scan that String colArg = job.get(COLUMN_LIST); if (StringUtils.isNotEmpty(colArg)) { String[] colNames = colArg.split(" "); byte[][] m_cols = new byte[colNames.length][]; for (int i = 0; i < m_cols.length; i++) { String colN = colNames[i]; m_cols[i] = Bytes.toBytes(colN); }/*from w ww .j a v a 2s . c o m*/ setInputColumns(m_cols); } try { setHTable(new HTable(HBaseConfiguration.create(job), tableName)); } catch (Exception e) { // PLOG.error(StringUtils.stringifyException(e)); } // set our table record reader PentahoTableRecordReader rr = new PentahoTableRecordReader(); String cacheSize = job.get(SCAN_CACHEDROWS); if (StringUtils.isNotEmpty(cacheSize)) { rr.setScanCacheRowSize(Integer.parseInt(cacheSize)); } FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL); String hvalue = job.get(SCAN_INCLUDE_FILTER); if (StringUtils.isNotEmpty(hvalue)) { String[] columns = hvalue.split(","); if (columns.length > 0) { for (String column : columns) { String[] fv = column.split(":"); if (fv.length > 2) { SingleColumnValueFilter rowfilter = new SingleColumnValueFilter(Bytes.toBytes(fv[0]), Bytes.toBytes(fv[1]), CompareOp.EQUAL, Bytes.toBytes(fv[2])); list.addFilter(rowfilter); } } } } String hexvalue = job.get(SCAN_EXCLUDE_FILTER); if (StringUtils.isNotEmpty(hexvalue)) { String[] columns = hexvalue.split(","); if (columns.length > 0) { for (String column : columns) { String[] fv = column.split(":"); if (fv.length > 2) { SingleColumnValueExcludeFilter rowfilter = new SingleColumnValueExcludeFilter( Bytes.toBytes(fv[0]), Bytes.toBytes(fv[1]), CompareOp.NOT_EQUAL, Bytes.toBytes(fv[2])); list.addFilter(rowfilter); } } } } String hmax = job.get("hbase.max.version"); if (StringUtils.isNotEmpty(hmax)) { rr.setMaxVersion(Integer.parseInt(hmax)); } rr.setValueFilter(list); setTableRecordReader(rr); }
From source file:com.ask.hive.hbase.PentahoTableCustomInputFormat.java
License:Apache License
public void validateInput(JobConf job) throws IOException { // expecting a table name String tableName = job.get(INPUT_TABLE); if (StringUtils.isEmpty(tableName)) { throw new IOException("expecting one table name"); }//from www . jav a 2s . c om // connected to table? if (getHTable() == null) { throw new IOException("could not connect to table '" + tableName + "'"); } // expecting at least one column/column family String colArg = job.get(COLUMN_LIST); if (colArg == null || colArg.length() == 0) { throw new IOException("expecting at least one column/column family"); } }
From source file:com.bah.culvert.hive.CulvertInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { String expression = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (expression == null) { // return what?? }/* w w w . ja v a 2 s .co m*/ ExprNodeDesc filterExpr = Utilities.deserializeExpression(expression, conf); CulvertPredicateHandlerDelegate predicateHandler = new CulvertPredicateHandlerDelegate(); CulvertIndexMapping[] culvertIndexMappings = CulvertHiveUtils.getIndexMappings(conf); /* * there should be no residual predicate, just the pushed predicate, since * we already handled that in the storage handler. */ List<IndexSearchCondition> indexSearchConditions = predicateHandler.decomposePredicateToList(filterExpr, culvertIndexMappings); List<Constraint> compiledConstraints = new ArrayList<Constraint>(); Map<String, CRange> rangeMap = new HashMap<String, CRange>(); // get start and end ranges from the conditions for (IndexSearchCondition condition : indexSearchConditions) { /* * The hive column is a simple string -- easy This column name will let us * get an index from the culvert configuration specified in the conf. */ String hiveColumn = condition.getColumnDesc().getColumn(); /* * the operation maps to the op name specified in * CulvertPredicateHandlerDelegate.createPredicateAnalyzerForCulvert() */ String operation = condition.getComparisonOp(); /* * We need to manipuate this quite a bit- we need to turn it into an * object and an object inspector, and then use the object inspector to * find a byte value for the key. */ ExprNodeConstantEvaluator nodeEvaluator = new ExprNodeConstantEvaluator(condition.getConstantDesc()); PrimitiveObjectInspector inspector; Object value; // we can assume that the inspector is a primitive try { inspector = (PrimitiveObjectInspector) nodeEvaluator.initialize(null); value = nodeEvaluator.evaluate(null); } catch (HiveException e) { throw new RuntimeException( "Unable to get constant portion of query expression expression " + condition, e); } Object primitive = inspector.getPrimitiveJavaObject(value); // look for the storage type in the column mapping CulvertIndexMapping culvertIndexMapping = getMapping(hiveColumn, culvertIndexMappings); if (culvertIndexMapping == null) { throw new IllegalArgumentException("Could not find a culvert mapping for this hive column " + hiveColumn + " out of " + Arrays.toString(culvertIndexMappings)); } // we have a utility to get the byte representation based on the hive // column type and the actual primitive itself. byte[] culvertValue = CulvertHiveUtils.getBytesForPrimitiveObject(primitive, culvertIndexMapping.getType()); CRange range = rangeMap.get(hiveColumn); if (range == null) { // ok, start a new range // we know we'll always want to insert for this operation if (operation.equals(GenericUDFOPEqual.class.getName())) { // = range = new CRange(culvertValue); } else if (operation.equals(GenericUDFOPGreaterThan.class.getName())) { // > range = new CRange(Bytes.lexIncrement(culvertValue), Bytes.START_END_KEY); } else if (operation.equals(GenericUDFOPEqualOrGreaterThan.class.getName())) { // => range = new CRange(culvertValue, Bytes.START_END_KEY); } else if (operation.equals(GenericUDFOPEqualOrLessThan.class.getName())) { // =< range = new CRange(Bytes.START_END_KEY, culvertValue); } /* * TODO we can't implement < because lexDecrement is undefined (in math * speak). Need to work around that by pushing down inclusive/exclusive * on the CRange. */ rangeMap.put(hiveColumn, range); } else { LexicographicBytesComparator c = LexicographicBytesComparator.INSTANCE; if (operation.equals(GenericUDFOPEqual.class.getName())) { // = (if two conflicting equals, the whole predicate becomes a no-op if (c.compare(range.getStart(), culvertValue) != 0) { rangeMap.clear(); return new InputSplit[0]; } } else if (operation.equals(GenericUDFOPGreaterThan.class.getName())) { // > // narrow the existing range byte[] exclusiveValue = Bytes.lexIncrement(culvertValue); int comp = c.compare(exclusiveValue, range.getEnd()); if (comp < 1) { rangeMap.put(hiveColumn, new CRange(Bytes.max(exclusiveValue, range.getStart()))); } else if (comp == 0) { // no-op - this is the same as what we're doing already } else { // if we get conflicting statements, kill it all... rangeMap.clear(); return new InputSplit[0]; } } else if (operation.equals(GenericUDFOPEqualOrGreaterThan.class.getName())) { // => // narrow the existing range int comp = c.compare(culvertValue, range.getEnd()); if (comp < 1) { rangeMap.put(hiveColumn, new CRange(Bytes.max(culvertValue, range.getStart()))); } else if (comp == 0) { // no-op - this is the same as what we're doing already } else { // if we get conflicting statements, kill it all... rangeMap.clear(); return new InputSplit[0]; } } else if (operation.equals(GenericUDFOPEqualOrLessThan.class.getName())) { // =< int comp = c.compare(culvertValue, range.getStart()); if (comp > 1) { rangeMap.put(hiveColumn, new CRange(Bytes.min(culvertValue, range.getEnd()))); } else if (comp == 0) { // no-op } else { // if we get conflicting statements, kill it... rangeMap.clear(); return new InputSplit[0]; } } } } // <-- end getting start/end ranges Client culvertClient = new Client(conf); Index[] culvertIndices = culvertClient.getIndicesForTable(CulvertHiveUtils.getCulvertTable(conf)); // create a map with the broken up ranges SortedMap<String, List<Constraint>> brokenRanges = new TreeMap<String, List<Constraint>>(); for (Entry<String, CRange> rangeEntry : rangeMap.entrySet()) { String hiveColumn = rangeEntry.getKey(); CRange totalRange = rangeEntry.getValue(); Index indexToUse = getIndexForHiveColumn(hiveColumn, culvertIndexMappings, culvertIndices); /* * There should be an index from when we did the predicate pushdown in the * storage handler */ if (indexToUse == null) { throw new RuntimeException("No indices exist over the requested column"); } /* * Set the start and end on the splits to start where the overall range * starts and ends so we don't query the entire shard, which would give * the wrong result. */ List<CRange> startEnds = indexToUse.getSplits(); List<Constraint> newRanges = new ArrayList<Constraint>(); if (startEnds.size() == 1) { newRanges.add(new IndexRangeConstraint(indexToUse, totalRange)); brokenRanges.put(hiveColumn, newRanges); continue; } assert (startEnds.size() != 0); CRange firstShard = startEnds.get(0); CRange first = new CRange(totalRange.getStart(), firstShard.getEnd()); newRanges.add(new IndexRangeConstraint(indexToUse, first)); for (int i = 1; i < startEnds.size() - 1; i++) { newRanges.add(new IndexRangeConstraint(indexToUse, startEnds.get(i))); } CRange lastShard = startEnds.get(startEnds.size() - 1); CRange last = new CRange(lastShard.getStart(), totalRange.getEnd()); newRanges.add(new IndexRangeConstraint(indexToUse, last)); brokenRanges.put(hiveColumn, newRanges); } // <-- finished creating fragment map /* * Finally, take the cross product of all the fragments in order to find the * splits. */ List<List<Constraint>> andRangeList = recursiveBuildRangeTuples(brokenRanges); InputSplit[] splits = new InputSplit[andRangeList.size()]; for (int i = 0; i < splits.length; i++) { List<Constraint> constraints = andRangeList.get(i); Set<String> locations = new HashSet<String>(); for (Constraint c : constraints) { locations.addAll(((IndexRangeConstraint) c).getIndex().getPreferredHosts()); } // add field selection too... List<Constraint> fieldSelectingConstraints = new ArrayList<Constraint>(constraints.size()); // TODO - wrap these constraints in field selecting constraints now that // we've pulled the preferred hosts out of them splits[i] = new CulvertInputSplit(new And(constraints), locations); } return null; }
From source file:com.benchmark.mapred.Join.java
License:Apache License
/** * The main driver for sort program.//from w ww. j av a2 s. c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.benchmark.mapred.Sort.java
License:Apache License
/** * The main driver for sort program.//from www . j a v a2 s . co m * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); String delimiter = job.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(); return new LineRecordReader(job, (FileSplit) genericSplit, recordDelimiterBytes); }