List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:colossal.pipe.ColHadoopCombiner.java
License:Apache License
@Override public void configure(JobConf conf) { super.configure(conf); this.schema = ColPhase.getSchema(this.out); this.groupBy = conf.get(ColPhase.GROUP_BY); this.sortBy = conf.get(ColPhase.SORT_BY); }
From source file:colossal.pipe.ColHadoopMapper.java
License:Apache License
@SuppressWarnings("unchecked") public void configure(JobConf conf) { this.mapper = ReflectionUtils.newInstance(conf.getClass(ColPhase.MAPPER, BaseMapper.class, ColMapper.class), conf);/* w ww . ja v a2 s . c om*/ this.isMapOnly = conf.getNumReduceTasks() == 0; try { this.out = (OUT) ReflectionUtils .newInstance(conf.getClass(ColPhase.MAP_OUT_CLASS, Object.class, Object.class), conf); this.schema = ColPhase.getSchema(this.out); this.groupBy = conf.get(ColPhase.GROUP_BY); this.sortBy = conf.get(ColPhase.SORT_BY); if (conf.getInputFormat() instanceof TextInputFormat) { Class<?> inClass = conf.getClass(ColPhase.MAP_IN_CLASS, Object.class, Object.class); if (inClass == String.class) { isStringInput = true; } else if (inClass == Text.class) { isTextInput = true; } else { isJsonInput = true; inSchema = ColPhase.getSchema((IN) ReflectionUtils.newInstance(inClass, conf)); } } } catch (Exception e) { if (e instanceof RuntimeException) throw (RuntimeException) e; throw new RuntimeException(e); } mapper.setConf(conf); }
From source file:colossal.pipe.ColHadoopReducerBase.java
License:Apache License
@SuppressWarnings({ "unchecked", "deprecation" }) @Override//from w w w . jav a2 s. c o m public void configure(JobConf conf) { this.reducer = getReducer(conf); try { this.out = (OUT) Class.forName(conf.get(ColPhase.REDUCE_OUT_CLASS)).newInstance(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.acme.extensions.data.SeedingHadoopAdapter.java
License:Apache License
@Override public void configure(JobConf conf) { super.configure(conf); seed = Long.parseLong(conf.get("seed")); rng = new Random(seed); }
From source file:com.alexholmes.hadooputils.sort.Sort.java
License:Apache License
/** * The driver for the sort MapReduce job. * * @param jobConf sort configuration * @param numMapTasks number of map tasks * @param numReduceTasks number of reduce tasks * @param sampler sampler, if required * @param codecClass the compression codec for compressing final outputs * @param mapCodecClass the compression codec for compressing intermediary map outputs * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes * for the job output files * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws IOException if something went wrong * @throws URISyntaxException if a URI wasn't correctly formed */// w w w . java 2 s . c o m public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks, final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass, final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException { jobConf.setJarByClass(Sort.class); jobConf.setJobName("sorter"); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (numMapTasks != null) { jobConf.setNumMapTasks(numMapTasks); } if (numReduceTasks != null) { jobConf.setNumReduceTasks(numReduceTasks); } else { int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sort.reduces_per_host"); if (sortReduces != null) { numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(numReduces); } jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(SortReduce.class); jobConf.setInputFormat(SortInputFormat.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); if (mapCodecClass != null) { jobConf.setMapOutputCompressorClass(mapCodecClass); } if (codecClass != null) { jobConf.setBoolean("mapred.output.compress", true); jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); } FileInputFormat.setInputPaths(jobConf, inputDirAsString); FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString)); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; FileSystem fileSystem = FileSystem.get(jobConf); if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) { inputDir = inputDir.getParent(); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + jobConf.getNumReduceTasks() + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); if (jobResult.isSuccessful()) { if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) { new LzoIndexer(jobConf).index(new Path(outputDirAsString)); } return true; } return false; }
From source file:com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Configuration dest = translateConfig(job); SyncClientInterface ots = null;/*from w w w .j ava2s .co m*/ String columns = job.get(TableStoreConsts.COLUMNS_MAPPING); if (columns == null) { columns = job.get(serdeConstants.LIST_COLUMNS); } logger.debug("columns to get: {}", columns); List<org.apache.hadoop.mapreduce.InputSplit> splits; try { ots = TableStore.newOtsClient(dest); TableMeta meta = fetchTableMeta(ots, job.get(TableStoreConsts.TABLE_NAME)); RangeRowQueryCriteria criteria = fetchCriteria(meta, columns); com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.addCriteria(dest, criteria); splits = com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.getSplits(dest, ots); } finally { if (ots != null) { ots.shutdown(); ots = null; } } InputSplit[] res = new InputSplit[splits.size()]; JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(new Job(job)); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); int i = 0; for (org.apache.hadoop.mapreduce.InputSplit split : splits) { res[i] = new TableStoreInputSplit( (com.aliyun.openservices.tablestore.hadoop.TableStoreInputSplit) split, tablePaths[0]); ++i; } return res; }
From source file:com.aliyun.openservices.tablestore.hive.TableStoreOutputFormat.java
License:Apache License
@Override public RecordWriter<Writable, BatchWriteWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String table = job.get(TableStoreConsts.TABLE_NAME); Configuration conf = translateConfig(job); SyncClientInterface ots = TableStore.newOtsClient(conf); final org.apache.hadoop.mapreduce.RecordWriter<Writable, BatchWriteWritable> writer = new TableStoreRecordWriter( ots, table);/*w w w . j a v a 2s . c o m*/ return new org.apache.hadoop.mapred.RecordWriter<Writable, BatchWriteWritable>() { @Override public void write(Writable any, BatchWriteWritable rows) throws IOException { try { writer.write(any, rows); } catch (InterruptedException ex) { throw new IOException("interrupted"); } } @Override public void close(Reporter reporter) throws IOException { try { writer.close(null); } catch (InterruptedException ex) { throw new IOException("interrupted"); } } }; }
From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;// www . j a v a2 s . co m try { iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (Exception se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } scan.addFamily(hbaseColumnFamiliesBytes.get(i)); empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<Text, Text>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public Text createKey() { return new Text(); } // @Override public Text createValue() { return new Text(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(Text rowKey, Text value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); //logic for to find the column name if (next) { rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow())); StringBuilder val = new StringBuilder(); String prev = ""; for (KeyValue kv : recordReader.getCurrentValue().raw()) { String current = new String(kv.getQualifier()); char[] col = new String(current).toCharArray(); if (val.length() > 0) { if (prev.equals(current)) val.append(","); else val.append("\t"); } prev = current; val.append(col[0]).append("_"); val.append(Bytes.toString(kv.getValue())); } value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));; // value.set(Bytes.toString(recordReader.getCurrentValue().value())); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
/** * minimum time should be less than max time <br/> * otherwise filter will be skiped * * * @param jobConf/* w w w . j ava 2s. co m*/ * @param scan * @throws java.io.IOException */ private void setTime(JobConf jobConf, Scan scan) throws IOException { long min = 0l; String mintime = jobConf.get("hbase.mintime"); if (StringUtils.isNotEmpty(mintime)) { min = Long.parseLong(mintime); } String maxtime = jobConf.get("hbase.maxtime"); if (StringUtils.isNotEmpty(maxtime)) { long l = Long.parseLong(maxtime); if (min <= l) scan.setTimeRange(min, l); } FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL); boolean isInmissing = true; String missing = jobConf.get("hbase.include.missing"); if (StringUtils.isNotEmpty(missing)) { isInmissing = Boolean.valueOf(missing); } String hvalue = jobConf.get("hbase.include.filter.value"); if (StringUtils.isNotEmpty(hvalue)) { String[] columns = hvalue.split(","); if (columns.length > 0) { for (String column : columns) { String[] fv = column.split(":"); SingleColumnValueFilter rowfilter = new SingleColumnValueFilter(Bytes.toBytes(fv[0]), Bytes.toBytes(fv[1]), CompareOp.EQUAL, Bytes.toBytes(fv[2])); rowfilter.setFilterIfMissing(isInmissing); list.addFilter(rowfilter); } } } boolean isExmissing = false; String exMissing = jobConf.get("hbase.exclude.missing"); if (StringUtils.isNotEmpty(exMissing)) { isExmissing = Boolean.valueOf(exMissing); } String hexvalue = jobConf.get("hbase.exclude.filter.value"); if (StringUtils.isNotEmpty(hexvalue)) { String[] columns = hexvalue.split(","); if (columns.length > 0) { for (String column : columns) { String[] fv = column.split(":"); SingleColumnValueFilter rowfilter = new SingleColumnValueFilter(Bytes.toBytes(fv[0]), Bytes.toBytes(fv[1]), CompareOp.NOT_EQUAL, Bytes.toBytes(fv[2])); rowfilter.setFilterIfMissing(isExmissing); list.addFilter(rowfilter); } } } String hmax = jobConf.get("hbase.max.version"); if (StringUtils.isNotEmpty(hmax)) { scan.setMaxVersions(Integer.parseInt(hmax)); } scan.setFilter(list); }
From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
/** * Converts a filter (which has been pushed down from Hive's optimizer) * into corresponding restrictions on the HBase scan. The * filter should already be in a form which can be fully converted. * * @param jobConf configuration for the scan * @param scan the HBase scan object to restrict * @param tableSplit the HBase table split to restrict, or null * if calculating splits * @param iKey 0-based offset of key column within Hive table * @return converted table split if any//from w w w. j a v a 2 s. com */ private TableSplit convertFilter(JobConf jobConf, Scan scan, TableSplit tableSplit, int iKey) throws IOException { String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized == null) { return tableSplit; } ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, jobConf); String columnNameProperty = jobConf.get(Constants.LIST_COLUMNS); List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(columnNames.get(iKey)); List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions); // There should be no residual since we already negotiated // that earlier in HBaseStorageHandler.decomposePredicate. if (residualPredicate != null) { throw new RuntimeException("Unexpected residual predicate " + residualPredicate.getExprString()); } // There should be exactly one predicate since we already // negotiated that also. if (searchConditions.size() != 1) { throw new RuntimeException("Exactly one search condition expected in push down"); } // Convert the search condition into a restriction on the HBase scan IndexSearchCondition sc = searchConditions.get(0); ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc()); byte[] startRow; try { ObjectInspector objInspector = eval.initialize(null); Object writable = eval.evaluate(null); ByteStream.Output serializeStream = new ByteStream.Output(); LazyUtils.writePrimitiveUTF8(serializeStream, writable, (PrimitiveObjectInspector) objInspector, false, (byte) 0, null); startRow = new byte[serializeStream.getCount()]; System.arraycopy(serializeStream.getData(), 0, startRow, 0, serializeStream.getCount()); } catch (HiveException ex) { throw new IOException(ex); } // stopRow is exclusive, so pad it with a trailing 0 byte to // make it compare as the very next value after startRow byte[] stopRow = new byte[startRow.length + 1]; System.arraycopy(startRow, 0, stopRow, 0, startRow.length); if (tableSplit != null) { tableSplit = new TableSplit(tableSplit.getTableName(), startRow, stopRow, tableSplit.getRegionLocation()); } scan.setStartRow(startRow); scan.setStopRow(stopRow); // Add a WhileMatchFilter to make the scan terminate as soon // as we see a non-matching key. This is probably redundant // since the stopRow above should already take care of it for us. scan.setFilter( new WhileMatchFilter(new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(startRow)))); return tableSplit; }