Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:colossal.pipe.ColHadoopCombiner.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);
    this.schema = ColPhase.getSchema(this.out);
    this.groupBy = conf.get(ColPhase.GROUP_BY);
    this.sortBy = conf.get(ColPhase.SORT_BY);
}

From source file:colossal.pipe.ColHadoopMapper.java

License:Apache License

@SuppressWarnings("unchecked")
public void configure(JobConf conf) {
    this.mapper = ReflectionUtils.newInstance(conf.getClass(ColPhase.MAPPER, BaseMapper.class, ColMapper.class),
            conf);/*  w  ww  . ja v  a2 s .  c om*/
    this.isMapOnly = conf.getNumReduceTasks() == 0;
    try {
        this.out = (OUT) ReflectionUtils
                .newInstance(conf.getClass(ColPhase.MAP_OUT_CLASS, Object.class, Object.class), conf);
        this.schema = ColPhase.getSchema(this.out);
        this.groupBy = conf.get(ColPhase.GROUP_BY);
        this.sortBy = conf.get(ColPhase.SORT_BY);
        if (conf.getInputFormat() instanceof TextInputFormat) {
            Class<?> inClass = conf.getClass(ColPhase.MAP_IN_CLASS, Object.class, Object.class);
            if (inClass == String.class) {
                isStringInput = true;
            } else if (inClass == Text.class) {
                isTextInput = true;
            } else {
                isJsonInput = true;
                inSchema = ColPhase.getSchema((IN) ReflectionUtils.newInstance(inClass, conf));
            }
        }
    } catch (Exception e) {
        if (e instanceof RuntimeException)
            throw (RuntimeException) e;
        throw new RuntimeException(e);
    }

    mapper.setConf(conf);
}

From source file:colossal.pipe.ColHadoopReducerBase.java

License:Apache License

@SuppressWarnings({ "unchecked", "deprecation" })
@Override//from  w  w  w .  jav a2  s. c  o m
public void configure(JobConf conf) {
    this.reducer = getReducer(conf);
    try {
        this.out = (OUT) Class.forName(conf.get(ColPhase.REDUCE_OUT_CLASS)).newInstance();
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.acme.extensions.data.SeedingHadoopAdapter.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);
    seed = Long.parseLong(conf.get("seed"));
    rng = new Random(seed);
}

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *///  w  w  w .  java 2 s  .  c o m
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}

From source file:com.aliyun.openservices.tablestore.hive.TableStoreInputFormat.java

License:Apache License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Configuration dest = translateConfig(job);
    SyncClientInterface ots = null;/*from  w  w w .j ava2s  .co m*/
    String columns = job.get(TableStoreConsts.COLUMNS_MAPPING);
    if (columns == null) {
        columns = job.get(serdeConstants.LIST_COLUMNS);
    }
    logger.debug("columns to get: {}", columns);
    List<org.apache.hadoop.mapreduce.InputSplit> splits;
    try {
        ots = TableStore.newOtsClient(dest);
        TableMeta meta = fetchTableMeta(ots, job.get(TableStoreConsts.TABLE_NAME));
        RangeRowQueryCriteria criteria = fetchCriteria(meta, columns);
        com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.addCriteria(dest, criteria);
        splits = com.aliyun.openservices.tablestore.hadoop.TableStoreInputFormat.getSplits(dest, ots);
    } finally {
        if (ots != null) {
            ots.shutdown();
            ots = null;
        }
    }
    InputSplit[] res = new InputSplit[splits.size()];
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(new Job(job));
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
    int i = 0;
    for (org.apache.hadoop.mapreduce.InputSplit split : splits) {
        res[i] = new TableStoreInputSplit(
                (com.aliyun.openservices.tablestore.hadoop.TableStoreInputSplit) split, tablePaths[0]);
        ++i;
    }
    return res;
}

From source file:com.aliyun.openservices.tablestore.hive.TableStoreOutputFormat.java

License:Apache License

@Override
public RecordWriter<Writable, BatchWriteWritable> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    String table = job.get(TableStoreConsts.TABLE_NAME);
    Configuration conf = translateConfig(job);
    SyncClientInterface ots = TableStore.newOtsClient(conf);
    final org.apache.hadoop.mapreduce.RecordWriter<Writable, BatchWriteWritable> writer = new TableStoreRecordWriter(
            ots, table);/*w  w  w .  j  a v a 2s  .  c o  m*/
    return new org.apache.hadoop.mapred.RecordWriter<Writable, BatchWriteWritable>() {
        @Override
        public void write(Writable any, BatchWriteWritable rows) throws IOException {
            try {
                writer.write(any, rows);
            } catch (InterruptedException ex) {
                throw new IOException("interrupted");
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            try {
                writer.close(null);
            } catch (InterruptedException ex) {
                throw new IOException("interrupted");
            }
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;//  www  .  j a v a2 s  .  co m
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

/**
 * minimum time should be less than max time <br/>
 * otherwise filter will be skiped     *
 *
 * @param jobConf/* w  w w . j ava  2s. co  m*/
 * @param scan
 * @throws java.io.IOException
 */
private void setTime(JobConf jobConf, Scan scan) throws IOException {
    long min = 0l;
    String mintime = jobConf.get("hbase.mintime");
    if (StringUtils.isNotEmpty(mintime)) {
        min = Long.parseLong(mintime);
    }
    String maxtime = jobConf.get("hbase.maxtime");
    if (StringUtils.isNotEmpty(maxtime)) {
        long l = Long.parseLong(maxtime);
        if (min <= l)
            scan.setTimeRange(min, l);
    }
    FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
    boolean isInmissing = true;
    String missing = jobConf.get("hbase.include.missing");
    if (StringUtils.isNotEmpty(missing)) {
        isInmissing = Boolean.valueOf(missing);
    }

    String hvalue = jobConf.get("hbase.include.filter.value");
    if (StringUtils.isNotEmpty(hvalue)) {
        String[] columns = hvalue.split(",");
        if (columns.length > 0) {
            for (String column : columns) {
                String[] fv = column.split(":");
                SingleColumnValueFilter rowfilter = new SingleColumnValueFilter(Bytes.toBytes(fv[0]),
                        Bytes.toBytes(fv[1]), CompareOp.EQUAL, Bytes.toBytes(fv[2]));
                rowfilter.setFilterIfMissing(isInmissing);
                list.addFilter(rowfilter);
            }
        }
    }
    boolean isExmissing = false;
    String exMissing = jobConf.get("hbase.exclude.missing");
    if (StringUtils.isNotEmpty(exMissing)) {
        isExmissing = Boolean.valueOf(exMissing);
    }

    String hexvalue = jobConf.get("hbase.exclude.filter.value");
    if (StringUtils.isNotEmpty(hexvalue)) {
        String[] columns = hexvalue.split(",");
        if (columns.length > 0) {
            for (String column : columns) {
                String[] fv = column.split(":");
                SingleColumnValueFilter rowfilter = new SingleColumnValueFilter(Bytes.toBytes(fv[0]),
                        Bytes.toBytes(fv[1]), CompareOp.NOT_EQUAL, Bytes.toBytes(fv[2]));
                rowfilter.setFilterIfMissing(isExmissing);
                list.addFilter(rowfilter);
            }
        }
    }
    String hmax = jobConf.get("hbase.max.version");
    if (StringUtils.isNotEmpty(hmax)) {
        scan.setMaxVersions(Integer.parseInt(hmax));
    }
    scan.setFilter(list);
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

/**
 * Converts a filter (which has been pushed down from Hive's optimizer)
 * into corresponding restrictions on the HBase scan.  The
 * filter should already be in a form which can be fully converted.
 *
 * @param jobConf    configuration for the scan
 * @param scan       the HBase scan object to restrict
 * @param tableSplit the HBase table split to restrict, or null
 *                   if calculating splits
 * @param iKey       0-based offset of key column within Hive table
 * @return converted table split if any//from  w w  w. j a v a  2 s. com
 */
private TableSplit convertFilter(JobConf jobConf, Scan scan, TableSplit tableSplit, int iKey)
        throws IOException {

    String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (filterExprSerialized == null) {
        return tableSplit;
    }
    ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, jobConf);

    String columnNameProperty = jobConf.get(Constants.LIST_COLUMNS);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(","));

    IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(columnNames.get(iKey));

    List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions);

    // There should be no residual since we already negotiated
    // that earlier in HBaseStorageHandler.decomposePredicate.
    if (residualPredicate != null) {
        throw new RuntimeException("Unexpected residual predicate " + residualPredicate.getExprString());
    }

    // There should be exactly one predicate since we already
    // negotiated that also.
    if (searchConditions.size() != 1) {
        throw new RuntimeException("Exactly one search condition expected in push down");
    }

    // Convert the search condition into a restriction on the HBase scan
    IndexSearchCondition sc = searchConditions.get(0);
    ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc());
    byte[] startRow;
    try {
        ObjectInspector objInspector = eval.initialize(null);
        Object writable = eval.evaluate(null);
        ByteStream.Output serializeStream = new ByteStream.Output();
        LazyUtils.writePrimitiveUTF8(serializeStream, writable, (PrimitiveObjectInspector) objInspector, false,
                (byte) 0, null);
        startRow = new byte[serializeStream.getCount()];
        System.arraycopy(serializeStream.getData(), 0, startRow, 0, serializeStream.getCount());
    } catch (HiveException ex) {
        throw new IOException(ex);
    }

    // stopRow is exclusive, so pad it with a trailing 0 byte to
    // make it compare as the very next value after startRow
    byte[] stopRow = new byte[startRow.length + 1];
    System.arraycopy(startRow, 0, stopRow, 0, startRow.length);

    if (tableSplit != null) {
        tableSplit = new TableSplit(tableSplit.getTableName(), startRow, stopRow,
                tableSplit.getRegionLocation());
    }
    scan.setStartRow(startRow);
    scan.setStopRow(stopRow);
    // Add a WhileMatchFilter to make the scan terminate as soon
    // as we see a non-matching key.  This is probably redundant
    // since the stopRow above should already take care of it for us.
    scan.setFilter(
            new WhileMatchFilter(new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(startRow))));
    return tableSplit;
}