com.jeffy.hive.RowIdUDF2.java Source code

Java tutorial

Introduction

Here is the source code for com.jeffy.hive.RowIdUDF2.java

Source

/*
 * Copyright AsiaInfo Authors.
 * 
 * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
 */
package com.jeffy.hive;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;

/**
 * HiveID  UDF?
 * 
 * @author Jeffy<renwu58@gmail.com>
 *
 */
@Description(name = "rowid", value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{SEQUENCE_NUMBER}")
@UDFType(deterministic = false, stateful = true)
public class RowIdUDF2 extends UDF {

    private long sequence;
    private int taskId;

    public RowIdUDF2() {
        sequence = 0L;
        taskId = -1;
    }

    public Text evaluate() {
        if (taskId == -1) {
            taskId = getTaskId() + 1;
        }
        sequence++;
        String rowid = taskId + "-" + sequence;
        return new Text(rowid);
    }

    /**
     * MapredContext??MapReduce???MapReduce
     * 
     * @return MapredID
     */
    public static int getTaskId() {
        MapredContext ctx = MapredContext.get();
        if (ctx == null) {
            throw new IllegalStateException("MapredContext is not set.");
        }
        JobConf conf = ctx.getJobConf();
        if (conf == null) {
            throw new IllegalStateException("JobConf is not set.");
        }
        int taskid = conf.getInt("mapred.task.partition", -1);
        if (taskid == -1) {
            taskid = conf.getInt("mapreduce.task.partition", -1);
            if (taskid == -1) {
                throw new IllegalStateException(
                        "Both mapred.task.partition and mapreduce.task.partition are not set: " + conf);
            }
        }
        return taskid;
    }
}