com.jeffy.hive.RowIdUDF.java Source code

Java tutorial

Introduction

Here is the source code for com.jeffy.hive.RowIdUDF.java

Source

/*
 * Copyright AsiaInfo Authors.
 * 
 * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
 */
package com.jeffy.hive;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.mapred.JobConf;
import org.apache.log4j.Logger;

/**
 * HiveID Generic UDF?
 * 
 * @author Jeffy<renwu58@gmail.com>
 *
 */
@Description(name = "rowid", value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{SEQUENCE_NUMBER}")
@UDFType(deterministic = false, stateful = true)
public class RowIdUDF extends GenericUDF {

    private static final Logger log = Logger.getLogger(RowIdUDF.class);

    private long sequence;
    private int taskId;

    public RowIdUDF() {
        sequence = 0L;
        taskId = -1;
    }

    /**
     * ??context.getJobConf()?jobConfiguration
     * ?Configuration?
     * This is only called in runtime of MapRedTask. ??MapReduce??
     */
    @Override
    public void configure(MapredContext context) {
        log.info("Configure RowIdUDF.");
        if (context != null) {
            JobConf conf = context.getJobConf();
            if (conf == null) {
                throw new IllegalStateException("JobConf is not set.");
            }
            taskId = conf.getInt("mapred.task.partition", -1);
            if (taskId == -1) {
                taskId = conf.getInt("mapreduce.task.partition", -1);
                if (taskId == -1) {
                    throw new IllegalStateException(
                            "Both mapred.task.partition and mapreduce.task.partition are not set: " + conf);
                }
            }
        }
    }

    /**
     * ??
     * ??????
     */
    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        log.info("Initialize RowIdUDF.");
        return PrimitiveObjectInspectorFactory
                .getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
    }

    /**
     * ?UDFevaluate
     */
    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        sequence++;
        return taskId + "-" + sequence;
    }

    /**
     * ?
     */
    @Override
    public String getDisplayString(String[] children) {
        return "Usage: rowid()";
    }

}