Java tutorial
/* * Copyright AsiaInfo Authors. * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 */ package com.jeffy.hive; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; /** * HiveID UDF? * * @author Jeffy<renwu58@gmail.com> * */ @Description(name = "rowid", value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{SEQUENCE_NUMBER}") @UDFType(deterministic = false, stateful = true) public class RowIdUDF2 extends UDF { private long sequence; private int taskId; public RowIdUDF2() { sequence = 0L; taskId = -1; } public Text evaluate() { if (taskId == -1) { taskId = getTaskId() + 1; } sequence++; String rowid = taskId + "-" + sequence; return new Text(rowid); } /** * MapredContext??MapReduce???MapReduce * * @return MapredID */ public static int getTaskId() { MapredContext ctx = MapredContext.get(); if (ctx == null) { throw new IllegalStateException("MapredContext is not set."); } JobConf conf = ctx.getJobConf(); if (conf == null) { throw new IllegalStateException("JobConf is not set."); } int taskid = conf.getInt("mapred.task.partition", -1); if (taskid == -1) { taskid = conf.getInt("mapreduce.task.partition", -1); if (taskid == -1) { throw new IllegalStateException( "Both mapred.task.partition and mapreduce.task.partition are not set: " + conf); } } return taskid; } }