Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID() 

Source Link

Usage

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void One_PG_One_Item() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);
    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();/*from w w w.  j a  va 2s .c  o  m*/

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_1Item(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void One_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();/* w w w . j ava 2s .  c om*/

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void N_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");

    mos.write(newIAW(2), EmptyImw, "pg/bucket-1");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 4, 7, 8), newIAW(1, 5, 6, 8)), "pg/bucket-1");
    mos.write(newIAW(2), new IntMatrixWritable(newIAW(3, 5, 7), newIAW(1, 2, 3, 4, 5, 6, 7, 8, 9)),
            "pg/bucket-1");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-1");
    mos.close();/*w  w  w  .j  a  v  a2 s  . c  o  m*/

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);
    reducer.reduce(new Text("2"), createTestInput_NItems2(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:com.asakusafw.lang.compiler.mapreduce.testing.InputFormatTester.java

License:Apache License

/**
 * Collects input contents.//from  www . j a v  a2  s .  co  m
 * @param <T> the data type
 * @param collector the target collector
 * @throws IOException if failed
 * @throws InterruptedException if interrupted
 */
@SuppressWarnings("unchecked")
public <T> void collect(Consumer<T> collector) throws IOException, InterruptedException {
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = format.getSplits(context);
    for (InputSplit split : splits) {
        InputSplit restored = restore(split);
        try (RecordReader<?, ?> reader = format.createRecordReader(restored, context)) {
            reader.initialize(restored, context);
            while (reader.nextKeyValue()) {
                collector.accept((T) reader.getCurrentValue());
            }
        }
    }
}

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*  ww w.j a v  a  2  s  .  c o  m*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/* w  w w  . j a  v  a2s . co m*/
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf, new TaskAttemptID());
    List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
    org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits
            .size()];/*from w ww. j a v a 2 s.c  o  m*/
    for (int i = 0; i < newInputSplits.size(); i++)
        oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
    return oldInputSplits;
}

From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedInputFormatTest.java

@Test
public void test() throws IOException, InterruptedException {
    Configuration conf = new Configuration(false);
    conf.set("fs.default.name", "file:///");
    conf.setInt(Constants.RECORD_PREFIX_LENGTH, 4);
    conf.setInt(Constants.FIELD_PREFIX_LENGTH, 4);

    Path path = new Path(tempFile.getAbsoluteFile().toURI());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LengthDelimitedInputFormat inputFormat = ReflectionUtils.newInstance(LengthDelimitedInputFormat.class,
            conf);/*from   w w w  .j  a va2 s. co  m*/
    try (LengthDelimitedRecordReader reader = (LengthDelimitedRecordReader) inputFormat.createRecordReader(null,
            context)) {
        FileSplit split = new FileSplit(path, 0, tempFile.length(), null);
        reader.initialize(split, context);

        while (reader.nextKeyValue()) {
            LengthDelimitedWritable writable = reader.getCurrentValue();
            Assert.assertNotNull(writable);
            Timestamp timestamp = new Timestamp(writable.getTimestamp().get());

            Assert.assertEquals("2014-12-31 23:06:06.255", timestamp.toString());
            FieldValueWritable[] writables = writable.getWritables();
            for (int i = 0; i < chars.length(); i++) {
                String value = chars.substring(0, i);
                FieldValueWritable fieldValueWritable = writables[i];
                Assert.assertEquals(value, fieldValueWritable.getData());
            }

            //          System.out.println(reader.getCurrentValue());
        }
    }

}

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample/*from  w  w w. j a v  a2s .  c  o m*/
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            @Override
            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.facebook.hiveio.benchmark.InputBenchmark.java

License:Apache License

/**
 * Run benchmark// w ww.  j  a va2 s  .c  om
 *
 * @param args parsed args
 * @throws Exception
 */
public void run(InputBenchmarkCmd args) throws Exception {
    HadoopNative.requireHadoopNative();

    Timer allTime = Metrics.newTimer(InputBenchmark.class, "all-time", MILLISECONDS, MILLISECONDS);
    TimerContext allTimerContext = allTime.time();

    HiveInputDescription input = new HiveInputDescription();
    input.getTableDesc().setDatabaseName(args.tableOpts.database);
    input.getTableDesc().setTableName(args.tableOpts.table);
    input.setPartitionFilter(args.tableOpts.partitionFilter);
    input.getMetastoreDesc().setHost(args.metastoreOpts.host);
    input.getMetastoreDesc().setPort(args.metastoreOpts.port);

    HiveConf hiveConf = HiveUtils.newHiveConf(InputBenchmark.class);

    System.err.println("Initialize profile with input data");
    HiveApiInputFormat.setProfileInputDesc(hiveConf, input, DEFAULT_PROFILE_ID);

    HiveApiInputFormat defaultInputFormat = new HiveApiInputFormat();
    if (args.trackMetrics) {
        defaultInputFormat.setObserver(new MetricsObserver("default", args.recordPrintPeriod));
    }

    List<InputSplit> splits = defaultInputFormat.getSplits(new JobContext(hiveConf, new JobID()));
    System.err.println("getSplits returned " + splits.size() + " splits");

    long numRows = 0;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        TaskAttemptID taskID = new TaskAttemptID();
        TaskAttemptContext taskContext = new TaskAttemptContext(hiveConf, taskID);
        if (i % args.splitPrintPeriod == 0) {
            System.err.println("Handling split " + i + " of " + splits.size());
        }
        RecordReader<WritableComparable, HiveReadableRecord> reader = defaultInputFormat
                .createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        numRows += readFully(reader);
    }

    System.err.println("Parsed " + numRows + " rows");

    allTimerContext.stop();

    new ConsoleReporter(System.err).run();
}