Example usage for org.apache.hadoop.mapreduce OutputFormat getOutputCommitter

List of usage examples for org.apache.hadoop.mapreduce OutputFormat getOutputCommitter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputFormat getOutputCommitter.

Prototype

public abstract OutputCommitter getOutputCommitter(TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Get the output committer for this output format.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java

License:Apache License

@Override
public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    // return a MultipleOutputsCommitter that commits for the root output format as well as all delegate outputformats
    if (committer == null) {
        Map<String, OutputCommitter> committers = new HashMap<>();
        for (String name : MultipleOutputs.getNamedOutputsList(context)) {
            Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs
                    .getNamedOutputFormatClass(context, name);

            TaskAttemptContext namedContext = MultipleOutputs.getNamedTaskContext(context, name);

            OutputFormat outputFormat = new InstantiatorFactory(false).get(TypeToken.of(namedOutputFormatClass))
                    .create();//from w ww . ja va  2  s . c  o m
            committers.put(name, outputFormat.getOutputCommitter(namedContext));
        }
        committer = new MultipleOutputsCommitter(committers);
    }

    return committer;
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException {
    assert job.getJobID() != null;
    TaskID taskId = newMapTaskId(job.getJobID(), 0);
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    OutputCommitter committer = output
            .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0)));
    boolean succeed = false;
    committer.setupJob(job);/* w  ww.  j a v  a  2 s .c  o  m*/
    try {
        if (job.getNumReduceTasks() == 0) {
            runMap(job, null);
        } else {
            try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(),
                    job.getMapOutputValueClass())) {
                runMap(job, sorter);
                runReduce(job, sorter);
            }
        }
        committer.commitJob(job);
        succeed = true;
    } finally {
        if (succeed == false) {
            try {
                committer.abortJob(job, State.FAILED);
            } catch (IOException e) {
                LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(),
                        job.getJobName()), e);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }//from  w w  w  . j  a va  2 s.c o m
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }/* ww  w .  j  a  va  2  s.c o  m*/
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;

        try {//from  ww  w. j  av a 2 s .  c o  m
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*www . j  a va 2  s.c  o m*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.mortardata.pig.storage.TestDynamoDBStorage.java

License:Apache License

@Test
public void testSingleRow() throws IOException, InterruptedException {
    // test specific constants
    String tableName = "mortar_test_foo_table";
    String awsAccessKeyId = "XXXXXXXXXXXXX";
    String awsSecretKey = "YYYYYYYYYYYYYY";
    Long writeCapacityUnits = 50L;
    Double consumedCapacityUnits = 7.0D;
    String location = "s3://mortar-example-output-data/unused";
    String signature = "thealias_" + location + "_com.mortardata.pig.storage.DynamoDBStorage('" + tableName
            + "','" + awsAccessKeyId + "','" + awsSecretKey + "')";
    ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString(
            "my_field:int,my_float_field:float,my_str_field:chararray,my_null_field:chararray,my_empty_string_field:chararray"));
    String mapOrReduce = "reduce";
    int numSlotsForStore = 3;
    int numTasksForStore = 20;
    String hashPrimaryKeyName = "my_field";

    Collection<KeySchemaElement> keySchemaElements = Arrays
            .asList(new KeySchemaElement(hashPrimaryKeyName, KeyType.HASH));
    Collection<AttributeDefinition> attributeDefinitions = Arrays
            .asList(new AttributeDefinition(hashPrimaryKeyName, ScalarAttributeType.N));

    // mock dynamo client
    AmazonDynamoDBClient dynamo = mock(AmazonDynamoDBClient.class);
    DescribeTableResult describeResult = new DescribeTableResult().withTable(new TableDescription()
            .withProvisionedThroughput(/*  ww  w.  j av a 2s .  c  om*/
                    new ProvisionedThroughputDescription().withWriteCapacityUnits(writeCapacityUnits))
            .withKeySchema(keySchemaElements).withAttributeDefinitions(attributeDefinitions));

    when(dynamo.describeTable(any(DescribeTableRequest.class))).thenReturn(describeResult);

    Map<String, List<WriteRequest>> unprocessedItems = Maps.newHashMap();
    BatchWriteItemResult batchWriteItemResult = new BatchWriteItemResult()
            .withUnprocessedItems(unprocessedItems).withConsumedCapacity(
                    new ConsumedCapacity().withTableName(tableName).withCapacityUnits(consumedCapacityUnits));
    ArgumentCaptor<BatchWriteItemRequest> batchWriteItemRequestCaptor = ArgumentCaptor
            .forClass(BatchWriteItemRequest.class);
    when(dynamo.batchWriteItem(batchWriteItemRequestCaptor.capture())).thenReturn(batchWriteItemResult);

    // mock Hadoop interaction
    HadoopJobInfo hadoopJobInfo = mock(HadoopJobInfo.class);
    when(hadoopJobInfo.getMapOrReduce()).thenReturn(mapOrReduce);
    when(hadoopJobInfo.getNumSlotsForStore()).thenReturn(numSlotsForStore);
    when(hadoopJobInfo.getNumTasksForStore()).thenReturn(numTasksForStore);
    when(hadoopJobInfo.getJobConfiguration()).thenReturn(new Configuration());

    // front end
    DynamoDBStorage storage = new DynamoDBStorage(tableName, awsAccessKeyId, awsSecretKey, dynamo,
            hadoopJobInfo);
    storage.setStoreFuncUDFContextSignature(signature);
    storage.checkSchema(schema);
    storage.setStoreLocation(location, null);
    Assert.assertNotNull(storage.getOutputFormat());

    // simulate back end
    storage.setStoreFuncUDFContextSignature(signature);

    @SuppressWarnings("rawtypes")
    OutputFormat outputFormat = storage.getOutputFormat();
    Assert.assertNotNull(outputFormat);
    storage.setStoreLocation(location, null);
    storage.prepareToWrite(null);
    Tuple tuple = TupleFactory.getInstance().newTuple(5);
    tuple.set(0, new Integer(3));
    tuple.set(1, new Float(4.3));
    tuple.set(2, "my_string_here");
    tuple.set(3, null);
    tuple.set(4, "");
    storage.putNext(tuple);
    outputFormat.getOutputCommitter(null).commitTask(null);

    // write throughput pct [default 0.5] * writeCapacityUnits / min(numSlotsForStore, numTasksForStore)
    Assert.assertEquals(8, storage.getMaxWriteCapacity());

    // ensure that we received the item to save out
    List<BatchWriteItemRequest> bwrirs = batchWriteItemRequestCaptor.getAllValues();
    Assert.assertEquals(1, bwrirs.size());
    List<WriteRequest> writeRequests = bwrirs.get(0).getRequestItems().get(tableName);
    Assert.assertEquals(1, writeRequests.size());
    Map<String, AttributeValue> item = writeRequests.get(0).getPutRequest().getItem();
    Assert.assertEquals(new AttributeValue().withN("3"), item.get(hashPrimaryKeyName));
    Assert.assertEquals(new AttributeValue().withN("4.3"), item.get("my_float_field"));
    Assert.assertNull(item.get("my_null_field"));
    Assert.assertNull(item.get("my_empty_string_field"));
}

From source file:com.samsung.px.pig.storage.TestDynamoDBStorage.java

License:Apache License

@Test
public void testSingleRow() throws IOException, InterruptedException {
    // test specific constants
    String tableName = "mortar_test_foo_table";
    String awsAccessKeyId = "XXXXXXXXXXXXX";
    String awsSecretKey = "YYYYYYYYYYYYYY";
    Long writeCapacityUnits = 50L;
    Double consumedCapacityUnits = 7.0D;
    String location = "s3://mortar-example-output-data/unused";
    String signature = "thealias_" + location + "_com.mortardata.pig.storage.DynamoDBStorage('" + tableName
            + "','" + awsAccessKeyId + "','" + awsSecretKey + "')";
    ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString(
            "my_field:int,my_float_field:float,my_str_field:chararray,my_null_field:chararray,my_empty_string_field:chararray"));
    String mapOrReduce = "reduce";
    int numSlotsForStore = 3;
    int numTasksForStore = 20;
    String hashPrimaryKeyName = "my_field";

    // mock dynamo client
    AmazonDynamoDBClient dynamo = mock(AmazonDynamoDBClient.class);
    DescribeTableResult describeResult = new DescribeTableResult().withTable(new TableDescription()
            .withProvisionedThroughput(//from   w  ww .  j  a v  a 2s  .c o m
                    new ProvisionedThroughputDescription().withWriteCapacityUnits(writeCapacityUnits))
            .withKeySchema(new KeySchema().withHashKeyElement(new KeySchemaElement()
                    .withAttributeName(hashPrimaryKeyName).withAttributeType(ScalarAttributeType.N))));

    when(dynamo.describeTable(any(DescribeTableRequest.class))).thenReturn(describeResult);

    Map<String, List<WriteRequest>> unprocessedItems = Maps.newHashMap();
    Map<String, BatchWriteResponse> reponses = Maps.newHashMap();
    reponses.put(tableName, new BatchWriteResponse().withConsumedCapacityUnits(consumedCapacityUnits));
    BatchWriteItemResult batchWriteItemResult = new BatchWriteItemResult()
            .withUnprocessedItems(unprocessedItems).withResponses(reponses);
    ArgumentCaptor<BatchWriteItemRequest> batchWriteItemRequestCaptor = ArgumentCaptor
            .forClass(BatchWriteItemRequest.class);
    when(dynamo.batchWriteItem(batchWriteItemRequestCaptor.capture())).thenReturn(batchWriteItemResult);

    // mock Hadoop interaction
    HadoopJobInfo hadoopJobInfo = mock(HadoopJobInfo.class);
    when(hadoopJobInfo.getMapOrReduce()).thenReturn(mapOrReduce);
    when(hadoopJobInfo.getNumSlotsForStore()).thenReturn(numSlotsForStore);
    when(hadoopJobInfo.getNumTasksForStore()).thenReturn(numTasksForStore);
    when(hadoopJobInfo.getJobConfiguration()).thenReturn(new Configuration());

    // front end
    DynamoDBStorage storage = new DynamoDBStorage(tableName, awsAccessKeyId, awsSecretKey, dynamo,
            hadoopJobInfo);
    storage.setStoreFuncUDFContextSignature(signature);
    storage.checkSchema(schema);
    storage.setStoreLocation(location, null);
    Assert.assertNotNull(storage.getOutputFormat());

    // simulate back end
    storage.setStoreFuncUDFContextSignature(signature);

    @SuppressWarnings("rawtypes")
    OutputFormat outputFormat = storage.getOutputFormat();
    Assert.assertNotNull(outputFormat);
    storage.setStoreLocation(location, null);
    storage.prepareToWrite(null);
    Tuple tuple = TupleFactory.getInstance().newTuple(5);
    tuple.set(0, new Integer(3));
    tuple.set(1, new Float(4.3));
    tuple.set(2, "my_string_here");
    tuple.set(3, null);
    tuple.set(4, "");
    storage.putNext(tuple);
    outputFormat.getOutputCommitter(null).commitTask(null);

    // write throughput pct [default 0.5] * writeCapacityUnits / min(numSlotsForStore, numTasksForStore)
    Assert.assertEquals(8, storage.getMaxWriteCapacity());

    // ensure that we received the item to save out
    List<BatchWriteItemRequest> bwrirs = batchWriteItemRequestCaptor.getAllValues();
    Assert.assertEquals(1, bwrirs.size());
    List<WriteRequest> writeRequests = bwrirs.get(0).getRequestItems().get(tableName);
    Assert.assertEquals(1, writeRequests.size());
    Map<String, AttributeValue> item = writeRequests.get(0).getPutRequest().getItem();
    Assert.assertEquals(new AttributeValue().withN("3"), item.get(hashPrimaryKeyName));
    Assert.assertEquals(new AttributeValue().withN("4.3"), item.get("my_float_field"));
    Assert.assertNull(item.get("my_null_field"));
    Assert.assertNull(item.get("my_empty_string_field"));
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java

License:Apache License

/**
 * Runs mapper for the single split./*w w  w .j  a va2 s. c o m*/
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split    split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex)
        throws IOException, ClassNotFoundException, InterruptedException {

    TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex);
    //Setup task ID info
    TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration,
            taskAttemptId);

    InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration);

    //Create RecordReader
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat
            .createRecordReader((InputSplit) split, taskContext);

    //Make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    org.apache.hadoop.mapreduce.RecordWriter output;
    OutputCommitter committer = null;
    if (mapOnlyJob) {
        OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(),
                configuration);
        output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(taskContext);
        committer = outputFormat.getOutputCommitter(taskContext);
        committer.setupTask(taskContext);
    } else {
        output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator);
    }

    input.initialize((InputSplit) split, taskContext);

    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode
            .getMapperContext(configuration, taskAttemptId, input, output);
    mapper.run(mapperContext);

    input.close();

    output.close(mapperContext);

    if (mapOnlyJob && committer != null) {
        committer.commitTask(taskContext);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapreduce.java

License:Apache License

public ReducerWrapperMapreduce(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    Configuration configuration = (Configuration) invocationParameters.getConfiguration();
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            configuration);//from   w  w w .jav a2 s . c om
    JobID jobID = (JobID) invocationParameters.getJobId();

    //Setup task ID info
    TaskAttemptID id = hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition);
    JobContext jobContext = hadoopVersionSpecificCode.createJobContext(new JobConf(configuration), jobID);
    taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration, id);

    reducer = (org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobContext.getReducerClass(), configuration);

    OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(), configuration);

    recordWriter = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
            .getRecordWriter(taskContext);

    committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupTask(taskContext);

    Class<INKEY> keyClass = (Class<INKEY>) jobContext.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobContext.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, configuration) > 0,
            firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(),
            secondKeySerializer, keyClass, valueClass, sort,
            HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, configuration),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, configuration),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, configuration));
    DataGridChunkedCollectionReader<INKEY, INVALUE> transport = DataGridChunkedCollectionReader
            .getGridReader(params);

    context = hadoopVersionSpecificCode.getReducerContext(configuration, id, committer, recordWriter, transport,
            null);

}