Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException {
    // get the path of the temporary output file
    final Configuration conf = context.getConfiguration();
    final AccumuloConfiguration acuConf = FileOutputConfigurator
            .getAccumuloConfiguration(AccumuloFileOutputFormat.class, context.getConfiguration());

    final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
    final Path file = this.getDefaultWorkFile(context, "." + extension);
    final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf);

    return new RecordWriter<Key, Value>() {
        RFileWriter out = null;/*from ww w .  j a  v a 2  s .com*/

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            if (out != null)
                out.close();
        }

        @Override
        public void write(Key key, Value value) throws IOException {
            if (out == null) {
                out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf))
                        .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build();
                out.startDefaultLocalityGroup();
            }
            out.append(key, value);
        }
    };
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Extracts Iterators settings from the context to be used by RecordReader.
 *
 * @param context/*w  w w .j  a  v  a 2s .com*/
 *          the Hadoop context for the configured job
 * @return List of iterator settings for given table
 */
private List<IteratorSetting> contextIterators(TaskAttemptContext context) {
    return InputConfigurator.getIterators(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {

    split = (RangeInputSplit) inSplit;/*from  w  w  w.j a v a2  s  .com*/
    log.debug("Initializing input split: " + split);
    Configuration conf = attempt.getConfiguration();

    client = createClient(attempt, this.CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf);
    String table = split.getTableName();

    // in case the table name changed, we can still use the previous name for terms of
    // configuration,
    // but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName());

    log.debug("Creating client with user: " + client.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);

    if (split instanceof BatchInputSplit) {
        BatchInputSplit batchSplit = (BatchInputSplit) split;

        BatchScanner scanner;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads);
            setupIterators(attempt, scanner, split);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            e.printStackTrace();
            throw new IOException(e);
        }

        scanner.setRanges(batchSplit.getRanges());
        scannerBase = scanner;
    } else {
        Scanner scanner;

        Boolean isOffline = split.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }

        Boolean isIsolated = split.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }

        Boolean usesLocalIterators = split.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }

        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations);
            } else {
                // Not using public API to create scanner so that we can use table ID
                // Table ID is used in case of renames during M/R job
                scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }

            setupIterators(attempt, scanner, split);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }

        scanner.setRange(split.getRange());
        scannerBase = scanner;

    }

    Collection<IteratorSetting.Column> columns = split.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }

    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }

    SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }

    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }

    Map<String, String> executionHints = split.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }

    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }

    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordWriter.java

License:Apache License

public AccumuloRecordWriter(TaskAttemptContext context) {
    Configuration conf = context.getConfiguration();
    this.simulate = OutputConfigurator.getSimulationMode(CLASS, conf);
    this.createTables = OutputConfigurator.canCreateTables(CLASS, conf);

    if (simulate)
        log.info("Simulating output only. No writes to tables will occur");

    this.bws = new HashMap<>();

    String tname = OutputConfigurator.getDefaultTableName(CLASS, conf);
    this.defaultTableName = (tname == null) ? null : new Text(tname);

    if (!simulate) {
        this.client = OutputConfigurator.createClient(CLASS, conf);
        mtbw = client.createMultiTableBatchWriter();
    }// ww w .  j a va  2  s  .c om
}

From source file:org.apache.avro.mapreduce.AvroKeyInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override//from ww w. j  a v  a2  s  .  c  om
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == readerSchema) {
        LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a reader schema equal to the writer schema.");
    }
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:org.apache.avro.mapreduce.AvroKeyOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*w  ww  .  ja  v a2 s. c o m*/
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    // Get the writer schema.
    Schema writerSchema = AvroJob.getOutputKeySchema(conf);
    boolean isMapOnly = context.getNumReduceTasks() == 0;
    if (isMapOnly) {
        Schema mapOutputSchema = AvroJob.getMapOutputKeySchema(conf);
        if (mapOutputSchema != null) {
            writerSchema = mapOutputSchema;
        }
    }
    if (null == writerSchema) {
        throw new IOException(
                "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
    }

    GenericData dataModel = AvroSerialization.createDataModel(conf);

    return mRecordWriterFactory.create(writerSchema, dataModel, getCompressionCodec(context),
            getAvroFileOutputStream(context));
}

From source file:org.apache.avro.mapreduce.AvroKeyValueInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/* w ww  .j  a  va  2s  . c  om*/
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == keyReaderSchema) {
        LOG.warn("Key reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a key reader schema equal to the writer schema.");
    }
    Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
    if (null == valueReaderSchema) {
        LOG.warn("Value reader schema was not set. Use AvroJob.setInputValueSchema() if desired.");
        LOG.info("Using a value reader schema equal to the writer schema.");
    }
    return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}

From source file:org.apache.avro.mapreduce.AvroKeyValueOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*ww w . jav  a 2  s. c o m*/
@SuppressWarnings("unchecked")
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(conf);

    AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass());
    AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass());

    GenericData dataModel = AvroSerialization.createDataModel(conf);

    return new AvroKeyValueRecordWriter<K, V>(keyConverter, valueConverter, dataModel,
            getCompressionCodec(context), getAvroFileOutputStream(context));
}

From source file:org.apache.avro.mapreduce.AvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        //FileOutputFormat.setOutputName(taskContext, baseFileName);
        taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName);
        try {/*from  www. j  a  va2  s . c  o  m*/
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}