Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException {
    // get the path of the temporary output file
    final Configuration conf = context.getConfiguration();
    final AccumuloConfiguration acuConf = FileOutputConfigurator
            .getAccumuloConfiguration(AccumuloFileOutputFormat.class, context.getConfiguration());

    final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
    final Path file = this.getDefaultWorkFile(context, "." + extension);
    final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf);

    return new RecordWriter<Key, Value>() {
        RFileWriter out = null;/*from ww w .  j a  v a 2  s .com*/

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            if (out != null)
                out.close();
        }

        @Override
        public void write(Key key, Value value) throws IOException {
            if (out == null) {
                out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf))
                        .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build();
                out.startDefaultLocalityGroup();
            }
            out.append(key, value);
        }
    };
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Extracts Iterators settings from the context to be used by RecordReader.
 *
 * @param context/*w  w w .j  a  v  a 2s .com*/
 *          the Hadoop context for the configured job
 * @return List of iterator settings for given table
 */
private List<IteratorSetting> contextIterators(TaskAttemptContext context) {
    return InputConfigurator.getIterators(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {

    split = (RangeInputSplit) inSplit;/*from  w  w  w.j a v a2  s  .com*/
    log.debug("Initializing input split: " + split);
    Configuration conf = attempt.getConfiguration();

    client = createClient(attempt, this.CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf);
    String table = split.getTableName();

    // in case the table name changed, we can still use the previous name for terms of
    // configuration,
    // but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName());

    log.debug("Creating client with user: " + client.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);

    if (split instanceof BatchInputSplit) {
        BatchInputSplit batchSplit = (BatchInputSplit) split;

        BatchScanner scanner;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads);
            setupIterators(attempt, scanner, split);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            e.printStackTrace();
            throw new IOException(e);
        }

        scanner.setRanges(batchSplit.getRanges());
        scannerBase = scanner;
    } else {
        Scanner scanner;

        Boolean isOffline = split.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }

        Boolean isIsolated = split.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }

        Boolean usesLocalIterators = split.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }

        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations);
            } else {
                // Not using public API to create scanner so that we can use table ID
                // Table ID is used in case of renames during M/R job
                scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }

            setupIterators(attempt, scanner, split);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }

        scanner.setRange(split.getRange());
        scannerBase = scanner;

    }

    Collection<IteratorSetting.Column> columns = split.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }

    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }

    SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }

    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }

    Map<String, String> executionHints = split.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }

    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }

    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordWriter.java

License:Apache License

public AccumuloRecordWriter(TaskAttemptContext context) {
    Configuration conf = context.getConfiguration();
    this.simulate = OutputConfigurator.getSimulationMode(CLASS, conf);
    this.createTables = OutputConfigurator.canCreateTables(CLASS, conf);

    if (simulate)
        log.info("Simulating output only. No writes to tables will occur");

    this.bws = new HashMap<>();

    String tname = OutputConfigurator.getDefaultTableName(CLASS, conf);
    this.defaultTableName = (tname == null) ? null : new Text(tname);

    if (!simulate) {
        this.client = OutputConfigurator.createClient(CLASS, conf);
        mtbw = client.createMultiTableBatchWriter();
    }// ww w .  j a va  2  s  .c om
}

From source file:org.apache.avro.mapreduce.AvroKeyInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override//from ww w. j  a v  a2  s  .  c  om
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == readerSchema) {
        LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a reader schema equal to the writer schema.");
    }
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:org.apache.avro.mapreduce.AvroKeyOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*w  ww  .  ja  v a2 s. c o m*/
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    // Get the writer schema.
    Schema writerSchema = AvroJob.getOutputKeySchema(conf);
    boolean isMapOnly = context.getNumReduceTasks() == 0;
    if (isMapOnly) {
        Schema mapOutputSchema = AvroJob.getMapOutputKeySchema(conf);
        if (mapOutputSchema != null) {
            writerSchema = mapOutputSchema;
        }
    }
    if (null == writerSchema) {
        throw new IOException(
                "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
    }

    GenericData dataModel = AvroSerialization.createDataModel(conf);

    return mRecordWriterFactory.create(writerSchema, dataModel, getCompressionCodec(context),
            getAvroFileOutputStream(context));
}

From source file:org.apache.avro.mapreduce.AvroKeyValueInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/* w ww  .j  a  va  2s  . c  om*/
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == keyReaderSchema) {
        LOG.warn("Key reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a key reader schema equal to the writer schema.");
    }
    Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
    if (null == valueReaderSchema) {
        LOG.warn("Value reader schema was not set. Use AvroJob.setInputValueSchema() if desired.");
        LOG.info("Using a value reader schema equal to the writer schema.");
    }
    return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}

From source file:org.apache.avro.mapreduce.AvroKeyValueOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*ww w . jav  a 2  s. c o m*/
@SuppressWarnings("unchecked")
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(conf);

    AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass());
    AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass());

    GenericData dataModel = AvroSerialization.createDataModel(conf);

    return new AvroKeyValueRecordWriter<K, V>(keyConverter, valueConverter, dataModel,
            getCompressionCodec(context), getAvroFileOutputStream(context));
}

From source file:org.apache.avro.mapreduce.AvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        //FileOutputFormat.setOutputName(taskContext, baseFileName);
        taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName);
        try {/*from  www. j  a  va2  s . c  o  m*/
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}