List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException { // get the path of the temporary output file final Configuration conf = context.getConfiguration(); final AccumuloConfiguration acuConf = FileOutputConfigurator .getAccumuloConfiguration(AccumuloFileOutputFormat.class, context.getConfiguration()); final String extension = acuConf.get(Property.TABLE_FILE_TYPE); final Path file = this.getDefaultWorkFile(context, "." + extension); final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf); return new RecordWriter<Key, Value>() { RFileWriter out = null;/*from ww w . j a v a 2 s .com*/ @Override public void close(TaskAttemptContext context) throws IOException { if (out != null) out.close(); } @Override public void write(Key key, Value value) throws IOException { if (out == null) { out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf)) .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build(); out.startDefaultLocalityGroup(); } out.append(key, value); } }; }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java
License:Apache License
/** * Extracts Iterators settings from the context to be used by RecordReader. * * @param context/*w w w .j a v a 2s .com*/ * the Hadoop context for the configured job * @return List of iterator settings for given table */ private List<IteratorSetting> contextIterators(TaskAttemptContext context) { return InputConfigurator.getIterators(CLASS, context.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException { split = (RangeInputSplit) inSplit;/*from w w w.j a v a2 s .com*/ log.debug("Initializing input split: " + split); Configuration conf = attempt.getConfiguration(); client = createClient(attempt, this.CLASS); ClientContext context = (ClientContext) client; Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf); String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf); String table = split.getTableName(); // in case the table name changed, we can still use the previous name for terms of // configuration, // but the scanner will use the table id resolved at job setup time InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName()); log.debug("Creating client with user: " + client.whoami()); log.debug("Creating scanner for table: " + table); log.debug("Authorizations are: " + authorizations); if (split instanceof BatchInputSplit) { BatchInputSplit batchSplit = (BatchInputSplit) split; BatchScanner scanner; try { // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit // will not span tablets int scanThreads = 1; scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads); setupIterators(attempt, scanner, split); if (classLoaderContext != null) { scanner.setClassLoaderContext(classLoaderContext); } } catch (TableNotFoundException e) { e.printStackTrace(); throw new IOException(e); } scanner.setRanges(batchSplit.getRanges()); scannerBase = scanner; } else { Scanner scanner; Boolean isOffline = split.isOffline(); if (isOffline == null) { isOffline = tableConfig.isOfflineScan(); } Boolean isIsolated = split.isIsolatedScan(); if (isIsolated == null) { isIsolated = tableConfig.shouldUseIsolatedScanners(); } Boolean usesLocalIterators = split.usesLocalIterators(); if (usesLocalIterators == null) { usesLocalIterators = tableConfig.shouldUseLocalIterators(); } try { if (isOffline) { scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations); } else { // Not using public API to create scanner so that we can use table ID // Table ID is used in case of renames during M/R job scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations); } if (isIsolated) { log.info("Creating isolated scanner"); scanner = new IsolatedScanner(scanner); } if (usesLocalIterators) { log.info("Using local iterators"); scanner = new ClientSideIteratorScanner(scanner); } setupIterators(attempt, scanner, split); } catch (RuntimeException e) { throw new IOException(e); } scanner.setRange(split.getRange()); scannerBase = scanner; } Collection<IteratorSetting.Column> columns = split.getFetchedColumns(); if (columns == null) { columns = tableConfig.getFetchedColumns(); } // setup a scanner within the bounds of this split for (Pair<Text, Text> c : columns) { if (c.getSecond() != null) { log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond()); scannerBase.fetchColumn(c.getFirst(), c.getSecond()); } else { log.debug("Fetching column family " + c.getFirst()); scannerBase.fetchColumnFamily(c.getFirst()); } } SamplerConfiguration samplerConfig = split.getSamplerConfiguration(); if (samplerConfig == null) { samplerConfig = tableConfig.getSamplerConfiguration(); } if (samplerConfig != null) { scannerBase.setSamplerConfiguration(samplerConfig); } Map<String, String> executionHints = split.getExecutionHints(); if (executionHints == null || executionHints.isEmpty()) { executionHints = tableConfig.getExecutionHints(); } if (executionHints != null) { scannerBase.setExecutionHints(executionHints); } scannerIterator = scannerBase.iterator(); numKeysRead = 0; }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordWriter.java
License:Apache License
public AccumuloRecordWriter(TaskAttemptContext context) { Configuration conf = context.getConfiguration(); this.simulate = OutputConfigurator.getSimulationMode(CLASS, conf); this.createTables = OutputConfigurator.canCreateTables(CLASS, conf); if (simulate) log.info("Simulating output only. No writes to tables will occur"); this.bws = new HashMap<>(); String tname = OutputConfigurator.getDefaultTableName(CLASS, conf); this.defaultTableName = (tname == null) ? null : new Text(tname); if (!simulate) { this.client = OutputConfigurator.createClient(CLASS, conf); mtbw = client.createMultiTableBatchWriter(); }// ww w . j a va 2 s .c om }
From source file:org.apache.avro.mapreduce.AvroKeyInputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override//from ww w. j a v a2 s . c om public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration()); if (null == readerSchema) { LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired."); LOG.info("Using a reader schema equal to the writer schema."); } return new AvroKeyRecordReader<T>(readerSchema); }
From source file:org.apache.avro.mapreduce.AvroKeyOutputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/*w ww . ja v a2 s. c o m*/ @SuppressWarnings("unchecked") public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); // Get the writer schema. Schema writerSchema = AvroJob.getOutputKeySchema(conf); boolean isMapOnly = context.getNumReduceTasks() == 0; if (isMapOnly) { Schema mapOutputSchema = AvroJob.getMapOutputKeySchema(conf); if (mapOutputSchema != null) { writerSchema = mapOutputSchema; } } if (null == writerSchema) { throw new IOException( "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema()."); } GenericData dataModel = AvroSerialization.createDataModel(conf); return mRecordWriterFactory.create(writerSchema, dataModel, getCompressionCodec(context), getAvroFileOutputStream(context)); }
From source file:org.apache.avro.mapreduce.AvroKeyValueInputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/* w ww .j a va 2s . c om*/ public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration()); if (null == keyReaderSchema) { LOG.warn("Key reader schema was not set. Use AvroJob.setInputKeySchema() if desired."); LOG.info("Using a key reader schema equal to the writer schema."); } Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration()); if (null == valueReaderSchema) { LOG.warn("Value reader schema was not set. Use AvroJob.setInputValueSchema() if desired."); LOG.info("Using a value reader schema equal to the writer schema."); } return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema); }
From source file:org.apache.avro.mapreduce.AvroKeyValueOutputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/*ww w . jav a 2 s. c o m*/ @SuppressWarnings("unchecked") public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(conf); AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass()); AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass()); GenericData dataModel = AvroSerialization.createDataModel(conf); return new AvroKeyValueRecordWriter<K, V>(keyConverter, valueConverter, dataModel, getCompressionCodec(context), getAvroFileOutputStream(context)); }
From source file:org.apache.avro.mapreduce.AvroMultipleOutputs.java
License:Apache License
@SuppressWarnings("unchecked") private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName) throws IOException, InterruptedException { // look for record-writer in the cache RecordWriter writer = recordWriters.get(baseFileName); // If not in cache, create a new one if (writer == null) { // get the record writer from context output format //FileOutputFormat.setOutputName(taskContext, baseFileName); taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName); try {/*from www. j a va2 s . c o m*/ writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), taskContext.getConfiguration())).getRecordWriter(taskContext); } catch (ClassNotFoundException e) { throw new IOException(e); } // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { writer = new RecordWriterWithCounter(writer, baseFileName, context); } // add the record-writer to the cache recordWriters.put(baseFileName, writer); } return writer; }