Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java

License:Open Source License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = RubixConstants.RUBIX_EXTENSION;

    CompressionCodec codec = null;//from   w ww. ja  v a  2  s  .  com
    boolean isCompressed = getCompressOutput(context);

    if (isCompressed) {
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension += codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);

    FSDataOutputStream fileOut = fs.create(file, false);
    return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(),
            context.getOutputValueClass(), codec);
}

From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    initialize(split, context.getConfiguration());
}

From source file:com.linkedin.cubert.io.text.PigTextOutputFormatWrapper.java

License:Open Source License

@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    PigTextOutputFormat outputFormat;/*from  w ww  .  ja va 2 s  .  com*/
    if (conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR) == null) {
        outputFormat = new PigTextOutputFormat(defaultDelimiter);
    } else {
        String str = conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR);
        str = StringEscapeUtils.unescapeJava(str);
        byte[] bytes = str.getBytes("UTF-8");

        if (bytes.length > 1)
            throw new RuntimeException(String.format("Invalid separator in text output format %s", str));

        outputFormat = new PigTextOutputFormat(bytes[0]);
    }

    return outputFormat.getRecordWriter(context);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageInputStream.java

License:Apache License

/** Construct given a path and a configuration. */
public AvroStorageInputStream(Path path, TaskAttemptContext context) throws IOException {
    this.stream = path.getFileSystem(context.getConfiguration()).open(path);
    this.len = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen();
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from ww w  .j  ava  2s. com*/
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY,
            Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroRecordReader.java

License:Apache License

/**
 * constructor to initialize input and avro data reader
 *///w  w  w .j  av  a  2  s  .co  m
public PigAvroRecordReader(TaskAttemptContext context, FileSplit split, Schema readerSchema,
        boolean ignoreBadFiles, Map<Path, Map<Integer, Integer>> schemaToMergedSchemaMap,
        boolean useMultipleSchemas) throws IOException {
    this.path = split.getPath();
    this.in = new AvroStorageInputStream(path, context);
    this.useMultipleSchemas = useMultipleSchemas;
    if (readerSchema == null) {
        AvroStorageLog.details("No avro schema given; assuming the schema is embedded");
    }

    Schema writerSchema;
    try {
        FileSystem fs = FileSystem.get(path.toUri(), context.getConfiguration());
        writerSchema = AvroStorageUtils.getSchema(path, fs);
    } catch (IOException e) {
        AvroStorageLog.details(
                "No avro writer schema found in '" + path + "'; assuming writer schema matches reader schema");
        writerSchema = null;
    }

    try {
        if (useMultipleSchemas) {
            this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, null));
        } else {
            this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, readerSchema));
        }
    } catch (IOException e) {
        throw new IOException("Error initializing data file reader for file (" + split.getPath() + ")", e);
    }
    this.reader.sync(split.getStart()); // sync to start
    this.start = in.tell();
    this.end = split.getStart() + split.getLength();
    this.ignoreBadFiles = ignoreBadFiles;
    this.schemaToMergedSchemaMap = schemaToMergedSchemaMap;
    if (schemaToMergedSchemaMap != null) {
        // initialize mProtoTuple with the right default values
        int maxPos = 0;
        for (Map<Integer, Integer> map : schemaToMergedSchemaMap.values()) {
            for (Integer i : map.values()) {
                maxPos = Math.max(i, maxPos);
            }
        }
        int tupleSize = maxPos + 1;
        AvroStorageLog.details("Creating proto tuple of fixed size: " + tupleSize);
        mProtoTuple = new ArrayList<Object>(tupleSize);
        for (int i = 0; i < tupleSize; i++) {
            // Get the list of fields from the passed schema
            List<Schema.Field> subFields = readerSchema.getFields();
            JsonNode defValue = subFields.get(i).defaultValue();
            if (defValue != null) {
                Schema.Type type = subFields.get(i).schema().getType();
                if (type.equals(Schema.Type.UNION)) {
                    List<Schema> schemas = subFields.get(i).schema().getTypes();
                    for (Schema schema : schemas) {
                        if (!schema.getType().equals(Schema.Type.NULL)) {
                            type = schema.getType();
                            break;
                        }
                    }
                }
                switch (type) {
                case BOOLEAN:
                    mProtoTuple.add(i, defValue.getBooleanValue());
                    break;
                case ENUM:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                case FIXED:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                case INT:
                    mProtoTuple.add(i, defValue.getIntValue());
                    break;
                case LONG:
                    mProtoTuple.add(i, defValue.getIntValue());
                    break;
                case FLOAT:
                    mProtoTuple.add(i, defValue.getNumberValue().floatValue());
                    break;
                case DOUBLE:
                    mProtoTuple.add(i, defValue.getNumberValue().doubleValue());
                    break;
                case STRING:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                default:
                    mProtoTuple.add(i, null);
                    break;
                }
            } else {
                mProtoTuple.add(i, null);
            }
        }
    }
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();

    String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
    log.info("Input file path:" + inputPathString);
    Path inputPath = new Path(inputPathString);

    SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
    SequenceFile.Metadata meta = reader.getMetadata();

    try {/* w w w. j  a  v a  2s  .  c o  m*/
        final Text keySchema = meta.get(new Text("key.schema"));
        final Text valueSchema = meta.get(new Text("value.schema"));

        if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
            throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",
                    keySchema, valueSchema));
        }

        return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),
                new JsonTypeSerializer(valueSchema.toString()),
                baseInputFormat.createRecordReader(split, context));
    } catch (Exception e) {
        throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
    }
}

From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Shamelessly copy in hadoop code to allow us to set the metadata with our schema

    Configuration conf = context.getConfiguration();

    CompressionCodec codec = null;/*w  w  w  .  j av  a 2  s  .  com*/
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    // get the path of the temporary output file
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);

    final String keySchema = getSchema("output.key.schema", conf);
    final String valueSchema = getSchema("output.value.schema", conf);

    /* begin cheddar's stealing of jay's code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();

    meta.set(new Text("key.schema"), new Text(keySchema));
    meta.set(new Text("value.schema"), new Text(valueSchema));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(),
            context.getOutputValueClass(), compressionType, codec, context, meta);
    /* end cheddar's stealing of jay's code */

    final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema);
    final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema);

    return new RecordWriter<Object, Object>() {

        public void write(Object key, Object value) throws IOException {

            out.append(new BytesWritable(keySerializer.toBytes(key)),
                    new BytesWritable(valueSerializer.toBytes(value)));
            context.progress();
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    configure(context.getConfiguration());
    final PinotRecordSerialization dataWriteSupport = getDataWriteSupport(context);
    initSegmentConfig(context);/*from w  w w  .j  av  a2  s.  co  m*/
    Path workDir = getDefaultWorkFile(context, "");
    return new PinotRecordWriter<>(_segmentConfig, context, workDir, dataWriteSupport);
}

From source file:com.linkedin.pinot.hadoop.io.PinotRecordWriter.java

License:Apache License

public PinotRecordWriter(SegmentGeneratorConfig segmentConfig, TaskAttemptContext context, Path workDir,
        PinotRecordSerialization pinotRecordSerialization) {
    _segmentConfig = segmentConfig;/*from   w w  w .  j  a  va2 s.co  m*/
    _workDir = workDir;
    _baseDataDir = PinotOutputFormat.getTempSegmentDir(context) + "/data";
    String filename = PinotOutputFormat.getTableName(context);
    try {
        _handler = new FileHandler(_baseDataDir, filename, ".json", MAX_FILE_SIZE);
        _handler.open(true);
        _pinotRecordSerialization = pinotRecordSerialization;
        _pinotRecordSerialization.init(context.getConfiguration(), segmentConfig.getSchema());
    } catch (Exception e) {
        throw new RuntimeException("Error initialize PinotRecordReader", e);
    }
}