Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java

License:Open Source License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = RubixConstants.RUBIX_EXTENSION;

    CompressionCodec codec = null;//from   w ww. ja  v a  2  s  .  com
    boolean isCompressed = getCompressOutput(context);

    if (isCompressed) {
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension += codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);

    FSDataOutputStream fileOut = fs.create(file, false);
    return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(),
            context.getOutputValueClass(), codec);
}

From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    initialize(split, context.getConfiguration());
}

From source file:com.linkedin.cubert.io.text.PigTextOutputFormatWrapper.java

License:Open Source License

@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    PigTextOutputFormat outputFormat;/*from  w ww  .  ja va 2 s  .  com*/
    if (conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR) == null) {
        outputFormat = new PigTextOutputFormat(defaultDelimiter);
    } else {
        String str = conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR);
        str = StringEscapeUtils.unescapeJava(str);
        byte[] bytes = str.getBytes("UTF-8");

        if (bytes.length > 1)
            throw new RuntimeException(String.format("Invalid separator in text output format %s", str));

        outputFormat = new PigTextOutputFormat(bytes[0]);
    }

    return outputFormat.getRecordWriter(context);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageInputStream.java

License:Apache License

/** Construct given a path and a configuration. */
public AvroStorageInputStream(Path path, TaskAttemptContext context) throws IOException {
    this.stream = path.getFileSystem(context.getConfiguration()).open(path);
    this.len = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen();
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from ww w  .j  ava  2s. com*/
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY,
            Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroRecordReader.java

License:Apache License

/**
 * constructor to initialize input and avro data reader
 *///w  w  w .j  av  a  2  s  .co  m
public PigAvroRecordReader(TaskAttemptContext context, FileSplit split, Schema readerSchema,
        boolean ignoreBadFiles, Map<Path, Map<Integer, Integer>> schemaToMergedSchemaMap,
        boolean useMultipleSchemas) throws IOException {
    this.path = split.getPath();
    this.in = new AvroStorageInputStream(path, context);
    this.useMultipleSchemas = useMultipleSchemas;
    if (readerSchema == null) {
        AvroStorageLog.details("No avro schema given; assuming the schema is embedded");
    }

    Schema writerSchema;
    try {
        FileSystem fs = FileSystem.get(path.toUri(), context.getConfiguration());
        writerSchema = AvroStorageUtils.getSchema(path, fs);
    } catch (IOException e) {
        AvroStorageLog.details(
                "No avro writer schema found in '" + path + "'; assuming writer schema matches reader schema");
        writerSchema = null;
    }

    try {
        if (useMultipleSchemas) {
            this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, null));
        } else {
            this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, readerSchema));
        }
    } catch (IOException e) {
        throw new IOException("Error initializing data file reader for file (" + split.getPath() + ")", e);
    }
    this.reader.sync(split.getStart()); // sync to start
    this.start = in.tell();
    this.end = split.getStart() + split.getLength();
    this.ignoreBadFiles = ignoreBadFiles;
    this.schemaToMergedSchemaMap = schemaToMergedSchemaMap;
    if (schemaToMergedSchemaMap != null) {
        // initialize mProtoTuple with the right default values
        int maxPos = 0;
        for (Map<Integer, Integer> map : schemaToMergedSchemaMap.values()) {
            for (Integer i : map.values()) {
                maxPos = Math.max(i, maxPos);
            }
        }
        int tupleSize = maxPos + 1;
        AvroStorageLog.details("Creating proto tuple of fixed size: " + tupleSize);
        mProtoTuple = new ArrayList<Object>(tupleSize);
        for (int i = 0; i < tupleSize; i++) {
            // Get the list of fields from the passed schema
            List<Schema.Field> subFields = readerSchema.getFields();
            JsonNode defValue = subFields.get(i).defaultValue();
            if (defValue != null) {
                Schema.Type type = subFields.get(i).schema().getType();
                if (type.equals(Schema.Type.UNION)) {
                    List<Schema> schemas = subFields.get(i).schema().getTypes();
                    for (Schema schema : schemas) {
                        if (!schema.getType().equals(Schema.Type.NULL)) {
                            type = schema.getType();
                            break;
                        }
                    }
                }
                switch (type) {
                case BOOLEAN:
                    mProtoTuple.add(i, defValue.getBooleanValue());
                    break;
                case ENUM:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                case FIXED:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                case INT:
                    mProtoTuple.add(i, defValue.getIntValue());
                    break;
                case LONG:
                    mProtoTuple.add(i, defValue.getIntValue());
                    break;
                case FLOAT:
                    mProtoTuple.add(i, defValue.getNumberValue().floatValue());
                    break;
                case DOUBLE:
                    mProtoTuple.add(i, defValue.getNumberValue().doubleValue());
                    break;
                case STRING:
                    mProtoTuple.add(i, defValue.getTextValue());
                    break;
                default:
                    mProtoTuple.add(i, null);
                    break;
                }
            } else {
                mProtoTuple.add(i, null);
            }
        }
    }
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();

    String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
    log.info("Input file path:" + inputPathString);
    Path inputPath = new Path(inputPathString);

    SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
    SequenceFile.Metadata meta = reader.getMetadata();

    try {/* w w w. j  a  v a  2s  .  c o  m*/
        final Text keySchema = meta.get(new Text("key.schema"));
        final Text valueSchema = meta.get(new Text("value.schema"));

        if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
            throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]",
                    keySchema, valueSchema));
        }

        return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()),
                new JsonTypeSerializer(valueSchema.toString()),
                baseInputFormat.createRecordReader(split, context));
    } catch (Exception e) {
        throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
    }
}

From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Shamelessly copy in hadoop code to allow us to set the metadata with our schema

    Configuration conf = context.getConfiguration();

    CompressionCodec codec = null;/*w  w  w  .  j av  a 2  s  .  com*/
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    // get the path of the temporary output file
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);

    final String keySchema = getSchema("output.key.schema", conf);
    final String valueSchema = getSchema("output.value.schema", conf);

    /* begin cheddar's stealing of jay's code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();

    meta.set(new Text("key.schema"), new Text(keySchema));
    meta.set(new Text("value.schema"), new Text(valueSchema));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(),
            context.getOutputValueClass(), compressionType, codec, context, meta);
    /* end cheddar's stealing of jay's code */

    final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema);
    final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema);

    return new RecordWriter<Object, Object>() {

        public void write(Object key, Object value) throws IOException {

            out.append(new BytesWritable(keySerializer.toBytes(key)),
                    new BytesWritable(valueSerializer.toBytes(value)));
            context.progress();
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    configure(context.getConfiguration());
    final PinotRecordSerialization dataWriteSupport = getDataWriteSupport(context);
    initSegmentConfig(context);/*from w  w w  .j  av  a2  s.  co  m*/
    Path workDir = getDefaultWorkFile(context, "");
    return new PinotRecordWriter<>(_segmentConfig, context, workDir, dataWriteSupport);
}

From source file:com.linkedin.pinot.hadoop.io.PinotRecordWriter.java

License:Apache License

public PinotRecordWriter(SegmentGeneratorConfig segmentConfig, TaskAttemptContext context, Path workDir,
        PinotRecordSerialization pinotRecordSerialization) {
    _segmentConfig = segmentConfig;/*from   w w  w .  j  a  va2 s.co  m*/
    _workDir = workDir;
    _baseDataDir = PinotOutputFormat.getTempSegmentDir(context) + "/data";
    String filename = PinotOutputFormat.getTableName(context);
    try {
        _handler = new FileHandler(_baseDataDir, filename, ".json", MAX_FILE_SIZE);
        _handler.open(true);
        _pinotRecordSerialization = pinotRecordSerialization;
        _pinotRecordSerialization.init(context.getConfiguration(), segmentConfig.getSchema());
    } catch (Exception e) {
        throw new RuntimeException("Error initialize PinotRecordReader", e);
    }
}