List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java
License:Open Source License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extension = RubixConstants.RUBIX_EXTENSION; CompressionCodec codec = null;//from w ww. ja v a 2 s . com boolean isCompressed = getCompressOutput(context); if (isCompressed) { Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension += codec.getDefaultExtension(); } Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(), context.getOutputValueClass(), codec); }
From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { initialize(split, context.getConfiguration()); }
From source file:com.linkedin.cubert.io.text.PigTextOutputFormatWrapper.java
License:Open Source License
@Override public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); PigTextOutputFormat outputFormat;/*from w ww . ja va 2 s . com*/ if (conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR) == null) { outputFormat = new PigTextOutputFormat(defaultDelimiter); } else { String str = conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR); str = StringEscapeUtils.unescapeJava(str); byte[] bytes = str.getBytes("UTF-8"); if (bytes.length > 1) throw new RuntimeException(String.format("Invalid separator in text output format %s", str)); outputFormat = new PigTextOutputFormat(bytes[0]); } return outputFormat.getRecordWriter(context); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageInputStream.java
License:Apache License
/** Construct given a path and a configuration. */ public AvroStorageInputStream(Path path, TaskAttemptContext context) throws IOException { this.stream = path.getFileSystem(context.getConfiguration()).open(path); this.len = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen(); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (schema == null) throw new IOException("Must provide a schema"); Configuration conf = context.getConfiguration(); DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema)); if (FileOutputFormat.getCompressOutput(context)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/*from ww w .j ava 2s. com*/ } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); Path path = getDefaultWorkFile(context, EXT); writer.create(schema, path.getFileSystem(conf).create(path)); return new PigAvroRecordWriter(writer); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroRecordReader.java
License:Apache License
/** * constructor to initialize input and avro data reader *///w w w .j av a 2 s .co m public PigAvroRecordReader(TaskAttemptContext context, FileSplit split, Schema readerSchema, boolean ignoreBadFiles, Map<Path, Map<Integer, Integer>> schemaToMergedSchemaMap, boolean useMultipleSchemas) throws IOException { this.path = split.getPath(); this.in = new AvroStorageInputStream(path, context); this.useMultipleSchemas = useMultipleSchemas; if (readerSchema == null) { AvroStorageLog.details("No avro schema given; assuming the schema is embedded"); } Schema writerSchema; try { FileSystem fs = FileSystem.get(path.toUri(), context.getConfiguration()); writerSchema = AvroStorageUtils.getSchema(path, fs); } catch (IOException e) { AvroStorageLog.details( "No avro writer schema found in '" + path + "'; assuming writer schema matches reader schema"); writerSchema = null; } try { if (useMultipleSchemas) { this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, null)); } else { this.reader = new DataFileReader<Object>(in, new PigAvroDatumReader(writerSchema, readerSchema)); } } catch (IOException e) { throw new IOException("Error initializing data file reader for file (" + split.getPath() + ")", e); } this.reader.sync(split.getStart()); // sync to start this.start = in.tell(); this.end = split.getStart() + split.getLength(); this.ignoreBadFiles = ignoreBadFiles; this.schemaToMergedSchemaMap = schemaToMergedSchemaMap; if (schemaToMergedSchemaMap != null) { // initialize mProtoTuple with the right default values int maxPos = 0; for (Map<Integer, Integer> map : schemaToMergedSchemaMap.values()) { for (Integer i : map.values()) { maxPos = Math.max(i, maxPos); } } int tupleSize = maxPos + 1; AvroStorageLog.details("Creating proto tuple of fixed size: " + tupleSize); mProtoTuple = new ArrayList<Object>(tupleSize); for (int i = 0; i < tupleSize; i++) { // Get the list of fields from the passed schema List<Schema.Field> subFields = readerSchema.getFields(); JsonNode defValue = subFields.get(i).defaultValue(); if (defValue != null) { Schema.Type type = subFields.get(i).schema().getType(); if (type.equals(Schema.Type.UNION)) { List<Schema> schemas = subFields.get(i).schema().getTypes(); for (Schema schema : schemas) { if (!schema.getType().equals(Schema.Type.NULL)) { type = schema.getType(); break; } } } switch (type) { case BOOLEAN: mProtoTuple.add(i, defValue.getBooleanValue()); break; case ENUM: mProtoTuple.add(i, defValue.getTextValue()); break; case FIXED: mProtoTuple.add(i, defValue.getTextValue()); break; case INT: mProtoTuple.add(i, defValue.getIntValue()); break; case LONG: mProtoTuple.add(i, defValue.getIntValue()); break; case FLOAT: mProtoTuple.add(i, defValue.getNumberValue().floatValue()); break; case DOUBLE: mProtoTuple.add(i, defValue.getNumberValue().doubleValue()); break; case STRING: mProtoTuple.add(i, defValue.getTextValue()); break; default: mProtoTuple.add(i, null); break; } } else { mProtoTuple.add(i, null); } } } }
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
@Override public RecordReader<Object, Object> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try {/* w w w. j a v a 2s . c o m*/ final Text keySchema = meta.get(new Text("key.schema")); final Text valueSchema = meta.get(new Text("value.schema")); if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(String.format("Cannot have a 0 length schema. keySchema[%s], valueSchema[%s]", keySchema, valueSchema)); } return new JsonObjectRecordReader(new JsonTypeSerializer(keySchema.toString()), new JsonTypeSerializer(valueSchema.toString()), baseInputFormat.createRecordReader(split, context)); } catch (Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } }
From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Shamelessly copy in hadoop code to allow us to set the metadata with our schema Configuration conf = context.getConfiguration(); CompressionCodec codec = null;/*w w w . j av a 2 s . com*/ CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = SequenceFileOutputFormat.getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } // get the path of the temporary output file Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); final String keySchema = getSchema("output.key.schema", conf); final String valueSchema = getSchema("output.value.schema", conf); /* begin cheddar's stealing of jay's code */ SequenceFile.Metadata meta = new SequenceFile.Metadata(); meta.set(new Text("key.schema"), new Text(keySchema)); meta.set(new Text("value.schema"), new Text(valueSchema)); final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(), context.getOutputValueClass(), compressionType, codec, context, meta); /* end cheddar's stealing of jay's code */ final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema); final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema); return new RecordWriter<Object, Object>() { public void write(Object key, Object value) throws IOException { out.append(new BytesWritable(keySerializer.toBytes(key)), new BytesWritable(valueSerializer.toBytes(value))); context.progress(); } public void close(TaskAttemptContext context) throws IOException { out.close(); } }; }
From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { configure(context.getConfiguration()); final PinotRecordSerialization dataWriteSupport = getDataWriteSupport(context); initSegmentConfig(context);/*from w w w .j av a2 s. co m*/ Path workDir = getDefaultWorkFile(context, ""); return new PinotRecordWriter<>(_segmentConfig, context, workDir, dataWriteSupport); }
From source file:com.linkedin.pinot.hadoop.io.PinotRecordWriter.java
License:Apache License
public PinotRecordWriter(SegmentGeneratorConfig segmentConfig, TaskAttemptContext context, Path workDir, PinotRecordSerialization pinotRecordSerialization) { _segmentConfig = segmentConfig;/*from w w w . j a va2 s.co m*/ _workDir = workDir; _baseDataDir = PinotOutputFormat.getTempSegmentDir(context) + "/data"; String filename = PinotOutputFormat.getTableName(context); try { _handler = new FileHandler(_baseDataDir, filename, ".json", MAX_FILE_SIZE); _handler.open(true); _pinotRecordSerialization = pinotRecordSerialization; _pinotRecordSerialization.init(context.getConfiguration(), segmentConfig.getSchema()); } catch (Exception e) { throw new RuntimeException("Error initialize PinotRecordReader", e); } }