List of usage examples for org.apache.hadoop.typedbytes TypedBytesWritable TypedBytesWritable
public TypedBytesWritable()
From source file:com.dappervision.hbase.mapred.TypedBytesTableRecordReader.java
License:Apache License
/** * @return TypedBytesWritable//w w w. j a v a2 s . co m * * @see org.apache.hadoop.mapred.RecordReader#createKey() */ public TypedBytesWritable createKey() { //return this.recordReaderImpl.createKey(); return new TypedBytesWritable(); }
From source file:com.dappervision.hbase.mapred.TypedBytesTableRecordReader.java
License:Apache License
/** * @return RowTypedBytesWritable/*w w w . j a v a 2s. com*/ * * @see org.apache.hadoop.mapred.RecordReader#createValue() */ public TypedBytesWritable createValue() { //return this.recordReaderImpl.createValue(); return new TypedBytesWritable(); }
From source file:com.dappervision.hbase.mapred.TypedBytesTableReducer.java
License:Apache License
@Override public void reduce(Text key, Iterator<Text> values, OutputCollector<TypedBytesWritable, TypedBytesWritable> outputCollector, Reporter arg3) throws IOException { byte[] keyBytes = key.getBytes(); TypedBytesWritable keyWritable = new TypedBytesWritable(); TypedBytesWritable valueWritable = new TypedBytesWritable(); keyWritable.setValue(new Buffer(keyBytes)); //merge the column family and qualifier HashMap<String, HashMap<String, String>> cfMap = new HashMap<String, HashMap<String, String>>(); while (values.hasNext()) { Text value = values.next(); String strVal = value.toString(); //Separate column family with comma (:) //Separate the qualifier and value with equity String[] cf_qual_val_parts = strVal.split(":"); String cf = cf_qual_val_parts[0]; String qual_val = cf_qual_val_parts[1]; String[] qual_val_parts = qual_val.split("="); String qual = qual_val_parts[0]; String val = qual_val_parts[1]; if (cfMap.get(cf) != null) { HashMap<String, String> qualMap = cfMap.get(cf); if (qualMap == null) { qualMap = new HashMap<String, String>(); }/* w w w . j av a2s . c o m*/ qualMap.put(qual, val); // the duplicated key will be replaced, if using Buffer, we should do it ourselves } else { HashMap<String, String> qualMap = new HashMap<String, String>(); qualMap.put(qual, val); cfMap.put(cf, qualMap); } } HashMap<Buffer, HashMap<Buffer, Buffer>> bufMap = new HashMap<Buffer, HashMap<Buffer, Buffer>>(); Set<Entry<String, HashMap<String, String>>> entrySet = cfMap.entrySet(); for (Entry<String, HashMap<String, String>> entry : entrySet) { HashMap<String, String> qualValMap = entry.getValue(); HashMap<Buffer, Buffer> qualValBufMap = new HashMap<Buffer, Buffer>(); for (Entry<String, String> qualValEntry : qualValMap.entrySet()) { qualValBufMap.put(new Buffer(qualValEntry.getKey().getBytes()), new Buffer(qualValEntry.getValue().getBytes())); } bufMap.put(new Buffer(entry.getKey().getBytes()), qualValBufMap); } valueWritable.setValue(bufMap); outputCollector.collect(keyWritable, valueWritable); }
From source file:com.jfolson.hive.serde.RBaseSerDe.java
License:Apache License
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // We can get the table definition from tbl. serializeBytesWritable = new TypedBytesWritable(); barrStr = new NonSyncDataOutputBuffer(); tbOut = new RTypedBytesWritableOutput(barrStr); inBarrStr = new NonSyncDataInputBuffer(); tbIn = new RTypedBytesWritableInput(inBarrStr); // Read the configuration parameters String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); columnNames = Arrays.asList(columnNameProperty.split(",")); columnTypes = null;/*from w w w . ja v a2 s .c o m*/ if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } assert columnNames.size() == columnTypes.size(); numColumns = columnNames.size(); // All columns have to be primitive. /*for (int c = 0; c < numColumns; c++) { if (columnTypes.get(c).getCategory() != Category.PRIMITIVE) { throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + c + "] named " + columnNames.get(c) + " has category " + columnTypes.get(c).getCategory()); } }*/ // Constructing the row ObjectInspector: // The row consists of some string columns, each column will be a java // String object. List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); for (int c = 0; c < numColumns; c++) { columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(c))); } // StandardStruct uses ArrayList to store the row. rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); // Constructing the row object, etc, which will be reused for all rows. row = new ArrayList<Object>(numColumns); for (int c = 0; c < numColumns; c++) { row.add(null); } String numStr = tbl.getProperty(KEYLENGTH_PROPERTY); if (numStr != null) { numKeys = Integer.parseInt(numStr); } wrapKeys = Boolean.parseBoolean(tbl.getProperty(WRAP_KEY_PROPERTY, "true")); unwrapKeys = wrapKeys; wrapValues = Boolean.parseBoolean(tbl.getProperty(WRAP_VALUE_PROPERTY, "true")); unwrapValues = wrapValues; keepAsBytes = Boolean.parseBoolean(tbl.getProperty(NATIVE_PROPERTY, "false")); if (keepAsBytes) { if (numKeys > 1) { throw new RuntimeException( "using native R serialization will only produce at most 1 key, not: " + numKeys); } if (numColumns > 2) { throw new RuntimeException( "using native R serialization will only produce at most 2 columns, not: " + numColumns); } } }
From source file:com.jfolson.hive.serde.RBaseSerDe.java
License:Apache License
protected Object deserializeField(RTypedBytesWritableInput in, TypeInfo type, Object reuse) throws IOException { RType rtype = in.readTypeCode();/*from w ww . jav a 2 s . co m*/ if (rtype == null) { throw new RuntimeException("End of stream"); } // read the type Class<? extends Writable> writableType = RType.getWritableType(rtype); if (writableType == null) { LOG.info("Warning: null Writable type for rtype: " + rtype); } if (writableType != null && writableType.isAssignableFrom(NullWritable.class)) { // indicates that the recorded value is null return null; } //LOG.info("RType should be instantiated as: "+writableType.getSimpleName()); switch (type.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; switch (ptype.getPrimitiveCategory()) { case VOID: { return null; } case BINARY: { TypedBytesWritable r = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse; byte[] bytes = in.getInput().readRaw(rtype.code); // rewrite the type code r.set(bytes, 0, bytes.length); return r; } case BOOLEAN: { //TODO Fix this hack: if (rtype != RType.BOOL) { in.readNull(); return null; } BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; return in.readBoolean(r); } /*case BYTE: { ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r = in.readByte(r); return r; }*/ /*case SHORT: { ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; r = in.readShort(r); return r; }*/ case INT: { if (rtype != RType.INT) { in.readNull(); return null; } IntWritable r = reuse == null ? null : (IntWritable) reuse; return in.readInt(r); } /*case LONG: { LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; r = in.readLong(r); return r; }*/ /*case FLOAT: { FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; r = in.readFloat(r); return r; }*/ case DOUBLE: { if (rtype != RType.DOUBLE) { in.readNull(); return null; } DoubleWritable r = reuse == null ? null : (DoubleWritable) reuse; return in.readDouble(r); } case STRING: { // TODO fix this hack if (rtype != RType.STRING) { in.readNull(); return null; } Text r = reuse == null ? null : (Text) reuse; return in.readText(r); } default: { throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory()); } } } // Currently, deserialization of complex types is not supported case LIST: { if (rtype != RType.VECTOR) { in.readNull(); return null; } ObjectInspector elemOI = ((ListObjectInspector) TypeInfoUtils .getStandardWritableObjectInspectorFromTypeInfo(type)).getListElementObjectInspector(); PrimitiveObjectInspector elemPOI = (PrimitiveObjectInspector) elemOI; Class<? extends Writable> elemClass = (Class<? extends Writable>) elemPOI.getPrimitiveWritableClass(); ArrayWritable l = reuse == null ? new ArrayWritable(elemClass) : new ArrayWritable(elemClass, (Writable[]) reuse); in.readVector(l); return l.get(); } case MAP: case STRUCT: default: { throw new RuntimeException("Unsupported category: " + type.getCategory()); } } }
From source file:com.jfolson.hive.serde.RBaseSerDe.java
License:Apache License
protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException { //LOG.info("Serializing hive type: "+oi.getTypeName()); //LOG.info("Serializing category: "+oi.getCategory().toString()); if (o == null) { tbOut.writeNull();/*from w ww . j a v a 2s .c o m*/ return; } switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString()); switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BINARY: { BinaryObjectInspector boi = (BinaryObjectInspector) poi; TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse; BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o); if (bytesWrite != null) { bytes.set(bytesWrite); if (!RType.isValid(bytes)) { LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code); bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength())); } //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName()); tbOut.write(bytes); } return; } case BOOLEAN: { BooleanObjectInspector boi = (BooleanObjectInspector) poi; BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; r.set(boi.get(o)); tbOut.write(r); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r.set(boi.get(o)); tbOut.write(r); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; r.set(spoi.get(o)); tbOut.write(r); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; r.set(ioi.get(o)); tbOut.write(r); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; r.set(loi.get(o)); tbOut.write(r); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; r.set(foi.get(o)); tbOut.write(r); return; } case DOUBLE: DoubleObjectInspector doi = (DoubleObjectInspector) poi; DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; r.set(doi.get(o)); tbOut.write(r); return; case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); tbOut.write(t); return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector elemOI = loi.getListElementObjectInspector(); List l = loi.getList(o); // Don't use array (typecode: 144) until everything supports NA values in typedbytes if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){ tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI); } else { tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI); } return; } case MAP: case STRUCT: { // For complex object, serialize to JSON format String s = SerDeUtils.getJSONString(o, oi); Text t = reuse == null ? new Text() : (Text) reuse; // convert to Text and write it t.set(s); tbOut.write(t); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }
From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java
License:Apache License
Object deserializeField(RTypedBytesWritableInput in, TypeInfo type, Object reuse) throws IOException { RType rtype = in.readTypeCode();/*w w w . j a v a 2s. c om*/ if (rtype == null) { throw new RuntimeException("End of stream"); } // read the type Class<? extends Writable> writableType = RType.getWritableType(rtype); if (writableType == null) { LOG.info("Warning: null Writable type for rtype: " + rtype); } if (writableType != null && writableType.isAssignableFrom(NullWritable.class)) { // indicates that the recorded value is null return null; } //LOG.info("RType should be instantiated as: "+writableType.getSimpleName()); switch (type.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; switch (ptype.getPrimitiveCategory()) { case VOID: { return null; } case BINARY: { TypedBytesWritable r = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse; byte[] bytes = in.getInput().readRaw(rtype.code); // rewrite the type code r.set(bytes, 0, bytes.length); return r; } case BOOLEAN: { //TODO Fix this hack: if (rtype != RType.BOOL) { in.readNull(); return null; } BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; return in.readBoolean(r); } /*case BYTE: { ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r = in.readByte(r); return r; }*/ /*case SHORT: { ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; r = in.readShort(r); return r; }*/ case INT: { if (rtype != RType.INT) { in.readNull(); return null; } IntWritable r = reuse == null ? null : (IntWritable) reuse; return in.readInt(r); } /*case LONG: { LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; r = in.readLong(r); return r; }*/ /*case FLOAT: { FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; r = in.readFloat(r); return r; }*/ case DOUBLE: { if (rtype != RType.DOUBLE) { in.readNull(); return null; } DoubleWritable r = reuse == null ? null : (DoubleWritable) reuse; return in.readDouble(r); } case STRING: { // TODO fix this hack if (rtype != RType.STRING) { in.readNull(); return null; } Text r = reuse == null ? null : (Text) reuse; return in.readText(r); } default: { throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory()); } } } // Currently, deserialization of complex types is not supported case LIST: { if (rtype != RType.VECTOR) { in.readNull(); return null; } ObjectInspector elemOI = ((ListObjectInspector) TypeInfoUtils .getStandardWritableObjectInspectorFromTypeInfo(type)).getListElementObjectInspector(); PrimitiveObjectInspector elemPOI = (PrimitiveObjectInspector) elemOI; Class<? extends Writable> elemClass = (Class<? extends Writable>) elemPOI.getPrimitiveWritableClass(); ArrayWritable l = reuse == null ? new ArrayWritable(elemClass) : new ArrayWritable(elemClass, (Writable[]) reuse); in.readVector(l); return l.get(); } case MAP: case STRUCT: default: { throw new RuntimeException("Unsupported category: " + type.getCategory()); } } }
From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java
License:Apache License
private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException { //LOG.info("Serializing hive type: "+oi.getTypeName()); //LOG.info("Serializing category: "+oi.getCategory().toString()); if (o == null) { tbOut.writeNull();/*from w ww .j a v a2 s. c o m*/ return; } switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString()); switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BINARY: { BinaryObjectInspector boi = (BinaryObjectInspector) poi; TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse; BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o); if (bytesWrite != null) { bytes.set(bytesWrite); if (!RType.isValid(bytes)) { LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code); bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength())); } //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName()); tbOut.write(bytes); } return; } case BOOLEAN: { BooleanObjectInspector boi = (BooleanObjectInspector) poi; BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; r.set(boi.get(o)); tbOut.write(r); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r.set(boi.get(o)); tbOut.write(r); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; r.set(spoi.get(o)); tbOut.write(r); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; r.set(ioi.get(o)); tbOut.write(r); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; r.set(loi.get(o)); tbOut.write(r); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; r.set(foi.get(o)); tbOut.write(r); return; } case DOUBLE: DoubleObjectInspector doi = (DoubleObjectInspector) poi; DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; r.set(doi.get(o)); tbOut.write(r); return; case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); tbOut.write(t); return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector elemOI = loi.getListElementObjectInspector(); List l = loi.getList(o); if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){ tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI); } else { tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI); } return; } case MAP: case STRUCT: { // For complex object, serialize to JSON format String s = SerDeUtils.getJSONString(o, oi); Text t = reuse == null ? new Text() : (Text) reuse; // convert to Text and write it t.set(s); tbOut.write(t); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }
From source file:fm.last.pigtail.test.TestTypedBytesSequenceFileLoader.java
License:Apache License
private String createSequenceFile(Object[] data) throws IOException { File tmpFile = File.createTempFile("test", ".tbseq"); String tmpFileName = tmpFile.getAbsolutePath(); System.err.println("fileName: " + tmpFileName); Path path = new Path("file:///" + tmpFileName); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); TypedBytesWritable key = new TypedBytesWritable(); TypedBytesWritable value = new TypedBytesWritable(); SequenceFile.Writer writer = null; try {//from ww w . ja v a 2 s.co m writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < data.length; i += 2) { key.setValue(data[i]); value.setValue(data[i + 1]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } // fix the file path string on Windows String regex = "\\\\"; String replacement = quoteReplacement("\\\\"); return tmpFileName.replaceAll(regex, replacement); }