List of usage examples for org.apache.hadoop.mapreduce.lib.output SequenceFileOutputFormat SequenceFileOutputFormat
SequenceFileOutputFormat
From source file:com.mozilla.grouperfish.pig.storage.DocumentVectorStorage.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/*w w w . ja v a 2s. c o m*/ public OutputFormat getOutputFormat() throws IOException { return new SequenceFileOutputFormat<Text, VectorWritable>(); }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public OutputFormat<IntWritable, VectorWritable> getOutputFormat() throws IOException { return new SequenceFileOutputFormat<IntWritable, VectorWritable>(); }
From source file:edu.uci.ics.pregelix.dataflow.KeyValueWriterFactory.java
License:Apache License
@Override public ITupleWriter getTupleWriter(IHyracksTaskContext ctx, final int partition, final int nPartition) throws HyracksDataException { return new ITupleWriter() { private SequenceFileOutputFormat sequenceOutputFormat = new SequenceFileOutputFormat(); private Writable key; private Writable value; private ResetableByteArrayInputStream bis = new ResetableByteArrayInputStream(); private DataInput dis = new DataInputStream(bis); private RecordWriter recordWriter; private ContextFactory ctxFactory = new ContextFactory(); private TaskAttemptContext context; @Override/*from w w w . j ava 2s. co m*/ public void open(DataOutput output) throws HyracksDataException { try { Job job = confFactory.getConf(); context = ctxFactory.createContext(job.getConfiguration(), partition); recordWriter = sequenceOutputFormat.getRecordWriter(context); Class<?> keyClass = context.getOutputKeyClass(); Class<?> valClass = context.getOutputValueClass(); key = (Writable) ReflectionUtils.createInstance(keyClass); value = (Writable) ReflectionUtils.createInstance(valClass); } catch (Exception e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException { try { byte[] data = tuple.getFieldData(0); int fieldStart = tuple.getFieldStart(0); bis.setByteArray(data, fieldStart); key.readFields(dis); data = tuple.getFieldData(1); fieldStart = tuple.getFieldStart(1); bis.setByteArray(data, fieldStart); value.readFields(dis); recordWriter.write(key, value); } catch (Exception e) { throw new HyracksDataException(e); } } @Override public void close(DataOutput output) throws HyracksDataException { try { recordWriter.close(context); } catch (Exception e) { throw new HyracksDataException(e); } } }; }
From source file:org.archive.bacon.io.SequenceFileStorage.java
License:Apache License
/** * Most of this method is cut/pasted from the Hadoop * SequenceFileOutputFormat. The big difference is that we use the * key and value types given to this Pig storage class rather than * using the ones set by the job configuration. *///from w ww . ja va 2 s . c o m public OutputFormat getOutputFormat() throws IOException { return new SequenceFileOutputFormat() { public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Class keyClass, valueClass; try { keyClass = conf.getClassByName(keyType); valueClass = conf.getClassByName(valueType); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } // Instantiate null objects for the key and value types. // See getWritable() for their use. try { nullKey = (Writable) keyClass.newInstance(); nullValue = (Writable) valueClass.newInstance(); } catch (ReflectiveOperationException roe) { throw new IOException(roe); } CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } // get the path of the temporary output file Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, keyClass, valueClass, compressionType, codec, context); return new RecordWriter() { public void write(Object key, Object value) throws IOException { out.append(key, value); } public void close(TaskAttemptContext context) throws IOException { out.close(); } }; } }; }
From source file:org.archive.hadoop.pig.SequenceFileStorage.java
License:Apache License
/** * Most of this method is cut/pasted from the Hadoop * SequenceFileOutputFormat. The big difference is that we use the * key and value types given to this Pig storage class rather than * using the ones set by the job configuration. *//*from ww w.java 2 s .c o m*/ public OutputFormat getOutputFormat() throws IOException { return new SequenceFileOutputFormat() { public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Class keyClass, valueClass; try { keyClass = conf.getClassByName(keyType); valueClass = conf.getClassByName(valueType); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } // Instantiate null objects for the key and value types. // See getWritable() for their use. try { nullKey = (Writable) keyClass.newInstance(); nullValue = (Writable) valueClass.newInstance(); } catch (Exception roe) { throw new IOException(roe); } CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } // get the path of the temporary output file Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, keyClass, valueClass, compressionType, codec, context); return new RecordWriter() { public void write(Object key, Object value) throws IOException { out.append(key, value); } public void close(TaskAttemptContext context) throws IOException { out.close(); } }; } }; }
From source file:pl.edu.icm.coansys.commons.pig.udf.NullableTupleSequenceFileStoreFunc.java
License:Open Source License
@Override public OutputFormat<NullableTuple, NullableTuple> getOutputFormat() throws IOException { return new SequenceFileOutputFormat<NullableTuple, NullableTuple>(); }
From source file:pl.edu.icm.coansys.commons.pig.udf.RichSequenceFileLoader.java
License:Open Source License
@Override public OutputFormat<Writable, Writable> getOutputFormat() throws IOException { return new SequenceFileOutputFormat<Writable, Writable>(); }