List of usage examples for org.apache.hadoop.io SequenceFile createWriter
public static Writer createWriter(FileContext fc, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata, final EnumSet<CreateFlag> createFlag, CreateOpts... opts) throws IOException
From source file:com.conductor.hadoop.WritableValueInputFormat.java
License:Apache License
/** * Writes the provided {@code values} to an input file to be read by the {@link Job}, and sets up all additional * necessary configuration./*from www . ja v a 2s .c o m*/ * * @param values * the values to be read by the job. * @param clazz * the type of the values. * @param inputsPerSplit * how man inputs each split gets * @param job * the job to configure * @param <V> * the type of the {@code values} * @throws IOException */ public static <V extends Writable> void setupInput(final List<V> values, Class<V> clazz, final int inputsPerSplit, final Job job) throws IOException { final Path inputPath = new Path("job_input_" + System.currentTimeMillis() + UUID.randomUUID().toString()); final Writer writer = SequenceFile.createWriter(FileContext.getFileContext(job.getConfiguration()), job.getConfiguration(), inputPath, NullWritable.class, clazz, CompressionType.NONE, CODEC, new Metadata(), EnumSet.of(CreateFlag.CREATE), DUMMY_VAR_ARGS); doSetupInput(values, clazz, inputsPerSplit, job, inputPath, writer); }
From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java
public static void main(String[] args) throws IOException { String uri = "output"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); File infile = new File("src/main/resources/input.txt"); SequenceFile.Writer writer = null; try {/*from w w w .j av a 2 s . co m*/ writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()), Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)), Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824), Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()), Writer.progressable(null), Writer.metadata(new Metadata())); int ctr = 100; List<String> lines = FileUtils.readLines(infile); for (String line : lines) { key.set(ctr++); value.set(line); if (ctr < 150) { System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); } writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:ldbc.snb.datagen.serializer.UpdateEventSerializer.java
License:Open Source License
public UpdateEventSerializer(Configuration conf, String fileNamePrefix, int reducerId, int numPartitions) throws IOException { conf_ = conf;/*from w w w . j ava 2 s .c o m*/ reducerId_ = reducerId; stringBuffer_ = new StringBuffer(512); data_ = new ArrayList<String>(); list_ = new ArrayList<String>(); currentEvent_ = new UpdateEvent(-1, -1, UpdateEvent.UpdateEventType.NO_EVENT, new String("")); numPartitions_ = numPartitions; stats_ = new UpdateStreamStats(); fileNamePrefix_ = fileNamePrefix; try { streamWriter_ = new SequenceFile.Writer[numPartitions_]; FileContext fc = FileContext.getFileContext(conf); for (int i = 0; i < numPartitions_; ++i) { Path outFile = new Path(fileNamePrefix_ + "_" + i); streamWriter_[i] = SequenceFile.createWriter(fc, conf, outFile, UpdateEventKey.class, Text.class, CompressionType.NONE, new DefaultCodec(), new SequenceFile.Metadata(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled())); FileSystem fs = FileSystem.get(conf); Path propertiesFile = new Path(fileNamePrefix_ + ".properties"); if (fs.exists(propertiesFile)) { FSDataInputStream file = fs.open(propertiesFile); Properties properties = new Properties(); properties.load(file); stats_.minDate_ = Long .parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); stats_.maxDate_ = Long .parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); stats_.count_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); file.close(); fs.delete(propertiesFile, true); } } } catch (IOException e) { throw e; } }