Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

public static Writer createWriter(FileContext fc, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec, Metadata metadata,
        final EnumSet<CreateFlag> createFlag, CreateOpts... opts) throws IOException 

Source Link

Document

Construct the preferred type of SequenceFile Writer.

Usage

From source file:com.conductor.hadoop.WritableValueInputFormat.java

License:Apache License

/**
 * Writes the provided {@code values} to an input file to be read by the {@link Job}, and sets up all additional
 * necessary configuration./*from   www  . ja  v  a  2s  .c o  m*/
 * 
 * @param values
 *            the values to be read by the job.
 * @param clazz
 *            the type of the values.
 * @param inputsPerSplit
 *            how man inputs each split gets
 * @param job
 *            the job to configure
 * @param <V>
 *            the type of the {@code values}
 * @throws IOException
 */
public static <V extends Writable> void setupInput(final List<V> values, Class<V> clazz,
        final int inputsPerSplit, final Job job) throws IOException {
    final Path inputPath = new Path("job_input_" + System.currentTimeMillis() + UUID.randomUUID().toString());
    final Writer writer = SequenceFile.createWriter(FileContext.getFileContext(job.getConfiguration()),
            job.getConfiguration(), inputPath, NullWritable.class, clazz, CompressionType.NONE, CODEC,
            new Metadata(), EnumSet.of(CreateFlag.CREATE), DUMMY_VAR_ARGS);
    doSetupInput(values, clazz, inputsPerSplit, job, inputPath, writer);
}

From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java

public static void main(String[] args) throws IOException {
    String uri = "output";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    File infile = new File("src/main/resources/input.txt");
    SequenceFile.Writer writer = null;
    try {/*from   w w w  .j av  a  2 s  .  co  m*/
        writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()),
                Writer.valueClass(value.getClass()),
                Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)),
                Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824),
                Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()),
                Writer.progressable(null), Writer.metadata(new Metadata()));
        int ctr = 100;
        List<String> lines = FileUtils.readLines(infile);
        for (String line : lines) {
            key.set(ctr++);
            value.set(line);
            if (ctr < 150) {
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            }
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:ldbc.snb.datagen.serializer.UpdateEventSerializer.java

License:Open Source License

public UpdateEventSerializer(Configuration conf, String fileNamePrefix, int reducerId, int numPartitions)
        throws IOException {
    conf_ = conf;/*from w w  w .  j ava 2 s .c o m*/
    reducerId_ = reducerId;
    stringBuffer_ = new StringBuffer(512);
    data_ = new ArrayList<String>();
    list_ = new ArrayList<String>();
    currentEvent_ = new UpdateEvent(-1, -1, UpdateEvent.UpdateEventType.NO_EVENT, new String(""));
    numPartitions_ = numPartitions;
    stats_ = new UpdateStreamStats();
    fileNamePrefix_ = fileNamePrefix;
    try {
        streamWriter_ = new SequenceFile.Writer[numPartitions_];
        FileContext fc = FileContext.getFileContext(conf);
        for (int i = 0; i < numPartitions_; ++i) {
            Path outFile = new Path(fileNamePrefix_ + "_" + i);
            streamWriter_[i] = SequenceFile.createWriter(fc, conf, outFile, UpdateEventKey.class, Text.class,
                    CompressionType.NONE, new DefaultCodec(), new SequenceFile.Metadata(),
                    EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
                    Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled()));
            FileSystem fs = FileSystem.get(conf);
            Path propertiesFile = new Path(fileNamePrefix_ + ".properties");
            if (fs.exists(propertiesFile)) {
                FSDataInputStream file = fs.open(propertiesFile);
                Properties properties = new Properties();
                properties.load(file);
                stats_.minDate_ = Long
                        .parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time"));
                stats_.maxDate_ = Long
                        .parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time"));
                stats_.count_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events"));
                file.close();
                fs.delete(propertiesFile, true);
            }
        }
    } catch (IOException e) {
        throw e;
    }
}