Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:Txt2SeqConverter.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 2) {
        //System.out.println("Usage: env HADOOP_CLASSPATH=.:$HADOOP_CLASSPATH hadoop Txt2SeqConverter input output");
        System.out.println("Usage: hadoop Txt2SeqConverter input output");
        System.exit(1);//from ww w .java  2  s . c o m
    }
    FileSystem fs = null;
    String seqFileName = args[1];
    Configuration conf = new Configuration();
    try {
        fs = FileSystem.get(URI.create(seqFileName), conf);
    } catch (IOException e) {
        System.out.println("ERROR: " + e.getMessage());
    }

    Path path = new Path(seqFileName);

    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        //writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);
        writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class,
                SequenceFile.CompressionType.BLOCK, new com.hadoop.compression.lzo.LzoCodec());
        BufferedReader br = new BufferedReader(new FileReader(args[0]));

        int transactionID = 0;
        String transaction = null;
        while ((transaction = br.readLine()) != null) {
            key.set(transactionID);
            value.set(transaction);
            writer.append(key, value);

            transactionID++;
        }
    } catch (IOException e) {
        System.out.println("ERROR: " + e.getMessage());
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(//from  w w  w .  j  a  v a  2 s  .c o  m
                String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName()));
        return 1;
    }

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

public void writeSequenceFile(Path path) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {/* ww  w  .j  ava2  s .com*/
        writer.append(key, value);
    } finally {
        writer.close();
    }
}

From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(String.format("Usage: %s: <file path> <number of records>",
                SequenceFileGenerator.class.getName()));
        return 1;
    }/* w  w  w. j ava 2  s. c o m*/

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java

License:Apache License

@Override
public ModelOutput<T> createOutput(Class<? extends T> dataType, FileSystem fileSystem, Path path,
        final Counter counter) throws IOException, InterruptedException {
    final K keyBuffer = createKeyObject();
    final V valueBuffer = createValueObject();
    CompressionCodec codec = getCompressionCodec(path);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Creating sequence file (path={0}, type={1}, codec={2})", //$NON-NLS-1$
                path, dataType.getName(), codec));
    }// www . jav  a2s.  c o  m
    configure(codec);
    final SequenceFile.Writer writer = SequenceFile.createWriter(fileSystem, getConf(), path,
            keyBuffer.getClass(), valueBuffer.getClass(),
            codec == null ? CompressionType.NONE : CompressionType.BLOCK, codec);
    boolean succeed = false;
    try {
        ModelOutput<T> output = new ModelOutput<T>() {

            private long lastPosition = 0;

            @Override
            public void write(T model) throws IOException {
                copyFromModel(model, keyBuffer, valueBuffer);
                writer.append(keyBuffer, valueBuffer);
                long nextPosition = writer.getLength();
                counter.add(nextPosition - lastPosition);
                lastPosition = nextPosition;
            }

            @Override
            public void close() throws IOException {
                writer.close();
            }
        };
        succeed = true;
        return output;
    } finally {
        if (succeed == false) {
            writer.close();
        }
    }
}

From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java

License:Apache License

/**
 * return a mapping of expected keys -> records
 *//*w ww. ja  v  a  2 s.co m*/
private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException {
    HashMap<String, Record> map = new HashMap<String, Record>();
    SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
    FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class,
                SequenceFile.CompressionType.NONE, null, metadata);
        for (int i = 0; i < numRecords; ++i) {
            Text key = new Text("key" + i);
            Text value = new Text("value" + i);
            writer.append(key, value);
            Record record = new Record();
            record.put("key", key);
            record.put("value", value);
            map.put(key.toString(), record);
        }
    } finally {
        Closeables.closeQuietly(writer);
    }
    return map;
}

From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java

License:Apache License

/**
 * return a mapping of expected keys -> records
 *//*ww w . ja va 2 s  .  c  o  m*/
private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException {
    HashMap<String, Record> map = new HashMap<String, Record>();
    SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
    FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(new Configuration(), out, Text.class,
                ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata);
        for (int i = 0; i < numRecords; ++i) {
            Text key = new Text("key" + i);
            ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i);
            writer.append(key, value);
            Record record = new Record();
            record.put("key", key);
            record.put("value", value);
            map.put(key.toString(), record);
        }
    } finally {
        Closeables.closeQuietly(writer);
    }
    return map;
}

From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java

License:Apache License

/**
 * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines
 * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count
 * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name.
 *//*from w  w  w. j  a v a 2  s.  c o  m*/
private Text writeFile(File srcDir, String fileName, Format format) throws IOException {

    int fileNum = Integer.parseInt(fileName.substring(4));

    File file = new File(srcDir, fileName);

    if (Format.TEXT == format) {
        PrintWriter writer = new PrintWriter(new FileOutputStream(file));

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            writer.printf("%d\t%d\n", k, v);
        }

        writer.close();
    } else {
        CustomWritable key = new CustomWritable();
        CustomWritable value = new CustomWritable();

        DefaultCodec codec = new DefaultCodec();
        codec.setConf(job);

        Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()),
                CustomWritable.class, CustomWritable.class, compressionType, codec);

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            key.set(k);
            value.set(v);

            writer.append(key, value);
        }

        writer.close();
    }

    return new Text(file.getAbsolutePath());
}

From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java

License:Apache License

/**
 * Every file in this unit test is named "file" followed by a number. This method will create a sequence or text file with as
 * many lines as the number in the file name. The keys in the file will count from one to the number. The values in the file
 * will count from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same
 * name./*ww w.  j a  v a  2s . c o m*/
 */
private void writeFile(String fileName, Format format) throws IOException {

    int fileNum = Integer.parseInt(fileName.substring(fileName.length() - 2));

    Path path = new Path(fileName);

    if (Format.TEXT == format) {
        PrintWriter writer = new PrintWriter(
                new BufferedWriter(new PrintWriter(getFileSystem().create(path, false))));

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            writer.printf("%d\t%d\n", k, v);
        }

        writer.close();
    } else {
        CustomWritable key = new CustomWritable();
        CustomWritable value = new CustomWritable();

        Writer writer = SequenceFile.createWriter(getFileSystem(), getFileSystem().getConf(), path,
                CustomWritable.class, CustomWritable.class, CompressionType.BLOCK, defaultCodec);

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            key.set(k);
            value.set(v);

            writer.append(key, value);
        }

        writer.close();
    }
}

From source file:com.netflix.suro.sink.localfile.FileWriterBase.java

License:Apache License

/**
 * Create a new sequence file//w  w w  . j  av a  2 s  . c o m
 *
 * @param newPath
 * @return
 * @throws java.io.IOException
 */
public SequenceFile.Writer createSequenceFile(String newPath) throws IOException {
    if (codec != null) {
        return SequenceFile.createWriter(fs, conf, new Path(newPath), Text.class, MessageWritable.class,
                SequenceFile.CompressionType.BLOCK, codec);
    } else {
        return SequenceFile.createWriter(fs, conf, new Path(newPath), Text.class, MessageWritable.class,
                SequenceFile.CompressionType.NONE, codec);
    }
}