List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException
From source file:Txt2SeqConverter.java
License:Apache License
public static void main(String[] args) { if (args.length != 2) { //System.out.println("Usage: env HADOOP_CLASSPATH=.:$HADOOP_CLASSPATH hadoop Txt2SeqConverter input output"); System.out.println("Usage: hadoop Txt2SeqConverter input output"); System.exit(1);//from ww w .java 2 s . c o m } FileSystem fs = null; String seqFileName = args[1]; Configuration conf = new Configuration(); try { fs = FileSystem.get(URI.create(seqFileName), conf); } catch (IOException e) { System.out.println("ERROR: " + e.getMessage()); } Path path = new Path(seqFileName); LongWritable key = new LongWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { //writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK, new com.hadoop.compression.lzo.LzoCodec()); BufferedReader br = new BufferedReader(new FileReader(args[0])); int transactionID = 0; String transaction = null; while ((transaction = br.readLine()) != null) { key.set(transactionID); value.set(transaction); writer.append(key, value); transactionID++; } } catch (IOException e) { System.out.println("ERROR: " + e.getMessage()); } finally { IOUtils.closeStream(writer); } }
From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(//from w w w . j a v a 2 s .c o m String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName())); return 1; } Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
public void writeSequenceFile(Path path) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try {/* ww w .j ava2 s .com*/ writer.append(key, value); } finally { writer.close(); } }
From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(String.format("Usage: %s: <file path> <number of records>", SequenceFileGenerator.class.getName())); return 1; }/* w w w. j ava 2 s. c o m*/ Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java
License:Apache License
@Override public ModelOutput<T> createOutput(Class<? extends T> dataType, FileSystem fileSystem, Path path, final Counter counter) throws IOException, InterruptedException { final K keyBuffer = createKeyObject(); final V valueBuffer = createValueObject(); CompressionCodec codec = getCompressionCodec(path); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Creating sequence file (path={0}, type={1}, codec={2})", //$NON-NLS-1$ path, dataType.getName(), codec)); }// www . jav a2s. c o m configure(codec); final SequenceFile.Writer writer = SequenceFile.createWriter(fileSystem, getConf(), path, keyBuffer.getClass(), valueBuffer.getClass(), codec == null ? CompressionType.NONE : CompressionType.BLOCK, codec); boolean succeed = false; try { ModelOutput<T> output = new ModelOutput<T>() { private long lastPosition = 0; @Override public void write(T model) throws IOException { copyFromModel(model, keyBuffer, valueBuffer); writer.append(keyBuffer, valueBuffer); long nextPosition = writer.getLength(); counter.add(nextPosition - lastPosition); lastPosition = nextPosition; } @Override public void close() throws IOException { writer.close(); } }; succeed = true; return output; } finally { if (succeed == false) { writer.close(); } } }
From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java
License:Apache License
/** * return a mapping of expected keys -> records *//*w ww. ja v a 2 s.co m*/ private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java
License:Apache License
/** * return a mapping of expected keys -> records *//*ww w . ja va 2 s . c o m*/ private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java
License:Apache License
/** * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name. *//*from w w w. j a v a 2 s. c o m*/ private Text writeFile(File srcDir, String fileName, Format format) throws IOException { int fileNum = Integer.parseInt(fileName.substring(4)); File file = new File(srcDir, fileName); if (Format.TEXT == format) { PrintWriter writer = new PrintWriter(new FileOutputStream(file)); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { writer.printf("%d\t%d\n", k, v); } writer.close(); } else { CustomWritable key = new CustomWritable(); CustomWritable value = new CustomWritable(); DefaultCodec codec = new DefaultCodec(); codec.setConf(job); Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()), CustomWritable.class, CustomWritable.class, compressionType, codec); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { key.set(k); value.set(v); writer.append(key, value); } writer.close(); } return new Text(file.getAbsolutePath()); }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
/** * Every file in this unit test is named "file" followed by a number. This method will create a sequence or text file with as * many lines as the number in the file name. The keys in the file will count from one to the number. The values in the file * will count from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same * name./*ww w. j a v a 2s . c o m*/ */ private void writeFile(String fileName, Format format) throws IOException { int fileNum = Integer.parseInt(fileName.substring(fileName.length() - 2)); Path path = new Path(fileName); if (Format.TEXT == format) { PrintWriter writer = new PrintWriter( new BufferedWriter(new PrintWriter(getFileSystem().create(path, false)))); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { writer.printf("%d\t%d\n", k, v); } writer.close(); } else { CustomWritable key = new CustomWritable(); CustomWritable value = new CustomWritable(); Writer writer = SequenceFile.createWriter(getFileSystem(), getFileSystem().getConf(), path, CustomWritable.class, CustomWritable.class, CompressionType.BLOCK, defaultCodec); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { key.set(k); value.set(v); writer.append(key, value); } writer.close(); } }
From source file:com.netflix.suro.sink.localfile.FileWriterBase.java
License:Apache License
/** * Create a new sequence file//w w w . j av a 2 s . c o m * * @param newPath * @return * @throws java.io.IOException */ public SequenceFile.Writer createSequenceFile(String newPath) throws IOException { if (codec != null) { return SequenceFile.createWriter(fs, conf, new Path(newPath), Text.class, MessageWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { return SequenceFile.createWriter(fs, conf, new Path(newPath), Text.class, MessageWritable.class, SequenceFile.CompressionType.NONE, codec); } }