List of usage examples for org.apache.hadoop.io SequenceFile SYNC_INTERVAL
int SYNC_INTERVAL
To view the source code for org.apache.hadoop.io SequenceFile SYNC_INTERVAL.
Click Source Link
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileInputFormat.java
License:Apache License
/** * Ctor. */ public CombineSequenceFileInputFormat() { setMinSplitSize(SequenceFile.SYNC_INTERVAL); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileInputFormat.java
License:Apache License
@Override protected long getFormatMinSplitSize() { return SequenceFile.SYNC_INTERVAL; }
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java
License:Apache License
@Override public long getMinimumFragmentSize() throws IOException, InterruptedException { return SequenceFile.SYNC_INTERVAL; }
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormatTest.java
License:Apache License
/** * Test for input./* w w w.j a v a 2 s . c o m*/ * @throws Exception if failed */ @Test public void input_fragment() throws Exception { final int count = 30000; Random rand = new Random(); LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class)) { LongWritable k = new LongWritable(); Text v = new Text(); for (int i = 0; i < count; i++) { k.set(i); v.set("Hello, world at " + i); writer.append(k, v); } } long fileLen = fs.getFileStatus(path).getLen(); StringOption value = new StringOption(); for (int attempt = 0; attempt < 5; attempt++) { int index = 0; long offset = 0; while (offset < fileLen) { long length = SequenceFile.SYNC_INTERVAL * (rand.nextInt(10) + 2); length = Math.min(length, fileLen - offset); try (ModelInput<StringOption> in = format.createInput(StringOption.class, fs, path, offset, length, new Counter())) { while (in.readTo(value)) { String answer = "Hello, world at " + index; assertThat(value.getAsString(), is(answer)); index++; } assertThat("eof", in.readTo(value), is(false)); } offset += length; } assertThat(index, is(count)); } }
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormatTest.java
License:Apache License
/** * Test for input.//from ww w .j av a 2 s .c om * @throws Exception if failed */ @Test public void input_largerecord() throws Exception { StringBuilder buf = new StringBuilder(); for (int i = 0; i < 1000000; i++) { buf.append("Hello, world!"); } Text record = new Text(buf.toString()); final int count = 5; LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class)) { LongWritable k = new LongWritable(); Text v = new Text(); for (int i = 0; i < count; i++) { k.set(i); v.set(record); writer.append(k, v); } } long fileLen = fs.getFileStatus(path).getLen(); StringOption value = new StringOption(); int index = 0; long offset = 0; while (offset < fileLen) { long length = SequenceFile.SYNC_INTERVAL * 2; length = Math.min(length, fileLen - offset); try (ModelInput<StringOption> in = format.createInput(StringOption.class, fs, path, offset, length, new Counter())) { while (in.readTo(value)) { assertThat(value.get(), is(record)); index++; } assertThat("eof", in.readTo(value), is(false)); } offset += length; } assertThat(index, is(count)); }
From source file:com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSinkTest.java
License:Open Source License
@Test @Category(NeedsRunner.class) public void testSeqFileWriteAndRead() throws Throwable { List<KV<Text, Text>> data = Lists.newArrayList(); for (int i = 0; i < 100; i++) { data.add(KV.of(new Text("key" + i), new Text("value" + i))); }/*from w w w .j a va2 s . c om*/ ValueProvider<ResourceId> output = StaticValueProvider.of(LocalResources.fromFile(workDir.getRoot(), true)); FilenamePolicy filenamePolicy = DefaultFilenamePolicy.fromStandardParameters(output, null, null, false); SequenceFileSink<Text, Text> sink = new SequenceFileSink<>(output, filenamePolicy, Text.class, WritableSerialization.class, Text.class, WritableSerialization.class); writePipeline.apply(Create.of(data)).apply(WriteFiles.to(sink).withNumShards(1)); writePipeline.run().waitUntilFinish(); SequenceFileSource<Text, Text> source = new SequenceFileSource<>( StaticValueProvider.of(workDir.getRoot().toString() + "/*"), Text.class, WritableSerialization.class, Text.class, WritableSerialization.class, SequenceFile.SYNC_INTERVAL); PAssert.that(readPipeline.apply(Read.from(source))).containsInAnyOrder(data); readPipeline.run(); }
From source file:com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSource.java
License:Open Source License
/** * Constructs a new top level source./*from w w w . j a v a2s.c o m*/ * * @param fileOrPatternSpec The path or pattern of the file(s) to read. * @param keyClass The {@link Class} of the key. * @param keySerialization The {@link Class} of the hadoop {@link org.apache.hadoop.io.serializer.Serialization} * to use for the key. * @param valueClass The {@link Class} of the value. * @param valueSerialization The {@link Class} of the hadoop {@link * org.apache.hadoop.io.serializer.Serialization} to use for the value. */ SequenceFileSource(ValueProvider<String> fileOrPatternSpec, Class<K> keyClass, Class<? extends Serialization<? super K>> keySerialization, Class<V> valueClass, Class<? extends Serialization<? super V>> valueSerialization, long minBundleSize) { super(fileOrPatternSpec, minBundleSize); Preconditions.checkArgument(minBundleSize >= SequenceFile.SYNC_INTERVAL, "minBundleSize must be at least " + SequenceFile.SYNC_INTERVAL); this.keyClass = keyClass; this.valueClass = valueClass; this.keySerializationClass = keySerialization; this.valueSerializationClass = valueSerialization; this.coder = KvCoder.of(new HadoopSerializationCoder<>(keyClass, keySerialization), new HadoopSerializationCoder<>(valueClass, valueSerialization)); }
From source file:com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSourceTest.java
License:Open Source License
@Test public void testSimpleWritable() throws IOException { Configuration config = new Configuration(false); List<KV<Text, Text>> data = Lists.newArrayList(); for (int i = 0; i < 10; i++) { data.add(KV.of(new Text("key" + i), new Text("value" + i))); }//from ww w. j av a 2s .co m // Write data to read File targetFile = workDir.newFile(); try (Writer writer = SequenceFile.createWriter(config, Writer.file(new org.apache.hadoop.fs.Path(targetFile.toString())), Writer.keyClass(Text.class), Writer.valueClass(Text.class))) { for (KV<Text, Text> kv : data) { writer.append(kv.getKey(), kv.getValue()); } } // Setup the source SequenceFileSource<Text, Text> source = new SequenceFileSource<>( StaticValueProvider.of(targetFile.getAbsolutePath()), Text.class, WritableSerialization.class, Text.class, WritableSerialization.class, SequenceFile.SYNC_INTERVAL); List<KV<Text, Text>> results = SourceTestUtils.readFromSource(source, null); assertThat(results, containsInAnyOrder(data.toArray())); }
From source file:com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSourceTest.java
License:Open Source License
@Test public void testHBaseTypes() throws Exception { File targetFile = workDir.newFile(); final List<KV<ImmutableBytesWritable, Result>> data = Lists.newArrayList(); final int nRows = 10; for (int i = 0; i < nRows; i++) { String keyStr = String.format("%03d", i); ImmutableBytesWritable rowKey = new ImmutableBytesWritable(keyStr.getBytes()); Result value = Result.create( Collections.singletonList(CellUtil.createCell(keyStr.getBytes(), ("family" + i).getBytes(), ("qualifier" + i).getBytes(), 123456, Type.Put.getCode(), ("value" + i).getBytes()))); data.add(KV.of(rowKey, value));/*from w w w .j ava2s. c o m*/ } // Write the file Configuration config = new Configuration(false); config.setStrings("io.serializations", ResultSerialization.class.getName(), WritableSerialization.class.getName()); try (Writer writer = SequenceFile.createWriter(config, Writer.file(new org.apache.hadoop.fs.Path(targetFile.toString())), Writer.keyClass(ImmutableBytesWritable.class), Writer.valueClass(Result.class))) { for (KV<ImmutableBytesWritable, Result> kv : data) { writer.append(kv.getKey(), kv.getValue()); } } // Read the file SequenceFileSource<ImmutableBytesWritable, Result> source = new SequenceFileSource<>( StaticValueProvider.of(targetFile.getAbsolutePath()), ImmutableBytesWritable.class, WritableSerialization.class, Result.class, ResultSerialization.class, SequenceFile.SYNC_INTERVAL); // Verify List<KV<ImmutableBytesWritable, Result>> actual = SourceTestUtils.readFromSource(source, null); assertThat(actual, hasSize(data.size())); Collections.sort(actual, new Comparator<KV<ImmutableBytesWritable, Result>>() { @Override public int compare(KV<ImmutableBytesWritable, Result> o1, KV<ImmutableBytesWritable, Result> o2) { return o1.getKey().compareTo(o2.getKey()); } }); for (int i = 0; i < data.size(); i++) { KV<ImmutableBytesWritable, Result> expectedKv = data.get(i); KV<ImmutableBytesWritable, Result> actualKv = actual.get(i); assertEquals(expectedKv.getKey(), actualKv.getKey()); assertEquals(actualKv.getValue().rawCells().length, expectedKv.getValue().rawCells().length); for (int j = 0; j < expectedKv.getValue().rawCells().length; j++) { Cell expectedCell = expectedKv.getValue().rawCells()[j]; Cell actualCell = actualKv.getValue().rawCells()[j]; assertTrue(CellUtil.equals(expectedCell, actualCell)); assertTrue(CellUtil.matchingValue(expectedCell, actualCell)); } } }
From source file:com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSourceTest.java
License:Open Source License
@Test public void testCompression() throws IOException { Configuration config = new Configuration(false); List<KV<Text, Text>> data = Lists.newArrayList(); for (int i = 0; i < 10; i++) { data.add(KV.of(new Text("key" + i), new Text("value" + i))); }/*ww w. j a va 2 s . c om*/ for (CompressionType compressionType : CompressionType.values()) { // Write data to read File targetFile = workDir.newFile(); try (Writer writer = SequenceFile.createWriter(config, Writer.file(new org.apache.hadoop.fs.Path(targetFile.toString())), Writer.keyClass(Text.class), Writer.valueClass(Text.class), Writer.compression(compressionType) )) { for (KV<Text, Text> kv : data) { writer.append(kv.getKey(), kv.getValue()); } } // Setup the source SequenceFileSource<Text, Text> source = new SequenceFileSource<>( StaticValueProvider.of(targetFile.getAbsolutePath()), Text.class, WritableSerialization.class, Text.class, WritableSerialization.class, SequenceFile.SYNC_INTERVAL); List<KV<Text, Text>> results = SourceTestUtils.readFromSource(source, null); assertThat(results, containsInAnyOrder(data.toArray())); } }