Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:co.cask.hydrator.plugin.batch.sink.HiveBatchSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, HCatRecord>> emitter)
        throws Exception {
    HCatRecord hCatRecord = recordToHCatRecordTransformer.toHCatRecord(input);
    emitter.emit(new KeyValue<>(NullWritable.get(), hCatRecord));
}

From source file:co.cask.hydrator.plugin.batch.sink.MongoDBBatchSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, BSONWritable>> emitter)
        throws Exception {
    BasicDBObjectBuilder bsonBuilder = BasicDBObjectBuilder.start();
    for (Schema.Field field : input.getSchema().getFields()) {
        bsonBuilder.add(field.getName(), input.get(field.getName()));
    }/*w w  w.  j a va  2  s. com*/
    emitter.emit(new KeyValue<>(NullWritable.get(), new BSONWritable(bsonBuilder.get())));
}

From source file:co.cask.hydrator.plugin.HDFSSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<Text, NullWritable>> emitter) throws Exception {
    List<String> dataArray = new ArrayList<>();
    for (Schema.Field field : input.getSchema().getFields()) {
        dataArray.add(input.get(field.getName()).toString());
    }/*w ww. j a  v  a  2  s. c  om*/
    emitter.emit(new KeyValue<>(new Text(Joiner.on(",").join(dataArray)), NullWritable.get()));
}

From source file:co.cask.hydrator.plugin.sink.HBaseSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Mutation>> emitter)
        throws Exception {
    Put put = recordPutTransformer.toPut(input);
    org.apache.hadoop.hbase.client.Put hbasePut = new org.apache.hadoop.hbase.client.Put(put.getRow());
    for (Map.Entry<byte[], byte[]> entry : put.getValues().entrySet()) {
        hbasePut.add(config.columnFamily.getBytes(), entry.getKey(), entry.getValue());
    }//from  w  w  w.  j a va2s.  com
    emitter.emit(new KeyValue<NullWritable, Mutation>(NullWritable.get(), hbasePut));
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Write out a SequenceFile that can be read by TotalOrderPartitioner
 * that contains the split points in startKeys.
 * @param partitionsPath output path for SequenceFile
 * @param startKeys the region start keys
 *///ww w . j  a  va2s  .co  m
private static void writePartitions(Configuration conf, Path partitionsPath,
        List<ImmutableBytesWritable> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);

    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
            ImmutableBytesWritable.class, NullWritable.class);

    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

From source file:com.alexholmes.hadooputils.combine.avro.mapred.CombineAvroInputFormatTest.java

License:Apache License

@SuppressWarnings("deprecation")
public void testProjection() throws Exception {
    JobConf job = new JobConf();

    Integer defaultRank = new Integer(-1);

    String jsonSchema = "{\"type\":\"record\"," + "\"name\":\"org.apache.avro.mapred.Pair\"," + "\"fields\": [ "
            + "{\"name\":\"rank\", \"type\":\"int\", \"default\": -1},"
            + "{\"name\":\"value\", \"type\":\"long\"}" + "]}";

    Schema readerSchema = Schema.parse(jsonSchema);

    AvroJob.setInputSchema(job, readerSchema);

    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/part-00000" + AvroOutputFormat.EXT);
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);

    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job,
            fileSplit);//from  ww w  .ja va2 s. co m

    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);
    NullWritable ignore = NullWritable.get();

    long sumOfCounts = 0;
    long numOfCounts = 0;
    while (recordReader.next(inputPair, ignore)) {
        Assert.assertEquals((Integer) inputPair.datum().get(0), defaultRank);
        sumOfCounts += (Long) inputPair.datum().get(1);
        numOfCounts++;
    }

    Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size());

    long actualSumOfCounts = 0;
    for (Long count : WordCountUtil.COUNTS.values()) {
        actualSumOfCounts += count;
    }

    Assert.assertEquals(sumOfCounts, actualSumOfCounts);
}

From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java

License:Apache License

public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
    Configuration conf = job;//from   w w  w. ja v a 2  s  .  c  o  m
    // Use the input format defined in the job. NOT, the one provided by
    // the parent class's writePartitionFile() method, which will be a plain
    // TextInputFormat, by default
    final InputFormat inf = job.getInputFormat();
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:com.alexholmes.hadooputils.sort.SortReduce.java

License:Apache License

@Override
public void reduce(final Text key, final Iterator<Text> values,
        final OutputCollector<Text, NullWritable> output, final Reporter reporter) throws IOException {
    while (values.hasNext()) {
        output.collect(values.next(), NullWritable.get());
        if (sortConfig.getUnique()) {
            break;
        }/*www  . ja v  a  2  s  .  c  om*/
    }
}

From source file:com.asakusafw.bulkloader.collector.ExportFileSendTest.java

License:Apache License

@SuppressWarnings("unchecked")
private File prepareInput(String path) throws IOException {
    File result = folder.newFile();
    Path p = new Path(new File(path).toURI());
    FileSystem fs = p.getFileSystem(new Configuration());
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, fs.getConf());
    try {/*from w w w  .  ja  va2  s. co m*/
        Writable buffer = (Writable) reader.getValueClass().newInstance();
        ModelOutput<Writable> output = (ModelOutput<Writable>) TemporaryStorage.openOutput(fs.getConf(),
                reader.getValueClass(), new BufferedOutputStream(new FileOutputStream(result)));
        try {
            while (reader.next(NullWritable.get(), buffer)) {
                output.write(buffer);
            }
        } finally {
            output.close();
        }
    } catch (Exception e) {
        throw new AssertionError(e);
    } finally {
        reader.close();
    }
    return result;
}

From source file:com.asakusafw.compiler.fileio.HadoopFileIoProcessorTest.java

License:Apache License

private List<Ex1> getList(Class<? extends FileExporterDescription> exporter) {
    try {/*w w w .  ja v  a2  s  .  c  o m*/
        FileExporterDescription instance = exporter.newInstance();
        Path path = new Path(Location.fromPath(instance.getPathPrefix(), '/').toString());
        FileSystem fs = path.getFileSystem(tester.configuration());
        FileStatus[] statuses = fs.globStatus(path);
        List<Ex1> results = new ArrayList<>();
        for (FileStatus status : statuses) {
            try (SequenceFile.Reader reader = new SequenceFile.Reader(tester.configuration(),
                    SequenceFile.Reader.file(fs.makeQualified(status.getPath())))) {
                Ex1 model = new Ex1();
                while (reader.next(NullWritable.get(), model)) {
                    Ex1 copy = new Ex1();
                    copy.copyFrom(model);
                    results.add(copy);
                }
            }
        }
        Collections.sort(results, new Comparator<Ex1>() {
            @Override
            public int compare(Ex1 o1, Ex1 o2) {
                return o1.getSidOption().compareTo(o2.getSidOption());
            }
        });
        return results;
    } catch (Exception e) {
        throw new AssertionError(e);
    }
}