Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:com.gotometrics.orderly.example.IntExample.java

License:Apache License

public void serializationExamples() throws Exception {
    IntWritableRowKey i = new IntWritableRowKey();
    IntWritable w = new IntWritable();
    ImmutableBytesWritable buffer = new ImmutableBytesWritable();
    byte[] b;/* w w w. ja va 2  s .c  om*/

    /* Serialize and deserialize into an immutablebyteswritable */
    w.set(-93214);
    b = new byte[i.getSerializedLength(w)];
    buffer.set(b);
    i.serialize(w, buffer);
    buffer.set(b, 0, b.length);
    System.out.println("deserialize(serialize(-93214)) = " + ((IntWritable) i.deserialize(buffer)).get());

    /* Serialize and deserialize into a byte array (descending sort,
     * with two reserved bits set to 0x3)
     */
    i.setReservedBits(2).setReservedValue(0x3).setOrder(Order.DESCENDING);
    w.set(0);
    System.out.println("deserialize(serialize(0)) = " + ((IntWritable) i.deserialize(i.serialize(w))).get());

    /* Serialize and deserialize NULL into a byte array */
    System.out.println("deserialize(serialize(NULL)) = " + i.deserialize(i.serialize(null)));
}

From source file:com.gotometrics.orderly.FixedUnsignedIntWritableRowKey.java

License:Apache License

protected IntWritable invertSign(IntWritable iw) {
    iw.set(iw.get() ^ Integer.MIN_VALUE);
    return iw;
}

From source file:com.hazelcast.jet.hadoop.impl.ReadHdfsPTest.java

License:Open Source License

private static void writeToSequenceFile(Configuration conf, Path path) throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    Option fileOption = Writer.file(path);
    Option keyClassOption = Writer.keyClass(key.getClass());
    Option valueClassOption = Writer.valueClass(value.getClass());
    try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) {
        for (int i = 0; i < ENTRIES.length; i++) {
            key.set(i);
            value.set(ENTRIES[i]);/*from   w  w  w  . j ava2 s  .co m*/
            writer.append(key, value);
        }
    }
}

From source file:com.hazelcast.jet.impl.connector.hadoop.ReadHdfsPTest.java

License:Open Source License

private void writeToSequenceFile(Configuration conf, Path path) throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    Option fileOption = Writer.file(path);
    Option keyClassOption = Writer.keyClass(key.getClass());
    Option valueClassOption = Writer.valueClass(value.getClass());
    try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) {
        for (int i = 0; i < ENTRIES.length; i++) {
            key.set(i);
            value.set(ENTRIES[i]);// w w w. jav a2s.c o  m
            writer.append(key, value);
        }
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath());

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();

    /*/*from   w w w  .  j  a  va 2s.c  o m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks"));

    Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                print(Verbosity.INFO, "\n\n" + dir.toUri().getPath());

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFiles == null)
                            return true;
                        ignoredFiles.reset(testPath.toUri().getPath());
                        return !ignoredFiles.matches();
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, " is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            boolean changed = uncrushedFiles.add(path.toUri().getPath());

                            assert changed : path.toUri().getPath();

                            long fileLength = content.getLen();

                            if (fileLength <= maxEligibleSize) {
                                crushables.add(content);
                                crushableBytes += fileLength;
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, " has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;

                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);

                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();

                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                        } else {
                            nBuckets += crushFiles.size();

                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

                            print(Verbosity.INFO, " => " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> bucketFiles = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), bucketFiles.size()));

                                key.set(bucketId);

                                for (String f : bucketFiles) {
                                    boolean changed = uncrushedFiles.remove(f);

                                    assert changed : f;

                                    pathMatcher.reset(f);

                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + bucketFiles, e);
        }
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();

    assert partitions.size() <= numPartitions;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    IntWritable partNum = new IntWritable();

    try {
        for (Bucket partition : partitions) {
            String partitionName = partition.name();

            partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1)));

            for (String bucketId : partition.contents()) {
                key.set(bucketId);

                writer.append(key, partNum);
            }
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }

    DataOutputStream countersStream = fs.create(this.counters);

    try {
        jobCounters.write(countersStream);
    } finally {
        try {
            countersStream.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partition() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);
    writer.append(key, partNum);/*from ww  w .ja v a  2  s .c  om*/

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionWithFewerPartitionsThanReduceTasks() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);
    writer.append(key, partNum);//w  w w  . j  a v a 2 s .  com

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-3");
    partNum.set(1);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(2);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(40);

    partitioner.configure(job);

    Text fileName = new Text();

    key.set("bucket-1");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-2");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0));
    }

    key.set("bucket-3");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1));
    }

    key.set("bucket-4");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-5");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }

    key.set("bucket-6");

    for (int file = 0; file < 4; file++) {
        fileName.set("file" + file);
        assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2));
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void noDupes() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable value = new IntWritable();

    key.set("bucket-1");
    value.set(0);
    writer.append(key, value);/*from w  ww.  j  a  v  a2 s  .com*/

    key.set("bucket-2");
    value.set(0);
    writer.append(key, value);

    key.set("bucket-2");
    value.set(1);
    writer.append(key, value);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail();
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("bucket-2")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooLow() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);
    writer.append(key, partNum);//from   w ww  .j ava 2s  . c  o m

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(-1);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("No such thing as a negitave partition");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition -1")) {
            throw e;
        }
    }
}

From source file:com.hdfs.concat.crush.CrushPartitionerTest.java

License:Apache License

@Test
public void partitionTooHigh() throws IOException {

    Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    Text key = new Text();
    IntWritable partNum = new IntWritable();

    key.set("bucket-1");
    partNum.set(0);
    writer.append(key, partNum);/*from ww w  . ja  va 2  s  . c  o  m*/

    key.set("bucket-2");
    partNum.set(0);
    writer.append(key, partNum);

    key.set("bucket-4");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-5");
    partNum.set(2);
    writer.append(key, partNum);

    key.set("bucket-6");
    partNum.set(3);
    writer.append(key, partNum);

    writer.close();

    job.setNumReduceTasks(3);

    try {
        partitioner.configure(job);
        fail("Parition with id 3 is not allowed with 3 reduce tasks");
    } catch (IllegalArgumentException e) {
        if (!e.getMessage().contains("Partition 3")) {
            throw e;
        }
    }
}