Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.github.ygf.pagerank.PageRankTopNReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("pagerank.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    float[] pageRanks = new float[topN.size()];
    String[] titles = new String[topN.size()];

    // The order of the entries is reversed. The priority queue is in
    // non-decreasing order and we want the highest PageRank first.
    for (int i = pageRanks.length - 1; i >= 0; i--) {
        Map.Entry<Float, Integer> entry = topN.poll();
        // Get the title of the page from the title index.
        page.set(entry.getValue());//from  w  w  w. ja  v  a  2s .  co m
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        pageRanks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < pageRanks.length; i++) {
        context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
    }
}

From source file:com.gotometrics.orderly.example.IntExample.java

License:Apache License

public void serializationExamples() throws Exception {
    IntWritableRowKey i = new IntWritableRowKey();
    IntWritable w = new IntWritable();
    ImmutableBytesWritable buffer = new ImmutableBytesWritable();
    byte[] b;/*ww w. j  a  v a2  s  .  com*/

    /* Serialize and deserialize into an immutablebyteswritable */
    w.set(-93214);
    b = new byte[i.getSerializedLength(w)];
    buffer.set(b);
    i.serialize(w, buffer);
    buffer.set(b, 0, b.length);
    System.out.println("deserialize(serialize(-93214)) = " + ((IntWritable) i.deserialize(buffer)).get());

    /* Serialize and deserialize into a byte array (descending sort,
     * with two reserved bits set to 0x3)
     */
    i.setReservedBits(2).setReservedValue(0x3).setOrder(Order.DESCENDING);
    w.set(0);
    System.out.println("deserialize(serialize(0)) = " + ((IntWritable) i.deserialize(i.serialize(w))).get());

    /* Serialize and deserialize NULL into a byte array */
    System.out.println("deserialize(serialize(NULL)) = " + i.deserialize(i.serialize(null)));
}

From source file:com.gotometrics.orderly.FixedIntegerRowKey.java

License:Apache License

protected Object toIntWritable(Object o) {
    if (o == null || o instanceof IntWritable)
        return o;
    if (iw == null)
        iw = new IntWritable();
    iw.set((Integer) o);/*  w  w w . j a va 2 s  . c  o m*/
    return iw;
}

From source file:com.gotometrics.orderly.FixedIntWritableRowKey.java

License:Apache License

@Override
public Object deserialize(ImmutableBytesWritable w) throws IOException {
    int offset = w.getOffset();
    byte[] s = w.get();

    int i = Bytes.toInt(s, offset) ^ Integer.MIN_VALUE ^ order.mask();
    RowKeyUtils.seek(w, Bytes.SIZEOF_INT);

    if (iw == null)
        iw = new IntWritable();
    iw.set(i);/*  w  ww  .ja v  a 2s. c o  m*/
    return iw;
}

From source file:com.gotometrics.orderly.IntWritableRowKey.java

License:Apache License

@Override
Writable createWritable() {
    return new IntWritable();
}

From source file:com.gsvic.csmr.io.InputData.java

License:Apache License

/**
 * Reads the Document-Frequency file/*from w w w.j ava 2  s.  co  m*/
 * @param conf
 * @param dfFile
 * @return Returns the Document-Frequency data in a HashMap
 * @throws IOException 
 */
public static HashMap<IntWritable, LongWritable> readDf(Configuration conf, Path dfFile) throws IOException {

    FileSystem filesystem = FileSystem.get(conf);
    SequenceFile.Reader reader;
    reader = new SequenceFile.Reader(filesystem, dfFile, conf);

    HashMap<IntWritable, LongWritable> dcf = new HashMap<>();
    IntWritable key = new IntWritable();
    LongWritable value = new LongWritable();

    while (reader.next(key, value)) {
        dcf.put(new IntWritable(key.get()), new LongWritable(value.get()));
    }

    return dcf;
}

From source file:com.gsvic.csmr.io.InputData.java

License:Apache License

/**
 *  Reads the dictionary file/*from www .j ava2 s .co  m*/
 * @param conf
 * @param dict
 * @return returns the dictionary in a HashMap
 * @throws IOException 
 */
public static HashMap<Text, IntWritable> readDictionary(Configuration conf, Path dict) throws IOException {
    FileSystem filesystem = FileSystem.get(conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(filesystem, dict, conf);

    HashMap<Text, IntWritable> dictMap = new HashMap<>();
    Text key = new Text();
    IntWritable value = new IntWritable();

    while (reader.next(key, value)) {
        dictMap.put(new Text(key), new IntWritable(value.get()));
    }

    return dictMap;
}

From source file:com.hazelcast.jet.hadoop.impl.ReadHdfsPTest.java

License:Open Source License

private static void writeToSequenceFile(Configuration conf, Path path) throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    Option fileOption = Writer.file(path);
    Option keyClassOption = Writer.keyClass(key.getClass());
    Option valueClassOption = Writer.valueClass(value.getClass());
    try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) {
        for (int i = 0; i < ENTRIES.length; i++) {
            key.set(i);//from  w  ww.j a  v a2 s  . c  o  m
            value.set(ENTRIES[i]);
            writer.append(key, value);
        }
    }
}

From source file:com.hazelcast.jet.impl.connector.hadoop.ReadHdfsPTest.java

License:Open Source License

private void writeToSequenceFile(Configuration conf, Path path) throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    Option fileOption = Writer.file(path);
    Option keyClassOption = Writer.keyClass(key.getClass());
    Option valueClassOption = Writer.valueClass(value.getClass());
    try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) {
        for (int i = 0; i < ENTRIES.length; i++) {
            key.set(i);//from w w w  . j  a  v a 2 s  . c o  m
            value.set(ENTRIES[i]);
            writer.append(key, value);
        }
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath());

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();

    /*//from w  w  w  .  j a v  a 2  s  .c om
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks"));

    Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                print(Verbosity.INFO, "\n\n" + dir.toUri().getPath());

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFiles == null)
                            return true;
                        ignoredFiles.reset(testPath.toUri().getPath());
                        return !ignoredFiles.matches();
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, " is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            boolean changed = uncrushedFiles.add(path.toUri().getPath());

                            assert changed : path.toUri().getPath();

                            long fileLength = content.getLen();

                            if (fileLength <= maxEligibleSize) {
                                crushables.add(content);
                                crushableBytes += fileLength;
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, " has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;

                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);

                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();

                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                        } else {
                            nBuckets += crushFiles.size();

                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

                            print(Verbosity.INFO, " => " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> bucketFiles = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), bucketFiles.size()));

                                key.set(bucketId);

                                for (String f : bucketFiles) {
                                    boolean changed = uncrushedFiles.remove(f);

                                    assert changed : f;

                                    pathMatcher.reset(f);

                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + bucketFiles, e);
        }
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();

    assert partitions.size() <= numPartitions;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    IntWritable partNum = new IntWritable();

    try {
        for (Bucket partition : partitions) {
            String partitionName = partition.name();

            partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1)));

            for (String bucketId : partition.contents()) {
                key.set(bucketId);

                writer.append(key, partNum);
            }
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }

    DataOutputStream countersStream = fs.create(this.counters);

    try {
        jobCounters.write(countersStream);
    } finally {
        try {
            countersStream.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }
}