Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java

License:Apache License

private void createFile(File dir, String fileName, int key, int count) throws IOException {
    File file = new File(dir, fileName);

    Writer writer = SequenceFile.createWriter(FileSystem.get(job), job, new Path(file.getAbsolutePath()),
            Text.class, IntWritable.class);

    Text text = new Text(Integer.toString(key));
    IntWritable value = new IntWritable();

    for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) {
        value.set(j);

        writer.append(text, value);/*  w ww.j  a  v  a2s  .co m*/
    }

    writer.close();
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.ReadWithZeros.java

License:Open Source License

public void readNextKeyValuePairs(DoubleWritable readKey, IntWritable readValue) throws IOException {
    try {/* www  .j  a v  a  2s  .co m*/
        if (contain0s && justFound0) {
            readKey.set(keyAfterZero.get());
            readValue.set(valueAfterZero.get());
            contain0s = false;
        } else {
            readKey.readFields(currentStream);
            readValue.readFields(currentStream);
        }
    } catch (EOFException e) {
        // case in which zero is the maximum value in the matrix. 
        // The zero value from the last entry is not present in the input sorted matrix, but needs to be accounted for.
        if (contain0s && !justFound0) {
            justFound0 = true;
            readKey.set(0);
            readValue.set((int) numZeros);
        } else {
            throw e;
        }
    }

    if (contain0s && !justFound0 && readKey.get() >= 0) {
        justFound0 = true;
        keyAfterZero.set(readKey.get());
        valueAfterZero.set(readValue.get());
        readKey.set(0);
        readValue.set((int) numZeros);
    }
}

From source file:com.jeffy.fbds.SequenceFileWriter.java

License:Apache License

public static void main(String[] args) throws IOException {
    // ?// w  w  w . j  a va  2 s . co  m
    String uri = args[0];
    Configuration conf = new Configuration();
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(path),
            Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()))) {
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public IntWritable readInt(IntWritable iw) throws IOException {
    if (iw == null) {
        iw = new IntWritable();
    }/*  w w  w.j a  v  a2s.  co m*/
    int val = in.readInt();
    if (val == RType.NA_INTEGER) {
        return null;
    }
    iw.set(val);
    return iw;
}

From source file:com.m6d.filecrush.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n");

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();
    removableFiles = new HashSet<String>();

    /*/*from   w  ww.j  a va  2  s  .  co m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();
    int fileCount = 0;

    //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++);
    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                String dirPath = dir.toUri().getPath();
                print(Verbosity.INFO, "\n\n[" + dirPath + "]");

                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFilesMatcher == null)
                            return true;
                        ignoredFilesMatcher.reset(testPath.toUri().getPath());
                        boolean ignores = ignoredFilesMatcher.matches();
                        if (ignores)
                            LOG.info("Ignoring file " + testPath);
                        return !ignores;
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, "\n  Directory is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            String filePath = path.toUri().getPath();
                            boolean skipFile = false;
                            if (skippedFilesMatcher != null) {
                                skippedFilesMatcher.reset(filePath);
                                if (skippedFilesMatcher.matches()) {
                                    skipFile = true;
                                }
                            }

                            boolean changed = uncrushedFiles.add(filePath);
                            assert changed : path.toUri().getPath();
                            long fileLength = content.getLen();

                            if (!skipFile && fileLength <= maxEligibleSize) {
                                if (removeEmptyFiles && fileLength == 0)
                                    removableFiles.add(filePath);
                                else {
                                    crushables.add(content);
                                    crushableBytes += fileLength;
                                }
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, "\n  Directory has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;
                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);
                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();
                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                            print(Verbosity.INFO, "\n  Directory skipped");
                        } else {
                            nBuckets += crushFiles.size();
                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);
                            print(Verbosity.INFO, "\n  Generating " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> filesInBucket = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), filesInBucket.size()));

                                key.set(bucketId);

                                for (String f : filesInBucket) {
                                    boolean changed = uncrushedFiles.remove(f);
                                    assert changed : f;

                                    pathMatcher.reset(f);
                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    /*
                                     * Write one row per file to maximize the number of mappers
                                     */
                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!removableFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Marked " + removableFiles.size() + " files for removal");

                        for (String removable : removableFiles) {
                            uncrushedFiles.remove(removable);
                            print(Verbosity.VERBOSE, "\n    " + removable);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size());
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        writer.close();
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();
    assert partitions.size() <= maxTasks;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);
    IntWritable partNum = new IntWritable();
    int totalReducers = 0;
    for (Bucket partition : partitions) {
        String partitionName = partition.name();

        int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1));
        partNum.set(p);

        if (partition.contents().size() > 0)
            totalReducers++;

        for (String bucketId : partition.contents()) {
            key.set(bucketId);
            writer.append(key, partNum);
        }
    }
    writer.close();

    print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers);
    job.setInt("mapreduce.job.reduces", totalReducers);

    DataOutputStream countersStream = fs.create(this.counters);
    jobCounters.write(countersStream);
    countersStream.close();
}

From source file:com.microsoft.canberra.tf.util.DoubleMatrixRecordReader.java

License:Open Source License

@Override
public boolean next(final IntWritable rowId, final DoubleMatrix matrixRow) throws IOException {

    if (!this.textRecordReader.next(this.offset, this.text)) {
        return false;
    }/*from   ww w.  j a  v  a 2 s.  c  o m*/

    LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { this.offset, this.text });

    final String[] fields = this.text.toString().split("\\s+");

    if (fields.length <= 1) {
        return false;
    }

    rowId.set(Integer.parseInt(fields[0]));
    matrixRow.resize(fields.length - 1, 1);

    for (int i = 1; i < fields.length; ++i) {
        matrixRow.put(i - 1, Double.parseDouble(fields[i]));
    }

    return true;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public void putNext(Tuple t) throws IOException {
    IntWritable outputKey = new IntWritable();
    VectorWritable outputValue = new VectorWritable();
    outputKey.set((Integer) t.get(0));
    Tuple currRow = (Tuple) t.get(1);/*from w ww. j ava2  s .co  m*/
    Vector currRowVector;
    if (dimensions == 0) {
        throw new IllegalArgumentException("Trying to create 0 dimension vector");
    }
    if (STORE_AS_DENSE) {
        currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString());
    } else if (STORE_AS_SEQUENTIAL) {
        currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    } else {
        currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    }
    for (int ii = 0; ii < currRow.size(); ii++) {
        Object o = currRow.get(ii);
        switch (currRow.getType(ii)) {
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            currRowVector.set(ii, (Double) o);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set column and element
            Tuple subt = (Tuple) o;
            currRowVector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }
    }
    outputValue.set(currRowVector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.examples.SingleWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new RuntimeException("Required args: wordToCount (string)");
    }//  ww w .ja  va 2s  .  com

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(args[0]);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));
    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(SingleWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    IntWritable result = (IntWritable) job.runAndGetResult();

    // Unload the invocation grid
    grid.unload();

    // Output the single result and it's frequency.
    System.out.println("The word: " + args[0] + " was used - " + result + " times!");
}

From source file:com.scaleoutsoftware.soss.hserver.examples.WordCountParameterPassing.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        throw new RuntimeException("Required args: wordMinLength wordMaxLength");
    }/*from   w ww  .j  av a  2  s  .c  o m*/

    int minLength = Integer.parseInt(args[0]);
    int maxLength = Integer.parseInt(args[1]);

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(minLength, maxLength);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));

    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(WordCountParameterPassing.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // Pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    job.waitForCompletion(true);

    // Unload the invocation grid
    grid.unload();

    // Output resulting words and their frequencies
    Iterable<Text> results = outputMap.keySet();
    System.out.println("Following words were longer than " + mapArgs.minWordLength + " and shorter than "
            + mapArgs.maxWordLength + ":");
    for (Text word : results) {
        System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times.");
    }
}

From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopy.java

License:Apache License

public static void main(String argv[]) throws Exception {
    final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("map-i",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("map-o",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    inputMap.clear();/*from   w w w  . j a v a  2  s . c  om*/
    outputMap.clear();
    Thread.sleep(10000);
    BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader();
    String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
    Text contentW = new Text(content);

    IntWritable count = new IntWritable();
    for (int i = 0; i < 1000; i++) {
        count.set(i);
        put.put(count, contentW);
    }
    put.close();

    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addClass(Test_MapToMapCopy.class).load();

    HServerJob job;
    Configuration configuration;

    for (int i = 0; i < 100; i++) {
        // MMF
        configuration = new Configuration();
        configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.namedmap.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.map.maxtempmemorykb", 100000);
        job = new HServerJob(configuration, "Sample job");
        job.setGrid(grid);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(NamedMapInputFormat.class);
        job.setOutputFormatClass(GridOutputFormat.class);
        NamedMapInputFormat.setNamedMap(job, inputMap);
        NamedMapInputFormat.setSuggestedNumberOfSplits(job, 64);
        GridOutputFormat.setNamedMap(job, outputMap);
        job.waitForCompletion(false);
        assertEquals(inputMap.size(), outputMap.size());
        outputMap.clear();
    }
    grid.unload();
}