Example usage for org.apache.commons.lang ArrayUtils indexOf

List of usage examples for org.apache.commons.lang ArrayUtils indexOf

Introduction

In this page you can find the example usage for org.apache.commons.lang ArrayUtils indexOf.

Prototype

public static int indexOf(boolean[] array, boolean valueToFind) 

Source Link

Document

Finds the index of the given value in the array.

Usage

From source file:org.apache.mahout.df.data.Dataset.java

/**
 * Converts a token to its corresponding int code for a given attribute
 * /*from ww  w. ja v  a2s . c  o m*/
 * @param attr
 *          attribute's index
 * @param token
 * @return
 */
public int valueOf(int attr, String token) {
    if (isNumerical(attr)) {
        throw new IllegalArgumentException("Only for CATEGORICAL attributes");
    }
    if (values == null) {
        throw new IllegalStateException("Values not found");
    }

    return ArrayUtils.indexOf(values[attr], token);
}

From source file:org.apache.mahout.df.data.Utils.java

/**
 * finds the label attribute's index/*  ww  w .j  av  a2 s.com*/
 * 
 * @param descriptor
 * @return
 * @throws Exception 
 */
public static int findLabel(String descriptor) throws DescriptorException {
    Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
    return ArrayUtils.indexOf(attrs, Attribute.LABEL);
}

From source file:org.apache.mahout.df.mapred.partial.PartialSequentialBuilder.java

@Override
protected void runJob(JobConf job) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);//from  ww  w .j  a  v  a2 s . c o  m

    int numTrees = Builder.getNbTrees(job); // total number of trees

    firstOutput = new PartialOutputCollector(numTrees);
    Reporter reporter = Reporter.NULL;

    firstIds = new int[splits.length];
    sizes = new int[splits.length];

    // to compute firstIds, process the splits in file order
    int firstId = 0;
    long slowest = 0; // duration of slowest map
    for (InputSplit split : splits) {
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, splits.length,
                numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.next(key, value)) {
            mapper.map(key, value, firstOutput, reporter);
            firstId++;
            sizes[hp]++;
        }

        mapper.close();

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.mahout.df.mapred.partial.Step0Job.java

/**
 * Replaces the first id for each partition in Hadoop's order
 * //from ww  w.j  ava  2 s  .com
 * @param keys
 * @param values
 * @return
 */
protected static Step0Output[] processOutput(int[] keys, Step0Output[] values) {
    int numMaps = values.length;

    // sort the values using firstId
    Step0Output[] sorted = Arrays.copyOf(values, numMaps);
    Arrays.sort(sorted);

    // compute the partitions firstIds (file order)
    int[] orderedIds = new int[numMaps];
    orderedIds[0] = 0;
    for (int p = 1; p < numMaps; p++) {
        orderedIds[p] = orderedIds[p - 1] + sorted[p - 1].size;
    }

    // update the values' first ids
    for (int p = 0; p < numMaps; p++) {
        int order = ArrayUtils.indexOf(sorted, values[p]);
        values[p].firstId = orderedIds[order];
    }

    // reorder the values in hadoop's order
    Step0Output[] reordered = new Step0Output[numMaps];
    for (int p = 0; p < numMaps; p++) {
        reordered[keys[p]] = values[p];
    }

    return reordered;
}

From source file:org.apache.mahout.df.mapred.partial.Step2MapperTest.java

public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
        int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
            keys[treeIndex] = new TreeID(partition, treeIndex);
            // put the partition in the leaf's label
            // this way we can track the outputs
            trees[treeIndex] = new Leaf(partition);
        }// www  .  jav a 2  s. c  o m

        sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];

        // number of trees that will be handled by the mapper
        int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

        PartialOutputCollector output = new PartialOutputCollector(nbConcerned);

        // load the current mapper's (key, tree) pairs
        TreeID[] curKeys = new TreeID[nbConcerned];
        Node[] curTrees = new Node[nbConcerned];
        InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

        // simulate the job
        MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, output, Reporter.NULL);
        }

        mapper.close();

        // make sure the mapper did not return its own trees
        assertEquals(nbConcerned, output.nbOutputs());

        // check the returned results
        int current = 0;
        for (int index = 0; index < nbTrees; index++) {
            if (keys[index].partition() == partition) {
                // should not be part of the results
                continue;
            }

            TreeID k = output.getKeys()[current];

            // the tree should receive the partition's index
            assertEquals(partition, k.partition());

            // make sure all the trees of the other partitions are handled in the
            // correct order
            assertEquals(index, k.treeId());

            int[] predictions = output.getValues()[current].getPredictions();

            // all the instances of the partition should be classified
            assertEquals(split.length, predictions.length);
            assertEquals("at least one instance of the partition was not classified", -1,
                    ArrayUtils.indexOf(predictions, -1));

            // the tree must not belong to the mapper's partition
            int treePartition = predictions[0];
            assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition);

            current++;
        }
    }
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);/*from ww w . ja  v a 2  s .co m*/
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    firstIds = new int[nbSplits];
    sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
        InputSplit split = splits.get(p);
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.Step0Job.java

/**
 * Replaces the first id for each partition in Hadoop's order
 * //from  w w  w.  j  a v a  2  s  . c  om
 * @param keys
 * @param values
 * @return
 */
protected static Step0Output[] processOutput(List<Integer> keys, List<Step0Output> values) {
    int numMaps = values.size();

    // sort the values using firstId
    Step0Output[] sorted = new Step0Output[numMaps];
    values.toArray(sorted);
    Arrays.sort(sorted);

    // compute the partitions firstIds (file order)
    int[] orderedIds = new int[numMaps];
    orderedIds[0] = 0;
    for (int p = 1; p < numMaps; p++) {
        orderedIds[p] = orderedIds[p - 1] + sorted[p - 1].size;
    }

    // update the values' first ids
    for (int p = 0; p < numMaps; p++) {
        int order = ArrayUtils.indexOf(sorted, values.get(p));
        values.get(p).firstId = orderedIds[order];
    }

    // reorder the values in hadoop's order
    Step0Output[] reordered = new Step0Output[numMaps];
    for (int p = 0; p < numMaps; p++) {
        reordered[keys.get(p)] = values.get(p);
    }

    return reordered;
}

From source file:org.apache.mahout.df.mapreduce.partial.Step2MapperTest.java

public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
        int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
            keys[treeIndex] = new TreeID(partition, treeIndex);
            // put the partition in the leaf's label
            // this way we can track the outputs
            trees[treeIndex] = new Leaf(partition);
        }/*  w w  w . j ava2  s  .co m*/

        sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];

        // number of trees that will be handled by the mapper
        int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

        MockContext context = new MockContext(new Step2Mapper(), new Configuration(), new TaskAttemptID(),
                nbConcerned);

        // load the current mapper's (key, tree) pairs
        TreeID[] curKeys = new TreeID[nbConcerned];
        Node[] curTrees = new Node[nbConcerned];
        InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

        // simulate the job
        MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, context);
        }

        mapper.cleanup(context);

        // make sure the mapper did not return its own trees
        assertEquals(nbConcerned, context.nbOutputs());

        // check the returned results
        int current = 0;
        for (int index = 0; index < nbTrees; index++) {
            if (keys[index].partition() == partition) {
                // should not be part of the results
                continue;
            }

            TreeID k = context.getKeys()[current];

            // the tree should receive the partition's index
            assertEquals(partition, k.partition());

            // make sure all the trees of the other partitions are handled in the
            // correct order
            assertEquals(index, k.treeId());

            int[] predictions = context.getValues()[current].getPredictions();

            // all the instances of the partition should be classified
            assertEquals(split.length, predictions.length);
            assertEquals("at least one instance of the partition was not classified", -1,
                    ArrayUtils.indexOf(predictions, -1));

            // the tree must not belong to the mapper's partition
            int treePartition = predictions[0];
            assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition);

            current++;
        }
    }
}

From source file:org.apache.mahout.df.node.CategoricalNode.java

@Override
public int classify(Instance instance) {
    int index = ArrayUtils.indexOf(values, instance.get(attr));
    if (index == -1) {
        // value not available, we cannot predict
        return -1;
    }//from w  w w  .  j av  a 2s .c o m
    return childs[index].classify(instance);
}

From source file:org.apache.mahout.df.split.OptIgSplit.java

/**
 * Computes the split for a CATEGORICAL attribute
 * //ww w .ja  va 2s  .  c o  m
 * @param data
 * @param attr
 * @return
 */
private static Split categoricalSplit(Data data, int attr) {
    double[] values = data.values(attr);
    int[][] counts = new int[values.length][data.getDataset().nblabels()];
    int[] countAll = new int[data.getDataset().nblabels()];

    // compute frequencies
    for (int index = 0; index < data.size(); index++) {
        Instance instance = data.get(index);
        counts[ArrayUtils.indexOf(values, instance.get(attr))][instance.label]++;
        countAll[instance.label]++;
    }

    int size = data.size();
    double hy = entropy(countAll, size); // H(Y)
    double hyx = 0.0; // H(Y|X)
    double invDataSize = 1.0 / size;

    for (int index = 0; index < values.length; index++) {
        size = DataUtils.sum(counts[index]);
        hyx += size * invDataSize * entropy(counts[index], size);
    }

    double ig = hy - hyx;
    return new Split(attr, ig);
}