Example usage for org.apache.commons.lang ArrayUtils indexOf

Introduction

In this page you can find the example usage for org.apache.commons.lang ArrayUtils indexOf.

Prototype

public static int indexOf(boolean[] array, boolean valueToFind)

Source Link

Document

Finds the index of the given value in the array.

Usage

From source file:org.apache.mahout.df.data.Dataset.java

/**
 * Converts a token to its corresponding int code for a given attribute
 * /*from ww  w. ja v  a2s . c  o m*/
 * @param attr
 *          attribute's index
 * @param token
 * @return
 */
public int valueOf(int attr, String token) {
    if (isNumerical(attr)) {
        throw new IllegalArgumentException("Only for CATEGORICAL attributes");
    }
    if (values == null) {
        throw new IllegalStateException("Values not found");
    }

    return ArrayUtils.indexOf(values[attr], token);
}

From source file:org.apache.mahout.df.data.Utils.java

/**
 * finds the label attribute's index/*  ww  w .j  av  a2 s.com*/
 * 
 * @param descriptor
 * @return
 * @throws Exception 
 */
public static int findLabel(String descriptor) throws DescriptorException {
    Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
    return ArrayUtils.indexOf(attrs, Attribute.LABEL);
}

From source file:org.apache.mahout.df.mapred.partial.PartialSequentialBuilder.java

@Override
protected void runJob(JobConf job) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);//from  ww  w .j  a  v  a2 s . c o  m

    int numTrees = Builder.getNbTrees(job); // total number of trees

    firstOutput = new PartialOutputCollector(numTrees);
    Reporter reporter = Reporter.NULL;

    firstIds = new int[splits.length];
    sizes = new int[splits.length];

    // to compute firstIds, process the splits in file order
    int firstId = 0;
    long slowest = 0; // duration of slowest map
    for (InputSplit split : splits) {
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, splits.length,
                numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.next(key, value)) {
            mapper.map(key, value, firstOutput, reporter);
            firstId++;
            sizes[hp]++;
        }

        mapper.close();

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.mahout.df.mapred.partial.Step0Job.java

/**
 * Replaces the first id for each partition in Hadoop's order
 * //from ww  w.j  ava  2 s  .com
 * @param keys
 * @param values
 * @return
 */
protected static Step0Output[] processOutput(int[] keys, Step0Output[] values) {
    int numMaps = values.length;

    // sort the values using firstId
    Step0Output[] sorted = Arrays.copyOf(values, numMaps);
    Arrays.sort(sorted);

    // compute the partitions firstIds (file order)
    int[] orderedIds = new int[numMaps];
    orderedIds[0] = 0;
    for (int p = 1; p < numMaps; p++) {
        orderedIds[p] = orderedIds[p - 1] + sorted[p - 1].size;
    }

    // update the values' first ids
    for (int p = 0; p < numMaps; p++) {
        int order = ArrayUtils.indexOf(sorted, values[p]);
        values[p].firstId = orderedIds[order];
    }

    // reorder the values in hadoop's order
    Step0Output[] reordered = new Step0Output[numMaps];
    for (int p = 0; p < numMaps; p++) {
        reordered[keys[p]] = values[p];
    }

    return reordered;
}

From source file:org.apache.mahout.df.mapred.partial.Step2MapperTest.java

public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
        int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
            keys[treeIndex] = new TreeID(partition, treeIndex);
            // put the partition in the leaf's label
            // this way we can track the outputs
            trees[treeIndex] = new Leaf(partition);
        }// www  .  jav a 2  s. c  o m

        sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];

        // number of trees that will be handled by the mapper
        int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

        PartialOutputCollector output = new PartialOutputCollector(nbConcerned);

        // load the current mapper's (key, tree) pairs
        TreeID[] curKeys = new TreeID[nbConcerned];
        Node[] curTrees = new Node[nbConcerned];
        InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

        // simulate the job
        MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, output, Reporter.NULL);
        }

        mapper.close();

        // make sure the mapper did not return its own trees
        assertEquals(nbConcerned, output.nbOutputs());

        // check the returned results
        int current = 0;
        for (int index = 0; index < nbTrees; index++) {
            if (keys[index].partition() == partition) {
                // should not be part of the results
                continue;
            }

            TreeID k = output.getKeys()[current];

            // the tree should receive the partition's index
            assertEquals(partition, k.partition());

            // make sure all the trees of the other partitions are handled in the
            // correct order
            assertEquals(index, k.treeId());

            int[] predictions = output.getValues()[current].getPredictions();

            // all the instances of the partition should be classified
            assertEquals(split.length, predictions.length);
            assertEquals("at least one instance of the partition was not classified", -1,
                    ArrayUtils.indexOf(predictions, -1));

            // the tree must not belong to the mapper's partition
            int treePartition = predictions[0];
            assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition);

            current++;
        }
    }
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);/*from ww w . ja  v a 2  s .co m*/
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    firstIds = new int[nbSplits];
    sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
        InputSplit split = splits.get(p);
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.Step0Job.java

/**
 * Replaces the first id for each partition in Hadoop's order
 * //from  w w  w.  j  a v a  2  s  . c  om
 * @param keys
 * @param values
 * @return
 */
protected static Step0Output[] processOutput(List<Integer> keys, List<Step0Output> values) {
    int numMaps = values.size();

    // sort the values using firstId
    Step0Output[] sorted = new Step0Output[numMaps];
    values.toArray(sorted);
    Arrays.sort(sorted);

    // compute the partitions firstIds (file order)
    int[] orderedIds = new int[numMaps];
    orderedIds[0] = 0;
    for (int p = 1; p < numMaps; p++) {
        orderedIds[p] = orderedIds[p - 1] + sorted[p - 1].size;
    }

    // update the values' first ids
    for (int p = 0; p < numMaps; p++) {
        int order = ArrayUtils.indexOf(sorted, values.get(p));
        values.get(p).firstId = orderedIds[order];
    }

    // reorder the values in hadoop's order
    Step0Output[] reordered = new Step0Output[numMaps];
    for (int p = 0; p < numMaps; p++) {
        reordered[keys.get(p)] = values.get(p);
    }

    return reordered;
}

From source file:org.apache.mahout.df.mapreduce.partial.Step2MapperTest.java

public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
        int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
            keys[treeIndex] = new TreeID(partition, treeIndex);
            // put the partition in the leaf's label
            // this way we can track the outputs
            trees[treeIndex] = new Leaf(partition);
        }/*  w w  w . j ava2  s  .co m*/

        sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];

        // number of trees that will be handled by the mapper
        int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

        MockContext context = new MockContext(new Step2Mapper(), new Configuration(), new TaskAttemptID(),
                nbConcerned);

        // load the current mapper's (key, tree) pairs
        TreeID[] curKeys = new TreeID[nbConcerned];
        Node[] curTrees = new Node[nbConcerned];
        InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

        // simulate the job
        MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, context);
        }

        mapper.cleanup(context);

        // make sure the mapper did not return its own trees
        assertEquals(nbConcerned, context.nbOutputs());

        // check the returned results
        int current = 0;
        for (int index = 0; index < nbTrees; index++) {
            if (keys[index].partition() == partition) {
                // should not be part of the results
                continue;
            }

            TreeID k = context.getKeys()[current];

            // the tree should receive the partition's index
            assertEquals(partition, k.partition());

            // make sure all the trees of the other partitions are handled in the
            // correct order
            assertEquals(index, k.treeId());

            int[] predictions = context.getValues()[current].getPredictions();

            // all the instances of the partition should be classified
            assertEquals(split.length, predictions.length);
            assertEquals("at least one instance of the partition was not classified", -1,
                    ArrayUtils.indexOf(predictions, -1));

            // the tree must not belong to the mapper's partition
            int treePartition = predictions[0];
            assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition);

            current++;
        }
    }
}

From source file:org.apache.mahout.df.node.CategoricalNode.java

@Override
public int classify(Instance instance) {
    int index = ArrayUtils.indexOf(values, instance.get(attr));
    if (index == -1) {
        // value not available, we cannot predict
        return -1;
    }//from w  w w  .  j av  a 2s .c o m
    return childs[index].classify(instance);
}

From source file:org.apache.mahout.df.split.OptIgSplit.java

/**
 * Computes the split for a CATEGORICAL attribute
 * //ww w .ja  va 2s  .  c o  m
 * @param data
 * @param attr
 * @return
 */
private static Split categoricalSplit(Data data, int attr) {
    double[] values = data.values(attr);
    int[][] counts = new int[values.length][data.getDataset().nblabels()];
    int[] countAll = new int[data.getDataset().nblabels()];

    // compute frequencies
    for (int index = 0; index < data.size(); index++) {
        Instance instance = data.get(index);
        counts[ArrayUtils.indexOf(values, instance.get(attr))][instance.label]++;
        countAll[instance.label]++;
    }

    int size = data.size();
    double hy = entropy(countAll, size); // H(Y)
    double hyx = 0.0; // H(Y|X)
    double invDataSize = 1.0 / size;

    for (int index = 0; index < values.length; index++) {
        size = DataUtils.sum(counts[index]);
        hyx += size * invDataSize * entropy(counts[index], size);
    }

    double ig = hy - hyx;
    return new Split(attr, ig);
}