Example usage for com.google.common.collect Ordering natural

List of usage examples for com.google.common.collect Ordering natural

Introduction

In this page you can find the example usage for com.google.common.collect Ordering natural.

Prototype

@GwtCompatible(serializable = true)
@SuppressWarnings("unchecked") 
public static <C extends Comparable> Ordering<C> natural() 

Source Link

Document

Returns a serializable ordering that uses the natural order of the values.

Usage

From source file:com.evidon.areweprivateyet.ValueComparableMap.java

public static void main(String[] args) {
    TreeMap<String, Integer> map = new ValueComparableMap<String, Integer>(Ordering.natural());
    map.put("a", 5);
    map.put("b", 1);
    map.put("c", 3);
    assertEquals("b", map.firstKey());
    assertEquals("a", map.lastKey());
    map.put("d", 0);
    assertEquals("d", map.firstKey());
    //ensure it's still a map (by overwriting a key, but with a new value) 
    map.put("d", 2);
    assertEquals("b", map.firstKey());
    //Ensure multiple values do not clobber keys
    map.put("e", 2);
    assertEquals(5, map.size());/* w  w w . j a va 2 s . c om*/
    assertEquals(2, (int) map.get("e"));
    assertEquals(2, (int) map.get("d"));
}

From source file:RandomWalk.java

public static void main(String[] args) {
    int numNodes = 3;
    if (args.length > 0)
        numNodes = Integer.parseInt(args[0]);
    System.out.printf("Generating a random graph with %d nodes...\n", numNodes);
    DirectedGraph graph = TestGraphs.generateRandomGraph(numNodes, Math.min(10, numNodes));
    System.out.printf("Generated a random directed graph with %s nodes and %s edges.\n", graph.nodeCount(),
            graph.edgeCount());/*  w ww . ja v a2 s .c  o m*/

    // Generate walk parameters
    long numSteps = 1000 * 1000;
    final scala.Option<Integer> wpNone = scala.Option.apply(null);
    final scala.Option<Integer> wpTwo = scala.Option.apply(2);
    RandomWalkParams walkParams = new RandomWalkParams(numSteps, 0.1, wpNone, wpTwo, wpNone, false,
            GraphDir.OutDir(), false, false);
    GraphUtils graphUtils = new GraphUtils(graph);

    // Do the walk and measure how long it took
    System.out.printf("Now doing a random walk of %s steps from Node 0...\n", numSteps);
    long startTime = System.nanoTime();
    Tuple2<Int2IntMap, scala.Option<Int2ObjectMap<Object2IntMap<DirectedPath>>>> lm = graphUtils
            .calculatePersonalizedReputation(0, walkParams);
    long endTime = System.nanoTime();
    Int2IntMap neighbors = lm._1;
    System.out.printf("Random walk visited %s nodes in %s ms:\n", neighbors.size(),
            (endTime - startTime) / 1000000);

    // Sort neighbors (or nodes) in descending number of visits and take the top 10 neighbors
    List<Integer> topNeighbors = Ordering.natural().onResultOf(Functions.forMap(neighbors)).reverse()
            .immutableSortedCopy(neighbors.keySet());

    if (topNeighbors.size() > 10)
        topNeighbors = topNeighbors.subList(0, 10);

    // Print the top 10 neighbors (and paths)
    System.out.printf("%8s%10s\t%s\n", "NodeID", "#Visits", "Top 2 Paths with counts");
    for (int id : topNeighbors) {
        int numVisits = neighbors.get(id);
        System.out.printf("%8s%10s\t", id, numVisits);
        if (lm._2.isDefined()) { // If Option is not None
            Object2IntMap<DirectedPath> paths = lm._2.get().get(id);
            int remaining = paths.size();
            for (Map.Entry<DirectedPath, Integer> ef : paths.entrySet()) {
                // Print a directed path and #visits along that path
                int[] nodes = ef.getKey().nodes();
                for (int i = 0; i < nodes.length; i++) {
                    if (i != 0)
                        System.out.printf("->%d", nodes[i]);
                    else
                        System.out.printf("%d", nodes[i]);
                }
                System.out.printf(" (%d)", ef.getValue());
                if (remaining > 1)
                    System.out.printf(" | ");
                remaining--;
            }
        }
        System.out.println();
    }
}

From source file:RandomWalkJava.java

public static void main(String[] args) {
    int numNodes = 3;
    if (args.length > 0)
        numNodes = Integer.parseInt(args[0]);
    System.out.printf("Generating a random graph with %d nodes...\n", numNodes);
    DirectedGraph graph = TestGraphs.generateRandomGraph(numNodes,
            TestGraphs.getProbEdgeRandomDirected(numNodes, Math.min(10, numNodes)), StoredGraphDir.BothInOut());
    System.out.printf("Generated a random directed graph with %s nodes and %s edges.\n", graph.nodeCount(),
            graph.edgeCount());/*  ww w  .j  a  v a  2s  .co m*/

    // Generate walk parameters
    long numSteps = 1000 * 1000;
    final scala.Option<Object> wpNone = scala.Option.apply(null);
    final scala.Option<Object> wpTwo = scala.Option.apply((Object) 2);
    RandomWalkParams walkParams = new RandomWalkParams(numSteps, 0.1, wpNone, wpTwo, wpNone, false,
            GraphDir.OutDir(), false, false);
    GraphUtils graphUtils = new GraphUtils(graph);

    // Do the walk and measure how long it took
    System.out.printf("Now doing a random walk of %s steps from Node 0...\n", numSteps);
    long startTime = System.nanoTime();
    Tuple2<Int2IntMap, scala.Option<Int2ObjectMap<Object2IntMap<DirectedPath>>>> lm = graphUtils
            .calculatePersonalizedReputation(0, walkParams);
    long endTime = System.nanoTime();
    Int2IntMap neighbors = lm._1;
    System.out.printf("Random walk visited %s nodes in %s ms:\n", neighbors.size(),
            (endTime - startTime) / 1000000);

    // Sort neighbors (or nodes) in descending number of visits and take the top 10 neighbors
    List<Integer> topNeighbors = Ordering.natural().onResultOf(Functions.forMap(neighbors)).reverse()
            .immutableSortedCopy(neighbors.keySet());

    if (topNeighbors.size() > 10)
        topNeighbors = topNeighbors.subList(0, 10);

    // Print the top 10 neighbors (and paths)
    System.out.printf("%8s%10s\t%s\n", "NodeID", "#Visits", "Top 2 Paths with counts");
    for (int id : topNeighbors) {
        int numVisits = neighbors.get(id);
        System.out.printf("%8s%10s\t", id, numVisits);
        if (lm._2.isDefined()) { // If Option is not None
            Object2IntMap<DirectedPath> paths = lm._2.get().get(id);
            int remaining = paths.size();
            for (Map.Entry<DirectedPath, Integer> ef : paths.entrySet()) {
                // Print a directed path and #visits along that path
                int[] nodes = ef.getKey().nodes();
                for (int i = 0; i < nodes.length; i++) {
                    if (i != 0)
                        System.out.printf("->%d", nodes[i]);
                    else
                        System.out.printf("%d", nodes[i]);
                }
                System.out.printf(" (%d)", ef.getValue());
                if (remaining > 1)
                    System.out.printf(" | ");
                remaining--;
            }
        }
        System.out.println();
    }
}

From source file:org.talos.CFGScanDroid.CFGScanDroid.java

public static void main(String[] args) throws IOException {
    parsedArguments = new JCommanderArguments();
    JCommander argParser = new JCommander(parsedArguments);

    // parse arguments
    try {/*from w w  w  .  j ava  2  s.  c  om*/
        argParser.parse(args);
    } catch (ParameterException exception) {
        System.err.println(exception);
        System.err.println("PARSE ERROR: Bad parameter");
        System.out.print(parsedArguments.getUsage());
        System.exit(1);
    }

    // make sure a useful set of arguments are set
    validateArguments(argParser);

    // get files from directories, one level deep
    List<File> fileList = getFileList();
    fileList = Ordering.natural().sortedCopy(fileList);

    // dump sigs
    if (parsedArguments.dumpSignatures()) {
        for (File file : fileList)
            dumpSigs(file);
        // scan
    } else {
        // load signatures
        List<CFGSig> signatures = null;
        for (String sigFile : parsedArguments.getSignatureFiles()) {
            if (signatures == null)
                signatures = parseSignatures(sigFile);
            else
                signatures.addAll(parseSignatures(sigFile));
        }

        // load raw signatures
        for (String sig : parsedArguments.getRawSignatures()) {
            if (signatures == null)
                signatures = new ArrayList<CFGSig>();

            CFGSig cfgSig = new CFGSig(sig);
            signatures.add(cfgSig);
        }

        // normalize
        if (parsedArguments.normalize()) {
            for (CFGSig cfgSig : signatures) {
                // System.out.println("NORMALIZING SIGNATURE: " + cfgSig.getName());
                // System.out.println(cfgSig.getVertexCount());
                // System.out.println(cfgSig.getEdgeCount());
                cfgSig.normalize();
                // System.out.println(cfgSig.getVertexCount());
                // System.out.println(cfgSig.getEdgeCount());
            }
        }

        // for each file, scan
        for (File file : fileList) {
            ++scannedSampleCount;
            boolean detected = scanDexFile(file, signatures);
            if (detected)
                ++detectedSampleCount;
        }

        // print stats
        if (parsedArguments.printStatistics()) {
            System.out.println();
            System.out.println("Samples Scanned:\t" + scannedSampleCount);
            System.out.println("Functions Scanned:\t" + scannedFunctionCount);
            System.out.println("Samples Detected:\t" + detectedSampleCount);
            for (CFGSig signature : signatures) {
                System.out.println(signature.getName() + ": " + signature.getDetectionCount());
            }
        }

        if (parsedArguments.outputGraph()) {
            Graph graph = buildGraph();
            graph.shutdown();
        }
    }

    return;
}

From source file:org.apache.mahout.classifier.sgd.TrainNewsGroups.java

public static void main(String[] args) throws IOException {
    File base = new File(args[0]);

    Multiset<String> overallCounts = HashMultiset.create();

    int leakType = 0;
    if (args.length > 1) {
        leakType = Integer.parseInt(args[1]);
    }//  ww  w .  j a v  a  2s .  c o  m

    Dictionary newsGroups = new Dictionary();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setProbes(2);
    AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES,
            new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
        if (newsgroup.isDirectory()) {
            newsGroups.intern(newsgroup.getName());
            files.addAll(Arrays.asList(newsgroup.listFiles()));
        }
    }
    Collections.shuffle(files);
    System.out.println(files.size() + " training files");
    SGDInfo info = new SGDInfo();

    int k = 0;

    for (File file : files) {
        String ng = file.getParentFile().getName();
        int actual = newsGroups.intern(ng);

        Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
        learningAlgorithm.train(actual, v);

        k++;
        State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();

        SGDHelper.analyzeState(info, leakType, k, best);
    }
    learningAlgorithm.close();
    SGDHelper.dissect(leakType, newsGroups, learningAlgorithm, files, overallCounts);
    System.out.println("exiting main");

    ModelSerializer.writeBinary("/tmp/news-group.model",
            learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));

    List<Integer> counts = Lists.newArrayList();
    System.out.println("Word counts");
    for (String count : overallCounts.elementSet()) {
        counts.add(overallCounts.count(count));
    }
    Collections.sort(counts, Ordering.natural().reverse());
    k = 0;
    for (Integer count : counts) {
        System.out.println(k + "\t" + count);
        k++;
        if (k > 1000) {
            break;
        }
    }
}

From source file:com.memonews.mahout.sentiment.SentimentModelTrainer.java

public static void main(final String[] args) throws IOException {
    final File base = new File(args[0]);
    final String modelPath = args.length > 1 ? args[1] : "target/model";

    final Multiset<String> overallCounts = HashMultiset.create();

    final Dictionary newsGroups = new Dictionary();

    final SentimentModelHelper helper = new SentimentModelHelper();
    helper.getEncoder().setProbes(2);/*from   w w  w .j a va 2s  .  co  m*/
    final AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(2,
            SentimentModelHelper.FEATURES, new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    final List<File> files = Lists.newArrayList();
    for (final File newsgroup : base.listFiles()) {
        if (newsgroup.isDirectory()) {
            newsGroups.intern(newsgroup.getName());
            files.addAll(Arrays.asList(newsgroup.listFiles()));
        }
    }
    Collections.shuffle(files);
    System.out.printf("%d training files\n", files.size());
    final SGDInfo info = new SGDInfo();

    int k = 0;

    for (final File file : files) {
        final String ng = file.getParentFile().getName();
        final int actual = newsGroups.intern(ng);

        final Vector v = helper.encodeFeatureVector(file, overallCounts);
        learningAlgorithm.train(actual, v);

        k++;
        final State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();

        SGDHelper.analyzeState(info, 0, k, best);
    }
    learningAlgorithm.close();
    SGDHelper.dissect(0, newsGroups, learningAlgorithm, files, overallCounts);
    System.out.println("exiting main");

    ModelSerializer.writeBinary(modelPath,
            learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));

    final List<Integer> counts = Lists.newArrayList();
    System.out.printf("Word counts\n");
    for (final String count : overallCounts.elementSet()) {
        counts.add(overallCounts.count(count));
    }
    Collections.sort(counts, Ordering.natural().reverse());
    k = 0;
    for (final Integer count : counts) {
        System.out.printf("%d\t%d\n", k, count);
        k++;
        if (k > 1000) {
            break;
        }
    }
}

From source file:org.apache.ctakes.temporal.data.analysis.PrintInconsistentAnnotations.java

public static void main(String[] args) throws Exception {
    Options options = CliFactory.parseArguments(Options.class, args);
    int windowSize = 50;

    List<Integer> patientSets = options.getPatients().getList();
    List<Integer> trainItems = THYMEData.getPatientSets(patientSets, THYMEData.TRAIN_REMAINDERS);
    List<File> files = THYMEData.getFilesFor(trainItems, options.getRawTextDirectory());

    CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(XMIReader.class,
            XMIReader.PARAM_XMI_DIRECTORY, options.getXMIDirectory()));

    int totalDocTimeRels = 0;
    int totalInconsistentDocTimeRels = 0;
    for (Iterator<JCas> casIter = new JCasIterator(reader, aggregateBuilder.createAggregate()); casIter
            .hasNext();) {//  w  w w . j a  v a  2s  . c  om
        JCas jCas = casIter.next();
        String text = jCas.getDocumentText();
        JCas goldView = jCas.getView("GoldView");

        // group events by their narrative container
        Multimap<Annotation, EventMention> containers = HashMultimap.create();
        for (TemporalTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)) {
            if (relation.getCategory().equals("CONTAINS")) {
                Annotation arg1 = relation.getArg1().getArgument();
                Annotation arg2 = relation.getArg2().getArgument();
                if (arg2 instanceof EventMention) {
                    EventMention event = (EventMention) arg2;
                    containers.put(arg1, event);
                }
            }
        }

        // check each container for inconsistent DocTimeRels
        for (Annotation container : containers.keySet()) {
            Set<String> docTimeRels = Sets.newHashSet();
            for (EventMention event : containers.get(container)) {
                docTimeRels.add(event.getEvent().getProperties().getDocTimeRel());
            }
            totalDocTimeRels += docTimeRels.size();

            boolean inconsistentDocTimeRels;
            if (container instanceof EventMention) {
                EventMention mention = ((EventMention) container);
                String containerDocTimeRel = mention.getEvent().getProperties().getDocTimeRel();
                inconsistentDocTimeRels = false;
                for (String docTimeRel : docTimeRels) {
                    if (docTimeRel.equals(containerDocTimeRel)) {
                        continue;
                    }
                    if (containerDocTimeRel.equals("BEFORE/OVERLAP")
                            && (docTimeRel.equals("BEFORE") || docTimeRel.equals("OVERLAP"))) {
                        continue;
                    }
                    inconsistentDocTimeRels = true;
                    break;
                }
            } else {
                if (docTimeRels.size() == 1) {
                    inconsistentDocTimeRels = false;
                } else if (docTimeRels.contains("BEFORE/OVERLAP")) {
                    inconsistentDocTimeRels = docTimeRels.size() == 1
                            && (docTimeRels.contains("BEFORE") || docTimeRels.contains("OVERLAP"));
                } else {
                    inconsistentDocTimeRels = true;
                }
            }

            // if inconsistent: print events, DocTimeRels and surrounding context
            if (inconsistentDocTimeRels) {
                totalInconsistentDocTimeRels += docTimeRels.size();

                List<Integer> offsets = Lists.newArrayList();
                offsets.add(container.getBegin());
                offsets.add(container.getEnd());
                for (EventMention event : containers.get(container)) {
                    offsets.add(event.getBegin());
                    offsets.add(event.getEnd());
                }
                Collections.sort(offsets);
                int begin = Math.max(offsets.get(0) - windowSize, 0);
                int end = Math.min(offsets.get(offsets.size() - 1) + windowSize, text.length());
                System.err.printf("Inconsistent DocTimeRels in %s, ...%s...\n",
                        new File(ViewUriUtil.getURI(jCas)).getName(),
                        text.substring(begin, end).replaceAll("([\r\n])[\r\n]+", "$1"));
                if (container instanceof EventMention) {
                    System.err.printf("Container: \"%s\" (docTimeRel=%s)\n", container.getCoveredText(),
                            ((EventMention) container).getEvent().getProperties().getDocTimeRel());
                } else {
                    System.err.printf("Container: \"%s\"\n", container.getCoveredText());
                }
                Ordering<EventMention> byBegin = Ordering.natural()
                        .onResultOf(new Function<EventMention, Integer>() {
                            @Override
                            public Integer apply(@Nullable EventMention event) {
                                return event.getBegin();
                            }
                        });
                for (EventMention event : byBegin.sortedCopy(containers.get(container))) {
                    System.err.printf("* \"%s\" (docTimeRel=%s)\n", event.getCoveredText(),
                            event.getEvent().getProperties().getDocTimeRel());
                }
                System.err.println();
            }
        }
    }

    System.err.printf("Inconsistent DocTimeRels: %.1f%% (%d/%d)\n",
            100.0 * totalInconsistentDocTimeRels / totalDocTimeRels, totalInconsistentDocTimeRels,
            totalDocTimeRels);
}

From source file:org.apache.ctakes.temporal.eval.EvaluationOfTimeSpans.java

public static void main(String[] args) throws Exception {
    Options options = CliFactory.parseArguments(Options.class, args);
    List<Integer> trainItems = null;
    List<Integer> devItems = null;
    List<Integer> testItems = null;

    List<Integer> patientSets = options.getPatients().getList();
    if (options.getXMLFormat() == XMLFormat.I2B2) {
        trainItems = I2B2Data.getTrainPatientSets(options.getXMLDirectory());
        devItems = I2B2Data.getDevPatientSets(options.getXMLDirectory());
        testItems = I2B2Data.getTestPatientSets(options.getXMLDirectory());
    } else {//  w  w  w  .  j av  a2s. c om
        trainItems = THYMEData.getPatientSets(patientSets, options.getTrainRemainders().getList());
        devItems = THYMEData.getPatientSets(patientSets, options.getDevRemainders().getList());
        testItems = THYMEData.getPatientSets(patientSets, options.getTestRemainders().getList());
    }

    List<Integer> allTrain = new ArrayList<>(trainItems);
    List<Integer> allTest = null;

    if (options.getTest()) {
        allTrain.addAll(devItems);
        allTest = new ArrayList<>(testItems);
    } else {
        allTest = new ArrayList<>(devItems);
    }

    // specify the annotator classes to use
    List<Class<? extends JCasAnnotator_ImplBase>> annotatorClasses = Lists.newArrayList();
    if (options.getRunBackwards())
        annotatorClasses.add(BackwardsTimeAnnotator.class);
    if (options.getRunForwards())
        annotatorClasses.add(TimeAnnotator.class);
    if (options.getRunParserBased())
        annotatorClasses.add(ConstituencyBasedTimeAnnotator.class);
    if (options.getRunCrfBased())
        annotatorClasses.add(CRFTimeAnnotator.class);
    if (annotatorClasses.size() == 0) {
        // run all
        annotatorClasses.add(BackwardsTimeAnnotator.class);
        annotatorClasses.add(TimeAnnotator.class);
        annotatorClasses.add(ConstituencyBasedTimeAnnotator.class);
        annotatorClasses.add(CRFTimeAnnotator.class);
    }
    Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments = Maps.newHashMap();

    // THYME best params: Backwards: 0.1, CRF 0.3, Time 0.1, Constituency 0.3
    // i2b2 best params: Backwards 0.1, CRF 3.0, Time 0.1, Constituency 0.3
    //      String gridParam = "0.01";
    annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[] { "-c", "0.1" });
    annotatorTrainingArguments.put(TimeAnnotator.class, new String[] { "-c", "0.1" });
    annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[] { "-c", "0.3" });
    annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[] { "-p", "c2=" + "0.3" });

    // run one evaluation per annotator class
    final Map<Class<?>, AnnotationStatistics<?>> annotatorStats = Maps.newHashMap();
    for (Class<? extends JCasAnnotator_ImplBase> annotatorClass : annotatorClasses) {
        EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(new File("target/eval/time-spans"),
                options.getRawTextDirectory(), options.getXMLDirectory(), options.getXMLFormat(),
                options.getSubcorpus(), options.getXMIDirectory(), options.getTreebankDirectory(),
                options.getFeatureSelectionThreshold(), options.getSMOTENeighborNumber(), annotatorClass,
                options.getPrintOverlappingSpans(), annotatorTrainingArguments.get(annotatorClass));
        evaluation.prepareXMIsFor(patientSets);
        evaluation.setSkipTrain(options.getSkipTrain());
        evaluation.printErrors = options.getPrintErrors();
        if (options.getI2B2Output() != null)
            evaluation.setI2B2Output(options.getI2B2Output() + "/" + annotatorClass.getSimpleName());
        String name = String.format("%s.errors", annotatorClass.getSimpleName());
        evaluation.setLogging(Level.FINE, new File("target/eval", name));
        AnnotationStatistics<String> stats = evaluation.trainAndTest(allTrain, allTest);
        annotatorStats.put(annotatorClass, stats);
    }

    // allow ordering of models by F1
    Ordering<Class<? extends JCasAnnotator_ImplBase>> byF1 = Ordering.natural()
            .onResultOf(new Function<Class<? extends JCasAnnotator_ImplBase>, Double>() {
                @Override
                public Double apply(Class<? extends JCasAnnotator_ImplBase> annotatorClass) {
                    return annotatorStats.get(annotatorClass).f1();
                }
            });

    // print out models, ordered by F1
    for (Class<?> annotatorClass : byF1.sortedCopy(annotatorClasses)) {
        System.err.printf("===== %s =====\n", annotatorClass.getSimpleName());
        System.err.println(annotatorStats.get(annotatorClass));
    }
}

From source file:com.yahoo.spaclu.data.index.IndexFeatureValueSpark.java

public static void main(String[] args) throws IOException {
    IndexFeatureValueOptions optionsFormatRawToDatabase = new IndexFeatureValueOptions(args);

    String inputPathString = optionsFormatRawToDatabase.getInputPath();
    String outputPathString = optionsFormatRawToDatabase.getOutputPath();
    String indexPathString = optionsFormatRawToDatabase.getIndexPath();
    int numberOfPartitions = optionsFormatRawToDatabase.getNumberOfPartitions();
    int maxCutoffThreshold = optionsFormatRawToDatabase.getMaximumCutoffThreshold();
    int minCutoffThreshold = optionsFormatRawToDatabase.getMinimumCutoffThreshold();

    /*/*from  w ww  .  ja  va 2  s .  c om*/
     * Set<String> excludingFeatureNames = new HashSet<String>();
     * excludingFeatureNames.add("login");
     * excludingFeatureNames.add("time"); excludingFeatureNames.add("day");
     * excludingFeatureNames.add("hms"); excludingFeatureNames.add("fail");
     */

    sLogger.info("Tool: " + IndexFeatureValueSpark.class.getSimpleName());
    sLogger.info(" - input path: " + inputPathString);
    sLogger.info(" - output path: " + outputPathString);
    sLogger.info(" - index path: " + indexPathString);
    sLogger.info(" - number of partitions: " + numberOfPartitions);
    sLogger.info(" - maximum cutoff: " + maxCutoffThreshold);
    sLogger.info(" - minimum cutoff: " + minCutoffThreshold);

    // Create a default hadoop configuration
    Configuration conf = new Configuration();

    // Parse created config to the HDFS
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = new Path(outputPathString);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }

    SparkConf sparkConf = new SparkConf().setAppName(optionsFormatRawToDatabase.toString());

    JavaSparkContext sc = new JavaSparkContext(sparkConf);

    Map<Integer, String> featureIndices = getFeatureIndices(sc.textFile(indexPathString));

    List<Integer> listOfAllFeatureIndices = new LinkedList<Integer>();
    List<String> listOfAllFeatureInfo = new LinkedList<String>();
    Iterator<Integer> indexIter = featureIndices.keySet().iterator();
    while (indexIter.hasNext()) {
        Integer tempKey = indexIter.next();
        listOfAllFeatureIndices.add(tempKey);
        listOfAllFeatureInfo.add(featureIndices.get(tempKey));
    }

    /*
     * 
     * 
     * 
     * 
     * 
     * 
     * 
     */

    JavaRDD<String> rawLines = sc.textFile(inputPathString).repartition(numberOfPartitions);

    JavaRDD<String[]> tokenizedLines = rawLines.map(new LineFilter(listOfAllFeatureIndices));
    JavaPairRDD<Entry<Integer, String>, Long> featureValuesCounts = tokenizedLines
            .flatMapToPair(new FeatureValueMapper()).reduceByKey(new FeatureValueReducer());

    Map<Integer, Builder<String, Long>> featureValueMapping = new Hashtable<Integer, Builder<String, Long>>();
    Iterator<Tuple2<Entry<Integer, String>, Long>> iter = featureValuesCounts.collect().iterator();
    while (iter.hasNext()) {
        Tuple2<Entry<Integer, String>, Long> temp = iter.next();
        Entry<Integer, String> featureValueEntry = temp._1;
        int featureIndex = featureValueEntry.getKey();
        String featureValue = featureValueEntry.getValue();
        long featureValueCount = temp._2;

        if (!featureValueMapping.containsKey(featureIndex)) {
            Builder<String, Long> mapBuilder = new Builder<String, Long>(Ordering.natural());

            featureValueMapping.put(featureIndex, mapBuilder);
        }

        featureValueMapping.get(featureIndex).put(featureValue, featureValueCount);
    }

    Preconditions.checkArgument(featureValueMapping.size() == listOfAllFeatureIndices.size());

    String outputFeaturePathString = outputPathString + "feature" + Settings.SEPERATOR;
    fs.mkdirs(new Path(outputFeaturePathString));

    String outputFeatureNamePathString = outputPathString + "feature.dat";
    Path outputFeatureNamePath = new Path(outputFeatureNamePathString);
    PrintWriter featureNamePrinterWriter = new PrintWriter(fs.create(outputFeatureNamePath), true);

    List<Integer> listOfFeatureIndicesToKeep = new LinkedList<Integer>();

    Map<Integer, Map<String, Integer>> featureValueIndex = new Hashtable<Integer, Map<String, Integer>>();
    for (int d = 0; d < featureValueMapping.size(); d++) {
        Map<String, Integer> valueToIndex = new Hashtable<String, Integer>();
        Map<Integer, String> indexToValue = new Hashtable<Integer, String>();

        ImmutableSortedMap<String, Long> immutableSortedMap = featureValueMapping.get(d).build();
        for (String keyString : immutableSortedMap.keySet()) {
            valueToIndex.put(keyString, valueToIndex.size());
            indexToValue.put(indexToValue.size(), keyString);
        }

        if (valueToIndex.size() <= minCutoffThreshold || valueToIndex.size() > maxCutoffThreshold) {
            sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size()
                    + " values, skip...");

            continue;
        } else {
            sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size()
                    + " values.");

            listOfFeatureIndicesToKeep.add(listOfAllFeatureIndices.get(d));
            featureNamePrinterWriter.println(listOfAllFeatureInfo.get(d));
        }

        String outputFeatureIndexPathString = outputFeaturePathString + "index" + Settings.UNDER_SCORE
                + featureValueIndex.size() + ".dat";
        Path outputIndexPath = new Path(outputFeatureIndexPathString);

        featureValueIndex.put(featureValueIndex.size(), valueToIndex);

        PrintWriter featureValueIndexPrinterWriter = new PrintWriter(fs.create(outputIndexPath), true);
        for (int i = 0; i < indexToValue.size(); i++) {
            featureValueIndexPrinterWriter.println("" + i + Settings.TAB + indexToValue.get(i) + Settings.TAB
                    + immutableSortedMap.get(indexToValue.get(i)));
        }
        featureValueIndexPrinterWriter.close();
    }

    featureNamePrinterWriter.close();

    JavaRDD<String[]> filteredLines = rawLines.map(new LineFilter(listOfFeatureIndicesToKeep));
    JavaRDD<FeatureIntegerVector> indexedData = filteredLines.map(new FeatureValueIndexer(featureValueIndex));

    String outputDataPathString = outputPathString + "data";
    Path outputDataPath = new Path(outputDataPathString);
    if (fs.exists(outputDataPath)) {
        fs.delete(outputDataPath, true);
    }
    indexedData.saveAsTextFile(outputDataPathString);

    sc.stop();
}

From source file:org.caleydo.view.domino.internal.util.IndexedSort.java

public static <T extends Comparable<T>> int[] sortIndex(List<T> list) {
    return sortIndex(list, Ordering.natural());
}