Example usage for org.apache.commons.cli HelpFormatter printHelp

List of usage examples for org.apache.commons.cli HelpFormatter printHelp

Introduction

In this page you can find the example usage for org.apache.commons.cli HelpFormatter printHelp.

Prototype

public void printHelp(String cmdLineSyntax, Options options) 

Source Link

Document

Print the help for options with the specified command line syntax.

Usage

From source file:general.Main.java

/**
 * Selects the files to be processed and specifies the files to write to.
 *
 * @param args Arguments to specify runtime behavior.
 *///  w  ww  . ja v  a  2  s  . co m
public static void main(String[] args) throws InvocationTargetException, NoSuchMethodException,
        InstantiationException, IllegalAccessException {
    Options options = new Options();
    options.addOption("l", "logging", false, "enables file logging");
    options.addOption("j", "jena", false, "uses the Jena SPARQL Parser");
    options.addOption("o", "openrdf", false, "uses the OpenRDF SPARQL Parser");
    options.addOption("f", "file", true, "defines the input file prefix");
    options.addOption("h", "help", false, "displays this help");
    options.addOption("t", "tsv", false, "reads from .tsv-files");
    // options.addOption("p", "parquet", false, "read from .parquet-files");
    options.addOption("n", "numberOfThreads", true, "number of used threads, default 1");
    options.addOption("b", "withBots", false, "enables metric calculation for bot queries+");
    options.addOption("p", "readPreprocessed", false, "enables reading of preprocessed files");

    //some parameters which can be changed through parameters
    //QueryHandler queryHandler = new OpenRDFQueryHandler();
    String inputFilePrefix;
    String inputFileSuffix = ".tsv";
    String queryParserName = "OpenRDF";
    Class inputHandlerClass = null;
    Class queryHandlerClass = null;
    int numberOfThreads = 1;

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd;
    try {
        cmd = parser.parse(options, args);
        if (cmd.hasOption("help")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("help", options);
            return;
        }
        if (cmd.hasOption("openrdf")) {
            queryHandlerClass = OpenRDFQueryHandler.class;
        }
        if (cmd.hasOption("tsv")) {
            inputFileSuffix = ".tsv";
            inputHandlerClass = InputHandlerTSV.class;
        }
        if (cmd.hasOption("parquet")) {
            inputFileSuffix = ".parquet";
            Logger.getLogger("org").setLevel(Level.WARN);
            Logger.getLogger("akka").setLevel(Level.WARN);
            SparkConf conf = new SparkConf().setAppName("SPARQLQueryAnalyzer").setMaster("local");
            JavaSparkContext sc = new JavaSparkContext(conf);
            inputHandlerClass = InputHandlerParquet.class;
        }
        if (inputHandlerClass == null) {
            System.out.println("Please specify which parser to use, either -t for TSV or -p for parquet.");
        }
        if (cmd.hasOption("file")) {
            inputFilePrefix = cmd.getOptionValue("file").trim();
        } else {
            System.out.println(
                    "Please specify at least the file which we should work on using the option '--file PREFIX' or 'f PREFIX'");
            return;
        }
        if (cmd.hasOption("logging")) {
            LoggingHandler.initFileLog(queryParserName, inputFilePrefix);
        }
        if (cmd.hasOption("numberOfThreads")) {
            numberOfThreads = Integer.parseInt(cmd.getOptionValue("numberOfThreads"));
        }
        if (cmd.hasOption("withBots")) {
            withBots = true;
        }
        if (cmd.hasOption("readPreprocessed")) {
            readPreprocessed = true;
        }
    } catch (UnrecognizedOptionException e) {
        System.out.println("Unrecognized commandline option: " + e.getOption());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("help", options);
        return;
    } catch (ParseException e) {
        System.out.println(
                "There was an error while parsing your command line input. Did you rechecked your syntax before running?");
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("help", options);
        return;
    }

    LoggingHandler.initConsoleLog();

    loadPreBuildQueryTypes();

    long startTime = System.nanoTime();

    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);

    for (int day = 1; day <= 31; day++) {
        String inputFile = inputFilePrefix + String.format("%02d", day) + inputFileSuffix;
        Runnable parseOneMonthWorker = new ParseOneMonthWorker(inputFile, inputFilePrefix, inputHandlerClass,
                queryParserName, queryHandlerClass, day);
        executor.execute(parseOneMonthWorker);
    }
    executor.shutdown();

    while (!executor.isTerminated()) {
        //wait until all workers are finished
    }

    writeQueryTypes(inputFilePrefix);

    long stopTime = System.nanoTime();
    long millis = TimeUnit.MILLISECONDS.convert(stopTime - startTime, TimeUnit.NANOSECONDS);
    Date date = new Date(millis);
    System.out.println("Finished executing with all threads: "
            + new SimpleDateFormat("mm-dd HH:mm:ss:SSSSSSS").format(date));
}

From source file:apps.quantification.LearnQuantificationSVMLight.java

public static void main(String[] args) throws IOException {
    String cmdLineSyntax = LearnQuantificationSVMLight.class.getName()
            + " [OPTIONS] <path to svm_light_learn> <path to svm_light_classify> <trainingIndexDirectory> <outputDirectory>";

    Options options = new Options();

    OptionBuilder.withArgName("f");
    OptionBuilder.withDescription("Number of folds");
    OptionBuilder.withLongOpt("f");
    OptionBuilder.isRequired(true);/*from  www . j  a v  a2  s.c  om*/
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("c");
    OptionBuilder.withDescription("The c value for svm_light (default 1)");
    OptionBuilder.withLongOpt("c");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("k");
    OptionBuilder.withDescription("Kernel type (default 0: linear, 1: polynomial, 2: RBF, 3: sigmoid)");
    OptionBuilder.withLongOpt("k");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("t");
    OptionBuilder.withDescription("Path for temporary files");
    OptionBuilder.withLongOpt("t");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("v");
    OptionBuilder.withDescription("Verbose output");
    OptionBuilder.withLongOpt("v");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("s");
    OptionBuilder.withDescription("Don't delete temporary training file in svm_light format (default: delete)");
    OptionBuilder.withLongOpt("s");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    SvmLightLearnerCustomizer classificationLearnerCustomizer = null;
    SvmLightClassifierCustomizer classificationCustomizer = null;

    int folds = -1;

    GnuParser parser = new GnuParser();
    String[] remainingArgs = null;
    try {
        CommandLine line = parser.parse(options, args);

        remainingArgs = line.getArgs();

        classificationLearnerCustomizer = new SvmLightLearnerCustomizer(remainingArgs[0]);
        classificationCustomizer = new SvmLightClassifierCustomizer(remainingArgs[1]);

        folds = Integer.parseInt(line.getOptionValue("f"));

        if (line.hasOption("c"))
            classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c")));

        if (line.hasOption("k")) {
            System.out.println("Kernel type: " + line.getOptionValue("k"));
            classificationLearnerCustomizer.setKernelType(Integer.parseInt(line.getOptionValue("k")));
        }

        if (line.hasOption("v"))
            classificationLearnerCustomizer.printSvmLightOutput(true);

        if (line.hasOption("s"))
            classificationLearnerCustomizer.setDeleteTrainingFiles(false);

        if (line.hasOption("t")) {
            classificationLearnerCustomizer.setTempPath(line.getOptionValue("t"));
            classificationCustomizer.setTempPath(line.getOptionValue("t"));
        }

    } catch (Exception exp) {
        System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    assert (classificationLearnerCustomizer != null);

    if (remainingArgs.length != 4) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    String indexFile = remainingArgs[2];

    File file = new File(indexFile);

    String indexName = file.getName();
    String indexPath = file.getParent();

    String outputPath = remainingArgs[3];

    SvmLightLearner classificationLearner = new SvmLightLearner();

    classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer);

    FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false);
    fssm.open();

    IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full,
            TroveClassificationDBType.Full);

    final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers");

    IOperationStatusListener status = new IOperationStatusListener() {

        @Override
        public void operationStatus(double percentage) {
            progressBar.signal((int) percentage);
        }
    };

    QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner,
            classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY,
            new LogisticFunction(), status);

    IQuantifier[] quantifiers = quantificationLearner.learn(training);

    File executableFile = new File(classificationLearnerCustomizer.getSvmLightLearnPath());
    IDataManager classifierDataManager = new SvmLightDataManager(new SvmLightClassifierCustomizer(
            executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_light_classify"));
    String description = "_SVMLight_C-" + classificationLearnerCustomizer.getC() + "_K-"
            + classificationLearnerCustomizer.getKernelType();
    if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0)
        description += "_" + classificationLearnerCustomizer.getAdditionalParameters();
    String quantifierPrefix = indexName + "_Quantifier-" + folds + description;

    FileSystemStorageManager fssmo = new FileSystemStorageManager(
            outputPath + File.separatorChar + quantifierPrefix, true);
    fssmo.open();
    QuantificationLearner.write(quantifiers, fssmo, classifierDataManager);
    fssmo.close();

    BufferedWriter bfs = new BufferedWriter(
            new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt"));
    TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs();
    TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs();
    TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs();
    TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs();

    ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet();

    short[] cats = simpleTPRs.keys();
    for (int i = 0; i < cats.length; ++i) {
        short cat = cats[i];
        String catName = training.getCategoryDB().getCategoryName(cat);
        ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat);
        double simpleTPR = simpleTPRs.get(cat);
        double simpleFPR = simpleFPRs.get(cat);
        double scaledTPR = scaledTPRs.get(cat);
        double scaledFPR = scaledFPRs.get(cat);
        String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t"
                + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t"
                + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n";
        bfs.write(line);
        line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp()
                + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn()
                + "\t" + scaledTPR + "\t" + scaledFPR + "\n";
        bfs.write(line);
    }
    bfs.close();
}

From source file:apps.quantification.QuantifySVMLight.java

public static void main(String[] args) throws IOException {
    String cmdLineSyntax = QuantifySVMLight.class.getName()
            + " [OPTIONS] <path to svm_light_classify> <testIndexDirectory> <quantificationModelDirectory>";

    Options options = new Options();

    OptionBuilder.withArgName("d");
    OptionBuilder.withDescription("Dump confidences file");
    OptionBuilder.withLongOpt("d");
    OptionBuilder.isRequired(false);//from   ww  w .  ja  v  a  2s.  co m
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("t");
    OptionBuilder.withDescription("Path for temporary files");
    OptionBuilder.withLongOpt("t");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("v");
    OptionBuilder.withDescription("Verbose output");
    OptionBuilder.withLongOpt("v");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("s");
    OptionBuilder.withDescription("Don't delete temporary files in svm_light format (default: delete)");
    OptionBuilder.withLongOpt("s");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    SvmLightClassifierCustomizer customizer = null;

    GnuParser parser = new GnuParser();
    String[] remainingArgs = null;
    try {
        CommandLine line = parser.parse(options, args);

        remainingArgs = line.getArgs();

        customizer = new SvmLightClassifierCustomizer(remainingArgs[0]);

        if (line.hasOption("v"))
            customizer.printSvmLightOutput(true);

        if (line.hasOption("s")) {
            System.out.println("Keeping temporary files.");
            customizer.setDeleteTestFiles(false);
            customizer.setDeletePredictionsFiles(false);
        }

        if (line.hasOption("t"))
            customizer.setTempPath(line.getOptionValue("t"));

    } catch (Exception exp) {
        System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    if (remainingArgs.length != 3) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    String indexFile = remainingArgs[1];

    File file = new File(indexFile);

    String indexName = file.getName();
    String indexPath = file.getParent();

    String quantifierFilename = remainingArgs[2];

    FileSystemStorageManager indexFssm = new FileSystemStorageManager(indexPath, false);
    indexFssm.open();

    IIndex test = TroveReadWriteHelper.readIndex(indexFssm, indexName, TroveContentDBType.Full,
            TroveClassificationDBType.Full);

    indexFssm.close();

    FileSystemStorageManager quantifierFssm = new FileSystemStorageManager(quantifierFilename, false);
    quantifierFssm.open();

    SvmLightDataManager classifierDataManager = new SvmLightDataManager(customizer);

    FileSystemStorageManager fssm = new FileSystemStorageManager(quantifierFilename, false);
    fssm.open();

    IQuantifier[] quantifiers = QuantificationLearner.read(fssm, classifierDataManager,
            ClassificationMode.PER_CATEGORY);
    fssm.close();

    quantifierFssm.close();

    Quantification ccQuantification = quantifiers[0].quantify(test);
    Quantification paQuantification = quantifiers[1].quantify(test);
    Quantification accQuantification = quantifiers[2].quantify(test);
    Quantification maxQuantification = quantifiers[3].quantify(test);
    Quantification sccQuantification = quantifiers[4].quantify(test);
    Quantification spaQuantification = quantifiers[5].quantify(test);
    Quantification trueQuantification = new Quantification("True", test.getClassificationDB());

    File quantifierFile = new File(quantifierFilename);

    String quantificationName = quantifierFile.getParent() + Os.pathSeparator() + indexName + "_"
            + quantifierFile.getName() + ".txt";

    BufferedWriter writer = new BufferedWriter(new FileWriter(quantificationName));
    IShortIterator iterator = test.getCategoryDB().getCategories();
    while (iterator.hasNext()) {
        short category = iterator.next();
        String prefix = quantifierFile.getName() + "\t" + indexName + "\t"
                + test.getCategoryDB().getCategoryName(category) + "\t" + category + "\t"
                + trueQuantification.getQuantification(category) + "\t";

        writer.write(prefix + ccQuantification.getName() + "\t" + ccQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + paQuantification.getName() + "\t" + paQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + accQuantification.getName() + "\t" + accQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + maxQuantification.getName() + "\t" + maxQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + sccQuantification.getName() + "\t" + sccQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + spaQuantification.getName() + "\t" + spaQuantification.getQuantification(category)
                + "\n");
    }
    writer.close();

    BufferedWriter bfs = new BufferedWriter(new FileWriter(quantifierFile.getParent() + Os.pathSeparator()
            + indexName + "_" + quantifierFile.getName() + "_rates.txt"));
    TShortDoubleHashMap simpleTPRs = ((CCQuantifier) quantifiers[0]).getSimpleTPRs();
    TShortDoubleHashMap simpleFPRs = ((CCQuantifier) quantifiers[0]).getSimpleFPRs();
    TShortDoubleHashMap maxTPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier())
            .getSimpleTPRs();
    TShortDoubleHashMap maxFPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier())
            .getSimpleFPRs();
    TShortDoubleHashMap scaledTPRs = ((PAQuantifier) quantifiers[1]).getScaledTPRs();
    TShortDoubleHashMap scaledFPRs = ((PAQuantifier) quantifiers[1]).getScaledFPRs();

    ContingencyTableSet simpleContingencyTableSet = ((CCQuantifier) quantifiers[0]).getContingencyTableSet();
    ContingencyTableSet maxContingencyTableSet = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3])
            .getInternalQuantifier()).getContingencyTableSet();

    short[] cats = simpleTPRs.keys();
    for (int i = 0; i < cats.length; ++i) {
        short cat = cats[i];
        String catName = test.getCategoryDB().getCategoryName(cat);
        ContingencyTable simpleContingencyTable = simpleContingencyTableSet.getCategoryContingencyTable(cat);
        ContingencyTable maxContingencyTable = maxContingencyTableSet.getCategoryContingencyTable(cat);
        double simpleTPR = simpleTPRs.get(cat);
        double simpleFPR = simpleFPRs.get(cat);
        double maxTPR = maxTPRs.get(cat);
        double maxFPR = maxFPRs.get(cat);
        double scaledTPR = scaledTPRs.get(cat);
        double scaledFPR = scaledFPRs.get(cat);
        String line = indexName + "_" + quantifierFile.getName() + "\ttest\tsimple\t" + catName + "\t" + cat
                + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t"
                + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + simpleTPR + "\t"
                + simpleFPR + "\n";
        bfs.write(line);
        line = indexName + "_" + quantifierFile.getName() + "\ttest\tmax\t" + catName + "\t" + cat + "\t"
                + maxContingencyTable.tp() + "\t" + maxContingencyTable.fp() + "\t" + maxContingencyTable.fn()
                + "\t" + maxContingencyTable.tn() + "\t" + maxTPR + "\t" + maxFPR + "\n";
        bfs.write(line);
        line = indexName + "_" + quantifierFile.getName() + "\ttest\tscaled\t" + catName + "\t" + cat + "\t"
                + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t"
                + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + scaledTPR + "\t"
                + scaledFPR + "\n";
        bfs.write(line);
    }
    bfs.close();
}

From source file:apps.quantification.QuantifySVMPerf.java

public static void main(String[] args) throws IOException {
    String cmdLineSyntax = QuantifySVMPerf.class.getName()
            + " [OPTIONS] <path to svm_perf_classify> <testIndexDirectory> <quantificationModelDirectory>";

    Options options = new Options();

    OptionBuilder.withArgName("d");
    OptionBuilder.withDescription("Dump confidences file");
    OptionBuilder.withLongOpt("d");
    OptionBuilder.isRequired(false);//w w w .j av  a  2  s  . co  m
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("t");
    OptionBuilder.withDescription("Path for temporary files");
    OptionBuilder.withLongOpt("t");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg();
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("v");
    OptionBuilder.withDescription("Verbose output");
    OptionBuilder.withLongOpt("v");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    OptionBuilder.withArgName("s");
    OptionBuilder.withDescription("Don't delete temporary files in svm_perf format (default: delete)");
    OptionBuilder.withLongOpt("s");
    OptionBuilder.isRequired(false);
    OptionBuilder.hasArg(false);
    options.addOption(OptionBuilder.create());

    SvmPerfClassifierCustomizer customizer = null;

    GnuParser parser = new GnuParser();
    String[] remainingArgs = null;
    try {
        CommandLine line = parser.parse(options, args);

        remainingArgs = line.getArgs();

        customizer = new SvmPerfClassifierCustomizer(remainingArgs[0]);

        if (line.hasOption("v"))
            customizer.printSvmPerfOutput(true);

        if (line.hasOption("s")) {
            System.out.println("Keeping temporary files.");
            customizer.setDeleteTestFiles(false);
            customizer.setDeletePredictionsFiles(false);
        }

        if (line.hasOption("t"))
            customizer.setTempPath(line.getOptionValue("t"));

    } catch (Exception exp) {
        System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    if (remainingArgs.length != 3) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(cmdLineSyntax, options);
        System.exit(-1);
    }

    String indexFile = remainingArgs[1];

    File file = new File(indexFile);

    String indexName = file.getName();
    String indexPath = file.getParent();

    String quantifierFilename = remainingArgs[2];

    FileSystemStorageManager indexFssm = new FileSystemStorageManager(indexPath, false);
    indexFssm.open();

    IIndex test = TroveReadWriteHelper.readIndex(indexFssm, indexName, TroveContentDBType.Full,
            TroveClassificationDBType.Full);

    indexFssm.close();

    FileSystemStorageManager quantifierFssm = new FileSystemStorageManager(quantifierFilename, false);
    quantifierFssm.open();

    SvmPerfDataManager classifierDataManager = new SvmPerfDataManager(customizer);

    FileSystemStorageManager fssm = new FileSystemStorageManager(quantifierFilename, false);
    fssm.open();

    IQuantifier[] quantifiers = QuantificationLearner.read(fssm, classifierDataManager,
            ClassificationMode.PER_CATEGORY);
    fssm.close();

    quantifierFssm.close();

    Quantification ccQuantification = quantifiers[0].quantify(test);
    Quantification paQuantification = quantifiers[1].quantify(test);
    Quantification accQuantification = quantifiers[2].quantify(test);
    Quantification maxQuantification = quantifiers[3].quantify(test);
    Quantification sccQuantification = quantifiers[4].quantify(test);
    Quantification spaQuantification = quantifiers[5].quantify(test);
    Quantification trueQuantification = new Quantification("True", test.getClassificationDB());

    File quantifierFile = new File(quantifierFilename);

    String quantificationName = quantifierFile.getParent() + Os.pathSeparator() + indexName + "_"
            + quantifierFile.getName() + ".txt";

    BufferedWriter writer = new BufferedWriter(new FileWriter(quantificationName));
    IShortIterator iterator = test.getCategoryDB().getCategories();
    while (iterator.hasNext()) {
        short category = iterator.next();
        String prefix = quantifierFile.getName() + "\t" + indexName + "\t"
                + test.getCategoryDB().getCategoryName(category) + "\t" + category + "\t"
                + trueQuantification.getQuantification(category) + "\t";

        writer.write(prefix + ccQuantification.getName() + "\t" + ccQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + paQuantification.getName() + "\t" + paQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + accQuantification.getName() + "\t" + accQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + maxQuantification.getName() + "\t" + maxQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + sccQuantification.getName() + "\t" + sccQuantification.getQuantification(category)
                + "\n");
        writer.write(prefix + spaQuantification.getName() + "\t" + spaQuantification.getQuantification(category)
                + "\n");
    }
    writer.close();

    BufferedWriter bfs = new BufferedWriter(new FileWriter(quantifierFile.getParent() + Os.pathSeparator()
            + indexName + "_" + quantifierFile.getName() + "_rates.txt"));
    TShortDoubleHashMap simpleTPRs = ((CCQuantifier) quantifiers[0]).getSimpleTPRs();
    TShortDoubleHashMap simpleFPRs = ((CCQuantifier) quantifiers[0]).getSimpleFPRs();
    TShortDoubleHashMap maxTPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier())
            .getSimpleTPRs();
    TShortDoubleHashMap maxFPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier())
            .getSimpleFPRs();
    TShortDoubleHashMap scaledTPRs = ((PAQuantifier) quantifiers[1]).getScaledTPRs();
    TShortDoubleHashMap scaledFPRs = ((PAQuantifier) quantifiers[1]).getScaledFPRs();

    ContingencyTableSet simpleContingencyTableSet = ((CCQuantifier) quantifiers[0]).getContingencyTableSet();
    ContingencyTableSet maxContingencyTableSet = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3])
            .getInternalQuantifier()).getContingencyTableSet();

    short[] cats = simpleTPRs.keys();
    for (int i = 0; i < cats.length; ++i) {
        short cat = cats[i];
        String catName = test.getCategoryDB().getCategoryName(cat);
        ContingencyTable simpleContingencyTable = simpleContingencyTableSet.getCategoryContingencyTable(cat);
        ContingencyTable maxContingencyTable = maxContingencyTableSet.getCategoryContingencyTable(cat);
        double simpleTPR = simpleTPRs.get(cat);
        double simpleFPR = simpleFPRs.get(cat);
        double maxTPR = maxTPRs.get(cat);
        double maxFPR = maxFPRs.get(cat);
        double scaledTPR = scaledTPRs.get(cat);
        double scaledFPR = scaledFPRs.get(cat);
        String line = indexName + "_" + quantifierFile.getName() + "\ttest\tsimple\t" + catName + "\t" + cat
                + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t"
                + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + simpleTPR + "\t"
                + simpleFPR + "\n";
        bfs.write(line);
        line = indexName + "_" + quantifierFile.getName() + "\ttest\tmax\t" + catName + "\t" + cat + "\t"
                + maxContingencyTable.tp() + "\t" + maxContingencyTable.fp() + "\t" + maxContingencyTable.fn()
                + "\t" + maxContingencyTable.tn() + "\t" + maxTPR + "\t" + maxFPR + "\n";
        bfs.write(line);
        line = indexName + "_" + quantifierFile.getName() + "\ttest\tscaled\t" + catName + "\t" + cat + "\t"
                + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t"
                + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + scaledTPR + "\t"
                + scaledFPR + "\n";
        bfs.write(line);
    }
    bfs.close();
}

From source file:CircularGenerator.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options helpOptions = new Options();
    helpOptions.addOption("h", "help", false, "show this help page");
    Options options = new Options();
    options.addOption("h", "help", false, "show this help page");
    options.addOption(OptionBuilder.withLongOpt("input").withArgName("INPUT")
            .withDescription("the input FastA File").isRequired().hasArg().create("i"));
    options.addOption(OptionBuilder.withLongOpt("elongation").withArgName("ELONGATION")
            .withDescription("the elongation factor [INT]").isRequired().hasArg().create("e"));
    options.addOption(OptionBuilder.withLongOpt("seq").withArgName("SEQ")
            .withDescription("the names of the sequences that should to be elongated").isRequired().hasArg()
            .hasOptionalArgs().hasArg().create("s"));
    HelpFormatter helpformatter = new HelpFormatter();
    CommandLineParser parser = new BasicParser();
    try {/*w ww .  j av a 2s .c o m*/
        CommandLine cmd = parser.parse(helpOptions, args);
        if (cmd.hasOption('h')) {
            helpformatter.printHelp(CLASS_NAME + "v" + VERSION, options);
            System.exit(0);
        }
    } catch (ParseException e1) {
    }
    String input = "";
    String tmpElongation = "";
    Integer elongation = 0;
    String[] names = new String[0];
    try {
        CommandLine cmd = parser.parse(options, args);
        if (cmd.hasOption('i')) {
            input = cmd.getOptionValue('i');
        }
        if (cmd.hasOption('e')) {
            tmpElongation = cmd.getOptionValue('e');
            try {
                elongation = Integer.parseInt(tmpElongation);
            } catch (Exception e) {
                System.err.println("elongation not an Integer: " + tmpElongation);
                System.exit(0);
            }
        }
        if (cmd.hasOption('s')) {
            names = cmd.getOptionValues('s');
        }
    } catch (ParseException e) {
        helpformatter.printHelp(CLASS_NAME, options);
        System.err.println(e.getMessage());
        System.exit(0);
    }

    CircularGenerator cg = new CircularGenerator(elongation);
    File f = new File(input);
    for (String s : names) {
        cg.keys_to_treat_circular.add(s);
    }
    cg.extendFastA(f);

}

From source file:edu.umd.ujjwalgoel.AnalyzePMI.java

@SuppressWarnings({ "static-access" })
public static void main(String[] args) {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));

    CommandLine cmdline = null;/*from w  w  w.  j a v  a 2s  .  co  m*/
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(AnalyzePMI.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    System.out.println("input path: " + inputPath);

    BufferedReader br = null;
    int countPairs = 0;

    List<PairOfWritables<PairOfStrings, FloatWritable>> pmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>();
    List<PairOfWritables<PairOfStrings, FloatWritable>> cloudPmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>();
    List<PairOfWritables<PairOfStrings, FloatWritable>> lovePmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>();

    PairOfWritables<PairOfStrings, FloatWritable> highestPMI = null;
    PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = null;
    PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = null;
    PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = null;

    PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = null;
    PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = null;
    PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = null;

    try {
        FileSystem fs = FileSystem.get(new Configuration());
        FileStatus[] status = fs.listStatus(new Path(inputPath));
        //PairOfStrings pair = new PairOfStrings();
        for (int i = 0; i < status.length; i++) {
            br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
            String line = br.readLine();
            while (line != null) {
                String[] words = line.split("\\t");
                float value = Float.parseFloat(words[1].trim());
                String[] wordPair = words[0].replaceAll("\\(", "").replaceAll("\\)", "").split(",");
                PairOfStrings pair = new PairOfStrings();
                pair.set(wordPair[0].trim(), wordPair[1].trim());
                if (wordPair[0].trim().equals("cloud")) {
                    PairOfWritables<PairOfStrings, FloatWritable> cloudPmi = new PairOfWritables<PairOfStrings, FloatWritable>();
                    cloudPmi.set(pair, new FloatWritable(value));
                    cloudPmis.add(cloudPmi);
                    if ((highestCloudPMI == null)
                            || (highestCloudPMI.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) {
                        highestCloudPMI = cloudPmi;
                    } else if ((highestCloudPMI2 == null)
                            || (highestCloudPMI2.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) {
                        highestCloudPMI2 = cloudPmi;
                    } else if ((highestCloudPMI3 == null)
                            || (highestCloudPMI3.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) {
                        highestCloudPMI3 = cloudPmi;
                    }
                }
                if (wordPair[0].trim().equals("love")) {
                    PairOfWritables<PairOfStrings, FloatWritable> lovePmi = new PairOfWritables<PairOfStrings, FloatWritable>();
                    lovePmi.set(pair, new FloatWritable(value));
                    lovePmis.add(lovePmi);
                    if ((highestLovePMI == null)
                            || (highestLovePMI.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) {
                        highestLovePMI = lovePmi;
                    } else if ((highestLovePMI2 == null)
                            || (highestLovePMI2.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) {
                        highestLovePMI2 = lovePmi;
                    } else if ((highestLovePMI3 == null)
                            || (highestLovePMI3.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) {
                        highestLovePMI3 = lovePmi;
                    }
                }
                PairOfWritables<PairOfStrings, FloatWritable> pmi = new PairOfWritables<PairOfStrings, FloatWritable>();
                pmi.set(pair, new FloatWritable(value));
                pmis.add(pmi);
                if (highestPMI == null) {
                    highestPMI = pmi;
                } else if (highestPMI.getRightElement().compareTo(pmi.getRightElement()) < 0) {
                    highestPMI = pmi;
                }
                countPairs++;
                line = br.readLine();
            }
        }
    } catch (Exception ex) {
        System.out.println("ERROR" + ex.getMessage());
    }

    /*Collections.sort(pmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() {
      public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1,
          PairOfWritables<PairOfStrings, FloatWritable> e2) {
        /*if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
          return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement());
        }
            
        return e2.getRightElement().compareTo(e1.getRightElement());
      }
    });
            
            
    Collections.sort(cloudPmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() {
      public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1,
          PairOfWritables<PairOfStrings, FloatWritable> e2) {
        if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
    return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement());
            }
            
        return e2.getRightElement().compareTo(e1.getRightElement());
      }
    });
            
            
    Collections.sort(lovePmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() {
      public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1,
          PairOfWritables<PairOfStrings, FloatWritable> e2) {
        if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
    return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement());
           }
            
        return e2.getRightElement().compareTo(e1.getRightElement());
      }
    });
            
     PairOfWritables<PairOfStrings, FloatWritable> highestPMI = pmis.get(0);
     PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = cloudPmis.get(0);      PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = cloudPmis.get(1);
     PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = cloudPmis.get(2);
             
     PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = lovePmis.get(0);       PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = lovePmis.get(1);
     PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = lovePmis.get(2);*/

    System.out.println("Total Distinct Pairs : " + countPairs);
    System.out.println("Pair with highest PMI : (" + highestPMI.getLeftElement().getLeftElement() + ", "
            + highestPMI.getLeftElement().getRightElement());

    System.out
            .println("Word with highest PMI with Cloud : " + highestCloudPMI.getLeftElement().getRightElement()
                    + " with value : " + highestCloudPMI.getRightElement().get());
    System.out.println(
            "Word with second highest PMI with Cloud : " + highestCloudPMI2.getLeftElement().getRightElement()
                    + " with value : " + highestCloudPMI2.getRightElement().get());
    System.out.println(
            "Word with third highest PMI with Cloud : " + highestCloudPMI3.getLeftElement().getRightElement()
                    + " with value : " + highestCloudPMI3.getRightElement().get());

    System.out.println("Word with highest PMI with Love : " + highestLovePMI.getLeftElement().getRightElement()
            + " with value : " + highestLovePMI.getRightElement().get());
    System.out.println(
            "Word with second highest PMI with Love : " + highestLovePMI2.getLeftElement().getRightElement()
                    + " with value : " + highestLovePMI2.getRightElement().get());
    System.out.println(
            "Word with third highest PMI with Love : " + highestLovePMI3.getLeftElement().getRightElement()
                    + " with value : " + highestLovePMI3.getRightElement().get());

}

From source file:executables.Align.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws IOException {
    Options options = new Options()
            .addOption(OptionBuilder.withArgName("f1").withDescription("Fasta file 1").hasArg().create("f1"))
            .addOption(OptionBuilder.withArgName("f2").withDescription("Fasta file 2").hasArg().create("f2"))
            .addOption(OptionBuilder.withArgName("s1").withDescription("sequence 1").hasArg().create("s1"))
            .addOption(OptionBuilder.withArgName("s2").withDescription("sequence 2").hasArg().create("s2"))
            .addOption(OptionBuilder.withArgName("gap-linear").withDescription("Linear gap cost").hasArg()
                    .create("gl"))
            .addOption(OptionBuilder.withArgName("gap-open").withDescription("Affine gap open cost").hasArg()
                    .create("go"))
            .addOption(OptionBuilder.withArgName("gap-extend").withDescription("Affine gap extend cost")
                    .hasArg().create("ge"))
            .addOption(OptionBuilder.withArgName("gap-function").withDescription("Gap function file").hasArg()
                    .create("gf"))
            .addOption(//from w w  w.  j  ava2s . c om
                    OptionBuilder.withArgName("gapless").withDescription("Gapless alignment").create("gapless"))
            .addOption(OptionBuilder.withArgName("mode")
                    .withDescription("Alignment mode: global,local,freeshift (Default: freeshift)").hasArg()
                    .create('m'))
            .addOption(OptionBuilder.withArgName("match").withDescription("Match score").hasArg().create("ma"))
            .addOption(OptionBuilder.withArgName("mismatch").withDescription("Mismatch score").hasArg()
                    .create("mi"))
            .addOption(OptionBuilder.withDescription("Do not append unaligned flanking sequences")
                    .create("noflank"))
            .addOption(OptionBuilder.withArgName("check").withDescription("Calculate checkscore").create('c'))
            .addOption(OptionBuilder.withArgName("format").withDescription(
                    "Output format, see String.format, parameters are: id1,id2,score,alignment (alignment only, if -f is specified); (default: '%s %s %.4f' w/o -f and '%s %s %.4f\n%s' w/ -f)")
                    .hasArg().create("format"))
            .addOption(OptionBuilder.withArgName("matrix")
                    .withDescription("Output dynamic programming matrix as well").create("matrix"))
            .addOption(OptionBuilder.withArgName("quasar-format")
                    .withDescription("Scoring matrix in quasar format").hasArg().create('q'))
            .addOption(
                    OptionBuilder.withArgName("pairs").withDescription("Pairs file").hasArg().create("pairs"))
            .addOption(OptionBuilder.withArgName("output").withDescription("Output").hasArg().create('o'))
            .addOption(OptionBuilder.withArgName("seqlib").withDescription("Seqlib file").hasArg()
                    .create("seqlib"))
            .addOption(OptionBuilder.withArgName("full").withDescription("Full output").create('f'));

    CommandLineParser parser = new PosixParser();
    try {
        CommandLine cmd = parser.parse(options, args);

        LongScoring<CharSequence> scoring = createScoring(cmd);
        AlignmentMode mode = createMode(cmd);
        if (mode == null)
            throw new ParseException("Mode unknown: " + cmd.getOptionValue('m'));

        Iterator<MutablePair<String, String>> idIterator = createSequences(scoring, cmd);

        GapCostFunction gap = createGapFunction(cmd);
        String format = getFormat(cmd);

        LongAligner<CharSequence> aligner;
        if (gap instanceof AffineGapCostFunction)
            aligner = new LongAligner<CharSequence>(scoring, ((AffineGapCostFunction) gap).getGapOpen(),
                    ((AffineGapCostFunction) gap).getGapExtend(), mode);
        else if (gap instanceof LinearGapCostFunction)
            aligner = new LongAligner<CharSequence>(scoring, ((LinearGapCostFunction) gap).getGap(), mode);
        else if (gap instanceof InfiniteGapCostFunction)
            aligner = new LongAligner<CharSequence>(scoring, mode);
        else
            throw new RuntimeException("Gap cost function " + gap.toString() + " currently not supported!");

        SimpleAlignmentFormatter formatter = cmd.hasOption('f')
                ? new SimpleAlignmentFormatter().setAppendUnaligned(!cmd.hasOption("noflank"))
                : null;

        CheckScore checkscore = cmd.hasOption('c') ? new CheckScore() : null;
        Alignment alignment = checkscore != null || formatter != null ? new Alignment() : null;

        float score;
        String ali;
        LineOrientedFile out = new LineOrientedFile(
                cmd.hasOption('o') ? cmd.getOptionValue('o') : LineOrientedFile.STDOUT);
        Writer wr = out.startWriting();

        while (idIterator.hasNext()) {
            MutablePair<String, String> ids = idIterator.next();

            score = alignment == null ? aligner.alignCache(ids.Item1, ids.Item2)
                    : aligner.alignCache(ids.Item1, ids.Item2, alignment);
            ali = formatter != null ? formatter.format(alignment, scoring, gap, mode,
                    scoring.getCachedSubject(ids.Item1), scoring.getCachedSubject(ids.Item2)) : "";
            out.writeLine(String.format(Locale.US, format, ids.Item1, ids.Item2, score, ali));

            if (cmd.hasOption("matrix")) {
                aligner.writeMatrix(wr,
                        aligner.getScoring().getCachedSubject(ids.Item1).toString().toCharArray(),
                        aligner.getScoring().getCachedSubject(ids.Item2).toString().toCharArray());
            }

            if (checkscore != null)
                checkscore.checkScore(aligner, scoring.getCachedSubject(ids.Item1).length(),
                        scoring.getCachedSubject(ids.Item2).length(), alignment, score);

        }

        out.finishWriting();

    } catch (ParseException e) {
        e.printStackTrace();
        HelpFormatter f = new HelpFormatter();
        f.printHelp("Align", options);
    }
}

From source file:cc.twittertools.index.IndexStatuses.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(// ww  w.  j a  v  a2 s .  co  m
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexStatuses.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
    config.setOpenMode(OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
            doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
            doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES));
            doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId,
                        Field.Store.YES));
                doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(),
                        Field.Store.YES));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES));
                doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(),
                        Field.Store.YES));
                doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}

From source file:com.github.braully.graph.UtilResult.java

public static void main(String... args) throws Exception {
    Options options = new Options();

    Option input = new Option("i", "input", true, "input file path");
    input.setRequired(false);/*from  ww w . j  ava 2  s .c o m*/
    options.addOption(input);

    Option verb = new Option("v", "verbose", false, "verbose process");
    input.setRequired(false);
    options.addOption(verb);

    Option output = new Option("o", "output", true, "output file");
    output.setRequired(false);
    options.addOption(output);

    CommandLineParser parser = new DefaultParser();
    HelpFormatter formatter = new HelpFormatter();
    CommandLine cmd;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        System.out.println(e.getMessage());
        formatter.printHelp("UtilResult", options);

        System.exit(1);
        return;
    }

    String inputFilePath = cmd.getOptionValue("input");
    if (inputFilePath == null) {
        inputFilePath = "/home/strike/grafos-para-processar/mft2/resultado.txt";
    }
    if (inputFilePath != null) {
        if (inputFilePath.toLowerCase().endsWith(".txt")) {
            processFileTxt(inputFilePath);
        } else if (inputFilePath.toLowerCase().endsWith(".json")) {
            processFileJson(inputFilePath);
        }
    }
}

From source file:io.anserini.index.IndexTweets.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(/*from  w w w  . ja v a  2 s .  co  m*/
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexTweets.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(Paths.get(indexPath));
    final IndexWriterConfig config = new IndexWriterConfig(ANALYZER);

    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongPoint(StatusField.ID.name, status.getId()));
            doc.add(new StoredField(StatusField.ID.name, status.getId()));
            doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch()));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
            doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount()));
            doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
            doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount()));
            doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));
            doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount()));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
                doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId));
                doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
                doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId()));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
                doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId));
                doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
                doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId()));
                doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount()));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}