List of usage examples for org.apache.commons.cli HelpFormatter printHelp
public void printHelp(String cmdLineSyntax, Options options)
options
with the specified command line syntax. From source file:general.Main.java
/** * Selects the files to be processed and specifies the files to write to. * * @param args Arguments to specify runtime behavior. */// w ww . ja v a 2 s . co m public static void main(String[] args) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { Options options = new Options(); options.addOption("l", "logging", false, "enables file logging"); options.addOption("j", "jena", false, "uses the Jena SPARQL Parser"); options.addOption("o", "openrdf", false, "uses the OpenRDF SPARQL Parser"); options.addOption("f", "file", true, "defines the input file prefix"); options.addOption("h", "help", false, "displays this help"); options.addOption("t", "tsv", false, "reads from .tsv-files"); // options.addOption("p", "parquet", false, "read from .parquet-files"); options.addOption("n", "numberOfThreads", true, "number of used threads, default 1"); options.addOption("b", "withBots", false, "enables metric calculation for bot queries+"); options.addOption("p", "readPreprocessed", false, "enables reading of preprocessed files"); //some parameters which can be changed through parameters //QueryHandler queryHandler = new OpenRDFQueryHandler(); String inputFilePrefix; String inputFileSuffix = ".tsv"; String queryParserName = "OpenRDF"; Class inputHandlerClass = null; Class queryHandlerClass = null; int numberOfThreads = 1; CommandLineParser parser = new DefaultParser(); CommandLine cmd; try { cmd = parser.parse(options, args); if (cmd.hasOption("help")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } if (cmd.hasOption("openrdf")) { queryHandlerClass = OpenRDFQueryHandler.class; } if (cmd.hasOption("tsv")) { inputFileSuffix = ".tsv"; inputHandlerClass = InputHandlerTSV.class; } if (cmd.hasOption("parquet")) { inputFileSuffix = ".parquet"; Logger.getLogger("org").setLevel(Level.WARN); Logger.getLogger("akka").setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("SPARQLQueryAnalyzer").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); inputHandlerClass = InputHandlerParquet.class; } if (inputHandlerClass == null) { System.out.println("Please specify which parser to use, either -t for TSV or -p for parquet."); } if (cmd.hasOption("file")) { inputFilePrefix = cmd.getOptionValue("file").trim(); } else { System.out.println( "Please specify at least the file which we should work on using the option '--file PREFIX' or 'f PREFIX'"); return; } if (cmd.hasOption("logging")) { LoggingHandler.initFileLog(queryParserName, inputFilePrefix); } if (cmd.hasOption("numberOfThreads")) { numberOfThreads = Integer.parseInt(cmd.getOptionValue("numberOfThreads")); } if (cmd.hasOption("withBots")) { withBots = true; } if (cmd.hasOption("readPreprocessed")) { readPreprocessed = true; } } catch (UnrecognizedOptionException e) { System.out.println("Unrecognized commandline option: " + e.getOption()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } catch (ParseException e) { System.out.println( "There was an error while parsing your command line input. Did you rechecked your syntax before running?"); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } LoggingHandler.initConsoleLog(); loadPreBuildQueryTypes(); long startTime = System.nanoTime(); ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads); for (int day = 1; day <= 31; day++) { String inputFile = inputFilePrefix + String.format("%02d", day) + inputFileSuffix; Runnable parseOneMonthWorker = new ParseOneMonthWorker(inputFile, inputFilePrefix, inputHandlerClass, queryParserName, queryHandlerClass, day); executor.execute(parseOneMonthWorker); } executor.shutdown(); while (!executor.isTerminated()) { //wait until all workers are finished } writeQueryTypes(inputFilePrefix); long stopTime = System.nanoTime(); long millis = TimeUnit.MILLISECONDS.convert(stopTime - startTime, TimeUnit.NANOSECONDS); Date date = new Date(millis); System.out.println("Finished executing with all threads: " + new SimpleDateFormat("mm-dd HH:mm:ss:SSSSSSS").format(date)); }
From source file:apps.quantification.LearnQuantificationSVMLight.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = LearnQuantificationSVMLight.class.getName() + " [OPTIONS] <path to svm_light_learn> <path to svm_light_classify> <trainingIndexDirectory> <outputDirectory>"; Options options = new Options(); OptionBuilder.withArgName("f"); OptionBuilder.withDescription("Number of folds"); OptionBuilder.withLongOpt("f"); OptionBuilder.isRequired(true);/*from www . j a v a2 s.c om*/ OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("c"); OptionBuilder.withDescription("The c value for svm_light (default 1)"); OptionBuilder.withLongOpt("c"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("k"); OptionBuilder.withDescription("Kernel type (default 0: linear, 1: polynomial, 2: RBF, 3: sigmoid)"); OptionBuilder.withLongOpt("k"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary training file in svm_light format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmLightLearnerCustomizer classificationLearnerCustomizer = null; SvmLightClassifierCustomizer classificationCustomizer = null; int folds = -1; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); classificationLearnerCustomizer = new SvmLightLearnerCustomizer(remainingArgs[0]); classificationCustomizer = new SvmLightClassifierCustomizer(remainingArgs[1]); folds = Integer.parseInt(line.getOptionValue("f")); if (line.hasOption("c")) classificationLearnerCustomizer.setC(Float.parseFloat(line.getOptionValue("c"))); if (line.hasOption("k")) { System.out.println("Kernel type: " + line.getOptionValue("k")); classificationLearnerCustomizer.setKernelType(Integer.parseInt(line.getOptionValue("k"))); } if (line.hasOption("v")) classificationLearnerCustomizer.printSvmLightOutput(true); if (line.hasOption("s")) classificationLearnerCustomizer.setDeleteTrainingFiles(false); if (line.hasOption("t")) { classificationLearnerCustomizer.setTempPath(line.getOptionValue("t")); classificationCustomizer.setTempPath(line.getOptionValue("t")); } } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } assert (classificationLearnerCustomizer != null); if (remainingArgs.length != 4) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[2]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String outputPath = remainingArgs[3]; SvmLightLearner classificationLearner = new SvmLightLearner(); classificationLearner.setRuntimeCustomizer(classificationLearnerCustomizer); FileSystemStorageManager fssm = new FileSystemStorageManager(indexPath, false); fssm.open(); IIndex training = TroveReadWriteHelper.readIndex(fssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); final TextualProgressBar progressBar = new TextualProgressBar("Learning the quantifiers"); IOperationStatusListener status = new IOperationStatusListener() { @Override public void operationStatus(double percentage) { progressBar.signal((int) percentage); } }; QuantificationLearner quantificationLearner = new QuantificationLearner(folds, classificationLearner, classificationLearnerCustomizer, classificationCustomizer, ClassificationMode.PER_CATEGORY, new LogisticFunction(), status); IQuantifier[] quantifiers = quantificationLearner.learn(training); File executableFile = new File(classificationLearnerCustomizer.getSvmLightLearnPath()); IDataManager classifierDataManager = new SvmLightDataManager(new SvmLightClassifierCustomizer( executableFile.getParentFile().getAbsolutePath() + Os.pathSeparator() + "svm_light_classify")); String description = "_SVMLight_C-" + classificationLearnerCustomizer.getC() + "_K-" + classificationLearnerCustomizer.getKernelType(); if (classificationLearnerCustomizer.getAdditionalParameters().length() > 0) description += "_" + classificationLearnerCustomizer.getAdditionalParameters(); String quantifierPrefix = indexName + "_Quantifier-" + folds + description; FileSystemStorageManager fssmo = new FileSystemStorageManager( outputPath + File.separatorChar + quantifierPrefix, true); fssmo.open(); QuantificationLearner.write(quantifiers, fssmo, classifierDataManager); fssmo.close(); BufferedWriter bfs = new BufferedWriter( new FileWriter(outputPath + File.separatorChar + quantifierPrefix + "_rates.txt")); TShortDoubleHashMap simpleTPRs = quantificationLearner.getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = quantificationLearner.getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = quantificationLearner.getScaledTPRs(); TShortDoubleHashMap scaledFPRs = quantificationLearner.getScaledFPRs(); ContingencyTableSet contingencyTableSet = quantificationLearner.getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = training.getCategoryDB().getCategoryName(cat); ContingencyTable contingencyTable = contingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = quantifierPrefix + "\ttrain\tsimple\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = quantifierPrefix + "\ttrain\tscaled\t" + catName + "\t" + cat + "\t" + contingencyTable.tp() + "\t" + contingencyTable.fp() + "\t" + contingencyTable.fn() + "\t" + contingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:apps.quantification.QuantifySVMLight.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = QuantifySVMLight.class.getName() + " [OPTIONS] <path to svm_light_classify> <testIndexDirectory> <quantificationModelDirectory>"; Options options = new Options(); OptionBuilder.withArgName("d"); OptionBuilder.withDescription("Dump confidences file"); OptionBuilder.withLongOpt("d"); OptionBuilder.isRequired(false);//from ww w . ja v a 2s. co m OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary files in svm_light format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmLightClassifierCustomizer customizer = null; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); customizer = new SvmLightClassifierCustomizer(remainingArgs[0]); if (line.hasOption("v")) customizer.printSvmLightOutput(true); if (line.hasOption("s")) { System.out.println("Keeping temporary files."); customizer.setDeleteTestFiles(false); customizer.setDeletePredictionsFiles(false); } if (line.hasOption("t")) customizer.setTempPath(line.getOptionValue("t")); } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } if (remainingArgs.length != 3) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[1]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String quantifierFilename = remainingArgs[2]; FileSystemStorageManager indexFssm = new FileSystemStorageManager(indexPath, false); indexFssm.open(); IIndex test = TroveReadWriteHelper.readIndex(indexFssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); indexFssm.close(); FileSystemStorageManager quantifierFssm = new FileSystemStorageManager(quantifierFilename, false); quantifierFssm.open(); SvmLightDataManager classifierDataManager = new SvmLightDataManager(customizer); FileSystemStorageManager fssm = new FileSystemStorageManager(quantifierFilename, false); fssm.open(); IQuantifier[] quantifiers = QuantificationLearner.read(fssm, classifierDataManager, ClassificationMode.PER_CATEGORY); fssm.close(); quantifierFssm.close(); Quantification ccQuantification = quantifiers[0].quantify(test); Quantification paQuantification = quantifiers[1].quantify(test); Quantification accQuantification = quantifiers[2].quantify(test); Quantification maxQuantification = quantifiers[3].quantify(test); Quantification sccQuantification = quantifiers[4].quantify(test); Quantification spaQuantification = quantifiers[5].quantify(test); Quantification trueQuantification = new Quantification("True", test.getClassificationDB()); File quantifierFile = new File(quantifierFilename); String quantificationName = quantifierFile.getParent() + Os.pathSeparator() + indexName + "_" + quantifierFile.getName() + ".txt"; BufferedWriter writer = new BufferedWriter(new FileWriter(quantificationName)); IShortIterator iterator = test.getCategoryDB().getCategories(); while (iterator.hasNext()) { short category = iterator.next(); String prefix = quantifierFile.getName() + "\t" + indexName + "\t" + test.getCategoryDB().getCategoryName(category) + "\t" + category + "\t" + trueQuantification.getQuantification(category) + "\t"; writer.write(prefix + ccQuantification.getName() + "\t" + ccQuantification.getQuantification(category) + "\n"); writer.write(prefix + paQuantification.getName() + "\t" + paQuantification.getQuantification(category) + "\n"); writer.write(prefix + accQuantification.getName() + "\t" + accQuantification.getQuantification(category) + "\n"); writer.write(prefix + maxQuantification.getName() + "\t" + maxQuantification.getQuantification(category) + "\n"); writer.write(prefix + sccQuantification.getName() + "\t" + sccQuantification.getQuantification(category) + "\n"); writer.write(prefix + spaQuantification.getName() + "\t" + spaQuantification.getQuantification(category) + "\n"); } writer.close(); BufferedWriter bfs = new BufferedWriter(new FileWriter(quantifierFile.getParent() + Os.pathSeparator() + indexName + "_" + quantifierFile.getName() + "_rates.txt")); TShortDoubleHashMap simpleTPRs = ((CCQuantifier) quantifiers[0]).getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = ((CCQuantifier) quantifiers[0]).getSimpleFPRs(); TShortDoubleHashMap maxTPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier()) .getSimpleTPRs(); TShortDoubleHashMap maxFPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier()) .getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = ((PAQuantifier) quantifiers[1]).getScaledTPRs(); TShortDoubleHashMap scaledFPRs = ((PAQuantifier) quantifiers[1]).getScaledFPRs(); ContingencyTableSet simpleContingencyTableSet = ((CCQuantifier) quantifiers[0]).getContingencyTableSet(); ContingencyTableSet maxContingencyTableSet = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]) .getInternalQuantifier()).getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = test.getCategoryDB().getCategoryName(cat); ContingencyTable simpleContingencyTable = simpleContingencyTableSet.getCategoryContingencyTable(cat); ContingencyTable maxContingencyTable = maxContingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double maxTPR = maxTPRs.get(cat); double maxFPR = maxFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = indexName + "_" + quantifierFile.getName() + "\ttest\tsimple\t" + catName + "\t" + cat + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t" + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = indexName + "_" + quantifierFile.getName() + "\ttest\tmax\t" + catName + "\t" + cat + "\t" + maxContingencyTable.tp() + "\t" + maxContingencyTable.fp() + "\t" + maxContingencyTable.fn() + "\t" + maxContingencyTable.tn() + "\t" + maxTPR + "\t" + maxFPR + "\n"; bfs.write(line); line = indexName + "_" + quantifierFile.getName() + "\ttest\tscaled\t" + catName + "\t" + cat + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t" + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:apps.quantification.QuantifySVMPerf.java
public static void main(String[] args) throws IOException { String cmdLineSyntax = QuantifySVMPerf.class.getName() + " [OPTIONS] <path to svm_perf_classify> <testIndexDirectory> <quantificationModelDirectory>"; Options options = new Options(); OptionBuilder.withArgName("d"); OptionBuilder.withDescription("Dump confidences file"); OptionBuilder.withLongOpt("d"); OptionBuilder.isRequired(false);//w w w .j av a 2 s . co m OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("t"); OptionBuilder.withDescription("Path for temporary files"); OptionBuilder.withLongOpt("t"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("v"); OptionBuilder.withDescription("Verbose output"); OptionBuilder.withLongOpt("v"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); OptionBuilder.withArgName("s"); OptionBuilder.withDescription("Don't delete temporary files in svm_perf format (default: delete)"); OptionBuilder.withLongOpt("s"); OptionBuilder.isRequired(false); OptionBuilder.hasArg(false); options.addOption(OptionBuilder.create()); SvmPerfClassifierCustomizer customizer = null; GnuParser parser = new GnuParser(); String[] remainingArgs = null; try { CommandLine line = parser.parse(options, args); remainingArgs = line.getArgs(); customizer = new SvmPerfClassifierCustomizer(remainingArgs[0]); if (line.hasOption("v")) customizer.printSvmPerfOutput(true); if (line.hasOption("s")) { System.out.println("Keeping temporary files."); customizer.setDeleteTestFiles(false); customizer.setDeletePredictionsFiles(false); } if (line.hasOption("t")) customizer.setTempPath(line.getOptionValue("t")); } catch (Exception exp) { System.err.println("Parsing failed. Reason: " + exp.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } if (remainingArgs.length != 3) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(cmdLineSyntax, options); System.exit(-1); } String indexFile = remainingArgs[1]; File file = new File(indexFile); String indexName = file.getName(); String indexPath = file.getParent(); String quantifierFilename = remainingArgs[2]; FileSystemStorageManager indexFssm = new FileSystemStorageManager(indexPath, false); indexFssm.open(); IIndex test = TroveReadWriteHelper.readIndex(indexFssm, indexName, TroveContentDBType.Full, TroveClassificationDBType.Full); indexFssm.close(); FileSystemStorageManager quantifierFssm = new FileSystemStorageManager(quantifierFilename, false); quantifierFssm.open(); SvmPerfDataManager classifierDataManager = new SvmPerfDataManager(customizer); FileSystemStorageManager fssm = new FileSystemStorageManager(quantifierFilename, false); fssm.open(); IQuantifier[] quantifiers = QuantificationLearner.read(fssm, classifierDataManager, ClassificationMode.PER_CATEGORY); fssm.close(); quantifierFssm.close(); Quantification ccQuantification = quantifiers[0].quantify(test); Quantification paQuantification = quantifiers[1].quantify(test); Quantification accQuantification = quantifiers[2].quantify(test); Quantification maxQuantification = quantifiers[3].quantify(test); Quantification sccQuantification = quantifiers[4].quantify(test); Quantification spaQuantification = quantifiers[5].quantify(test); Quantification trueQuantification = new Quantification("True", test.getClassificationDB()); File quantifierFile = new File(quantifierFilename); String quantificationName = quantifierFile.getParent() + Os.pathSeparator() + indexName + "_" + quantifierFile.getName() + ".txt"; BufferedWriter writer = new BufferedWriter(new FileWriter(quantificationName)); IShortIterator iterator = test.getCategoryDB().getCategories(); while (iterator.hasNext()) { short category = iterator.next(); String prefix = quantifierFile.getName() + "\t" + indexName + "\t" + test.getCategoryDB().getCategoryName(category) + "\t" + category + "\t" + trueQuantification.getQuantification(category) + "\t"; writer.write(prefix + ccQuantification.getName() + "\t" + ccQuantification.getQuantification(category) + "\n"); writer.write(prefix + paQuantification.getName() + "\t" + paQuantification.getQuantification(category) + "\n"); writer.write(prefix + accQuantification.getName() + "\t" + accQuantification.getQuantification(category) + "\n"); writer.write(prefix + maxQuantification.getName() + "\t" + maxQuantification.getQuantification(category) + "\n"); writer.write(prefix + sccQuantification.getName() + "\t" + sccQuantification.getQuantification(category) + "\n"); writer.write(prefix + spaQuantification.getName() + "\t" + spaQuantification.getQuantification(category) + "\n"); } writer.close(); BufferedWriter bfs = new BufferedWriter(new FileWriter(quantifierFile.getParent() + Os.pathSeparator() + indexName + "_" + quantifierFile.getName() + "_rates.txt")); TShortDoubleHashMap simpleTPRs = ((CCQuantifier) quantifiers[0]).getSimpleTPRs(); TShortDoubleHashMap simpleFPRs = ((CCQuantifier) quantifiers[0]).getSimpleFPRs(); TShortDoubleHashMap maxTPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier()) .getSimpleTPRs(); TShortDoubleHashMap maxFPRs = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]).getInternalQuantifier()) .getSimpleFPRs(); TShortDoubleHashMap scaledTPRs = ((PAQuantifier) quantifiers[1]).getScaledTPRs(); TShortDoubleHashMap scaledFPRs = ((PAQuantifier) quantifiers[1]).getScaledFPRs(); ContingencyTableSet simpleContingencyTableSet = ((CCQuantifier) quantifiers[0]).getContingencyTableSet(); ContingencyTableSet maxContingencyTableSet = ((CCQuantifier) ((ScaledQuantifier) quantifiers[3]) .getInternalQuantifier()).getContingencyTableSet(); short[] cats = simpleTPRs.keys(); for (int i = 0; i < cats.length; ++i) { short cat = cats[i]; String catName = test.getCategoryDB().getCategoryName(cat); ContingencyTable simpleContingencyTable = simpleContingencyTableSet.getCategoryContingencyTable(cat); ContingencyTable maxContingencyTable = maxContingencyTableSet.getCategoryContingencyTable(cat); double simpleTPR = simpleTPRs.get(cat); double simpleFPR = simpleFPRs.get(cat); double maxTPR = maxTPRs.get(cat); double maxFPR = maxFPRs.get(cat); double scaledTPR = scaledTPRs.get(cat); double scaledFPR = scaledFPRs.get(cat); String line = indexName + "_" + quantifierFile.getName() + "\ttest\tsimple\t" + catName + "\t" + cat + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t" + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + simpleTPR + "\t" + simpleFPR + "\n"; bfs.write(line); line = indexName + "_" + quantifierFile.getName() + "\ttest\tmax\t" + catName + "\t" + cat + "\t" + maxContingencyTable.tp() + "\t" + maxContingencyTable.fp() + "\t" + maxContingencyTable.fn() + "\t" + maxContingencyTable.tn() + "\t" + maxTPR + "\t" + maxFPR + "\n"; bfs.write(line); line = indexName + "_" + quantifierFile.getName() + "\ttest\tscaled\t" + catName + "\t" + cat + "\t" + simpleContingencyTable.tp() + "\t" + simpleContingencyTable.fp() + "\t" + simpleContingencyTable.fn() + "\t" + simpleContingencyTable.tn() + "\t" + scaledTPR + "\t" + scaledFPR + "\n"; bfs.write(line); } bfs.close(); }
From source file:CircularGenerator.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options helpOptions = new Options(); helpOptions.addOption("h", "help", false, "show this help page"); Options options = new Options(); options.addOption("h", "help", false, "show this help page"); options.addOption(OptionBuilder.withLongOpt("input").withArgName("INPUT") .withDescription("the input FastA File").isRequired().hasArg().create("i")); options.addOption(OptionBuilder.withLongOpt("elongation").withArgName("ELONGATION") .withDescription("the elongation factor [INT]").isRequired().hasArg().create("e")); options.addOption(OptionBuilder.withLongOpt("seq").withArgName("SEQ") .withDescription("the names of the sequences that should to be elongated").isRequired().hasArg() .hasOptionalArgs().hasArg().create("s")); HelpFormatter helpformatter = new HelpFormatter(); CommandLineParser parser = new BasicParser(); try {/*w ww . j av a 2s .c o m*/ CommandLine cmd = parser.parse(helpOptions, args); if (cmd.hasOption('h')) { helpformatter.printHelp(CLASS_NAME + "v" + VERSION, options); System.exit(0); } } catch (ParseException e1) { } String input = ""; String tmpElongation = ""; Integer elongation = 0; String[] names = new String[0]; try { CommandLine cmd = parser.parse(options, args); if (cmd.hasOption('i')) { input = cmd.getOptionValue('i'); } if (cmd.hasOption('e')) { tmpElongation = cmd.getOptionValue('e'); try { elongation = Integer.parseInt(tmpElongation); } catch (Exception e) { System.err.println("elongation not an Integer: " + tmpElongation); System.exit(0); } } if (cmd.hasOption('s')) { names = cmd.getOptionValues('s'); } } catch (ParseException e) { helpformatter.printHelp(CLASS_NAME, options); System.err.println(e.getMessage()); System.exit(0); } CircularGenerator cg = new CircularGenerator(elongation); File f = new File(input); for (String s : names) { cg.keys_to_treat_circular.add(s); } cg.extendFastA(f); }
From source file:edu.umd.ujjwalgoel.AnalyzePMI.java
@SuppressWarnings({ "static-access" }) public static void main(String[] args) { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); CommandLine cmdline = null;/*from w w w. j a v a 2s . co m*/ CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(AnalyzePMI.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String inputPath = cmdline.getOptionValue(INPUT); System.out.println("input path: " + inputPath); BufferedReader br = null; int countPairs = 0; List<PairOfWritables<PairOfStrings, FloatWritable>> pmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); List<PairOfWritables<PairOfStrings, FloatWritable>> cloudPmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); List<PairOfWritables<PairOfStrings, FloatWritable>> lovePmis = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); PairOfWritables<PairOfStrings, FloatWritable> highestPMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = null; PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = null; PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = null; try { FileSystem fs = FileSystem.get(new Configuration()); FileStatus[] status = fs.listStatus(new Path(inputPath)); //PairOfStrings pair = new PairOfStrings(); for (int i = 0; i < status.length; i++) { br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line = br.readLine(); while (line != null) { String[] words = line.split("\\t"); float value = Float.parseFloat(words[1].trim()); String[] wordPair = words[0].replaceAll("\\(", "").replaceAll("\\)", "").split(","); PairOfStrings pair = new PairOfStrings(); pair.set(wordPair[0].trim(), wordPair[1].trim()); if (wordPair[0].trim().equals("cloud")) { PairOfWritables<PairOfStrings, FloatWritable> cloudPmi = new PairOfWritables<PairOfStrings, FloatWritable>(); cloudPmi.set(pair, new FloatWritable(value)); cloudPmis.add(cloudPmi); if ((highestCloudPMI == null) || (highestCloudPMI.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI = cloudPmi; } else if ((highestCloudPMI2 == null) || (highestCloudPMI2.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI2 = cloudPmi; } else if ((highestCloudPMI3 == null) || (highestCloudPMI3.getRightElement().compareTo(cloudPmi.getRightElement()) < 0)) { highestCloudPMI3 = cloudPmi; } } if (wordPair[0].trim().equals("love")) { PairOfWritables<PairOfStrings, FloatWritable> lovePmi = new PairOfWritables<PairOfStrings, FloatWritable>(); lovePmi.set(pair, new FloatWritable(value)); lovePmis.add(lovePmi); if ((highestLovePMI == null) || (highestLovePMI.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI = lovePmi; } else if ((highestLovePMI2 == null) || (highestLovePMI2.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI2 = lovePmi; } else if ((highestLovePMI3 == null) || (highestLovePMI3.getRightElement().compareTo(lovePmi.getRightElement()) < 0)) { highestLovePMI3 = lovePmi; } } PairOfWritables<PairOfStrings, FloatWritable> pmi = new PairOfWritables<PairOfStrings, FloatWritable>(); pmi.set(pair, new FloatWritable(value)); pmis.add(pmi); if (highestPMI == null) { highestPMI = pmi; } else if (highestPMI.getRightElement().compareTo(pmi.getRightElement()) < 0) { highestPMI = pmi; } countPairs++; line = br.readLine(); } } } catch (Exception ex) { System.out.println("ERROR" + ex.getMessage()); } /*Collections.sort(pmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { /*if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); Collections.sort(cloudPmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); Collections.sort(lovePmis, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() { public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().getLeftElement().compareTo(e2.getLeftElement().getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); PairOfWritables<PairOfStrings, FloatWritable> highestPMI = pmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI = cloudPmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI2 = cloudPmis.get(1); PairOfWritables<PairOfStrings, FloatWritable> highestCloudPMI3 = cloudPmis.get(2); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI = lovePmis.get(0); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI2 = lovePmis.get(1); PairOfWritables<PairOfStrings, FloatWritable> highestLovePMI3 = lovePmis.get(2);*/ System.out.println("Total Distinct Pairs : " + countPairs); System.out.println("Pair with highest PMI : (" + highestPMI.getLeftElement().getLeftElement() + ", " + highestPMI.getLeftElement().getRightElement()); System.out .println("Word with highest PMI with Cloud : " + highestCloudPMI.getLeftElement().getRightElement() + " with value : " + highestCloudPMI.getRightElement().get()); System.out.println( "Word with second highest PMI with Cloud : " + highestCloudPMI2.getLeftElement().getRightElement() + " with value : " + highestCloudPMI2.getRightElement().get()); System.out.println( "Word with third highest PMI with Cloud : " + highestCloudPMI3.getLeftElement().getRightElement() + " with value : " + highestCloudPMI3.getRightElement().get()); System.out.println("Word with highest PMI with Love : " + highestLovePMI.getLeftElement().getRightElement() + " with value : " + highestLovePMI.getRightElement().get()); System.out.println( "Word with second highest PMI with Love : " + highestLovePMI2.getLeftElement().getRightElement() + " with value : " + highestLovePMI2.getRightElement().get()); System.out.println( "Word with third highest PMI with Love : " + highestLovePMI3.getLeftElement().getRightElement() + " with value : " + highestLovePMI3.getRightElement().get()); }
From source file:executables.Align.java
@SuppressWarnings("static-access") public static void main(String[] args) throws IOException { Options options = new Options() .addOption(OptionBuilder.withArgName("f1").withDescription("Fasta file 1").hasArg().create("f1")) .addOption(OptionBuilder.withArgName("f2").withDescription("Fasta file 2").hasArg().create("f2")) .addOption(OptionBuilder.withArgName("s1").withDescription("sequence 1").hasArg().create("s1")) .addOption(OptionBuilder.withArgName("s2").withDescription("sequence 2").hasArg().create("s2")) .addOption(OptionBuilder.withArgName("gap-linear").withDescription("Linear gap cost").hasArg() .create("gl")) .addOption(OptionBuilder.withArgName("gap-open").withDescription("Affine gap open cost").hasArg() .create("go")) .addOption(OptionBuilder.withArgName("gap-extend").withDescription("Affine gap extend cost") .hasArg().create("ge")) .addOption(OptionBuilder.withArgName("gap-function").withDescription("Gap function file").hasArg() .create("gf")) .addOption(//from w w w. j ava2s . c om OptionBuilder.withArgName("gapless").withDescription("Gapless alignment").create("gapless")) .addOption(OptionBuilder.withArgName("mode") .withDescription("Alignment mode: global,local,freeshift (Default: freeshift)").hasArg() .create('m')) .addOption(OptionBuilder.withArgName("match").withDescription("Match score").hasArg().create("ma")) .addOption(OptionBuilder.withArgName("mismatch").withDescription("Mismatch score").hasArg() .create("mi")) .addOption(OptionBuilder.withDescription("Do not append unaligned flanking sequences") .create("noflank")) .addOption(OptionBuilder.withArgName("check").withDescription("Calculate checkscore").create('c')) .addOption(OptionBuilder.withArgName("format").withDescription( "Output format, see String.format, parameters are: id1,id2,score,alignment (alignment only, if -f is specified); (default: '%s %s %.4f' w/o -f and '%s %s %.4f\n%s' w/ -f)") .hasArg().create("format")) .addOption(OptionBuilder.withArgName("matrix") .withDescription("Output dynamic programming matrix as well").create("matrix")) .addOption(OptionBuilder.withArgName("quasar-format") .withDescription("Scoring matrix in quasar format").hasArg().create('q')) .addOption( OptionBuilder.withArgName("pairs").withDescription("Pairs file").hasArg().create("pairs")) .addOption(OptionBuilder.withArgName("output").withDescription("Output").hasArg().create('o')) .addOption(OptionBuilder.withArgName("seqlib").withDescription("Seqlib file").hasArg() .create("seqlib")) .addOption(OptionBuilder.withArgName("full").withDescription("Full output").create('f')); CommandLineParser parser = new PosixParser(); try { CommandLine cmd = parser.parse(options, args); LongScoring<CharSequence> scoring = createScoring(cmd); AlignmentMode mode = createMode(cmd); if (mode == null) throw new ParseException("Mode unknown: " + cmd.getOptionValue('m')); Iterator<MutablePair<String, String>> idIterator = createSequences(scoring, cmd); GapCostFunction gap = createGapFunction(cmd); String format = getFormat(cmd); LongAligner<CharSequence> aligner; if (gap instanceof AffineGapCostFunction) aligner = new LongAligner<CharSequence>(scoring, ((AffineGapCostFunction) gap).getGapOpen(), ((AffineGapCostFunction) gap).getGapExtend(), mode); else if (gap instanceof LinearGapCostFunction) aligner = new LongAligner<CharSequence>(scoring, ((LinearGapCostFunction) gap).getGap(), mode); else if (gap instanceof InfiniteGapCostFunction) aligner = new LongAligner<CharSequence>(scoring, mode); else throw new RuntimeException("Gap cost function " + gap.toString() + " currently not supported!"); SimpleAlignmentFormatter formatter = cmd.hasOption('f') ? new SimpleAlignmentFormatter().setAppendUnaligned(!cmd.hasOption("noflank")) : null; CheckScore checkscore = cmd.hasOption('c') ? new CheckScore() : null; Alignment alignment = checkscore != null || formatter != null ? new Alignment() : null; float score; String ali; LineOrientedFile out = new LineOrientedFile( cmd.hasOption('o') ? cmd.getOptionValue('o') : LineOrientedFile.STDOUT); Writer wr = out.startWriting(); while (idIterator.hasNext()) { MutablePair<String, String> ids = idIterator.next(); score = alignment == null ? aligner.alignCache(ids.Item1, ids.Item2) : aligner.alignCache(ids.Item1, ids.Item2, alignment); ali = formatter != null ? formatter.format(alignment, scoring, gap, mode, scoring.getCachedSubject(ids.Item1), scoring.getCachedSubject(ids.Item2)) : ""; out.writeLine(String.format(Locale.US, format, ids.Item1, ids.Item2, score, ali)); if (cmd.hasOption("matrix")) { aligner.writeMatrix(wr, aligner.getScoring().getCachedSubject(ids.Item1).toString().toCharArray(), aligner.getScoring().getCachedSubject(ids.Item2).toString().toCharArray()); } if (checkscore != null) checkscore.checkScore(aligner, scoring.getCachedSubject(ids.Item1).length(), scoring.getCachedSubject(ids.Item2).length(), alignment, score); } out.finishWriting(); } catch (ParseException e) { e.printStackTrace(); HelpFormatter f = new HelpFormatter(); f.printHelp("Align", options); } }
From source file:cc.twittertools.index.IndexStatuses.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(// ww w. j a v a2 s . co m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexStatuses.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexed(true); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }
From source file:com.github.braully.graph.UtilResult.java
public static void main(String... args) throws Exception { Options options = new Options(); Option input = new Option("i", "input", true, "input file path"); input.setRequired(false);/*from ww w . j ava 2 s .c o m*/ options.addOption(input); Option verb = new Option("v", "verbose", false, "verbose process"); input.setRequired(false); options.addOption(verb); Option output = new Option("o", "output", true, "output file"); output.setRequired(false); options.addOption(output); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { System.out.println(e.getMessage()); formatter.printHelp("UtilResult", options); System.exit(1); return; } String inputFilePath = cmd.getOptionValue("input"); if (inputFilePath == null) { inputFilePath = "/home/strike/grafos-para-processar/mft2/resultado.txt"; } if (inputFilePath != null) { if (inputFilePath.toLowerCase().endsWith(".txt")) { processFileTxt(inputFilePath); } else if (inputFilePath.toLowerCase().endsWith(".json")) { processFileJson(inputFilePath); } } }
From source file:io.anserini.index.IndexTweets.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(/*from w w w . ja v a 2 s . co m*/ OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweets.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(Paths.get(indexPath)); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongPoint(StatusField.ID.name, status.getId())); doc.add(new StoredField(StatusField.ID.name, status.getId())); doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch())); doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch())); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }