List of usage examples for com.google.common.collect Lists newArrayList
@GwtCompatible(serializable = true) public static <E> ArrayList<E> newArrayList(Iterator<? extends E> elements)
From source file:brooklyn.demo.WebClusterDatabaseExampleApp.java
public static void main(String[] argv) { List<String> args = Lists.newArrayList(argv); String port = CommandLineUtil.getCommandLineOption(args, "--port", "8081+"); String location = CommandLineUtil.getCommandLineOption(args, "--location", DEFAULT_LOCATION); BrooklynLauncher launcher = BrooklynLauncher.newInstance() .application(EntitySpec.create(StartableApplication.class, WebClusterDatabaseExampleApp.class) .displayName("Brooklyn WebApp Cluster with Database example")) .webconsolePort(port).location(location).start(); Entities.dumpInfo(launcher.getApplications()); }
From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGeneratorDatabase.java
public static void main(String[] args) throws Exception { OptionParser parser = new OptionParser(); OptionSpec<File> benchmarkDirectorySpec = parser.accepts("d", "base directory").withRequiredArg() .ofType(File.class).required(); OptionSpec<File> queriesFileSpec = parser.accepts("i", "input queries file").withRequiredArg() .ofType(File.class).required(); OptionSpec<String> tableNameSpec = parser.accepts("db", "database name").withRequiredArg() .ofType(String.class).required(); OptionSpec<URL> endpointURLSpec = parser.accepts("e", "endpoint URL").withRequiredArg().ofType(URL.class) .required();// w w w . j av a 2 s .com OptionSpec<String> defaultGraphSpec = parser.accepts("g", "default graph").withRequiredArg() .ofType(String.class); OptionSpec<Boolean> useCacheSpec = parser.accepts("cache", "use cache").withOptionalArg() .ofType(Boolean.class).defaultsTo(Boolean.TRUE); OptionSpec<Boolean> queriesHaveIdSpec = parser.accepts("id", "input file contains ID, SPARQL query") .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.TRUE); OptionSpec<String> cbdSpec = parser.accepts("cbd", "CBD structure tree string").withOptionalArg() .ofType(String.class).required(); OptionSpec<String> queriesToOmitTokensSpec = parser .accepts("omitTokens", "comma-separated list of tokens such that queries containing any of them will be omitted") .withRequiredArg().ofType(String.class).defaultsTo(""); OptionSpec<Boolean> workaroundSpec = parser.accepts("workaround", "Virtuoso parse error workaround enabled") .withRequiredArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE); OptionSet options = parser.parse(args); File benchmarkDirectory = options.valueOf(benchmarkDirectorySpec); File inputFile = options.valueOf(queriesFileSpec); String tableName = options.valueOf(tableNameSpec); URL endpointURL = options.valueOf(endpointURLSpec); List<String> defaultGraphs = options.has(defaultGraphSpec) ? Lists.newArrayList(options.valueOf(defaultGraphSpec)) : Collections.emptyList(); SparqlEndpoint endpoint = SparqlEndpoint.create(endpointURL.toString(), defaultGraphs); // SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); // ks.setUseCache(options.valueOf(useCacheSpec)); // ks.setCacheDir(benchmarkDirectory.getPath()); // ks.setQueryDelay(1000); // ks.setRetryCount(0); // ks.init(); QueryExecutionFactory qef = buildQueryExecutionFactory(endpoint, options.valueOf(useCacheSpec), benchmarkDirectory.getPath(), TimeUnit.DAYS.toMillis(30), 0, 60); CBDStructureTree cbdStructureTree = CBDStructureTree.fromTreeString(options.valueOf(cbdSpec).trim()); List<String> omitTokens = Splitter.on(",").omitEmptyStrings().trimResults() .splitToList(options.valueOf(queriesToOmitTokensSpec)); BenchmarkDescriptionGeneratorDatabase generator = new BenchmarkDescriptionGeneratorDatabase(qef); generator.setDefaultCbdStructure(cbdStructureTree); generator.setSkipQueryTokens(omitTokens); generator.setEndpoint(endpoint); generator.setWorkaroundEnabled(options.valueOf(workaroundSpec)); generator.generateBenchmarkDescription(inputFile, tableName, options.valueOf(queriesHaveIdSpec)); }
From source file:org.attribyte.api.pubsub.impl.server.Server.java
/** * Starts the server.//w w w .ja va 2 s . c o m * @param args The startup args. * @throws Exception on startup error. */ public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Start-up error: Expecting <config file> [allowed topics file]"); System.exit(1); } Properties commandLineOverrides = new Properties(); args = InitUtil.fromCommandLine(args, commandLineOverrides); Properties props = new Properties(); Properties logProps = new Properties(); CLI.loadProperties(args, props, logProps); props.putAll(commandLineOverrides); logProps.putAll(commandLineOverrides); final Logger logger = initLogger(props, logProps); logger.info("Applied command line overrides: " + commandLineOverrides.toString()); //Buffer and log hub events for logging and debug... final int MAX_STORED_SUBSCRIPTION_REQUESTS = 200; final ArrayBlockingQueue<SubscriptionEvent> recentSubscriptionRequests = new ArrayBlockingQueue<>( MAX_STORED_SUBSCRIPTION_REQUESTS); final HubEndpoint.EventHandler hubEventHandler = new HubEndpoint.EventHandler() { private synchronized void offer(SubscriptionEvent record) { if (!recentSubscriptionRequests.offer(record)) { List<SubscriptionEvent> drain = Lists .newArrayListWithCapacity(MAX_STORED_SUBSCRIPTION_REQUESTS / 2); recentSubscriptionRequests.drainTo(drain, drain.size()); recentSubscriptionRequests.offer(record); } } @Override public void subscriptionRequestAccepted(final Request request, final Response response, final Subscriber subscriber) { final SubscriptionEvent record; try { record = new SubscriptionRequestRecord(request, response, subscriber); } catch (IOException ioe) { return; } logger.info(record.toString()); offer(record); } @Override public void subscriptionRequestRejected(final Request request, final Response response, final Subscriber subscriber) { final SubscriptionEvent record; try { record = new SubscriptionRequestRecord(request, response, subscriber); } catch (IOException ioe) { return; } logger.warn(record.toString()); offer(record); } @Override public void subscriptionVerifyFailure(String callbackURL, int callbackResponseCode, String reason, int attempts, boolean abandoned) { final SubscriptionEvent record = new SubscriptionVerifyRecord(callbackURL, callbackResponseCode, reason, attempts, abandoned); logger.warn(record.toString()); offer(record); } @Override public void subscriptionVerified(Subscription subscription) { final SubscriptionEvent record = new SubscriptionVerifyRecord(subscription); logger.info(record.toString()); offer(record); } }; /** * A source for subscription request records (for console, etc). */ final SubscriptionEvent.Source subscriptionEventSource = new SubscriptionEvent.Source() { public List<SubscriptionEvent> latestEvents(int limit) { List<SubscriptionEvent> records = Lists.newArrayList(recentSubscriptionRequests); Collections.sort(records); return records.size() < limit ? records : records.subList(0, limit); } }; /** * A queue to which new topics are added as reported by the datastore event handler. */ final BlockingQueue<Topic> newTopicQueue = new LinkedBlockingDeque<>(); /** * A datastore event handler that offers new topics to a queue. */ final HubDatastore.EventHandler topicEventHandler = new HubDatastore.EventHandler() { @Override public void newTopic(final Topic topic) throws DatastoreException { newTopicQueue.offer(topic); } @Override public void newSubscription(final Subscription subscription) throws DatastoreException { //Ignore } @Override public void exception(final Throwable t) { //Ignore } @Override public void setNext(final HubDatastore.EventHandler next) { //Ignore } }; final HubEndpoint endpoint = new HubEndpoint("endpoint.", props, logger, hubEventHandler, topicEventHandler); final String topicAddedTopicURL = Strings.emptyToNull(props.getProperty("endpoint.topicAddedTopic", "")); final Topic topicAddedTopic = topicAddedTopicURL != null ? endpoint.getDatastore().getTopic(topicAddedTopicURL, true) : null; final Thread topicAddedNotifier = topicAddedTopic != null ? new Thread(new TopicAddedNotifier(newTopicQueue, endpoint, topicAddedTopic)) : null; if (topicAddedNotifier != null) { topicAddedNotifier.setName("topic-added-notifier"); topicAddedNotifier.start(); } if (props.getProperty("endpoint.topics") != null) { //Add supported topics... for (String topicURL : Splitter.on(",").omitEmptyStrings().trimResults() .split(props.getProperty("endpoint.topics"))) { Topic topic = endpoint.getDatastore().getTopic(topicURL, true); System.out.println("Added topic, '" + topicURL + "' (" + topic.getId() + ")"); } } final MetricRegistry registry = props.getProperty("endpoint.instrumentJVM", "true").equalsIgnoreCase("true") ? instrumentJVM(new MetricRegistry()) : new MetricRegistry(); if (props.getProperty("endpoint.instrumentSystem", "true").equalsIgnoreCase("true")) { instrumentSystem(registry); } registry.registerAll(endpoint); final HealthCheckRegistry healthCheckRegistry = new HealthCheckRegistry(); //TODO final Reporting reporting = new Reporting("metrics-reporting.", props, registry, null); //No filter... String httpAddress = props.getProperty("http.address", "127.0.0.1"); int httpPort = Integer.parseInt(props.getProperty("http.port", "8086")); org.eclipse.jetty.server.Server server = new org.eclipse.jetty.server.Server(); server.addLifeCycleListener(new LifeCycle.Listener() { public void lifeCycleFailure(LifeCycle event, Throwable cause) { System.out.println("Failure " + cause.toString()); } public void lifeCycleStarted(LifeCycle event) { System.out.println("Started..."); } public void lifeCycleStarting(LifeCycle event) { System.out.println("Server Starting..."); } public void lifeCycleStopped(LifeCycle event) { System.out.println("Server Stopped..."); } public void lifeCycleStopping(LifeCycle event) { System.out.println("Shutting down metrics reporting..."); reporting.stop(); if (topicAddedNotifier != null) { System.out.println("Shutting down new topic notifier..."); topicAddedNotifier.interrupt(); } System.out.println("Shutting down endpoint..."); endpoint.shutdown(); System.out.println("Shutdown endpoint..."); } }); HttpConfiguration httpConfig = new HttpConfiguration(); httpConfig.setOutputBufferSize(32768); httpConfig.setRequestHeaderSize(8192); httpConfig.setResponseHeaderSize(8192); httpConfig.setSendServerVersion(false); httpConfig.setSendDateHeader(false); ServerConnector httpConnector = new ServerConnector(server, new HttpConnectionFactory(httpConfig)); httpConnector.setHost(httpAddress); httpConnector.setPort(httpPort); httpConnector.setIdleTimeout(30000L); server.addConnector(httpConnector); HandlerCollection serverHandlers = new HandlerCollection(); server.setHandler(serverHandlers); ServletContextHandler rootContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS); rootContext.setContextPath("/"); final AdminConsole adminConsole; final List<String> allowedAssetPaths; if (props.getProperty("admin.enabled", "false").equalsIgnoreCase("true")) { File assetDirFile = getSystemFile("admin.assetDirectory", props); if (assetDirFile == null) { System.err.println("The 'admin.assetDirectory' must be configured"); System.exit(1); } if (!assetDirFile.exists()) { System.err.println("The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must exist"); System.exit(1); } if (!assetDirFile.isDirectory()) { System.err.println( "The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must be a directory"); System.exit(1); } if (!assetDirFile.canRead()) { System.err.println( "The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must be readable"); System.exit(1); } char[] adminUsername = props.getProperty("admin.username", "").toCharArray(); char[] adminPassword = props.getProperty("admin.password", "").toCharArray(); String adminRealm = props.getProperty("admin.realm", "pubsubhub"); if (adminUsername.length == 0 || adminPassword.length == 0) { System.err.println("The 'admin.username' and 'admin.password' must be specified"); System.exit(1); } File templateDirFile = getSystemFile("admin.templateDirectory", props); if (templateDirFile == null) { System.err.println("The 'admin.templateDirectory' must be specified"); System.exit(1); } if (!templateDirFile.exists()) { System.err .println("The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must exist"); System.exit(1); } if (!templateDirFile.isDirectory()) { System.err.println( "The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must be a directory"); System.exit(1); } if (!templateDirFile.canRead()) { System.err.println( "The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must be readable"); System.exit(1); } adminConsole = new AdminConsole(rootContext, assetDirFile.getAbsolutePath(), endpoint, new AdminAuth(adminRealm, adminUsername, adminPassword), templateDirFile.getAbsolutePath(), logger); allowedAssetPaths = Lists.newArrayList(Splitter.on(',').omitEmptyStrings().trimResults() .split(props.getProperty("admin.assetPaths", ""))); System.out.println("Admin console is enabled..."); } else { adminConsole = null; allowedAssetPaths = ImmutableList.of(); } serverHandlers.addHandler(rootContext); //TODO: Introduces incompatible dependency... /* InstrumentedHandler instrumentedHandler = new InstrumentedHandler(registry); instrumentedHandler.setName("http-server"); instrumentedHandler.setHandler(rootContext); serverHandlers.addHandler(instrumentedHandler); */ File requestLogPathFile = getSystemFile("http.log.path", props); if (requestLogPathFile != null) { if (!requestLogPathFile.exists()) { System.err .println("The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' must exist"); System.exit(1); } if (!requestLogPathFile.isDirectory()) { System.err.println( "The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' must be a directory"); System.exit(1); } if (!requestLogPathFile.canWrite()) { System.err.println( "The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' is not writable"); System.exit(1); } int requestLogRetainDays = Integer.parseInt(props.getProperty("http.log.retainDays", "14")); boolean requestLogExtendedFormat = props.getProperty("http.log.extendedFormat", "true") .equalsIgnoreCase("true"); String requestLogTimeZone = props.getProperty("http.log.timeZone", TimeZone.getDefault().getID()); String requestLogPrefix = props.getProperty("http.log.prefix", "requests"); String requestLogPath = requestLogPathFile.getAbsolutePath(); if (!requestLogPath.endsWith("/")) { requestLogPath = requestLogPath + "/"; } NCSARequestLog requestLog = new NCSARequestLog(requestLogPath + requestLogPrefix + "-yyyy_mm_dd.log"); requestLog.setRetainDays(requestLogRetainDays); requestLog.setAppend(true); requestLog.setExtended(requestLogExtendedFormat); requestLog.setLogTimeZone(requestLogTimeZone); requestLog.setLogCookies(false); requestLog.setPreferProxiedForAddress(true); RequestLogHandler requestLogHandler = new RequestLogHandler(); requestLogHandler.setRequestLog(requestLog); serverHandlers.addHandler(requestLogHandler); } HubServlet hubServlet = new HubServlet(endpoint, logger); rootContext.addServlet(new ServletHolder(hubServlet), "/subscribe/*"); InitUtil filterInit = new InitUtil("publish.", props); List<BasicAuthFilter> publishURLFilters = Lists.newArrayList(); List<Object> publishURLFilterObjects = filterInit.initClassList("topicURLFilters", BasicAuthFilter.class); for (Object o : publishURLFilterObjects) { BasicAuthFilter filter = (BasicAuthFilter) o; filter.init(filterInit.getProperties()); publishURLFilters.add(filter); } final long topicCacheMaxAgeSeconds = Long .parseLong(props.getProperty("endpoint.topicCache.maxAgeSeconds", "0")); final Cache<String, Topic> topicCache; if (topicCacheMaxAgeSeconds > 0) { topicCache = CacheBuilder.newBuilder().concurrencyLevel(16) .expireAfterWrite(topicCacheMaxAgeSeconds, TimeUnit.SECONDS).maximumSize(4096).build(); } else { topicCache = null; } final String replicationTopicURL = Strings.emptyToNull(props.getProperty("endpoint.replicationTopic", "")); //Get or create replication topic, if configured. final Topic replicationTopic = replicationTopicURL != null ? endpoint.getDatastore().getTopic(replicationTopicURL, true) : null; int maxBodySizeBytes = filterInit.getIntProperty("maxBodySizeBytes", BroadcastServlet.DEFAULT_MAX_BODY_BYTES); boolean autocreateTopics = filterInit.getProperty("autocreateTopics", "false").equalsIgnoreCase("true"); int maxSavedNotifications = filterInit.getIntProperty("maxSavedNotifications", 0); boolean jsonEnabled = filterInit.getProperty("jsonEnabled", "false").equalsIgnoreCase("true"); final BroadcastServlet broadcastServlet = new BroadcastServlet(endpoint, maxBodySizeBytes, autocreateTopics, logger, publishURLFilters, topicCache, replicationTopic, maxSavedNotifications, jsonEnabled); rootContext.addServlet(new ServletHolder(broadcastServlet), "/notify/*"); CallbackMetricsServlet callbackMetricsServlet = new CallbackMetricsServlet(endpoint); ServletHolder callbackMetricsServletHolder = new ServletHolder(callbackMetricsServlet); rootContext.addServlet(callbackMetricsServletHolder, "/metrics/callback/*"); NotificationMetricsServlet notificationMetricsServlet = new NotificationMetricsServlet(endpoint); ServletHolder notificationMetricsServletHolder = new ServletHolder(notificationMetricsServlet); rootContext.addServlet(notificationMetricsServletHolder, "/metrics/notification/*"); MetricsServlet metricsServlet = new MetricsServlet(registry); ServletHolder metricsServletHolder = new ServletHolder(metricsServlet); rootContext.setInitParameter(MetricsServlet.RATE_UNIT, "SECONDS"); rootContext.setInitParameter(MetricsServlet.DURATION_UNIT, "MILLISECONDS"); rootContext.setInitParameter(MetricsServlet.SHOW_SAMPLES, "false"); rootContext.addServlet(metricsServletHolder, "/metrics/*"); boolean outputHostAddys = props.getProperty("ping.outputHostAddresses", "false").equalsIgnoreCase("true"); PingServlet pingServlet = new PingServlet(props.getProperty("http.instanceName", ""), outputHostAddys); rootContext.addServlet(new ServletHolder(pingServlet), "/ping/*"); HealthCheckServlet healthCheckServlet = new HealthCheckServlet(healthCheckRegistry); for (Map.Entry<String, HealthCheck> healthCheck : endpoint.getDatastore().getHealthChecks().entrySet()) { healthCheckRegistry.register(healthCheck.getKey(), healthCheck.getValue()); } healthCheckRegistry.register("no-deadlocked-threads", new ThreadDeadlockHealthCheck()); rootContext.addServlet(new ServletHolder(healthCheckServlet), "/health/*"); ThreadDumpServlet threadDumpServlet = new ThreadDumpServlet(); rootContext.addServlet(new ServletHolder(threadDumpServlet), "/threads/*"); if (adminConsole != null && allowedAssetPaths.size() > 0) { String adminPath = props.getProperty("admin.path", "/admin/"); List<Invalidatable> invalidatables = Collections.<Invalidatable>singletonList(new Invalidatable() { @Override public void invalidate() { broadcastServlet.invalidateCaches(); if (topicCache != null) { topicCache.invalidateAll(); } } }); adminConsole.initServlets(rootContext, adminPath, allowedAssetPaths, invalidatables, subscriptionEventSource, broadcastServlet); } int numReporters = reporting.start(); logger.info("Started " + numReporters + " metrics reporters"); server.setDumpBeforeStop(false); server.setStopAtShutdown(true); server.start(); server.join(); }
From source file:pl.edu.icm.cermine.PdfNLMContentExtractor.java
public static void main(String[] args) throws ParseException, IOException { CommandLineOptionsParser parser = new CommandLineOptionsParser(); if (!parser.parse(args)) { System.err.println("Usage: PdfNLMContentExtractor -path <path> [optional parameters]\n\n" + "Tool for extracting metadata and content from PDF files.\n\n" + "Arguments:\n" + " -path <path> path to a PDF file or directory containing PDF files\n" + " -ext <extension> (optional) the extension of the resulting metadata file;\n" + " default: \"cermxml\"; used only if passed path is a directory\n" + " -modelmeta <path> (optional) the path to the metadata classifier model file\n" + " -modelinit <path> (optional) the path to the initial classifier model file\n" + " -str whether to store structure (TrueViz) files as well;\n" + " used only if passed path is a directory\n" + " -strext <extension> (optional) the extension of the structure (TrueViz) file;\n" + " default: \"cxml\"; used only if passed path is a directory\n" + " -threads <num> number of threads for parallel processing\n"); System.exit(1);/*from w w w. j a v a 2 s.co m*/ } String path = parser.getPath(); String extension = parser.getNLMExtension(); boolean extractStr = parser.extractStructure(); String strExtension = parser.getBxExtension(); PdfNLMContentExtractor.THREADS_NUMBER = parser.getThreadsNumber(); File file = new File(path); if (file.isFile()) { try { PdfNLMContentExtractor extractor = new PdfNLMContentExtractor(); parser.updateMetadataModel(extractor.getConf()); parser.updateInitialModel(extractor.getConf()); InputStream in = new FileInputStream(file); Element result = extractor.extractContent(in); XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); System.out.println(outputter.outputString(result)); } catch (AnalysisException ex) { ex.printStackTrace(); } } else { Collection<File> files = FileUtils.listFiles(file, new String[] { "pdf" }, true); int i = 0; for (File pdf : files) { File xmlF = new File(pdf.getPath().replaceAll("pdf$", extension)); if (xmlF.exists()) { i++; continue; } long start = System.currentTimeMillis(); float elapsed = 0; System.out.println(pdf.getPath()); try { PdfNLMContentExtractor extractor = new PdfNLMContentExtractor(); parser.updateMetadataModel(extractor.getConf()); parser.updateInitialModel(extractor.getConf()); InputStream in = new FileInputStream(pdf); BxDocument doc = ExtractionUtils.extractStructure(extractor.getConf(), in); Element result = extractor.extractContent(doc); long end = System.currentTimeMillis(); elapsed = (end - start) / 1000F; XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); if (!xmlF.createNewFile()) { System.out.println("Cannot create new file!"); } FileUtils.writeStringToFile(xmlF, outputter.outputString(result)); if (extractStr) { BxDocumentToTrueVizWriter writer = new BxDocumentToTrueVizWriter(); File strF = new File(pdf.getPath().replaceAll("pdf$", strExtension)); writer.write(new FileWriter(strF), Lists.newArrayList(doc)); } } catch (AnalysisException ex) { ex.printStackTrace(); } catch (TransformationException ex) { ex.printStackTrace(); } i++; int percentage = i * 100 / files.size(); if (elapsed == 0) { elapsed = (System.currentTimeMillis() - start) / 1000F; } System.out.println("Extraction time: " + Math.round(elapsed) + "s"); System.out.println(percentage + "% done (" + i + " out of " + files.size() + ")"); System.out.println(""); } } }
From source file:com.google.cloud.genomics.dataflow.pipelines.CalculateCoverage.java
public static void main(String[] args) throws GeneralSecurityException, IOException { // Register the options so that they show up via --help PipelineOptionsFactory.register(Options.class); options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); auth = GenomicsOptions.Methods.getGenomicsAuth(options); p = Pipeline.create(options);/*from ww w . j a va2s . c o m*/ p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER); if (options.getInputDatasetId().isEmpty() && options.getReadGroupSetIds().isEmpty()) { throw new IllegalArgumentException("InputDatasetId or ReadGroupSetIds must be specified"); } List<String> rgsIds; if (options.getInputDatasetId().isEmpty()) { rgsIds = Lists.newArrayList(options.getReadGroupSetIds().split(",")); } else { rgsIds = GenomicsUtils.getReadGroupSetIds(options.getInputDatasetId(), auth); } if (rgsIds.size() < options.getNumQuantiles()) { throw new IllegalArgumentException("Number of ReadGroupSets must be greater than or equal to" + " the number of requested quantiles."); } // Grab one ReferenceSetId to be used within the pipeline to confirm that all ReadGroupSets // are associated with the same ReferenceSet. String referenceSetId = GenomicsUtils.getReferenceSetId(rgsIds.get(0), auth); if (Strings.isNullOrEmpty(referenceSetId)) { throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + rgsIds.get(0) + ". All ReadGroupSets in given input must have an associated ReferenceSet."); } // Create our destination AnnotationSet for the associated ReferenceSet. AnnotationSet annotationSet = createAnnotationSet(referenceSetId); PCollection<Read> reads = p.begin().apply(Create.of(rgsIds)) .apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer( auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, SexChromosomeFilter.INCLUDE_XY)); PCollection<KV<PosRgsMq, Double>> coverageMeans = reads.apply(new CalculateCoverageMean()); PCollection<KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>> quantiles = coverageMeans .apply(new CalculateQuantiles(options.getNumQuantiles())); PCollection<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>> answer = quantiles .apply(GroupByKey.<Position, KV<PosRgsMq.MappingQuality, List<Double>>>create()); answer.apply(ParDo.of(new CreateAnnotations(annotationSet.getId(), auth, true))); p.run(); }
From source file:org.apache.mahout.knn.tools.TrainNewsGroupsKMeansLogisticRegression.java
public static void main(String[] args) throws IOException, ParseException { Options options = new Options(); options.addOption("i", "input", true, "Path to the input folder containing the training set's" + " sequence files."); options.addOption("o", "output", true, "Base path to the output file. The name will be " + "appended with a suffix for each type of training."); options.addOption("a", "actual", false, "If set, runs the training with the actual cluster " + "assignments and outputs the model to the output path with a -actual suffix."); options.addOption("b", "ballkmeans", false, "If set, runs the training with the ball k-means " + "cluster assignments and outputs the model to the output path with a -ballkmeans suffix."); options.addOption("s", "streamingkmeans", false, "If set, runs the training with the " + "streaming k-means cluster assignments and outputs the model to the output path with a " + "-streamingkmeans suffix."); options.addOption("c", "centroids", true, "Path to the centroids seqfile"); CommandLine cmd = (new PosixParser()).parse(options, args); String inputPath = cmd.getOptionValue("input"); Preconditions.checkNotNull(inputPath); String outputBase = cmd.getOptionValue("output"); Preconditions.checkNotNull(outputBase); String centroidsPath = cmd.getOptionValue("centroids"); Preconditions.checkNotNull(centroidsPath); Configuration conf = new Configuration(); SequenceFileDirIterable<Text, VectorWritable> inputIterable = new SequenceFileDirIterable<Text, VectorWritable>( new Path(inputPath), PathType.LIST, conf); PrintStream clusterIdOut = new PrintStream(new FileOutputStream("cluster-ids.csv")); clusterIdOut.printf("clusterName, clusterId\n"); int clusterId = 0; Map<String, Integer> clusterNamesToIds = Maps.newHashMapWithExpectedSize(NUM_CLASSES); for (Pair<Text, VectorWritable> pair : inputIterable) { String clusterName = pair.getFirst().toString(); if (!clusterNamesToIds.containsKey(clusterName)) { clusterIdOut.printf("%s, %d\n", clusterName, clusterId); clusterNamesToIds.put(clusterName, clusterId++); }// w ww .jav a 2 s . c o m } clusterIdOut.close(); if (cmd.hasOption("actual")) { System.out.printf("\nActual clusters models\n"); System.out.printf("----------------------\n"); long start = System.currentTimeMillis(); trainActual(inputIterable, outputBase, clusterNamesToIds); long end = System.currentTimeMillis(); System.out.printf("Trained models for actual clusters. Took %d ms\n", end - start); } if (cmd.hasOption("ballkmeans") || cmd.hasOption("streamingkmeans")) { SequenceFileValueIterable<CentroidWritable> centroidIterable = new SequenceFileValueIterable<CentroidWritable>( new Path(centroidsPath), conf); List<Centroid> centroids = Lists .newArrayList(CreateCentroids.getCentroidsFromCentroidWritableIterable(centroidIterable)); if (cmd.hasOption("ballkmeans")) { System.out.printf("\nBall k-means clusters models\n"); System.out.printf("----------------------------\n"); long start = System.currentTimeMillis(); trainComputed(inputIterable, outputBase, "ballkmeans", clusterNamesToIds, new Pair<Integer, Iterable<Centroid>>(NUM_FEATURES_BKM, centroids)); long end = System.currentTimeMillis(); System.out.printf("Trained models for ballkmeans clusters. Took %d ms\n", end - start); } if (cmd.hasOption("streamingkmeans")) { System.out.printf("\nStreaming k-means clusters models\n"); System.out.printf("---------------------------------\n"); long start = System.currentTimeMillis(); trainComputed(inputIterable, outputBase, "streamingkmeans", clusterNamesToIds, new Pair<Integer, Iterable<Centroid>>(centroids.size(), centroids)); long end = System.currentTimeMillis(); System.out.printf("Trained models for streamingkmeans clusters. Took %d ms\n", end - start); } } }
From source file:com.google.cloud.genomics.dataflow.pipelines.VerifyBamId.java
/** * Run the VerifyBamId algorithm and output the resulting contamination estimate. *//*w w w.ja va2 s . c o m*/ public static void main(String[] args) throws GeneralSecurityException, IOException { // Register the options so that they show up via --help PipelineOptionsFactory.register(Options.class); pipelineOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); // Option validation is not yet automatic, we make an explicit call here. Options.Methods.validateOptions(pipelineOptions); auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions); p = Pipeline.create(pipelineOptions); p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER); if (pipelineOptions.getInputDatasetId().isEmpty() && pipelineOptions.getReadGroupSetIds().isEmpty()) { throw new IllegalArgumentException("InputDatasetId or ReadGroupSetIds must be specified"); } List<String> rgsIds; if (pipelineOptions.getInputDatasetId().isEmpty()) { rgsIds = Lists.newArrayList(pipelineOptions.getReadGroupSetIds().split(",")); } else { rgsIds = GenomicsUtils.getReadGroupSetIds(pipelineOptions.getInputDatasetId(), auth); } // Grab one ReferenceSetId to be used within the pipeline to confirm that all ReadGroupSets // are associated with the same ReferenceSet. String referenceSetId = GenomicsUtils.getReferenceSetId(rgsIds.get(0), auth); if (Strings.isNullOrEmpty(referenceSetId)) { throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + rgsIds.get(0) + ". All ReadGroupSets in given input must have an associated ReferenceSet."); } // TODO: confirm that variant set also corresponds to the same reference // https://github.com/googlegenomics/api-client-java/issues/66 // Reads in Reads. PCollection<Read> reads = p.begin().apply(Create.of(rgsIds)) .apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer( auth, ShardBoundary.Requirement.STRICT, null, SexChromosomeFilter.INCLUDE_XY)); /* TODO: We can reduce the number of requests needed to be created by doing the following: 1. Stream the Variants first (rather than concurrently with the Reads). Select a subset of them equal to some threshold (say 50K by default). 2. Create the requests for streaming Reads by running a ParDo over the selected Variants to get their ranges (we only need to stream Reads that overlap the selected Variants). 3. Stream the Reads from the created requests. */ // Reads in Variants. TODO potentially provide an option to load the Variants from a file. List<StreamVariantsRequest> variantRequests = pipelineOptions.isAllReferences() ? ShardUtils.getVariantRequests(pipelineOptions.getVariantSetId(), ShardUtils.SexChromosomeFilter.INCLUDE_XY, pipelineOptions.getBasesPerShard(), auth) : ShardUtils.getVariantRequests(pipelineOptions.getVariantSetId(), pipelineOptions.getReferences(), pipelineOptions.getBasesPerShard()); PCollection<Variant> variants = p.apply(Create.of(variantRequests)) .apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS)); PCollection<KV<Position, AlleleFreq>> refFreq = getFreq(variants, pipelineOptions.getMinFrequency()); PCollection<KV<Position, ReadCounts>> readCountsTable = combineReads(reads, pipelineOptions.getSamplingFraction(), HASH_PREFIX, refFreq); // Converts our results to a single Map of Position keys to ReadCounts values. PCollectionView<Map<Position, ReadCounts>> view = readCountsTable.apply(View.<Position, ReadCounts>asMap()); // Calculates the contamination estimate based on the resulting Map above. PCollection<String> result = p.begin().apply(Create.of("")) .apply(ParDo.of(new Maximizer(view)).withSideInputs(view)); // Writes the result to the given output location in Cloud Storage. result.apply(TextIO.Write.to(pipelineOptions.getOutput()).named("WriteOutput").withoutSharding()); p.run(); }
From source file:com.trulia.stail.Stail.java
public static void main(String[] args) { final Stail stail = new Stail(); JCommander jct = new JCommander(stail); jct.setProgramName("stail"); try {/* w w w .j av a 2s . c o m*/ jct.parse(args); AWSCredentialsProvider credentialsProvider = new DefaultAWSCredentialsProviderChain(); if (stail.profile != null) { credentialsProvider = new ProfileCredentialsProvider(stail.profile); } if (stail.role != null) { credentialsProvider = new STSAssumeRoleSessionCredentialsProvider.Builder(stail.role, "stail") .withStsClient(AWSSecurityTokenServiceClientBuilder.standard() .withCredentials(credentialsProvider).build()) .build(); } AmazonKinesis client = AmazonKinesisClientBuilder.standard().withRegion(stail.region) .withCredentials(credentialsProvider).build(); // prepare the initial shard iterators at the LATEST position Map<Shard, String> shardIterators = getShardIterators(client, stail.stream, stail.start); IRecordProcessor processor = stail.json ? new JSONRecordProcessor() : new RawRecordProcessor(); Map<Shard, RateLimiter> rateLimiters = new HashMap<>(); shardIterators.keySet() .forEach(shard -> rateLimiters.put(shard, RateLimiter.create(MAX_SHARD_THROUGHPUT))); long end = Strings.isNullOrEmpty(stail.duration) ? Long.MAX_VALUE : System.currentTimeMillis() + Duration.parse(stail.duration).toMillis(); Set<String> reshardedShards = new HashSet<>(); Map<Shard, String> sequenceNumbers = new HashMap<>(); while (System.currentTimeMillis() < end) { if (!reshardedShards.isEmpty()) { // get the new list of shards List<Shard> shards = getShards(client, stail.stream); for (Shard shard : shards) { if (!Strings.isNullOrEmpty(shard.getParentShardId()) && reshardedShards.contains(shard.getParentShardId())) { // the old shard was split, so we need to consume this new shard from the beginning shardIterators.put(shard, getOldestShardIterator(client, stail.stream, shard)); } else if (!Strings.isNullOrEmpty(shard.getAdjacentParentShardId()) && reshardedShards.contains(shard.getAdjacentParentShardId())) { // the old shards were merged into a new shard shardIterators.put(shard, getOldestShardIterator(client, stail.stream, shard)); } } reshardedShards.clear(); } for (Shard shard : Lists.newArrayList(shardIterators.keySet())) { String shardIterator = shardIterators.remove(shard); GetRecordsRequest getRecordsRequest = new GetRecordsRequest(); getRecordsRequest.setShardIterator(shardIterator); getRecordsRequest.setLimit(BATCH_SIZE); try { GetRecordsResult getRecordsResult = client.getRecords(getRecordsRequest); List<Record> records = getRecordsResult.getRecords(); processor.processRecords(records, null); shardIterator = getRecordsResult.getNextShardIterator(); if (records.size() <= 0) { // nothing on the stream yet, so lets wait a bit to see if something appears TimeUnit.SECONDS.sleep(1); } else { int bytesRead = records.stream().map(record -> record.getData().position()) .reduce((_1, _2) -> _1 + _2).get(); sequenceNumbers.put(shard, records.get(records.size() - 1).getSequenceNumber()); // optionally sleep if we have hit the limit for this shard rateLimiters.get(shard).acquire(bytesRead); } if (!Strings.isNullOrEmpty(shardIterator)) { shardIterators.put(shard, shardIterator); } else { reshardedShards.add(shard.getShardId()); } } catch (ProvisionedThroughputExceededException e) { logger.warn("tripped the max throughput. Backing off: {}", e.getMessage()); TimeUnit.SECONDS.sleep(6); // we tripped the max throughput. Back off // add the original iterator back into the map so we can try it again shardIterators.put(shard, shardIterator); } catch (ExpiredIteratorException e) { logger.debug("Iterator expired", e); String sequenceNumber = sequenceNumbers.get(shard); if (sequenceNumber == null) { logger.warn("No previously known sequence number for {}. Moving to LATEST", shard.getShardId()); shardIterators.put(shard, getShardIterator(client, stail.stream, shard, null)); } else { shardIterators.put(shard, getShardIteratorAtSequenceNumber(client, stail.stream, shard, sequenceNumber)); } } } } } catch (ParameterException e) { jct.usage(); System.exit(1); } catch (InterruptedException e) { Thread.currentThread().interrupt(); System.exit(2); } }
From source file:TwitterExample.java
public static void main(String[] args) throws Exception { //Use class loader to load the file ClassLoader classloader = Thread.currentThread().getContextClassLoader(); InputStream is = classloader.getResourceAsStream("myFile.properties"); // copy config from Java resource to a file File configOnDisk = new File("myFile.properties"); Files.copy(classloader.getResourceAsStream("myFile.properties"), configOnDisk.toPath(), StandardCopyOption.REPLACE_EXISTING); final ParameterTool params = ParameterTool.fromPropertiesFile("myFile.properties"); System.out.println("Usage: TwitterExample [--output <path>] " + "[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]"); // set up the execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); env.setParallelism(params.getInt("parallelism", 1)); //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //DataStream<String> streamSource = env.addSource(new TwitterSource("/myFile.properties")); System.out.println(" This is the param" + params.getProperties()); // get input data DataStream<String> streamSource; if (params.has(TwitterSource.CONSUMER_KEY) && params.has(TwitterSource.CONSUMER_SECRET) && params.has(TwitterSource.TOKEN) && params.has(TwitterSource.TOKEN_SECRET)) { final Vector<String> theList = initArrayList("words.txt", classloader); //Find tweets about Trump and Clinton TwitterSource twitterA = new TwitterSource(params.getProperties()); TwitterSourceOpt.FilterEndpoint i = new TwitterSourceOpt.FilterEndpoint(theList); twitterA.setCustomEndpointInitializer(i); streamSource = env.addSource(twitterA); } else {/*from w w w .j av a2 s . c o m*/ System.out.println("Executing TwitterStream example with default props."); System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " + "--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info."); // get default test text data streamSource = env.fromElements(TwitterExampleData.TEXTS); } final Vector<String> stopWords = initArrayList("stopwords.txt", classloader); DataStream<Tuple2<String, Integer>> tweets = streamSource // selecting English tweets and splitting to (word, 1) .flatMap(new SelectEnglishAndTokenizeFlatMap("text")); //Get locations DataStream<Tuple2<String, Integer>> locations = streamSource .flatMap(new SelectEnglishAndTokenizeFlatMap("location")).keyBy(0).sum(1); tweets.keyBy(0).asQueryableState("Twitter tweets by key"); //Filter out stop words tweets = tweets.filter(new FilterFunction<Tuple2<String, Integer>>() { public boolean filter(Tuple2<String, Integer> value) { String word = value.getField(0); return !stopWords.contains(word); } }); DataStream<Tuple2<String, Integer>> dataWindowKafka = tweets.keyBy(0).timeWindow(Time.seconds(10)).sum(1) .filter(new FilterFunction<Tuple2<String, Integer>>() { public boolean filter(Tuple2<String, Integer> value) { int s = value.getField(1); return s > 10; } }); dataWindowKafka.map(new JSONIZEString()); Pattern<Tuple2<String, Integer>, ?> pattern = Pattern.<Tuple2<String, Integer>>begin("first") .where(new SimpleCondition2(15)).followedBy("increasing").where(new SimpleCondition2(20)) .followedBy("End").where(new IterativeCondition<Tuple2<String, Integer>>() { @Override public boolean filter(Tuple2<String, Integer> stringIntegerTuple2, Context<Tuple2<String, Integer>> context) throws Exception { List<Tuple2<String, Integer>> s = Lists.newArrayList(context.getEventsForPattern("End")); int i = s.size(); int value = stringIntegerTuple2.getField(1); int prevValue = s.get(i - 1).getField(1); return value > prevValue; } }); PatternStream<Tuple2<String, Integer>> patternStream = CEP.pattern(dataWindowKafka.keyBy(0), pattern); DataStream<String> manyMentions = patternStream .select(new PatternSelectFunction<Tuple2<String, Integer>, String>() { @Override public String select(Map<String, List<Tuple2<String, Integer>>> map) throws Exception { System.out.println(map.toString()); return "the word " + map.toString(); } }); System.out.println(manyMentions.writeAsText("alert.txt")); //Temporarily disabled Kafka for testing purposes uncomment the following to re-enable //Initialize a Kafka producer that will be consumed by D3.js and (possibly the database). //FlinkKafkaProducer010 myProducer = initKafkaProducer("localhost:9092","test"); //dataWindowKafka.map(new JSONIZEString()).addSink(myProducer); //Transition to a table environment StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); // tableEnv.registerDataStream("myTable2", dataWindowKafka, "word, count"); Table table2 = tableEnv.fromDataStream(dataWindowKafka, "word, count"); // Confusing //System.out.println("This is the table name " + table2.where("count>5")); // Using a CSV TableSink //TableSink sink = new CsvTableSink("path54.csv", ","); //table2.writeToSink(sink); Properties kafkaProperties = new Properties(); kafkaProperties.setProperty("bootstrap.servers", "localhost:9092"); kafkaProperties.setProperty("group.id", "test"); kafkaProperties.setProperty("zookeeper.connect", "localhost:2181"); KafkaTableSink10 plotSink = makeTableSink("twitter", kafkaProperties); //table2.writeToSink(plotSink); env.execute("Twitter Streaming Example"); }
From source file:com.yahoo.yqlplus.api.index.IndexName.java
public static IndexName of(List<String> cols) { List<String> copy = Lists.newArrayList(cols); Collections.sort(copy);//from w ww . j a v a2 s .com return new IndexName(Collections.unmodifiableList(copy)); }