Example usage for com.google.common.collect Lists newArrayList

List of usage examples for com.google.common.collect Lists newArrayList

Introduction

In this page you can find the example usage for com.google.common.collect Lists newArrayList.

Prototype

@GwtCompatible(serializable = true)
public static <E> ArrayList<E> newArrayList(Iterator<? extends E> elements) 

Source Link

Document

Creates a mutable ArrayList instance containing the given elements; a very thin shortcut for creating an empty list and then calling Iterators#addAll .

Usage

From source file:brooklyn.demo.WebClusterDatabaseExampleApp.java

public static void main(String[] argv) {
    List<String> args = Lists.newArrayList(argv);
    String port = CommandLineUtil.getCommandLineOption(args, "--port", "8081+");
    String location = CommandLineUtil.getCommandLineOption(args, "--location", DEFAULT_LOCATION);

    BrooklynLauncher launcher = BrooklynLauncher.newInstance()
            .application(EntitySpec.create(StartableApplication.class, WebClusterDatabaseExampleApp.class)
                    .displayName("Brooklyn WebApp Cluster with Database example"))
            .webconsolePort(port).location(location).start();

    Entities.dumpInfo(launcher.getApplications());
}

From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGeneratorDatabase.java

public static void main(String[] args) throws Exception {
    OptionParser parser = new OptionParser();
    OptionSpec<File> benchmarkDirectorySpec = parser.accepts("d", "base directory").withRequiredArg()
            .ofType(File.class).required();
    OptionSpec<File> queriesFileSpec = parser.accepts("i", "input queries file").withRequiredArg()
            .ofType(File.class).required();
    OptionSpec<String> tableNameSpec = parser.accepts("db", "database name").withRequiredArg()
            .ofType(String.class).required();
    OptionSpec<URL> endpointURLSpec = parser.accepts("e", "endpoint URL").withRequiredArg().ofType(URL.class)
            .required();// w  w w  .  j av  a  2 s .com
    OptionSpec<String> defaultGraphSpec = parser.accepts("g", "default graph").withRequiredArg()
            .ofType(String.class);
    OptionSpec<Boolean> useCacheSpec = parser.accepts("cache", "use cache").withOptionalArg()
            .ofType(Boolean.class).defaultsTo(Boolean.TRUE);
    OptionSpec<Boolean> queriesHaveIdSpec = parser.accepts("id", "input file contains ID, SPARQL query")
            .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.TRUE);
    OptionSpec<String> cbdSpec = parser.accepts("cbd", "CBD structure tree string").withOptionalArg()
            .ofType(String.class).required();
    OptionSpec<String> queriesToOmitTokensSpec = parser
            .accepts("omitTokens",
                    "comma-separated list of tokens such that queries containing any of them will be omitted")
            .withRequiredArg().ofType(String.class).defaultsTo("");
    OptionSpec<Boolean> workaroundSpec = parser.accepts("workaround", "Virtuoso parse error workaround enabled")
            .withRequiredArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);

    OptionSet options = parser.parse(args);

    File benchmarkDirectory = options.valueOf(benchmarkDirectorySpec);
    File inputFile = options.valueOf(queriesFileSpec);
    String tableName = options.valueOf(tableNameSpec);

    URL endpointURL = options.valueOf(endpointURLSpec);
    List<String> defaultGraphs = options.has(defaultGraphSpec)
            ? Lists.newArrayList(options.valueOf(defaultGraphSpec))
            : Collections.emptyList();
    SparqlEndpoint endpoint = SparqlEndpoint.create(endpointURL.toString(), defaultGraphs);

    //      SparqlEndpointKS ks = new SparqlEndpointKS(endpoint);
    //      ks.setUseCache(options.valueOf(useCacheSpec));
    //      ks.setCacheDir(benchmarkDirectory.getPath());
    //      ks.setQueryDelay(1000);
    //      ks.setRetryCount(0);
    //      ks.init();

    QueryExecutionFactory qef = buildQueryExecutionFactory(endpoint, options.valueOf(useCacheSpec),
            benchmarkDirectory.getPath(), TimeUnit.DAYS.toMillis(30), 0, 60);

    CBDStructureTree cbdStructureTree = CBDStructureTree.fromTreeString(options.valueOf(cbdSpec).trim());

    List<String> omitTokens = Splitter.on(",").omitEmptyStrings().trimResults()
            .splitToList(options.valueOf(queriesToOmitTokensSpec));

    BenchmarkDescriptionGeneratorDatabase generator = new BenchmarkDescriptionGeneratorDatabase(qef);
    generator.setDefaultCbdStructure(cbdStructureTree);
    generator.setSkipQueryTokens(omitTokens);
    generator.setEndpoint(endpoint);
    generator.setWorkaroundEnabled(options.valueOf(workaroundSpec));
    generator.generateBenchmarkDescription(inputFile, tableName, options.valueOf(queriesHaveIdSpec));
}

From source file:org.attribyte.api.pubsub.impl.server.Server.java

/**
 * Starts the server.//w  w w  .ja va 2  s . c  o  m
 * @param args The startup args.
 * @throws Exception on startup error.
 */
public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("Start-up error: Expecting <config file> [allowed topics file]");
        System.exit(1);
    }

    Properties commandLineOverrides = new Properties();
    args = InitUtil.fromCommandLine(args, commandLineOverrides);

    Properties props = new Properties();
    Properties logProps = new Properties();
    CLI.loadProperties(args, props, logProps);

    props.putAll(commandLineOverrides);
    logProps.putAll(commandLineOverrides);

    final Logger logger = initLogger(props, logProps);

    logger.info("Applied command line overrides: " + commandLineOverrides.toString());

    //Buffer and log hub events for logging and debug...

    final int MAX_STORED_SUBSCRIPTION_REQUESTS = 200;

    final ArrayBlockingQueue<SubscriptionEvent> recentSubscriptionRequests = new ArrayBlockingQueue<>(
            MAX_STORED_SUBSCRIPTION_REQUESTS);

    final HubEndpoint.EventHandler hubEventHandler = new HubEndpoint.EventHandler() {
        private synchronized void offer(SubscriptionEvent record) {
            if (!recentSubscriptionRequests.offer(record)) {
                List<SubscriptionEvent> drain = Lists
                        .newArrayListWithCapacity(MAX_STORED_SUBSCRIPTION_REQUESTS / 2);
                recentSubscriptionRequests.drainTo(drain, drain.size());
                recentSubscriptionRequests.offer(record);
            }
        }

        @Override
        public void subscriptionRequestAccepted(final Request request, final Response response,
                final Subscriber subscriber) {
            final SubscriptionEvent record;
            try {
                record = new SubscriptionRequestRecord(request, response, subscriber);
            } catch (IOException ioe) {
                return;
            }

            logger.info(record.toString());
            offer(record);
        }

        @Override
        public void subscriptionRequestRejected(final Request request, final Response response,
                final Subscriber subscriber) {

            final SubscriptionEvent record;
            try {
                record = new SubscriptionRequestRecord(request, response, subscriber);
            } catch (IOException ioe) {
                return;
            }

            logger.warn(record.toString());
            offer(record);
        }

        @Override
        public void subscriptionVerifyFailure(String callbackURL, int callbackResponseCode, String reason,
                int attempts, boolean abandoned) {
            final SubscriptionEvent record = new SubscriptionVerifyRecord(callbackURL, callbackResponseCode,
                    reason, attempts, abandoned);
            logger.warn(record.toString());
            offer(record);
        }

        @Override
        public void subscriptionVerified(Subscription subscription) {
            final SubscriptionEvent record = new SubscriptionVerifyRecord(subscription);
            logger.info(record.toString());
            offer(record);
        }
    };

    /**
     * A source for subscription request records (for console, etc).
     */
    final SubscriptionEvent.Source subscriptionEventSource = new SubscriptionEvent.Source() {
        public List<SubscriptionEvent> latestEvents(int limit) {
            List<SubscriptionEvent> records = Lists.newArrayList(recentSubscriptionRequests);
            Collections.sort(records);
            return records.size() < limit ? records : records.subList(0, limit);
        }
    };

    /**
     * A queue to which new topics are added as reported by the datastore event handler.
     */
    final BlockingQueue<Topic> newTopicQueue = new LinkedBlockingDeque<>();

    /**
     * A datastore event handler that offers new topics to a queue.
     */
    final HubDatastore.EventHandler topicEventHandler = new HubDatastore.EventHandler() {

        @Override
        public void newTopic(final Topic topic) throws DatastoreException {
            newTopicQueue.offer(topic);
        }

        @Override
        public void newSubscription(final Subscription subscription) throws DatastoreException {
            //Ignore
        }

        @Override
        public void exception(final Throwable t) {
            //Ignore
        }

        @Override
        public void setNext(final HubDatastore.EventHandler next) {
            //Ignore
        }
    };

    final HubEndpoint endpoint = new HubEndpoint("endpoint.", props, logger, hubEventHandler,
            topicEventHandler);

    final String topicAddedTopicURL = Strings.emptyToNull(props.getProperty("endpoint.topicAddedTopic", ""));
    final Topic topicAddedTopic = topicAddedTopicURL != null
            ? endpoint.getDatastore().getTopic(topicAddedTopicURL, true)
            : null;
    final Thread topicAddedNotifier = topicAddedTopic != null
            ? new Thread(new TopicAddedNotifier(newTopicQueue, endpoint, topicAddedTopic))
            : null;
    if (topicAddedNotifier != null) {
        topicAddedNotifier.setName("topic-added-notifier");
        topicAddedNotifier.start();
    }

    if (props.getProperty("endpoint.topics") != null) { //Add supported topics...
        for (String topicURL : Splitter.on(",").omitEmptyStrings().trimResults()
                .split(props.getProperty("endpoint.topics"))) {
            Topic topic = endpoint.getDatastore().getTopic(topicURL, true);
            System.out.println("Added topic, '" + topicURL + "' (" + topic.getId() + ")");
        }
    }

    final MetricRegistry registry = props.getProperty("endpoint.instrumentJVM", "true").equalsIgnoreCase("true")
            ? instrumentJVM(new MetricRegistry())
            : new MetricRegistry();

    if (props.getProperty("endpoint.instrumentSystem", "true").equalsIgnoreCase("true")) {
        instrumentSystem(registry);
    }

    registry.registerAll(endpoint);

    final HealthCheckRegistry healthCheckRegistry = new HealthCheckRegistry(); //TODO

    final Reporting reporting = new Reporting("metrics-reporting.", props, registry, null); //No filter...

    String httpAddress = props.getProperty("http.address", "127.0.0.1");
    int httpPort = Integer.parseInt(props.getProperty("http.port", "8086"));

    org.eclipse.jetty.server.Server server = new org.eclipse.jetty.server.Server();

    server.addLifeCycleListener(new LifeCycle.Listener() {

        public void lifeCycleFailure(LifeCycle event, Throwable cause) {
            System.out.println("Failure " + cause.toString());
        }

        public void lifeCycleStarted(LifeCycle event) {
            System.out.println("Started...");
        }

        public void lifeCycleStarting(LifeCycle event) {
            System.out.println("Server Starting...");
        }

        public void lifeCycleStopped(LifeCycle event) {
            System.out.println("Server Stopped...");
        }

        public void lifeCycleStopping(LifeCycle event) {
            System.out.println("Shutting down metrics reporting...");
            reporting.stop();
            if (topicAddedNotifier != null) {
                System.out.println("Shutting down new topic notifier...");
                topicAddedNotifier.interrupt();
            }
            System.out.println("Shutting down endpoint...");
            endpoint.shutdown();
            System.out.println("Shutdown endpoint...");
        }
    });

    HttpConfiguration httpConfig = new HttpConfiguration();
    httpConfig.setOutputBufferSize(32768);
    httpConfig.setRequestHeaderSize(8192);
    httpConfig.setResponseHeaderSize(8192);
    httpConfig.setSendServerVersion(false);
    httpConfig.setSendDateHeader(false);
    ServerConnector httpConnector = new ServerConnector(server, new HttpConnectionFactory(httpConfig));
    httpConnector.setHost(httpAddress);
    httpConnector.setPort(httpPort);
    httpConnector.setIdleTimeout(30000L);
    server.addConnector(httpConnector);
    HandlerCollection serverHandlers = new HandlerCollection();
    server.setHandler(serverHandlers);

    ServletContextHandler rootContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS);
    rootContext.setContextPath("/");

    final AdminConsole adminConsole;
    final List<String> allowedAssetPaths;

    if (props.getProperty("admin.enabled", "false").equalsIgnoreCase("true")) {

        File assetDirFile = getSystemFile("admin.assetDirectory", props);

        if (assetDirFile == null) {
            System.err.println("The 'admin.assetDirectory' must be configured");
            System.exit(1);
        }

        if (!assetDirFile.exists()) {
            System.err.println("The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must exist");
            System.exit(1);
        }

        if (!assetDirFile.isDirectory()) {
            System.err.println(
                    "The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must be a directory");
            System.exit(1);
        }

        if (!assetDirFile.canRead()) {
            System.err.println(
                    "The 'admin.assetDirectory'" + assetDirFile.getAbsolutePath() + "' must be readable");
            System.exit(1);
        }

        char[] adminUsername = props.getProperty("admin.username", "").toCharArray();
        char[] adminPassword = props.getProperty("admin.password", "").toCharArray();
        String adminRealm = props.getProperty("admin.realm", "pubsubhub");

        if (adminUsername.length == 0 || adminPassword.length == 0) {
            System.err.println("The 'admin.username' and 'admin.password' must be specified");
            System.exit(1);
        }

        File templateDirFile = getSystemFile("admin.templateDirectory", props);

        if (templateDirFile == null) {
            System.err.println("The 'admin.templateDirectory' must be specified");
            System.exit(1);
        }

        if (!templateDirFile.exists()) {
            System.err
                    .println("The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must exist");
            System.exit(1);
        }

        if (!templateDirFile.isDirectory()) {
            System.err.println(
                    "The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must be a directory");
            System.exit(1);
        }

        if (!templateDirFile.canRead()) {
            System.err.println(
                    "The 'admin.templateDirectory'" + assetDirFile.getAbsolutePath() + "' must be readable");
            System.exit(1);
        }

        adminConsole = new AdminConsole(rootContext, assetDirFile.getAbsolutePath(), endpoint,
                new AdminAuth(adminRealm, adminUsername, adminPassword), templateDirFile.getAbsolutePath(),
                logger);

        allowedAssetPaths = Lists.newArrayList(Splitter.on(',').omitEmptyStrings().trimResults()
                .split(props.getProperty("admin.assetPaths", "")));
        System.out.println("Admin console is enabled...");
    } else {
        adminConsole = null;
        allowedAssetPaths = ImmutableList.of();
    }

    serverHandlers.addHandler(rootContext);

    //TODO: Introduces incompatible dependency...
    /*
    InstrumentedHandler instrumentedHandler = new InstrumentedHandler(registry);
    instrumentedHandler.setName("http-server");
    instrumentedHandler.setHandler(rootContext);
    serverHandlers.addHandler(instrumentedHandler);
    */

    File requestLogPathFile = getSystemFile("http.log.path", props);
    if (requestLogPathFile != null) {

        if (!requestLogPathFile.exists()) {
            System.err
                    .println("The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' must exist");
            System.exit(1);
        }

        if (!requestLogPathFile.isDirectory()) {
            System.err.println(
                    "The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' must be a directory");
            System.exit(1);
        }

        if (!requestLogPathFile.canWrite()) {
            System.err.println(
                    "The 'http.log.path', '" + requestLogPathFile.getAbsolutePath() + "' is not writable");
            System.exit(1);
        }

        int requestLogRetainDays = Integer.parseInt(props.getProperty("http.log.retainDays", "14"));
        boolean requestLogExtendedFormat = props.getProperty("http.log.extendedFormat", "true")
                .equalsIgnoreCase("true");
        String requestLogTimeZone = props.getProperty("http.log.timeZone", TimeZone.getDefault().getID());
        String requestLogPrefix = props.getProperty("http.log.prefix", "requests");
        String requestLogPath = requestLogPathFile.getAbsolutePath();
        if (!requestLogPath.endsWith("/")) {
            requestLogPath = requestLogPath + "/";
        }

        NCSARequestLog requestLog = new NCSARequestLog(requestLogPath + requestLogPrefix + "-yyyy_mm_dd.log");
        requestLog.setRetainDays(requestLogRetainDays);
        requestLog.setAppend(true);
        requestLog.setExtended(requestLogExtendedFormat);
        requestLog.setLogTimeZone(requestLogTimeZone);
        requestLog.setLogCookies(false);
        requestLog.setPreferProxiedForAddress(true);

        RequestLogHandler requestLogHandler = new RequestLogHandler();
        requestLogHandler.setRequestLog(requestLog);
        serverHandlers.addHandler(requestLogHandler);
    }

    HubServlet hubServlet = new HubServlet(endpoint, logger);
    rootContext.addServlet(new ServletHolder(hubServlet), "/subscribe/*");

    InitUtil filterInit = new InitUtil("publish.", props);
    List<BasicAuthFilter> publishURLFilters = Lists.newArrayList();
    List<Object> publishURLFilterObjects = filterInit.initClassList("topicURLFilters", BasicAuthFilter.class);
    for (Object o : publishURLFilterObjects) {
        BasicAuthFilter filter = (BasicAuthFilter) o;
        filter.init(filterInit.getProperties());
        publishURLFilters.add(filter);
    }

    final long topicCacheMaxAgeSeconds = Long
            .parseLong(props.getProperty("endpoint.topicCache.maxAgeSeconds", "0"));
    final Cache<String, Topic> topicCache;
    if (topicCacheMaxAgeSeconds > 0) {
        topicCache = CacheBuilder.newBuilder().concurrencyLevel(16)
                .expireAfterWrite(topicCacheMaxAgeSeconds, TimeUnit.SECONDS).maximumSize(4096).build();
    } else {
        topicCache = null;
    }

    final String replicationTopicURL = Strings.emptyToNull(props.getProperty("endpoint.replicationTopic", ""));
    //Get or create replication topic, if configured.
    final Topic replicationTopic = replicationTopicURL != null
            ? endpoint.getDatastore().getTopic(replicationTopicURL, true)
            : null;

    int maxBodySizeBytes = filterInit.getIntProperty("maxBodySizeBytes",
            BroadcastServlet.DEFAULT_MAX_BODY_BYTES);
    boolean autocreateTopics = filterInit.getProperty("autocreateTopics", "false").equalsIgnoreCase("true");

    int maxSavedNotifications = filterInit.getIntProperty("maxSavedNotifications", 0);

    boolean jsonEnabled = filterInit.getProperty("jsonEnabled", "false").equalsIgnoreCase("true");

    final BroadcastServlet broadcastServlet = new BroadcastServlet(endpoint, maxBodySizeBytes, autocreateTopics,
            logger, publishURLFilters, topicCache, replicationTopic, maxSavedNotifications, jsonEnabled);
    rootContext.addServlet(new ServletHolder(broadcastServlet), "/notify/*");

    CallbackMetricsServlet callbackMetricsServlet = new CallbackMetricsServlet(endpoint);
    ServletHolder callbackMetricsServletHolder = new ServletHolder(callbackMetricsServlet);
    rootContext.addServlet(callbackMetricsServletHolder, "/metrics/callback/*");

    NotificationMetricsServlet notificationMetricsServlet = new NotificationMetricsServlet(endpoint);
    ServletHolder notificationMetricsServletHolder = new ServletHolder(notificationMetricsServlet);
    rootContext.addServlet(notificationMetricsServletHolder, "/metrics/notification/*");

    MetricsServlet metricsServlet = new MetricsServlet(registry);
    ServletHolder metricsServletHolder = new ServletHolder(metricsServlet);
    rootContext.setInitParameter(MetricsServlet.RATE_UNIT, "SECONDS");
    rootContext.setInitParameter(MetricsServlet.DURATION_UNIT, "MILLISECONDS");
    rootContext.setInitParameter(MetricsServlet.SHOW_SAMPLES, "false");
    rootContext.addServlet(metricsServletHolder, "/metrics/*");

    boolean outputHostAddys = props.getProperty("ping.outputHostAddresses", "false").equalsIgnoreCase("true");
    PingServlet pingServlet = new PingServlet(props.getProperty("http.instanceName", ""), outputHostAddys);
    rootContext.addServlet(new ServletHolder(pingServlet), "/ping/*");

    HealthCheckServlet healthCheckServlet = new HealthCheckServlet(healthCheckRegistry);
    for (Map.Entry<String, HealthCheck> healthCheck : endpoint.getDatastore().getHealthChecks().entrySet()) {
        healthCheckRegistry.register(healthCheck.getKey(), healthCheck.getValue());
    }
    healthCheckRegistry.register("no-deadlocked-threads", new ThreadDeadlockHealthCheck());

    rootContext.addServlet(new ServletHolder(healthCheckServlet), "/health/*");

    ThreadDumpServlet threadDumpServlet = new ThreadDumpServlet();
    rootContext.addServlet(new ServletHolder(threadDumpServlet), "/threads/*");

    if (adminConsole != null && allowedAssetPaths.size() > 0) {
        String adminPath = props.getProperty("admin.path", "/admin/");
        List<Invalidatable> invalidatables = Collections.<Invalidatable>singletonList(new Invalidatable() {
            @Override
            public void invalidate() {
                broadcastServlet.invalidateCaches();
                if (topicCache != null) {
                    topicCache.invalidateAll();
                }
            }
        });
        adminConsole.initServlets(rootContext, adminPath, allowedAssetPaths, invalidatables,
                subscriptionEventSource, broadcastServlet);
    }

    int numReporters = reporting.start();
    logger.info("Started " + numReporters + " metrics reporters");

    server.setDumpBeforeStop(false);
    server.setStopAtShutdown(true);
    server.start();
    server.join();
}

From source file:pl.edu.icm.cermine.PdfNLMContentExtractor.java

public static void main(String[] args) throws ParseException, IOException {
    CommandLineOptionsParser parser = new CommandLineOptionsParser();
    if (!parser.parse(args)) {
        System.err.println("Usage: PdfNLMContentExtractor -path <path> [optional parameters]\n\n"
                + "Tool for extracting metadata and content from PDF files.\n\n" + "Arguments:\n"
                + "  -path <path>              path to a PDF file or directory containing PDF files\n"
                + "  -ext <extension>          (optional) the extension of the resulting metadata file;\n"
                + "                            default: \"cermxml\"; used only if passed path is a directory\n"
                + "  -modelmeta <path>         (optional) the path to the metadata classifier model file\n"
                + "  -modelinit <path>         (optional) the path to the initial classifier model file\n"
                + "  -str                      whether to store structure (TrueViz) files as well;\n"
                + "                            used only if passed path is a directory\n"
                + "  -strext <extension>       (optional) the extension of the structure (TrueViz) file;\n"
                + "                            default: \"cxml\"; used only if passed path is a directory\n"
                + "  -threads <num>            number of threads for parallel processing\n");
        System.exit(1);/*from  w w w. j  a v  a 2 s.co m*/
    }

    String path = parser.getPath();
    String extension = parser.getNLMExtension();
    boolean extractStr = parser.extractStructure();
    String strExtension = parser.getBxExtension();
    PdfNLMContentExtractor.THREADS_NUMBER = parser.getThreadsNumber();

    File file = new File(path);
    if (file.isFile()) {
        try {
            PdfNLMContentExtractor extractor = new PdfNLMContentExtractor();
            parser.updateMetadataModel(extractor.getConf());
            parser.updateInitialModel(extractor.getConf());
            InputStream in = new FileInputStream(file);
            Element result = extractor.extractContent(in);
            XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
            System.out.println(outputter.outputString(result));
        } catch (AnalysisException ex) {
            ex.printStackTrace();
        }
    } else {

        Collection<File> files = FileUtils.listFiles(file, new String[] { "pdf" }, true);

        int i = 0;
        for (File pdf : files) {
            File xmlF = new File(pdf.getPath().replaceAll("pdf$", extension));
            if (xmlF.exists()) {
                i++;
                continue;
            }

            long start = System.currentTimeMillis();
            float elapsed = 0;

            System.out.println(pdf.getPath());

            try {
                PdfNLMContentExtractor extractor = new PdfNLMContentExtractor();
                parser.updateMetadataModel(extractor.getConf());
                parser.updateInitialModel(extractor.getConf());

                InputStream in = new FileInputStream(pdf);
                BxDocument doc = ExtractionUtils.extractStructure(extractor.getConf(), in);
                Element result = extractor.extractContent(doc);

                long end = System.currentTimeMillis();
                elapsed = (end - start) / 1000F;

                XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
                if (!xmlF.createNewFile()) {
                    System.out.println("Cannot create new file!");
                }
                FileUtils.writeStringToFile(xmlF, outputter.outputString(result));

                if (extractStr) {
                    BxDocumentToTrueVizWriter writer = new BxDocumentToTrueVizWriter();
                    File strF = new File(pdf.getPath().replaceAll("pdf$", strExtension));
                    writer.write(new FileWriter(strF), Lists.newArrayList(doc));
                }
            } catch (AnalysisException ex) {
                ex.printStackTrace();
            } catch (TransformationException ex) {
                ex.printStackTrace();
            }

            i++;
            int percentage = i * 100 / files.size();
            if (elapsed == 0) {
                elapsed = (System.currentTimeMillis() - start) / 1000F;
            }
            System.out.println("Extraction time: " + Math.round(elapsed) + "s");
            System.out.println(percentage + "% done (" + i + " out of " + files.size() + ")");
            System.out.println("");
        }
    }
}

From source file:com.google.cloud.genomics.dataflow.pipelines.CalculateCoverage.java

public static void main(String[] args) throws GeneralSecurityException, IOException {
    // Register the options so that they show up via --help
    PipelineOptionsFactory.register(Options.class);
    options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

    auth = GenomicsOptions.Methods.getGenomicsAuth(options);

    p = Pipeline.create(options);/*from  ww  w  .  j  a va2s . c o m*/
    p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER);

    if (options.getInputDatasetId().isEmpty() && options.getReadGroupSetIds().isEmpty()) {
        throw new IllegalArgumentException("InputDatasetId or ReadGroupSetIds must be specified");
    }

    List<String> rgsIds;
    if (options.getInputDatasetId().isEmpty()) {
        rgsIds = Lists.newArrayList(options.getReadGroupSetIds().split(","));
    } else {
        rgsIds = GenomicsUtils.getReadGroupSetIds(options.getInputDatasetId(), auth);
    }

    if (rgsIds.size() < options.getNumQuantiles()) {
        throw new IllegalArgumentException("Number of ReadGroupSets must be greater than or equal to"
                + " the number of requested quantiles.");
    }

    // Grab one ReferenceSetId to be used within the pipeline to confirm that all ReadGroupSets
    // are associated with the same ReferenceSet.
    String referenceSetId = GenomicsUtils.getReferenceSetId(rgsIds.get(0), auth);
    if (Strings.isNullOrEmpty(referenceSetId)) {
        throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + rgsIds.get(0)
                + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
    }

    // Create our destination AnnotationSet for the associated ReferenceSet.
    AnnotationSet annotationSet = createAnnotationSet(referenceSetId);

    PCollection<Read> reads = p.begin().apply(Create.of(rgsIds))
            .apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer(
                    auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, SexChromosomeFilter.INCLUDE_XY));

    PCollection<KV<PosRgsMq, Double>> coverageMeans = reads.apply(new CalculateCoverageMean());
    PCollection<KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>> quantiles = coverageMeans
            .apply(new CalculateQuantiles(options.getNumQuantiles()));
    PCollection<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>> answer = quantiles
            .apply(GroupByKey.<Position, KV<PosRgsMq.MappingQuality, List<Double>>>create());
    answer.apply(ParDo.of(new CreateAnnotations(annotationSet.getId(), auth, true)));

    p.run();
}

From source file:org.apache.mahout.knn.tools.TrainNewsGroupsKMeansLogisticRegression.java

public static void main(String[] args) throws IOException, ParseException {
    Options options = new Options();
    options.addOption("i", "input", true,
            "Path to the input folder containing the training set's" + " sequence files.");
    options.addOption("o", "output", true, "Base path to the output file. The name will be "
            + "appended with a suffix for each type of training.");
    options.addOption("a", "actual", false, "If set, runs the training with the actual cluster "
            + "assignments and outputs the model to the output path with a -actual suffix.");
    options.addOption("b", "ballkmeans", false, "If set, runs the training with the ball k-means "
            + "cluster assignments and outputs the model to the output path with a -ballkmeans suffix.");
    options.addOption("s", "streamingkmeans", false,
            "If set, runs the training with the "
                    + "streaming k-means cluster assignments and outputs the model to the output path with a "
                    + "-streamingkmeans suffix.");
    options.addOption("c", "centroids", true, "Path to the centroids seqfile");

    CommandLine cmd = (new PosixParser()).parse(options, args);

    String inputPath = cmd.getOptionValue("input");
    Preconditions.checkNotNull(inputPath);

    String outputBase = cmd.getOptionValue("output");
    Preconditions.checkNotNull(outputBase);

    String centroidsPath = cmd.getOptionValue("centroids");
    Preconditions.checkNotNull(centroidsPath);

    Configuration conf = new Configuration();
    SequenceFileDirIterable<Text, VectorWritable> inputIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(inputPath), PathType.LIST, conf);

    PrintStream clusterIdOut = new PrintStream(new FileOutputStream("cluster-ids.csv"));
    clusterIdOut.printf("clusterName, clusterId\n");
    int clusterId = 0;
    Map<String, Integer> clusterNamesToIds = Maps.newHashMapWithExpectedSize(NUM_CLASSES);
    for (Pair<Text, VectorWritable> pair : inputIterable) {
        String clusterName = pair.getFirst().toString();
        if (!clusterNamesToIds.containsKey(clusterName)) {
            clusterIdOut.printf("%s, %d\n", clusterName, clusterId);
            clusterNamesToIds.put(clusterName, clusterId++);
        }// w ww  .jav a 2 s  . c o m
    }
    clusterIdOut.close();

    if (cmd.hasOption("actual")) {
        System.out.printf("\nActual clusters models\n");
        System.out.printf("----------------------\n");
        long start = System.currentTimeMillis();
        trainActual(inputIterable, outputBase, clusterNamesToIds);
        long end = System.currentTimeMillis();
        System.out.printf("Trained models for actual clusters. Took %d ms\n", end - start);
    }

    if (cmd.hasOption("ballkmeans") || cmd.hasOption("streamingkmeans")) {
        SequenceFileValueIterable<CentroidWritable> centroidIterable = new SequenceFileValueIterable<CentroidWritable>(
                new Path(centroidsPath), conf);
        List<Centroid> centroids = Lists
                .newArrayList(CreateCentroids.getCentroidsFromCentroidWritableIterable(centroidIterable));

        if (cmd.hasOption("ballkmeans")) {
            System.out.printf("\nBall k-means clusters models\n");
            System.out.printf("----------------------------\n");
            long start = System.currentTimeMillis();
            trainComputed(inputIterable, outputBase, "ballkmeans", clusterNamesToIds,
                    new Pair<Integer, Iterable<Centroid>>(NUM_FEATURES_BKM, centroids));
            long end = System.currentTimeMillis();
            System.out.printf("Trained models for ballkmeans clusters. Took %d ms\n", end - start);
        }

        if (cmd.hasOption("streamingkmeans")) {
            System.out.printf("\nStreaming k-means clusters models\n");
            System.out.printf("---------------------------------\n");
            long start = System.currentTimeMillis();
            trainComputed(inputIterable, outputBase, "streamingkmeans", clusterNamesToIds,
                    new Pair<Integer, Iterable<Centroid>>(centroids.size(), centroids));
            long end = System.currentTimeMillis();
            System.out.printf("Trained models for streamingkmeans clusters. Took %d ms\n", end - start);
        }
    }
}

From source file:com.google.cloud.genomics.dataflow.pipelines.VerifyBamId.java

/**
 * Run the VerifyBamId algorithm and output the resulting contamination estimate.
 *//*w w w.ja  va2 s  . c  o  m*/
public static void main(String[] args) throws GeneralSecurityException, IOException {
    // Register the options so that they show up via --help
    PipelineOptionsFactory.register(Options.class);
    pipelineOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Option validation is not yet automatic, we make an explicit call here.
    Options.Methods.validateOptions(pipelineOptions);

    auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions);

    p = Pipeline.create(pipelineOptions);
    p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER);

    if (pipelineOptions.getInputDatasetId().isEmpty() && pipelineOptions.getReadGroupSetIds().isEmpty()) {
        throw new IllegalArgumentException("InputDatasetId or ReadGroupSetIds must be specified");
    }

    List<String> rgsIds;
    if (pipelineOptions.getInputDatasetId().isEmpty()) {
        rgsIds = Lists.newArrayList(pipelineOptions.getReadGroupSetIds().split(","));
    } else {
        rgsIds = GenomicsUtils.getReadGroupSetIds(pipelineOptions.getInputDatasetId(), auth);
    }

    // Grab one ReferenceSetId to be used within the pipeline to confirm that all ReadGroupSets
    // are associated with the same ReferenceSet.
    String referenceSetId = GenomicsUtils.getReferenceSetId(rgsIds.get(0), auth);
    if (Strings.isNullOrEmpty(referenceSetId)) {
        throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + rgsIds.get(0)
                + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
    }

    // TODO: confirm that variant set also corresponds to the same reference
    // https://github.com/googlegenomics/api-client-java/issues/66

    // Reads in Reads.
    PCollection<Read> reads = p.begin().apply(Create.of(rgsIds))
            .apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer(
                    auth, ShardBoundary.Requirement.STRICT, null, SexChromosomeFilter.INCLUDE_XY));

    /*
    TODO:  We can reduce the number of requests needed to be created by doing the following:
    1. Stream the Variants first (rather than concurrently with the Reads).  Select a subset of
       them equal to some threshold (say 50K by default).
    2. Create the requests for streaming Reads by running a ParDo over the selected Variants
       to get their ranges (we only need to stream Reads that overlap the selected Variants).
    3. Stream the Reads from the created requests.
    */

    // Reads in Variants.  TODO potentially provide an option to load the Variants from a file.
    List<StreamVariantsRequest> variantRequests = pipelineOptions.isAllReferences()
            ? ShardUtils.getVariantRequests(pipelineOptions.getVariantSetId(),
                    ShardUtils.SexChromosomeFilter.INCLUDE_XY, pipelineOptions.getBasesPerShard(), auth)
            : ShardUtils.getVariantRequests(pipelineOptions.getVariantSetId(), pipelineOptions.getReferences(),
                    pipelineOptions.getBasesPerShard());

    PCollection<Variant> variants = p.apply(Create.of(variantRequests))
            .apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS));

    PCollection<KV<Position, AlleleFreq>> refFreq = getFreq(variants, pipelineOptions.getMinFrequency());

    PCollection<KV<Position, ReadCounts>> readCountsTable = combineReads(reads,
            pipelineOptions.getSamplingFraction(), HASH_PREFIX, refFreq);

    // Converts our results to a single Map of Position keys to ReadCounts values.
    PCollectionView<Map<Position, ReadCounts>> view = readCountsTable.apply(View.<Position, ReadCounts>asMap());

    // Calculates the contamination estimate based on the resulting Map above.
    PCollection<String> result = p.begin().apply(Create.of(""))
            .apply(ParDo.of(new Maximizer(view)).withSideInputs(view));

    // Writes the result to the given output location in Cloud Storage.
    result.apply(TextIO.Write.to(pipelineOptions.getOutput()).named("WriteOutput").withoutSharding());

    p.run();

}

From source file:com.trulia.stail.Stail.java

public static void main(String[] args) {
    final Stail stail = new Stail();

    JCommander jct = new JCommander(stail);
    jct.setProgramName("stail");
    try {/* w  w  w  .j  av a 2s  . c o m*/
        jct.parse(args);

        AWSCredentialsProvider credentialsProvider = new DefaultAWSCredentialsProviderChain();
        if (stail.profile != null) {
            credentialsProvider = new ProfileCredentialsProvider(stail.profile);
        }

        if (stail.role != null) {
            credentialsProvider = new STSAssumeRoleSessionCredentialsProvider.Builder(stail.role, "stail")
                    .withStsClient(AWSSecurityTokenServiceClientBuilder.standard()
                            .withCredentials(credentialsProvider).build())
                    .build();
        }

        AmazonKinesis client = AmazonKinesisClientBuilder.standard().withRegion(stail.region)
                .withCredentials(credentialsProvider).build();

        // prepare the initial shard iterators at the LATEST position
        Map<Shard, String> shardIterators = getShardIterators(client, stail.stream, stail.start);

        IRecordProcessor processor = stail.json ? new JSONRecordProcessor() : new RawRecordProcessor();

        Map<Shard, RateLimiter> rateLimiters = new HashMap<>();
        shardIterators.keySet()
                .forEach(shard -> rateLimiters.put(shard, RateLimiter.create(MAX_SHARD_THROUGHPUT)));

        long end = Strings.isNullOrEmpty(stail.duration) ? Long.MAX_VALUE
                : System.currentTimeMillis() + Duration.parse(stail.duration).toMillis();

        Set<String> reshardedShards = new HashSet<>();

        Map<Shard, String> sequenceNumbers = new HashMap<>();

        while (System.currentTimeMillis() < end) {
            if (!reshardedShards.isEmpty()) {
                // get the new list of shards
                List<Shard> shards = getShards(client, stail.stream);
                for (Shard shard : shards) {
                    if (!Strings.isNullOrEmpty(shard.getParentShardId())
                            && reshardedShards.contains(shard.getParentShardId())) {
                        // the old shard was split, so we need to consume this new shard from the beginning
                        shardIterators.put(shard, getOldestShardIterator(client, stail.stream, shard));
                    } else if (!Strings.isNullOrEmpty(shard.getAdjacentParentShardId())
                            && reshardedShards.contains(shard.getAdjacentParentShardId())) {
                        // the old shards were merged into a new shard
                        shardIterators.put(shard, getOldestShardIterator(client, stail.stream, shard));
                    }
                }

                reshardedShards.clear();
            }

            for (Shard shard : Lists.newArrayList(shardIterators.keySet())) {
                String shardIterator = shardIterators.remove(shard);

                GetRecordsRequest getRecordsRequest = new GetRecordsRequest();
                getRecordsRequest.setShardIterator(shardIterator);
                getRecordsRequest.setLimit(BATCH_SIZE);

                try {
                    GetRecordsResult getRecordsResult = client.getRecords(getRecordsRequest);
                    List<Record> records = getRecordsResult.getRecords();
                    processor.processRecords(records, null);

                    shardIterator = getRecordsResult.getNextShardIterator();

                    if (records.size() <= 0) {
                        // nothing on the stream yet, so lets wait a bit to see if something appears
                        TimeUnit.SECONDS.sleep(1);
                    } else {
                        int bytesRead = records.stream().map(record -> record.getData().position())
                                .reduce((_1, _2) -> _1 + _2).get();

                        sequenceNumbers.put(shard, records.get(records.size() - 1).getSequenceNumber());

                        // optionally sleep if we have hit the limit for this shard
                        rateLimiters.get(shard).acquire(bytesRead);
                    }

                    if (!Strings.isNullOrEmpty(shardIterator)) {
                        shardIterators.put(shard, shardIterator);
                    } else {
                        reshardedShards.add(shard.getShardId());
                    }
                } catch (ProvisionedThroughputExceededException e) {
                    logger.warn("tripped the max throughput.  Backing off: {}", e.getMessage());
                    TimeUnit.SECONDS.sleep(6); // we tripped the max throughput.  Back off

                    // add the original iterator back into the map so we can try it again
                    shardIterators.put(shard, shardIterator);
                } catch (ExpiredIteratorException e) {
                    logger.debug("Iterator expired", e);

                    String sequenceNumber = sequenceNumbers.get(shard);
                    if (sequenceNumber == null) {
                        logger.warn("No previously known sequence number for {}.  Moving to LATEST",
                                shard.getShardId());
                        shardIterators.put(shard, getShardIterator(client, stail.stream, shard, null));
                    } else {
                        shardIterators.put(shard,
                                getShardIteratorAtSequenceNumber(client, stail.stream, shard, sequenceNumber));
                    }
                }
            }
        }
    } catch (ParameterException e) {
        jct.usage();
        System.exit(1);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        System.exit(2);
    }
}

From source file:TwitterExample.java

public static void main(String[] args) throws Exception {

    //Use  class loader to load the file
    ClassLoader classloader = Thread.currentThread().getContextClassLoader();
    InputStream is = classloader.getResourceAsStream("myFile.properties");
    // copy config from Java resource to a file
    File configOnDisk = new File("myFile.properties");
    Files.copy(classloader.getResourceAsStream("myFile.properties"), configOnDisk.toPath(),
            StandardCopyOption.REPLACE_EXISTING);
    final ParameterTool params = ParameterTool.fromPropertiesFile("myFile.properties");
    System.out.println("Usage: TwitterExample [--output <path>] "
            + "[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");

    // set up the execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);

    env.setParallelism(params.getInt("parallelism", 1));
    //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    //DataStream<String> streamSource = env.addSource(new TwitterSource("/myFile.properties"));
    System.out.println(" This is the param" + params.getProperties());

    // get input data
    DataStream<String> streamSource;

    if (params.has(TwitterSource.CONSUMER_KEY) && params.has(TwitterSource.CONSUMER_SECRET)
            && params.has(TwitterSource.TOKEN) && params.has(TwitterSource.TOKEN_SECRET)) {

        final Vector<String> theList = initArrayList("words.txt", classloader);

        //Find tweets about Trump and Clinton
        TwitterSource twitterA = new TwitterSource(params.getProperties());
        TwitterSourceOpt.FilterEndpoint i = new TwitterSourceOpt.FilterEndpoint(theList);
        twitterA.setCustomEndpointInitializer(i);

        streamSource = env.addSource(twitterA);

    } else {/*from w w  w  .j av a2  s  . c  o m*/
        System.out.println("Executing TwitterStream example with default props.");
        System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> "
                + "--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
        // get default test text data
        streamSource = env.fromElements(TwitterExampleData.TEXTS);
    }
    final Vector<String> stopWords = initArrayList("stopwords.txt", classloader);

    DataStream<Tuple2<String, Integer>> tweets = streamSource
            // selecting English tweets and splitting to (word, 1)
            .flatMap(new SelectEnglishAndTokenizeFlatMap("text"));

    //Get locations
    DataStream<Tuple2<String, Integer>> locations = streamSource
            .flatMap(new SelectEnglishAndTokenizeFlatMap("location")).keyBy(0).sum(1);
    tweets.keyBy(0).asQueryableState("Twitter tweets by key");
    //Filter out stop words
    tweets = tweets.filter(new FilterFunction<Tuple2<String, Integer>>() {
        public boolean filter(Tuple2<String, Integer> value) {
            String word = value.getField(0);
            return !stopWords.contains(word);

        }
    });

    DataStream<Tuple2<String, Integer>> dataWindowKafka = tweets.keyBy(0).timeWindow(Time.seconds(10)).sum(1)
            .filter(new FilterFunction<Tuple2<String, Integer>>() {
                public boolean filter(Tuple2<String, Integer> value) {
                    int s = value.getField(1);
                    return s > 10;
                }
            });

    dataWindowKafka.map(new JSONIZEString());
    Pattern<Tuple2<String, Integer>, ?> pattern = Pattern.<Tuple2<String, Integer>>begin("first")
            .where(new SimpleCondition2(15)).followedBy("increasing").where(new SimpleCondition2(20))
            .followedBy("End").where(new IterativeCondition<Tuple2<String, Integer>>() {
                @Override
                public boolean filter(Tuple2<String, Integer> stringIntegerTuple2,
                        Context<Tuple2<String, Integer>> context) throws Exception {
                    List<Tuple2<String, Integer>> s = Lists.newArrayList(context.getEventsForPattern("End"));
                    int i = s.size();
                    int value = stringIntegerTuple2.getField(1);
                    int prevValue = s.get(i - 1).getField(1);
                    return value > prevValue;
                }
            });
    PatternStream<Tuple2<String, Integer>> patternStream = CEP.pattern(dataWindowKafka.keyBy(0), pattern);
    DataStream<String> manyMentions = patternStream
            .select(new PatternSelectFunction<Tuple2<String, Integer>, String>() {
                @Override
                public String select(Map<String, List<Tuple2<String, Integer>>> map) throws Exception {
                    System.out.println(map.toString());
                    return "the word " + map.toString();
                }
            });

    System.out.println(manyMentions.writeAsText("alert.txt"));

    //Temporarily disabled Kafka for testing purposes uncomment the following to re-enable
    //Initialize a Kafka producer that will be consumed by D3.js and (possibly the database).
    //FlinkKafkaProducer010 myProducer = initKafkaProducer("localhost:9092","test");
    //dataWindowKafka.map(new JSONIZEString()).addSink(myProducer);

    //Transition to a table environment

    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    // tableEnv.registerDataStream("myTable2", dataWindowKafka, "word, count");
    Table table2 = tableEnv.fromDataStream(dataWindowKafka, "word, count");
    // Confusing
    //System.out.println("This is the table name " + table2.where("count>5"));
    // Using a CSV TableSink
    //TableSink sink = new CsvTableSink("path54.csv", ",");
    //table2.writeToSink(sink);
    Properties kafkaProperties = new Properties();
    kafkaProperties.setProperty("bootstrap.servers", "localhost:9092");
    kafkaProperties.setProperty("group.id", "test");
    kafkaProperties.setProperty("zookeeper.connect", "localhost:2181");
    KafkaTableSink10 plotSink = makeTableSink("twitter", kafkaProperties);
    //table2.writeToSink(plotSink);

    env.execute("Twitter Streaming Example");

}

From source file:com.yahoo.yqlplus.api.index.IndexName.java

public static IndexName of(List<String> cols) {
    List<String> copy = Lists.newArrayList(cols);
    Collections.sort(copy);//from w ww . j a  v  a2 s .com
    return new IndexName(Collections.unmodifiableList(copy));
}