Example usage for com.google.common.util.concurrent RateLimiter acquire

List of usage examples for com.google.common.util.concurrent RateLimiter acquire

Introduction

In this page you can find the example usage for com.google.common.util.concurrent RateLimiter acquire.

Prototype

public double acquire() 

Source Link

Document

Acquires a single permit from this RateLimiter , blocking until the request can be granted.

Usage

From source file:org.pantsbuild.testproject.jarversionincompatibility.DependsOnRateLimiter.java

public static void main(String[] args) {
    RateLimiter rateLimiter = RateLimiter.create(10000.0); // 10k permits per second
    rateLimiter.acquire();
}

From source file:org.hibernate.stresser.Stresser.java

public static void main(String[] args) throws InterruptedException {
    AnnotationConfigApplicationContext context = new AnnotationConfigApplicationContext(Stresser.class);
    context.registerShutdownHook();//w  w  w. j  ava  2  s .  c  o m

    try {
        final RateLimiter limiter = RateLimiter.create(5000); // 5k rps
        final PlayerDao playerDao = context.getBean(PlayerDao.class);

        playerDao.saveAll(ENTITIES);

        Thread[] threads = new Thread[CONCURRENCY];
        for (int i = 0; i < CONCURRENCY; i++) {
            threads[i] = new Thread() {
                @Override
                public void run() {
                    for (int i = 0; i < ITERATIONS; i++) {
                        limiter.acquire();
                        ThreadLocalRandom random = ThreadLocalRandom.current();
                        int playerId = random.nextInt(ENTITIES);
                        if (random.nextDouble() < 0.7) {
                            playerDao.update(playerId, i);
                        } else {
                            playerDao.get(playerId);
                        }
                    }
                }
            };
            threads[i].start();
        }

        for (Thread thread : threads) {
            thread.join();
        }
    } finally {
        context.close();
    }
}

From source file:co.paralleluniverse.photon.Photon.java

public static void main(final String[] args) throws InterruptedException, IOException {

    final Options options = new Options();
    options.addOption("rate", true, "Requests per second (default " + rateDefault + ")");
    options.addOption("duration", true,
            "Minimum test duration in seconds: will wait for <duration> * <rate> requests to terminate or, if progress check enabled, no progress after <duration> (default "
                    + durationDefault + ")");
    options.addOption("maxconnections", true,
            "Maximum number of open connections (default " + maxConnectionsDefault + ")");
    options.addOption("timeout", true,
            "Connection and read timeout in millis (default " + timeoutDefault + ")");
    options.addOption("print", true,
            "Print cycle in millis, 0 to disable intermediate statistics (default " + printCycleDefault + ")");
    options.addOption("check", true,
            "Progress check cycle in millis, 0 to disable progress check (default " + checkCycleDefault + ")");
    options.addOption("stats", false, "Print full statistics when finish (default false)");
    options.addOption("minmax", false, "Print min/mean/stddev/max stats when finish (default false)");
    options.addOption("name", true, "Test name to print in the statistics (default '" + testNameDefault + "')");
    options.addOption("help", false, "Print help");

    try {//from   w w w  . java  2 s .c o  m
        final CommandLine cmd = new BasicParser().parse(options, args);
        final String[] ar = cmd.getArgs();
        if (cmd.hasOption("help") || ar.length != 1)
            printUsageAndExit(options);

        final String url = ar[0];

        final int timeout = Integer.parseInt(cmd.getOptionValue("timeout", timeoutDefault));
        final int maxConnections = Integer
                .parseInt(cmd.getOptionValue("maxconnections", maxConnectionsDefault));
        final int duration = Integer.parseInt(cmd.getOptionValue("duration", durationDefault));
        final int printCycle = Integer.parseInt(cmd.getOptionValue("print", printCycleDefault));
        final int checkCycle = Integer.parseInt(cmd.getOptionValue("check", checkCycleDefault));
        final String testName = cmd.getOptionValue("name", testNameDefault);
        final int rate = Integer.parseInt(cmd.getOptionValue("rate", rateDefault));

        final MetricRegistry metrics = new MetricRegistry();
        final Meter requestMeter = metrics.meter("request");
        final Meter responseMeter = metrics.meter("response");
        final Meter errorsMeter = metrics.meter("errors");
        final Logger log = LoggerFactory.getLogger(Photon.class);
        final ConcurrentHashMap<String, AtomicInteger> errors = new ConcurrentHashMap<>();
        final HttpGet request = new HttpGet(url);
        final StripedTimeSeries<Long> sts = new StripedTimeSeries<>(30000, false);
        final StripedHistogram sh = new StripedHistogram(60000, 5);

        log.info("name: " + testName + " url:" + url + " rate:" + rate + " duration:" + duration
                + " maxconnections:" + maxConnections + ", " + "timeout:" + timeout);
        final DefaultConnectingIOReactor ioreactor = new DefaultConnectingIOReactor(IOReactorConfig.custom()
                .setConnectTimeout(timeout).setIoThreadCount(10).setSoTimeout(timeout).build());

        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            final List<ExceptionEvent> events = ioreactor.getAuditLog();
            if (events != null)
                events.stream().filter(event -> event != null).forEach(event -> {
                    System.err.println(
                            "Apache Async HTTP Client I/O Reactor Error Time: " + event.getTimestamp());
                    //noinspection ThrowableResultOfMethodCallIgnored
                    if (event.getCause() != null)
                        //noinspection ThrowableResultOfMethodCallIgnored
                        event.getCause().printStackTrace();
                });
            if (cmd.hasOption("stats"))
                printFinishStatistics(errorsMeter, sts, sh, testName);
            if (!errors.keySet().isEmpty())
                errors.entrySet().stream()
                        .forEach(p -> log.info(testName + " " + p.getKey() + " " + p.getValue() + "ms"));
            System.out.println(
                    testName + " responseTime(90%): " + sh.getHistogramData().getValueAtPercentile(90) + "ms");
            if (cmd.hasOption("minmax")) {
                final HistogramData hd = sh.getHistogramData();
                System.out.format("%s %8s%8s%8s%8s\n", testName, "min", "mean", "sd", "max");
                System.out.format("%s %8d%8.2f%8.2f%8d\n", testName, hd.getMinValue(), hd.getMean(),
                        hd.getStdDeviation(), hd.getMaxValue());
            }
        }));

        final PoolingNHttpClientConnectionManager mngr = new PoolingNHttpClientConnectionManager(ioreactor);
        mngr.setDefaultMaxPerRoute(maxConnections);
        mngr.setMaxTotal(maxConnections);
        final CloseableHttpAsyncClient ahc = HttpAsyncClientBuilder.create().setConnectionManager(mngr)
                .setDefaultRequestConfig(RequestConfig.custom().setLocalAddress(null).build()).build();
        try (final CloseableHttpClient client = new FiberHttpClient(ahc)) {
            final int num = duration * rate;

            final CountDownLatch cdl = new CountDownLatch(num);
            final Semaphore sem = new Semaphore(maxConnections);
            final RateLimiter rl = RateLimiter.create(rate);

            spawnStatisticsThread(printCycle, cdl, log, requestMeter, responseMeter, errorsMeter, testName);

            for (int i = 0; i < num; i++) {
                rl.acquire();
                if (sem.availablePermits() == 0)
                    log.debug("Maximum connections count reached, waiting...");
                sem.acquireUninterruptibly();

                new Fiber<Void>(() -> {
                    requestMeter.mark();
                    final long start = System.nanoTime();
                    try {
                        try (final CloseableHttpResponse ignored = client.execute(request)) {
                            responseMeter.mark();
                        } catch (final Throwable t) {
                            markError(errorsMeter, errors, t);
                        }
                    } catch (final Throwable t) {
                        markError(errorsMeter, errors, t);
                    } finally {
                        final long now = System.nanoTime();
                        final long millis = TimeUnit.NANOSECONDS.toMillis(now - start);
                        sts.record(start, millis);
                        sh.recordValue(millis);
                        sem.release();
                        cdl.countDown();
                    }
                }).start();
            }
            spawnProgressCheckThread(log, duration, checkCycle, cdl);
            cdl.await();
        }
    } catch (final ParseException ex) {
        System.err.println("Parsing failed.  Reason: " + ex.getMessage());
    }
}

From source file:com.twitter.distributedlog.basic.RecordGenerator.java

public static void main(String[] args) throws Exception {
    if (3 != args.length) {
        System.out.println(HELP);
        return;//  w  w  w. j  a  v  a2s.  c o  m
    }

    String finagleNameStr = args[0];
    final String streamName = args[1];
    double rate = Double.parseDouble(args[2]);
    RateLimiter limiter = RateLimiter.create(rate);

    DistributedLogClient client = DistributedLogClientBuilder.newBuilder()
            .clientId(ClientId.apply("record-generator")).name("record-generator").thriftmux(true)
            .finagleNameStr(finagleNameStr).build();

    final CountDownLatch keepAliveLatch = new CountDownLatch(1);
    final AtomicLong numWrites = new AtomicLong(0);
    final AtomicBoolean running = new AtomicBoolean(true);

    while (running.get()) {
        limiter.acquire();
        String record = "record-" + System.currentTimeMillis();
        client.write(streamName, ByteBuffer.wrap(record.getBytes(UTF_8)))
                .addEventListener(new FutureEventListener<DLSN>() {
                    @Override
                    public void onFailure(Throwable cause) {
                        System.out.println("Encountered error on writing data");
                        cause.printStackTrace(System.err);
                        running.set(false);
                        keepAliveLatch.countDown();
                    }

                    @Override
                    public void onSuccess(DLSN value) {
                        long numSuccesses = numWrites.incrementAndGet();
                        if (numSuccesses % 100 == 0) {
                            System.out.println("Write " + numSuccesses + " records.");
                        }
                    }
                });
    }

    keepAliveLatch.await();
    client.close();
}

From source file:com.yahoo.pulsar.testclient.PerformanceReader.java

public static void main(String[] args) throws Exception {
    final Arguments arguments = new Arguments();
    JCommander jc = new JCommander(arguments);
    jc.setProgramName("pulsar-perf-reader");

    try {/*ww w.  j a  va 2 s.  c  om*/
        jc.parse(args);
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        jc.usage();
        System.exit(-1);
    }

    if (arguments.help) {
        jc.usage();
        System.exit(-1);
    }

    if (arguments.topic.size() != 1) {
        System.out.println("Only one topic name is allowed");
        jc.usage();
        System.exit(-1);
    }

    if (arguments.confFile != null) {
        Properties prop = new Properties(System.getProperties());
        prop.load(new FileInputStream(arguments.confFile));

        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("brokerServiceUrl");
        }

        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("webServiceUrl");
        }

        // fallback to previous-version serviceUrl property to maintain backward-compatibility
        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("serviceUrl", "http://localhost:8080/");
        }

        if (arguments.authPluginClassName == null) {
            arguments.authPluginClassName = prop.getProperty("authPlugin", null);
        }

        if (arguments.authParams == null) {
            arguments.authParams = prop.getProperty("authParams", null);
        }
    }

    // Dump config variables
    ObjectMapper m = new ObjectMapper();
    ObjectWriter w = m.writerWithDefaultPrettyPrinter();
    log.info("Starting Pulsar performance reader with config: {}", w.writeValueAsString(arguments));

    final DestinationName prefixTopicName = DestinationName.get(arguments.topic.get(0));

    final RateLimiter limiter = arguments.rate > 0 ? RateLimiter.create(arguments.rate) : null;

    ReaderListener listener = (reader, msg) -> {
        messagesReceived.increment();
        bytesReceived.add(msg.getData().length);

        if (limiter != null) {
            limiter.acquire();
        }
    };

    EventLoopGroup eventLoopGroup;
    if (SystemUtils.IS_OS_LINUX) {
        eventLoopGroup = new EpollEventLoopGroup(Runtime.getRuntime().availableProcessors() * 2,
                new DefaultThreadFactory("pulsar-perf-reader"));
    } else {
        eventLoopGroup = new NioEventLoopGroup(Runtime.getRuntime().availableProcessors(),
                new DefaultThreadFactory("pulsar-perf-reader"));
    }

    ClientConfiguration clientConf = new ClientConfiguration();
    clientConf.setConnectionsPerBroker(arguments.maxConnections);
    clientConf.setStatsInterval(arguments.statsIntervalSeconds, TimeUnit.SECONDS);
    if (isNotBlank(arguments.authPluginClassName)) {
        clientConf.setAuthentication(arguments.authPluginClassName, arguments.authParams);
    }
    PulsarClient pulsarClient = new PulsarClientImpl(arguments.serviceURL, clientConf, eventLoopGroup);

    List<CompletableFuture<Reader>> futures = Lists.newArrayList();
    ReaderConfiguration readerConfig = new ReaderConfiguration();
    readerConfig.setReaderListener(listener);
    readerConfig.setReceiverQueueSize(arguments.receiverQueueSize);

    MessageId startMessageId;
    if ("earliest".equals(arguments.startMessageId)) {
        startMessageId = MessageId.earliest;
    } else if ("latest".equals(arguments.startMessageId)) {
        startMessageId = MessageId.latest;
    } else {
        String[] parts = arguments.startMessageId.split(":");
        startMessageId = new MessageIdImpl(Long.parseLong(parts[0]), Long.parseLong(parts[1]), -1);
    }

    for (int i = 0; i < arguments.numDestinations; i++) {
        final DestinationName destinationName = (arguments.numDestinations == 1) ? prefixTopicName
                : DestinationName.get(String.format("%s-%d", prefixTopicName, i));

        futures.add(pulsarClient.createReaderAsync(destinationName.toString(), startMessageId, readerConfig));
    }

    FutureUtil.waitForAll(futures).get();

    log.info("Start reading from {} topics", arguments.numDestinations);

    long oldTime = System.nanoTime();

    while (true) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            break;
        }

        long now = System.nanoTime();
        double elapsed = (now - oldTime) / 1e9;
        double rate = messagesReceived.sumThenReset() / elapsed;
        double throughput = bytesReceived.sumThenReset() / elapsed * 8 / 1024 / 1024;

        log.info("Read throughput: {}  msg/s -- {} Mbit/s", dec.format(rate), dec.format(throughput));
        oldTime = now;
    }

    pulsarClient.close();
}

From source file:org.apache.distributedlog.basic.RecordGenerator.java

public static void main(String[] args) throws Exception {
    if (3 != args.length) {
        System.out.println(HELP);
        return;// w w  w.  ja va2 s.  co  m
    }

    String finagleNameStr = args[0];
    final String streamName = args[1];
    double rate = Double.parseDouble(args[2]);
    RateLimiter limiter = RateLimiter.create(rate);

    DistributedLogClient client = DistributedLogClientBuilder.newBuilder()
            .clientId(ClientId$.MODULE$.apply("record-generator")).name("record-generator").thriftmux(true)
            .finagleNameStr(finagleNameStr).build();

    final CountDownLatch keepAliveLatch = new CountDownLatch(1);
    final AtomicLong numWrites = new AtomicLong(0);
    final AtomicBoolean running = new AtomicBoolean(true);

    while (running.get()) {
        limiter.acquire();
        String record = "record-" + System.currentTimeMillis();
        client.write(streamName, ByteBuffer.wrap(record.getBytes(UTF_8)))
                .addEventListener(new FutureEventListener<DLSN>() {
                    @Override
                    public void onFailure(Throwable cause) {
                        System.out.println("Encountered error on writing data");
                        cause.printStackTrace(System.err);
                        running.set(false);
                        keepAliveLatch.countDown();
                    }

                    @Override
                    public void onSuccess(DLSN value) {
                        long numSuccesses = numWrites.incrementAndGet();
                        if (numSuccesses % 100 == 0) {
                            System.out.println("Write " + numSuccesses + " records.");
                        }
                    }
                });
    }

    keepAliveLatch.await();
    client.close();
}

From source file:com.yahoo.pulsar.testclient.PerformanceConsumer.java

public static void main(String[] args) throws Exception {
    final Arguments arguments = new Arguments();
    JCommander jc = new JCommander(arguments);
    jc.setProgramName("pulsar-perf-consumer");

    try {/*  w w w. ja  v  a  2s  .com*/
        jc.parse(args);
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        jc.usage();
        System.exit(-1);
    }

    if (arguments.help) {
        jc.usage();
        System.exit(-1);
    }

    if (arguments.topic.size() != 1) {
        System.out.println("Only one destination name is allowed");
        jc.usage();
        System.exit(-1);
    }

    if (arguments.confFile != null) {
        Properties prop = new Properties(System.getProperties());
        prop.load(new FileInputStream(arguments.confFile));

        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("brokerServiceUrl");
        }

        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("webServiceUrl");
        }

        // fallback to previous-version serviceUrl property to maintain backward-compatibility
        if (arguments.serviceURL == null) {
            arguments.serviceURL = prop.getProperty("serviceUrl", "http://localhost:8080/");
        }

        if (arguments.authPluginClassName == null) {
            arguments.authPluginClassName = prop.getProperty("authPlugin", null);
        }

        if (arguments.authParams == null) {
            arguments.authParams = prop.getProperty("authParams", null);
        }
    }

    // Dump config variables
    ObjectMapper m = new ObjectMapper();
    ObjectWriter w = m.writerWithDefaultPrettyPrinter();
    log.info("Starting Pulsar performance consumer with config: {}", w.writeValueAsString(arguments));

    final DestinationName prefixDestinationName = DestinationName.get(arguments.topic.get(0));

    final RateLimiter limiter = arguments.rate > 0 ? RateLimiter.create(arguments.rate) : null;

    MessageListener listener = new MessageListener() {
        public void received(Consumer consumer, Message msg) {
            messagesReceived.increment();
            bytesReceived.add(msg.getData().length);

            if (limiter != null) {
                limiter.acquire();
            }

            consumer.acknowledgeAsync(msg);
        }
    };

    EventLoopGroup eventLoopGroup;
    if (SystemUtils.IS_OS_LINUX) {
        eventLoopGroup = new EpollEventLoopGroup(Runtime.getRuntime().availableProcessors() * 2,
                new DefaultThreadFactory("pulsar-perf-consumer"));
    } else {
        eventLoopGroup = new NioEventLoopGroup(Runtime.getRuntime().availableProcessors(),
                new DefaultThreadFactory("pulsar-perf-consumer"));
    }

    ClientConfiguration clientConf = new ClientConfiguration();
    clientConf.setConnectionsPerBroker(arguments.maxConnections);
    clientConf.setStatsInterval(arguments.statsIntervalSeconds, TimeUnit.SECONDS);
    if (isNotBlank(arguments.authPluginClassName)) {
        clientConf.setAuthentication(arguments.authPluginClassName, arguments.authParams);
    }
    PulsarClient pulsarClient = new PulsarClientImpl(arguments.serviceURL, clientConf, eventLoopGroup);

    List<Future<Consumer>> futures = Lists.newArrayList();
    ConsumerConfiguration consumerConfig = new ConsumerConfiguration();
    consumerConfig.setMessageListener(listener);
    consumerConfig.setReceiverQueueSize(arguments.receiverQueueSize);

    for (int i = 0; i < arguments.numDestinations; i++) {
        final DestinationName destinationName = (arguments.numDestinations == 1) ? prefixDestinationName
                : DestinationName.get(String.format("%s-%d", prefixDestinationName, i));
        log.info("Adding {} consumers on destination {}", arguments.numConsumers, destinationName);

        for (int j = 0; j < arguments.numConsumers; j++) {
            String subscriberName;
            if (arguments.numConsumers > 1) {
                subscriberName = String.format("%s-%d", arguments.subscriberName, j);
            } else {
                subscriberName = arguments.subscriberName;
            }

            futures.add(
                    pulsarClient.subscribeAsync(destinationName.toString(), subscriberName, consumerConfig));
        }
    }

    for (Future<Consumer> future : futures) {
        future.get();
    }

    log.info("Start receiving from {} consumers on {} destinations", arguments.numConsumers,
            arguments.numDestinations);

    long oldTime = System.nanoTime();

    while (true) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            break;
        }

        long now = System.nanoTime();
        double elapsed = (now - oldTime) / 1e9;
        double rate = messagesReceived.sumThenReset() / elapsed;
        double throughput = bytesReceived.sumThenReset() / elapsed * 8 / 1024 / 1024;

        log.info("Throughput received: {}  msg/s -- {} Mbit/s", dec.format(rate), dec.format(throughput));
        oldTime = now;
    }

    pulsarClient.close();
}

From source file:io.fluo.webindex.data.LoadS3.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        log.error("Usage: LoadS3 <pathsFile> <range>");
        System.exit(1);//from   w  w  w  .  ja v  a  2s.  c o  m
    }
    final List<String> loadList = IndexEnv.getPathsRange(args[0], args[1]);
    if (loadList.isEmpty()) {
        log.error("No files to load given {} {}", args[0], args[1]);
        System.exit(1);
    }

    final int rateLimit = DataConfig.load().getLoadRateLimit();

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-s3");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        log.info("Loading {} files (Range {} of paths file {}) from AWS", loadList.size(), args[1], args[0]);

        JavaRDD<String> loadRDD = ctx.parallelize(loadList, loadList.size());

        final String prefix = DataConfig.CC_URL_PREFIX;

        loadRDD.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties"));
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    String urlToCopy = prefix + path;
                    log.info("Loading {} to Fluo", urlToCopy);
                    try {
                        ArchiveReader reader = WARCReaderFactory.get(new URL(urlToCopy), 0);
                        for (ArchiveRecord record : reader) {
                            Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                            if (page.getOutboundLinks().size() > 0) {
                                log.info("Loading page {} with {} links", page.getUrl(),
                                        page.getOutboundLinks().size());
                                if (rateLimiter != null) {
                                    rateLimiter.acquire();
                                }
                                le.execute(PageLoader.updatePage(page));
                            }
                        }
                    } catch (Exception e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}

From source file:webindex.data.LoadS3.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        log.error("Usage: LoadS3 <pathsFile> <range>");
        System.exit(1);//from  www.j  a  va2s . com
    }
    final List<String> loadList = IndexEnv.getPathsRange(args[0], args[1]);
    if (loadList.isEmpty()) {
        log.error("No files to load given {} {}", args[0], args[1]);
        System.exit(1);
    }

    final WebIndexConfig webIndexConfig = WebIndexConfig.load();

    final int rateLimit = webIndexConfig.getLoadRateLimit();
    final String appName = webIndexConfig.fluoApp;

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-s3");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        log.info("Loading {} files (Range {} of paths file {}) from AWS", loadList.size(), args[1], args[0]);

        JavaRDD<String> loadRDD = ctx.parallelize(loadList, loadList.size());

        final String prefix = WebIndexConfig.CC_URL_PREFIX;

        loadRDD.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo-conn.properties"));
            fluoConfig.setApplicationName(appName);
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    String urlToCopy = prefix + path;
                    log.info("Loading {} to Fluo", urlToCopy);
                    try {
                        ArchiveReader reader = WARCReaderFactory.get(new URL(urlToCopy), 0);
                        for (ArchiveRecord record : reader) {
                            Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                            if (page.getOutboundLinks().size() > 0) {
                                log.info("Loading page {} with {} links", page.getUrl(),
                                        page.getOutboundLinks().size());
                                if (rateLimiter != null) {
                                    rateLimiter.acquire();
                                }
                                le.execute(PageLoader.updatePage(page));
                            }
                        }
                    } catch (Exception e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}

From source file:io.fluo.webindex.data.LoadHdfs.java

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        log.error("Usage: LoadHdfs <dataDir>");
        System.exit(1);//from w  ww  .  j  a  v  a2s .  co  m
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final int rateLimit = DataConfig.load().getLoadRateLimit();

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
        LocatedFileStatus status = listIter.next();
        if (status.isFile()) {
            loadPaths.add(status.getPath().toString());
        }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

        paths.foreachPartition(iter -> {
            final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties"));
            final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            try (FluoClient client = FluoFactory.newClient(fluoConfig);
                    LoaderExecutor le = client.newLoaderExecutor()) {
                iter.forEachRemaining(path -> {
                    Path filePath = new Path(path);
                    try {
                        if (fs.exists(filePath)) {
                            FSDataInputStream fsin = fs.open(filePath);
                            ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                            for (ArchiveRecord record : reader) {
                                Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                                if (page.getOutboundLinks().size() > 0) {
                                    log.info("Loading page {} with {} links", page.getUrl(),
                                            page.getOutboundLinks().size());
                                    if (rateLimiter != null) {
                                        rateLimiter.acquire();
                                    }
                                    le.execute(PageLoader.updatePage(page));
                                }
                            }
                        }
                    } catch (IOException e) {
                        log.error("Exception while processing {}", path, e);
                    }
                });
            }
        });
    }
}