Example usage for com.google.common.collect Lists partition

List of usage examples for com.google.common.collect Lists partition

Introduction

In this page you can find the example usage for com.google.common.collect Lists partition.

Prototype

public static <T> List<List<T>> partition(List<T> list, int size) 

Source Link

Document

Returns consecutive List#subList(int,int) sublists of a list, each of the same size (the final list may be smaller).

Usage

From source file:org.opennms.netmgt.newts.NewtsWriter.java

@Override
public void onEvent(SampleBatchEvent event) throws Exception {
    // We'd expect the logs from this thread to be in collectd.log
    Logging.putPrefix("collectd");

    List<Sample> samples = event.getSamples();
    // Decrement our entry counter
    m_numEntriesOnRingBuffer.decrementAndGet();

    // Partition the samples into collections smaller then max_batch_size
    for (List<Sample> batch : Lists.partition(samples, m_maxBatchSize)) {
        try {/* w w  w . j  a  va2 s .  com*/
            if (event.isIndexOnly() && !NewtsUtils.DISABLE_INDEXING) {
                LOG.debug("Indexing {} samples", batch.size());
                m_indexer.update(batch);
            } else {
                LOG.debug("Inserting {} samples", batch.size());
                m_sampleRepository.insert(batch);
            }

            if (LOG.isDebugEnabled()) {
                String uniqueResourceIds = batch.stream().map(s -> s.getResource().getId()).distinct()
                        .collect(Collectors.joining(", "));
                LOG.debug("Successfully inserted samples for resources with ids {}", uniqueResourceIds);
            }
        } catch (Throwable t) {
            RATE_LIMITED_LOGGER.error("An error occurred while inserting samples. Some sample may be lost.", t);
        }
    }
}

From source file:org.apache.druid.indexing.overlord.autoscaling.ec2.EC2AutoScaler.java

@Override
public List<String> ipToIdLookup(List<String> ips) {
    final List<String> retVal = FluentIterable
            // chunk requests to avoid hitting default AWS limits on filters
            .from(Lists.partition(ips, MAX_AWS_FILTER_VALUES))
            .transformAndConcat(new Function<List<String>, Iterable<Reservation>>() {
                @Override//from w  w  w . j a v a2  s  .c o  m
                public Iterable<Reservation> apply(List<String> input) {
                    return amazonEC2Client.describeInstances(
                            new DescribeInstancesRequest().withFilters(new Filter("private-ip-address", input)))
                            .getReservations();
                }
            }).transformAndConcat(new Function<Reservation, Iterable<Instance>>() {
                @Override
                public Iterable<Instance> apply(Reservation reservation) {
                    return reservation.getInstances();
                }
            }).transform(new Function<Instance, String>() {
                @Override
                public String apply(Instance instance) {
                    return instance.getInstanceId();
                }
            }).toList();

    log.debug("Performing lookup: %s --> %s", ips, retVal);

    return retVal;
}

From source file:com.simiacryptus.mindseye.test.integration.ClassifyProblem.java

@Nonnull
@Override//from w w w  .ja va 2s  .  c  o m
public ClassifyProblem run(@Nonnull final NotebookOutput log) {
    @Nonnull
    final TrainingMonitor monitor = TestUtil.getMonitor(history);
    final Tensor[][] trainingData = getTrainingData(log);

    @Nonnull
    final DAGNetwork network = fwdFactory.imageToVector(log, categories);
    log.h3("Network Diagram");
    log.eval(() -> {
        return Graphviz.fromGraph(TestUtil.toGraph(network)).height(400).width(600).render(Format.PNG)
                .toImage();
    });

    log.h3("Training");
    @Nonnull
    final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
    TestUtil.instrumentPerformance(supervisedNetwork);
    int initialSampleSize = Math.max(trainingData.length / 5, Math.min(10, trainingData.length / 2));
    @Nonnull
    final ValidatingTrainer trainer = optimizer.train(log,
            new SampledArrayTrainable(trainingData, supervisedNetwork, initialSampleSize, getBatchSize()),
            new ArrayTrainable(trainingData, supervisedNetwork, getBatchSize()), monitor);
    log.run(() -> {
        trainer.setTimeout(timeoutMinutes, TimeUnit.MINUTES).setMaxIterations(10000).run();
    });
    if (!history.isEmpty()) {
        log.eval(() -> {
            return TestUtil.plot(history);
        });
        log.eval(() -> {
            return TestUtil.plotTime(history);
        });
    }

    @Nonnull
    String training_name = log.getName() + "_" + ClassifyProblem.modelNo++ + "_plot.png";
    try {
        BufferedImage image = Util.toImage(TestUtil.plot(history));
        if (null != image)
            ImageIO.write(image, "png", log.file(training_name));
    } catch (IOException e) {
        logger.warn("Error writing result images", e);
    }
    log.appendFrontMatterProperty("result_plot", new File(log.getResourceDir(), training_name).toString(), ";");

    TestUtil.extractPerformance(log, supervisedNetwork);
    @Nonnull
    final String modelName = "classification_model_" + ClassifyProblem.modelNo++ + ".json";
    log.appendFrontMatterProperty("result_model", modelName, ";");
    log.p("Saved model as " + log.file(network.getJson().toString(), modelName, modelName));

    log.h3("Validation");
    log.p("If we apply our model against the entire validation dataset, we get this accuracy:");
    log.eval(() -> {
        return data.validationData().mapToDouble(
                labeledObject -> predict(network, labeledObject)[0] == parse(labeledObject.label) ? 1 : 0)
                .average().getAsDouble() * 100;
    });

    log.p("Let's examine some incorrectly predicted results in more detail:");
    log.eval(() -> {
        try {
            @Nonnull
            final TableOutput table = new TableOutput();
            Lists.partition(data.validationData().collect(Collectors.toList()), 100).stream().flatMap(batch -> {
                @Nonnull
                TensorList batchIn = TensorArray
                        .create(batch.stream().map(x -> x.data).toArray(i -> new Tensor[i]));
                TensorList batchOut = network.eval(new ConstantResult(batchIn)).getData();
                return IntStream.range(0, batchOut.length())
                        .mapToObj(i -> toRow(log, batch.get(i), batchOut.get(i).getData()));
            }).filter(x -> null != x).limit(10).forEach(table::putRow);
            return table;
        } catch (@Nonnull final IOException e) {
            throw new RuntimeException(e);
        }
    });
    return this;
}

From source file:hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.FourStoreDriver.java

public void insertEdges(final Multimap<String, String> edges, final String type) throws IOException {
    if (edges.isEmpty()) {
        return;/*from  ww w.  j a  v  a2 s . c om*/
    }

    final ArrayList<String> sourceVertices = new ArrayList<>(edges.keySet());
    final List<List<String>> sourceVerticesPartitions = Lists.partition(sourceVertices, PARTITION_SIZE);
    for (final List<String> sourceVerticesPartition : sourceVerticesPartitions) {

        final Multimap<String, String> edgePartition = ArrayListMultimap.create();
        for (final String sourceVertexURI : sourceVerticesPartition) {
            final Collection<String> targetVertexURIs = edges.get(sourceVertexURI);
            edgePartition.putAll(sourceVertexURI, targetVertexURIs);
        }

        insertEdgesPartition(edgePartition, type);
    }
}

From source file:com.romeikat.datamessie.core.sync.service.template.withIdAndVersion.CreateOrUpdateExecutor.java

private void update(final List<Long> lhsIds) throws TaskCancelledException {
    final Collection<List<Long>> lhsIdsBatches = Lists.partition(lhsIds, batchSizeEntities);
    final int lhsCount = lhsIds.size();
    int firstEntity = 0;

    CountDownLatch rhsInProgress = null;
    CountDownLatch rhsDone = null;
    final Executor e = Executors.newSingleThreadExecutor();

    for (final List<Long> lhsIdsBatch : lhsIdsBatches) {
        // Feedback
        final int lastEntity = firstEntity + lhsIdsBatch.size();
        final double progress = (double) lastEntity / (double) lhsCount;
        final String msg = String.format("Updating %s to %s of %s (%s)",
                IntegerConverter.INSTANCE.convertToString(firstEntity + 1),
                IntegerConverter.INSTANCE.convertToString(lastEntity),
                IntegerConverter.INSTANCE.convertToString(lhsCount),
                PercentageConverter.INSTANCE_2.convertToString(progress));
        final TaskExecutionWork work = taskExecution.reportWorkStart(msg);

        // Load LHS
        final Collection<E> lhsEntities = loadLhsBatch(rhsInProgress, lhsIdsBatch);

        // Update RHS
        rhsInProgress = new CountDownLatch(1);
        rhsDone = new CountDownLatch(1);
        e.execute(new RhsUpdater(rhsInProgress, rhsDone, lhsIdsBatch, lhsEntities));

        firstEntity += batchSizeEntities;

        taskExecution.reportWorkEnd(work);
        taskExecution.checkpoint();/*from   ww w.j av a 2  s.c  o m*/
    }

    // Wait until last batch ends
    if (rhsDone != null) {
        try {
            rhsDone.await();
        } catch (final InterruptedException e1) {
        }
    }
}

From source file:com.netflix.metacat.main.services.search.ElasticSearchUtilImpl.java

/**
 * Batch marks the documents as deleted.
 * @param type index type/*from   w  w  w  . j av a 2  s  .c o m*/
 * @param ids list of entity ids
 * @param metacatRequestContext context containing the user name
 */
public void softDelete(final String type, final List<String> ids,
        final MetacatRequestContext metacatRequestContext) {
    if (ids != null && !ids.isEmpty()) {
        final List<List<String>> partitionedIds = Lists.partition(ids, 100);
        partitionedIds.forEach(subIds -> softDeleteDoc(type, subIds, metacatRequestContext));
        partitionedIds.forEach(subIds -> ensureMigrationByCopy(type, subIds));
    }
}

From source file:com.falcon.orca.actors.ClusterManager.java

@Override
public void onReceive(Object message) {

    if (message instanceof ClusterEvent.CurrentClusterState) {
        log.info("Got message of type Current cluster state");
    } else if (message instanceof ClusterEvent.MemberUp) {
        ClusterEvent.MemberUp memberUp = (ClusterEvent.MemberUp) message;
        //if(memberUp.member().hasRole("node_manager")) {
        NodeManagerCommand command = new NodeManagerCommand();
        command.setType(NodeManagerCommandType.REGISTER_TO_MASTER);
        getContext().actorSelection(memberUp.member().address() + "/user/node_manager").tell(command,
                getSelf());/*from   w w w  . j av a2  s. co m*/
        //}
    } else if (message instanceof ClusterEvent.MemberExited) {
        nodeManagers.remove(getSender());
    } else if (message instanceof ClusterEvent.UnreachableMember) {
        log.info("Got message of type unreachable member " + getSender());
        nodeManagers.remove(getSender());
    } else if (message instanceof ClusterManagerCommand) {
        switch (((ClusterManagerCommand) message).getType()) {
        case REGISTER_NODE:
            nodeManagers.add(getSender());
            if (nodeManagers.size() >= minimumNodes) {
                printOnCmd("Minimum number of nodes in cluster complete, you can run tests now.");
            }
            break;
        case UNREGISTER_NODE: {
            printOnCmd("Got a node disconnect request, current size " + nodeManagers.size());
            nodeManagers.remove(getSender());
            printOnCmd("Removing node from cluster after removal size " + nodeManagers.size());
            break;
        }
        case START_LOAD: {
            try {
                if (nodeManagers.size() < minimumNodes) {
                    printOnCmd("Not enough numbers of nodes, have patience.");
                } else if (isBusy) {
                    printOnCmd("Already a run going on can't start another, wait for it to finish");
                } else {
                    isBusy = true;
                    busyNodes = 0;
                    pausedNodes = 0;
                    mergedResponseTimes.clear();
                    runResults.clear();
                    nodeManagers.forEach((o) -> {
                        NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                        nodeManagerCommand.setType(NodeManagerCommandType.CLEAR_LOAD_DATA);
                        o.tell(nodeManagerCommand, getSelf());
                    });
                    ClusterManagerCommand managerCommand = (ClusterManagerCommand) message;
                    RunDetails runDetails = (RunDetails) managerCommand.getFromContext("runDetails");
                    if (runDetails.isUrlDynamic() || runDetails.isBodyDynamic()) {
                        DataReader dataReader = new JsonFileReader(runDetails.getDataFilePath(),
                                runDetails.getTemplateFilePath());
                        HashMap<String, HashMap<String, List<Object>>> dynDataFromFile = dataReader
                                .readVariableValues();
                        HashMap<String, HashMap<String, DynVarUseType>> dynVarUseTypeFromFile = dataReader
                                .readVariableUseType();
                        HashMap<String, DynGenerator> generators = dataReader.readGenerators();
                        if (generators != null) {
                            generators.forEach((k, v) -> nodeManagers.forEach((o) -> {
                                NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                                nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                nodeManagerCommand.putOnContext("key", k);
                                nodeManagerCommand.putOnContext("data", v);
                                nodeManagerCommand.putOnContext("dataType", "generator");
                                o.tell(nodeManagerCommand, getSelf());
                            }));
                        }
                        final int[] nodeManagerIndex = { 0 };
                        if (runDetails.isBodyDynamic()) {
                            HashMap<String, List<Object>> bodyParams = dynDataFromFile.get("bodyData");
                            HashMap<String, DynVarUseType> bodyParamsUseType = dynVarUseTypeFromFile
                                    .get("bodyVarUseType");
                            String template = dataReader.readTemplate();
                            if (!StringUtils.isBlank(template)) {
                                NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                                nodeManagerCommand.putOnContext("template", template);
                                nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                nodeManagers.forEach((o) -> o.tell(nodeManagerCommand, getSelf()));
                            }
                            bodyParams.forEach((k, v) -> {
                                List<List<Object>> partitions = Lists.partition(v,
                                        v.size() / nodeManagers.size());
                                nodeManagerIndex[0] = 0;
                                nodeManagers.forEach((o) -> {
                                    List<Object> partition = partitions.get(nodeManagerIndex[0]++);
                                    if (partition.size() < 10000) {
                                        NodeManagerCommand nodeDataCommand = new NodeManagerCommand();
                                        nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                        nodeDataCommand.putOnContext("key", k);
                                        nodeDataCommand.putOnContext("data", new ArrayList(partition));
                                        nodeDataCommand.putOnContext("dataType", "body");
                                        nodeDataCommand.putOnContext("dataUseType", bodyParamsUseType.get(k));
                                        o.tell(nodeDataCommand, getSelf());
                                    } else {
                                        List<List<Object>> nodePartitions = Lists.partition(partition, 10000);
                                        for (List<Object> nodePartition : nodePartitions) {
                                            NodeManagerCommand nodeDataCommand = new NodeManagerCommand();
                                            nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                            nodeDataCommand.putOnContext("key", k);
                                            nodeDataCommand.putOnContext("data", new ArrayList(nodePartition));
                                            nodeDataCommand.putOnContext("dataType", "body");
                                            nodeDataCommand.putOnContext("dataUseType",
                                                    bodyParamsUseType.get(k));
                                            o.tell(nodeDataCommand, getSelf());
                                        }
                                    }
                                });
                            });
                        }
                        if (runDetails.isUrlDynamic()) {
                            HashMap<String, List<Object>> urlParams = dynDataFromFile.get("urlData");
                            HashMap<String, DynVarUseType> urlParamsUseType = dynVarUseTypeFromFile
                                    .get("urlVarUseType");
                            //Block to send urlTemplate to each node
                            {
                                NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                                nodeManagerCommand.putOnContext("urlTemplate", runDetails.getUrl());
                                nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                nodeManagers.forEach((o) -> o.tell(nodeManagerCommand, getSelf()));
                            }

                            urlParams.forEach((k, v) -> {
                                List<List<Object>> partitions = Lists.partition(v,
                                        v.size() / nodeManagers.size());
                                nodeManagerIndex[0] = 0;
                                nodeManagers.forEach((o) -> {
                                    List<Object> partition = partitions.get(nodeManagerIndex[0]++);
                                    if (partition.size() < 10000) {
                                        NodeManagerCommand nodeDataCommand = new NodeManagerCommand();
                                        nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                        nodeDataCommand.putOnContext("key", k);
                                        nodeDataCommand.putOnContext("data", new ArrayList<>(partition));
                                        nodeDataCommand.putOnContext("dataType", "url");
                                        nodeDataCommand.putOnContext("dataUseType", urlParamsUseType.get(k));
                                        o.tell(nodeDataCommand, getSelf());
                                    } else {
                                        List<List<Object>> nodePartitions = Lists.partition(partition, 10000);
                                        for (List<Object> nodePartition : nodePartitions) {
                                            NodeManagerCommand nodeDataCommand = new NodeManagerCommand();
                                            nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA);
                                            nodeDataCommand.putOnContext("key", k);
                                            nodeDataCommand.putOnContext("data",
                                                    new ArrayList<>(nodePartition));
                                            nodeDataCommand.putOnContext("dataType", "url");
                                            nodeDataCommand.putOnContext("dataUseType",
                                                    urlParamsUseType.get(k));
                                            o.tell(nodeDataCommand, getSelf());
                                        }
                                    }
                                });
                            });
                        }
                        nodeManagers.forEach((o) -> {
                            NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                            nodeManagerCommand.setType(NodeManagerCommandType.LOAD_DATA_COMPLETE);
                            o.tell(nodeManagerCommand, getSelf());
                        });
                    }
                    nodeManagers.forEach((o) -> {
                        NodeManagerCommand command = new NodeManagerCommand();
                        command.putOnContext("runDetails", runDetails);
                        command.setType(NodeManagerCommandType.START_LOAD);
                        o.tell(command, getSelf());
                    });
                }
            } catch (IOException ie) {
                printOnCmd("Datareader failed to read data from file, make sure template file, datafile pathe"
                        + " are correct.");
                log.error("Datareader failed to load", ie);
                busyNodes = 0;
                isBusy = false;
            }
            break;
        }
        case LOAD_GENERATION_START: {
            busyNodes++;
            break;
        }
        case LOAD_GENERATION_COMPLETE: {
            runResults.add((RunResult) ((ClusterManagerCommand) message).getFromContext("runResult"));
            NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
            nodeManagerCommand.setType(NodeManagerCommandType.SEND_DATA);
            getSender().tell(nodeManagerCommand, getSelf());
            break;
        }
        case TAKE_DATA: {
            mergedResponseTimes
                    .addAll((List<Long>) ((ClusterManagerCommand) message).getFromContext("responseTimes"));
            break;
        }
        case DATA_SEND_COMPLETE: {
            busyNodes--;
            if (busyNodes <= 0) {
                isBusy = false;
                printMergedResult(runResults, mergedResponseTimes);
            }
            break;
        }
        case STOP_LOAD: {
            nodeManagers.forEach((o) -> {
                NodeManagerCommand command = new NodeManagerCommand();
                command.setType(NodeManagerCommandType.STOP_LOAD);
                o.tell(command, getSelf());
            });
            break;
        }
        case PAUSE_LOAD: {
            nodeManagers.forEach((o) -> {
                NodeManagerCommand command = new NodeManagerCommand();
                command.setType(NodeManagerCommandType.PAUSE_LOAD);
                o.tell(command, getSelf());
            });
            break;
        }
        case LOAD_GENERATION_PAUSED: {
            pausedNodes++;
            if (Objects.equals(pausedNodes, busyNodes)) {
                isPaused = true;
            }
            break;
        }
        case LOAD_GENERATION_RESUMED: {
            pausedNodes--;
            if (pausedNodes <= 0) {
                isPaused = false;
            }
            break;
        }
        case RESUME_LOAD: {
            nodeManagers.forEach((o) -> {
                NodeManagerCommand command = new NodeManagerCommand();
                command.setType(NodeManagerCommandType.RESUME_LOAD);
                o.tell(command, getSelf());
            });
            break;
        }
        case CLUSTER_DETAILS:
            printOnCmd(printClusterDetails(isBusy, isPaused, nodeManagers.size(), busyNodes));
            break;
        case EXIT:
            //kill all the node managers and kill the local actor system and then exit
            nodeManagers.forEach((o) -> {
                NodeManagerCommand nodeManagerCommand = new NodeManagerCommand();
                nodeManagerCommand.setType(NodeManagerCommandType.REMOTE_EXIT);
                o.tell(nodeManagerCommand, getSelf());
            });
            context().stop(getSelf());
            break;
        default:
            unhandled(message);
        }
    }
}

From source file:org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStoreJDBC.java

public int delete(Connection connection, RDBTableMetaData tmd, List<String> allIds) throws SQLException {
    int count = 0;

    for (List<String> ids : Lists.partition(allIds, RDBJDBCTools.MAX_IN_CLAUSE)) {
        PreparedStatement stmt;//from w w w  .j  ava2s  .  co  m
        PreparedStatementComponent inClause = RDBJDBCTools.createInStatement("ID", ids, tmd.isIdBinary());
        String sql = "delete from " + tmd.getName() + " where " + inClause.getStatementComponent();
        stmt = connection.prepareStatement(sql);

        try {
            inClause.setParameters(stmt, 1);
            int result = stmt.executeUpdate();
            if (result != ids.size()) {
                LOG.debug("DB delete failed for " + tmd.getName() + "/" + ids);
            }
            count += result;
        } finally {
            stmt.close();
        }
    }

    return count;
}

From source file:com.r4intellij.misc.rinstallcache.LibraryIndexFactory.java

static RPackage buildPackageCache(final String packageName) {
    log.info("rebuilding cache of " + packageName);
    System.err.println("rebuilding cache of " + packageName);

    HashMap<String, Function> api = new HashMap<String, Function>();
    // note make sure to have a linebreak at the end of the output. otherwise the streamgobbler will not pick it up
    //        String allFunsConcat = CachingUtils.evalRCommandCat("ls(getNamespace(\"" + packageName + "\"), all.names=F)");
    String allFunsConcat = CachingUtils.evalRCommandCat("getNamespaceExports('" + packageName + "')");

    List<String> allFuns = Splitter.on("\n").trimResults().splitToList(allFunsConcat);
    allFuns = Lists.newArrayList(Iterables.filter(allFuns, new Predicate<String>() {
        @Override//w  w  w . j  a va2 s  . c o  m
        public boolean apply(String funName) {
            return !funName.contains("<-") && !funName.startsWith(".");
        }
    }));

    if (allFuns.isEmpty()) {
        System.err.println("could not detect functions in package:" + packageName);
    }

    com.google.common.base.Function<String, String> quoteFun = new com.google.common.base.Function<String, String>() {
        public String apply(String funName) {
            // paste('toggleProbes', paste(deparse(args(AnnotationDbi::toggleProbes)), collapse=""), sep='||')
            return "cat(paste('" + funName + "', paste(deparse(args(" + packageName + "::" + funName
                    + ")), collapse=''), sep='----'), fill=1)";
            //                return "paste(" + funName + ", args(" + packageName + "::" + funName + "), sep='||');";
        }
    };
    //        allFuns = allFuns.subList(5, 92);
    //
    //        String getFunSigs =Joiner.on(";").join(Lists.transform(allFuns, quoteFun));

    //        String getFunSigs = "sigfuns <- c(" + Joiner.on(",").join(allFuns) + ");" +
    //                "beautify_args <- function(name) { paste(deparse(substitute(name)), deparse(args(name)), collapse=\"\") }; " +
    //                "beautify_args(sigfuns)";
    //        System.err.println(getFunSigs);

    for (List<String> funNamesBatch : Lists.partition(allFuns, 50)) {
        String getFunSigs = Joiner.on(";").join(Lists.transform(funNamesBatch, quoteFun));
        String funSigs = CachingUtils.evalRCommand(getFunSigs);

        List<String> strings = Splitter.on("\n").trimResults().splitToList(funSigs);

        for (String funSig : strings) {
            String[] splitSig = funSig.split("----");
            if (splitSig.length == 1)
                continue;

            String funName = splitSig[0];
            String signature = splitSig[1].replace("NULL", "").replace("\n", "");

            api.put(funName, new Function(funName, signature));

        }

    }

    //correct but too slow
    //        for (String funName : allFuns) {
    //            String funSig = CachingUtils.evalRCommand("args(" + packageName + "::" + funName + ")");
    //            funSig = funSig.replace("NULL", "").replace("\n", "");
    //            if (funSig.isEmpty()) {
    //                continue;
    //            }
    //
    //            api.put(funName, new Function(funName, funSig));
    //        }

    //        String rawFunSigs = CachingUtils.evalRCommand("library(" + packageName + "); print('----');  lsf.str('package:" + packageName + "')");
    //        String[] splitFunSignatures = rawFunSigs.split("----\"\n")[1].replace("\n  ", "").split("\n");
    //        List<String> funSigs = new ArrayList<String>();
    //        for (int i = 1; i < splitFunSignatures.length - 2; i++) {
    //            String curLine = splitFunSignatures[i];
    //            if (curLine.contains(" : ")) {
    //                funSigs.add(curLine);
    //            } else {
    //                funSigs.add(funSigs.remove(funSigs.size() - 1) + curLine);
    //            }
    //        }
    //
    //        for (String nameAndSig : funSigs) {
    //            int nameSplitter = nameAndSig.indexOf(':');
    //            String funName = nameAndSig.substring(0, nameSplitter).trim();
    //            String funSignature = nameAndSig.substring(nameSplitter + 2, nameAndSig.length()).trim();
    //
    //            api.put(funName, new Function(funName, funSignature));
    //        }

    String[] rawDocStrings = CachingUtils
            .evalRCommand("pckgDocu <-library(help = " + packageName + "); pckgDocu$info[[2]]").split("\n");
    List<String> fusedDocStrings = new ArrayList<String>();
    String curGroup = null;
    for (int i = 0; i < rawDocStrings.length - 2; i++) {
        String curRawDoc = rawDocStrings[i];
        String curLine = curRawDoc.substring(curRawDoc.indexOf("\""), curRawDoc.length());

        curLine = curLine.replace("\"", "").replace("\n", "");

        if (curLine.startsWith("     ")) {
            curGroup += curLine.trim();
        } else {
            if (!Strings.isNullOrEmpty(curGroup)) {
                fusedDocStrings.add(curGroup);
            }
            curGroup = curLine.trim();
        }
    }

    for (String docString : fusedDocStrings) {
        int splitter = docString.indexOf(" ");

        if (splitter < 0) {
            System.err.println("doc string parsing failed for: " + docString);
            continue;
        }

        String funName = docString.substring(0, splitter).trim();
        String fundDesc = docString.substring(splitter, docString.length()).trim();
        Function function = api.get(funName);
        if (function != null) {
            function.setShortDesc(fundDesc);
        } else {
            System.err.println("could not find function for " + funName);
        }

    }
    //
    //
    //        String curFunName = null, curFunDesc = "";
    //        matcher = Pattern.compile("1] \"(.*)\"").matcher(output);
    //        matcher.find();
    //        String funDescs = matcher.group(1);
    //        for (String docuLine : funDescs.split(lineBreaker)) {
    //            if (docuLine.startsWith(" ")) {
    //                curFunDesc += " " + docuLine.trim();
    //            } else {
    //                if (curFunName != null) {
    //                    if (funNames.contains(curFunName) &&
    //                            curFunName.matches("^[A-z.].*") &&
    //                            !curFunName.equals("function") &&
    //                            !curFunName.contains("<-") &&
    //                            !curFunName.startsWith("["))
    //                        api.add(new Function(curFunName, curFunDesc));
    //                }
    //
    //
    //                String[] splitLine = docuLine.replaceFirst(" ", "____").split("____");
    //                curFunName = splitLine[0];
    //                curFunDesc = splitLine.length == 2 ? splitLine[1].trim() : "";
    //            }
    //        }

    // compile dependency list
    List<String> cleanedDeps = getDependencies(packageName);

    String packageVersion = getPackageVersion(packageName);
    RPackage rPackage = new RPackage(packageName, new ArrayList<Function>(api.values()), packageVersion,
            cleanedDeps);

    //        // add function definitions
    //        StringBuilder getFunImplsCmd = new StringBuilder("library(" + packageName + ");\n");
    //        for (Function function : api) {
    //            String funName = function.getFunName();
    //            getFunImplsCmd.append("print(\"" + funName + "\"); if(is.function(try(" + funName + "))) {" + funName + ";} else{ NULL};\n");
    //        }
    //
    //        File tmpScript = File.createTempFile("rplugin", "R");
    //        BufferedWriter out = new BufferedWriter(new FileWriter(tmpScript));
    //        out.write(getFunImplsCmd.toString());
    //        out.close();
    //
    //        String funImpls = CachingUtils.evalRScript(tmpScript);
    ////        tmpScript.delete();
    //
    //        matcher = Pattern.compile("1] \"(.*)\"\n(function.*)", Pattern.DOTALL).matcher(funImpls);
    ////        String[] splitFuns = funImpls.split("\n?\\[1] \"(.*)\"\n?");
    //        String[] splitFuns = funImpls.split("> print.*\n.*\n");
    //
    ////        if(splitFuns.length != )
    //
    //        for (int i = 0; i < api.size(); i++) {
    //            Function anApi = api.get(i);
    //            matcher.find();
    //
    //            anApi.setFunSignature(splitFuns[i + 1]);
    //        }

    return rPackage;
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.ClassifierProvider.java

default List<Double> crossTrainInfer(List<Map<String, Double>> X, List<String> Y, ResampleType resampleType,
        String label) throws AnalysisEngineProcessException {
    Set<Integer> indexes = IntStream.range(0, X.size()).boxed().collect(toSet());
    List<Integer> indexList = new ArrayList<>(indexes);
    Collections.shuffle(indexList);
    int nfolds = (int) Math.ceil(indexList.size() / 10.0);
    List<Double> ret = IntStream.range(0, X.size()).mapToObj(i -> Double.NaN).collect(toList());
    int fold = 1;
    for (List<Integer> cvTestIndexes : Lists.partition(indexList, nfolds)) {
        LOG.info("Train Predict Fold {}", fold++);
        List<Map<String, Double>> cvTrainX = new ArrayList<>();
        List<String> cvTrainY = new ArrayList<>();
        Sets.difference(indexes, new HashSet<>(cvTestIndexes)).forEach(cvTrainIndex -> {
            cvTrainX.add(X.get(cvTrainIndex));
            cvTrainY.add(Y.get(cvTrainIndex));
        });/*from  w w  w  . j a v  a 2 s.  c o m*/
        train(cvTrainX, cvTrainY, resampleType, false);
        for (int cvTestIndex : cvTestIndexes) {
            double result = infer(X.get(cvTestIndex), label);
            ret.set(cvTestIndex, result);
        }
    }
    return ret;
}