List of usage examples for com.google.common.collect Lists partition
public static <T> List<List<T>> partition(List<T> list, int size)
From source file:org.opennms.netmgt.newts.NewtsWriter.java
@Override public void onEvent(SampleBatchEvent event) throws Exception { // We'd expect the logs from this thread to be in collectd.log Logging.putPrefix("collectd"); List<Sample> samples = event.getSamples(); // Decrement our entry counter m_numEntriesOnRingBuffer.decrementAndGet(); // Partition the samples into collections smaller then max_batch_size for (List<Sample> batch : Lists.partition(samples, m_maxBatchSize)) { try {/* w w w . j a va2 s . com*/ if (event.isIndexOnly() && !NewtsUtils.DISABLE_INDEXING) { LOG.debug("Indexing {} samples", batch.size()); m_indexer.update(batch); } else { LOG.debug("Inserting {} samples", batch.size()); m_sampleRepository.insert(batch); } if (LOG.isDebugEnabled()) { String uniqueResourceIds = batch.stream().map(s -> s.getResource().getId()).distinct() .collect(Collectors.joining(", ")); LOG.debug("Successfully inserted samples for resources with ids {}", uniqueResourceIds); } } catch (Throwable t) { RATE_LIMITED_LOGGER.error("An error occurred while inserting samples. Some sample may be lost.", t); } } }
From source file:org.apache.druid.indexing.overlord.autoscaling.ec2.EC2AutoScaler.java
@Override public List<String> ipToIdLookup(List<String> ips) { final List<String> retVal = FluentIterable // chunk requests to avoid hitting default AWS limits on filters .from(Lists.partition(ips, MAX_AWS_FILTER_VALUES)) .transformAndConcat(new Function<List<String>, Iterable<Reservation>>() { @Override//from w w w . j a v a2 s .c o m public Iterable<Reservation> apply(List<String> input) { return amazonEC2Client.describeInstances( new DescribeInstancesRequest().withFilters(new Filter("private-ip-address", input))) .getReservations(); } }).transformAndConcat(new Function<Reservation, Iterable<Instance>>() { @Override public Iterable<Instance> apply(Reservation reservation) { return reservation.getInstances(); } }).transform(new Function<Instance, String>() { @Override public String apply(Instance instance) { return instance.getInstanceId(); } }).toList(); log.debug("Performing lookup: %s --> %s", ips, retVal); return retVal; }
From source file:com.simiacryptus.mindseye.test.integration.ClassifyProblem.java
@Nonnull @Override//from w w w .ja va 2s . c o m public ClassifyProblem run(@Nonnull final NotebookOutput log) { @Nonnull final TrainingMonitor monitor = TestUtil.getMonitor(history); final Tensor[][] trainingData = getTrainingData(log); @Nonnull final DAGNetwork network = fwdFactory.imageToVector(log, categories); log.h3("Network Diagram"); log.eval(() -> { return Graphviz.fromGraph(TestUtil.toGraph(network)).height(400).width(600).render(Format.PNG) .toImage(); }); log.h3("Training"); @Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer()); TestUtil.instrumentPerformance(supervisedNetwork); int initialSampleSize = Math.max(trainingData.length / 5, Math.min(10, trainingData.length / 2)); @Nonnull final ValidatingTrainer trainer = optimizer.train(log, new SampledArrayTrainable(trainingData, supervisedNetwork, initialSampleSize, getBatchSize()), new ArrayTrainable(trainingData, supervisedNetwork, getBatchSize()), monitor); log.run(() -> { trainer.setTimeout(timeoutMinutes, TimeUnit.MINUTES).setMaxIterations(10000).run(); }); if (!history.isEmpty()) { log.eval(() -> { return TestUtil.plot(history); }); log.eval(() -> { return TestUtil.plotTime(history); }); } @Nonnull String training_name = log.getName() + "_" + ClassifyProblem.modelNo++ + "_plot.png"; try { BufferedImage image = Util.toImage(TestUtil.plot(history)); if (null != image) ImageIO.write(image, "png", log.file(training_name)); } catch (IOException e) { logger.warn("Error writing result images", e); } log.appendFrontMatterProperty("result_plot", new File(log.getResourceDir(), training_name).toString(), ";"); TestUtil.extractPerformance(log, supervisedNetwork); @Nonnull final String modelName = "classification_model_" + ClassifyProblem.modelNo++ + ".json"; log.appendFrontMatterProperty("result_model", modelName, ";"); log.p("Saved model as " + log.file(network.getJson().toString(), modelName, modelName)); log.h3("Validation"); log.p("If we apply our model against the entire validation dataset, we get this accuracy:"); log.eval(() -> { return data.validationData().mapToDouble( labeledObject -> predict(network, labeledObject)[0] == parse(labeledObject.label) ? 1 : 0) .average().getAsDouble() * 100; }); log.p("Let's examine some incorrectly predicted results in more detail:"); log.eval(() -> { try { @Nonnull final TableOutput table = new TableOutput(); Lists.partition(data.validationData().collect(Collectors.toList()), 100).stream().flatMap(batch -> { @Nonnull TensorList batchIn = TensorArray .create(batch.stream().map(x -> x.data).toArray(i -> new Tensor[i])); TensorList batchOut = network.eval(new ConstantResult(batchIn)).getData(); return IntStream.range(0, batchOut.length()) .mapToObj(i -> toRow(log, batch.get(i), batchOut.get(i).getData())); }).filter(x -> null != x).limit(10).forEach(table::putRow); return table; } catch (@Nonnull final IOException e) { throw new RuntimeException(e); } }); return this; }
From source file:hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.FourStoreDriver.java
public void insertEdges(final Multimap<String, String> edges, final String type) throws IOException { if (edges.isEmpty()) { return;/*from ww w. j a v a2 s . c om*/ } final ArrayList<String> sourceVertices = new ArrayList<>(edges.keySet()); final List<List<String>> sourceVerticesPartitions = Lists.partition(sourceVertices, PARTITION_SIZE); for (final List<String> sourceVerticesPartition : sourceVerticesPartitions) { final Multimap<String, String> edgePartition = ArrayListMultimap.create(); for (final String sourceVertexURI : sourceVerticesPartition) { final Collection<String> targetVertexURIs = edges.get(sourceVertexURI); edgePartition.putAll(sourceVertexURI, targetVertexURIs); } insertEdgesPartition(edgePartition, type); } }
From source file:com.romeikat.datamessie.core.sync.service.template.withIdAndVersion.CreateOrUpdateExecutor.java
private void update(final List<Long> lhsIds) throws TaskCancelledException { final Collection<List<Long>> lhsIdsBatches = Lists.partition(lhsIds, batchSizeEntities); final int lhsCount = lhsIds.size(); int firstEntity = 0; CountDownLatch rhsInProgress = null; CountDownLatch rhsDone = null; final Executor e = Executors.newSingleThreadExecutor(); for (final List<Long> lhsIdsBatch : lhsIdsBatches) { // Feedback final int lastEntity = firstEntity + lhsIdsBatch.size(); final double progress = (double) lastEntity / (double) lhsCount; final String msg = String.format("Updating %s to %s of %s (%s)", IntegerConverter.INSTANCE.convertToString(firstEntity + 1), IntegerConverter.INSTANCE.convertToString(lastEntity), IntegerConverter.INSTANCE.convertToString(lhsCount), PercentageConverter.INSTANCE_2.convertToString(progress)); final TaskExecutionWork work = taskExecution.reportWorkStart(msg); // Load LHS final Collection<E> lhsEntities = loadLhsBatch(rhsInProgress, lhsIdsBatch); // Update RHS rhsInProgress = new CountDownLatch(1); rhsDone = new CountDownLatch(1); e.execute(new RhsUpdater(rhsInProgress, rhsDone, lhsIdsBatch, lhsEntities)); firstEntity += batchSizeEntities; taskExecution.reportWorkEnd(work); taskExecution.checkpoint();/*from ww w.j av a 2 s.c o m*/ } // Wait until last batch ends if (rhsDone != null) { try { rhsDone.await(); } catch (final InterruptedException e1) { } } }
From source file:com.netflix.metacat.main.services.search.ElasticSearchUtilImpl.java
/** * Batch marks the documents as deleted. * @param type index type/*from w w w . j av a 2 s .c o m*/ * @param ids list of entity ids * @param metacatRequestContext context containing the user name */ public void softDelete(final String type, final List<String> ids, final MetacatRequestContext metacatRequestContext) { if (ids != null && !ids.isEmpty()) { final List<List<String>> partitionedIds = Lists.partition(ids, 100); partitionedIds.forEach(subIds -> softDeleteDoc(type, subIds, metacatRequestContext)); partitionedIds.forEach(subIds -> ensureMigrationByCopy(type, subIds)); } }
From source file:com.falcon.orca.actors.ClusterManager.java
@Override public void onReceive(Object message) { if (message instanceof ClusterEvent.CurrentClusterState) { log.info("Got message of type Current cluster state"); } else if (message instanceof ClusterEvent.MemberUp) { ClusterEvent.MemberUp memberUp = (ClusterEvent.MemberUp) message; //if(memberUp.member().hasRole("node_manager")) { NodeManagerCommand command = new NodeManagerCommand(); command.setType(NodeManagerCommandType.REGISTER_TO_MASTER); getContext().actorSelection(memberUp.member().address() + "/user/node_manager").tell(command, getSelf());/*from w w w . j av a2 s. co m*/ //} } else if (message instanceof ClusterEvent.MemberExited) { nodeManagers.remove(getSender()); } else if (message instanceof ClusterEvent.UnreachableMember) { log.info("Got message of type unreachable member " + getSender()); nodeManagers.remove(getSender()); } else if (message instanceof ClusterManagerCommand) { switch (((ClusterManagerCommand) message).getType()) { case REGISTER_NODE: nodeManagers.add(getSender()); if (nodeManagers.size() >= minimumNodes) { printOnCmd("Minimum number of nodes in cluster complete, you can run tests now."); } break; case UNREGISTER_NODE: { printOnCmd("Got a node disconnect request, current size " + nodeManagers.size()); nodeManagers.remove(getSender()); printOnCmd("Removing node from cluster after removal size " + nodeManagers.size()); break; } case START_LOAD: { try { if (nodeManagers.size() < minimumNodes) { printOnCmd("Not enough numbers of nodes, have patience."); } else if (isBusy) { printOnCmd("Already a run going on can't start another, wait for it to finish"); } else { isBusy = true; busyNodes = 0; pausedNodes = 0; mergedResponseTimes.clear(); runResults.clear(); nodeManagers.forEach((o) -> { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.setType(NodeManagerCommandType.CLEAR_LOAD_DATA); o.tell(nodeManagerCommand, getSelf()); }); ClusterManagerCommand managerCommand = (ClusterManagerCommand) message; RunDetails runDetails = (RunDetails) managerCommand.getFromContext("runDetails"); if (runDetails.isUrlDynamic() || runDetails.isBodyDynamic()) { DataReader dataReader = new JsonFileReader(runDetails.getDataFilePath(), runDetails.getTemplateFilePath()); HashMap<String, HashMap<String, List<Object>>> dynDataFromFile = dataReader .readVariableValues(); HashMap<String, HashMap<String, DynVarUseType>> dynVarUseTypeFromFile = dataReader .readVariableUseType(); HashMap<String, DynGenerator> generators = dataReader.readGenerators(); if (generators != null) { generators.forEach((k, v) -> nodeManagers.forEach((o) -> { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeManagerCommand.putOnContext("key", k); nodeManagerCommand.putOnContext("data", v); nodeManagerCommand.putOnContext("dataType", "generator"); o.tell(nodeManagerCommand, getSelf()); })); } final int[] nodeManagerIndex = { 0 }; if (runDetails.isBodyDynamic()) { HashMap<String, List<Object>> bodyParams = dynDataFromFile.get("bodyData"); HashMap<String, DynVarUseType> bodyParamsUseType = dynVarUseTypeFromFile .get("bodyVarUseType"); String template = dataReader.readTemplate(); if (!StringUtils.isBlank(template)) { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.putOnContext("template", template); nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeManagers.forEach((o) -> o.tell(nodeManagerCommand, getSelf())); } bodyParams.forEach((k, v) -> { List<List<Object>> partitions = Lists.partition(v, v.size() / nodeManagers.size()); nodeManagerIndex[0] = 0; nodeManagers.forEach((o) -> { List<Object> partition = partitions.get(nodeManagerIndex[0]++); if (partition.size() < 10000) { NodeManagerCommand nodeDataCommand = new NodeManagerCommand(); nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeDataCommand.putOnContext("key", k); nodeDataCommand.putOnContext("data", new ArrayList(partition)); nodeDataCommand.putOnContext("dataType", "body"); nodeDataCommand.putOnContext("dataUseType", bodyParamsUseType.get(k)); o.tell(nodeDataCommand, getSelf()); } else { List<List<Object>> nodePartitions = Lists.partition(partition, 10000); for (List<Object> nodePartition : nodePartitions) { NodeManagerCommand nodeDataCommand = new NodeManagerCommand(); nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeDataCommand.putOnContext("key", k); nodeDataCommand.putOnContext("data", new ArrayList(nodePartition)); nodeDataCommand.putOnContext("dataType", "body"); nodeDataCommand.putOnContext("dataUseType", bodyParamsUseType.get(k)); o.tell(nodeDataCommand, getSelf()); } } }); }); } if (runDetails.isUrlDynamic()) { HashMap<String, List<Object>> urlParams = dynDataFromFile.get("urlData"); HashMap<String, DynVarUseType> urlParamsUseType = dynVarUseTypeFromFile .get("urlVarUseType"); //Block to send urlTemplate to each node { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.putOnContext("urlTemplate", runDetails.getUrl()); nodeManagerCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeManagers.forEach((o) -> o.tell(nodeManagerCommand, getSelf())); } urlParams.forEach((k, v) -> { List<List<Object>> partitions = Lists.partition(v, v.size() / nodeManagers.size()); nodeManagerIndex[0] = 0; nodeManagers.forEach((o) -> { List<Object> partition = partitions.get(nodeManagerIndex[0]++); if (partition.size() < 10000) { NodeManagerCommand nodeDataCommand = new NodeManagerCommand(); nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeDataCommand.putOnContext("key", k); nodeDataCommand.putOnContext("data", new ArrayList<>(partition)); nodeDataCommand.putOnContext("dataType", "url"); nodeDataCommand.putOnContext("dataUseType", urlParamsUseType.get(k)); o.tell(nodeDataCommand, getSelf()); } else { List<List<Object>> nodePartitions = Lists.partition(partition, 10000); for (List<Object> nodePartition : nodePartitions) { NodeManagerCommand nodeDataCommand = new NodeManagerCommand(); nodeDataCommand.setType(NodeManagerCommandType.TAKE_LOAD_DATA); nodeDataCommand.putOnContext("key", k); nodeDataCommand.putOnContext("data", new ArrayList<>(nodePartition)); nodeDataCommand.putOnContext("dataType", "url"); nodeDataCommand.putOnContext("dataUseType", urlParamsUseType.get(k)); o.tell(nodeDataCommand, getSelf()); } } }); }); } nodeManagers.forEach((o) -> { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.setType(NodeManagerCommandType.LOAD_DATA_COMPLETE); o.tell(nodeManagerCommand, getSelf()); }); } nodeManagers.forEach((o) -> { NodeManagerCommand command = new NodeManagerCommand(); command.putOnContext("runDetails", runDetails); command.setType(NodeManagerCommandType.START_LOAD); o.tell(command, getSelf()); }); } } catch (IOException ie) { printOnCmd("Datareader failed to read data from file, make sure template file, datafile pathe" + " are correct."); log.error("Datareader failed to load", ie); busyNodes = 0; isBusy = false; } break; } case LOAD_GENERATION_START: { busyNodes++; break; } case LOAD_GENERATION_COMPLETE: { runResults.add((RunResult) ((ClusterManagerCommand) message).getFromContext("runResult")); NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.setType(NodeManagerCommandType.SEND_DATA); getSender().tell(nodeManagerCommand, getSelf()); break; } case TAKE_DATA: { mergedResponseTimes .addAll((List<Long>) ((ClusterManagerCommand) message).getFromContext("responseTimes")); break; } case DATA_SEND_COMPLETE: { busyNodes--; if (busyNodes <= 0) { isBusy = false; printMergedResult(runResults, mergedResponseTimes); } break; } case STOP_LOAD: { nodeManagers.forEach((o) -> { NodeManagerCommand command = new NodeManagerCommand(); command.setType(NodeManagerCommandType.STOP_LOAD); o.tell(command, getSelf()); }); break; } case PAUSE_LOAD: { nodeManagers.forEach((o) -> { NodeManagerCommand command = new NodeManagerCommand(); command.setType(NodeManagerCommandType.PAUSE_LOAD); o.tell(command, getSelf()); }); break; } case LOAD_GENERATION_PAUSED: { pausedNodes++; if (Objects.equals(pausedNodes, busyNodes)) { isPaused = true; } break; } case LOAD_GENERATION_RESUMED: { pausedNodes--; if (pausedNodes <= 0) { isPaused = false; } break; } case RESUME_LOAD: { nodeManagers.forEach((o) -> { NodeManagerCommand command = new NodeManagerCommand(); command.setType(NodeManagerCommandType.RESUME_LOAD); o.tell(command, getSelf()); }); break; } case CLUSTER_DETAILS: printOnCmd(printClusterDetails(isBusy, isPaused, nodeManagers.size(), busyNodes)); break; case EXIT: //kill all the node managers and kill the local actor system and then exit nodeManagers.forEach((o) -> { NodeManagerCommand nodeManagerCommand = new NodeManagerCommand(); nodeManagerCommand.setType(NodeManagerCommandType.REMOTE_EXIT); o.tell(nodeManagerCommand, getSelf()); }); context().stop(getSelf()); break; default: unhandled(message); } } }
From source file:org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStoreJDBC.java
public int delete(Connection connection, RDBTableMetaData tmd, List<String> allIds) throws SQLException { int count = 0; for (List<String> ids : Lists.partition(allIds, RDBJDBCTools.MAX_IN_CLAUSE)) { PreparedStatement stmt;//from w w w .j ava2s . co m PreparedStatementComponent inClause = RDBJDBCTools.createInStatement("ID", ids, tmd.isIdBinary()); String sql = "delete from " + tmd.getName() + " where " + inClause.getStatementComponent(); stmt = connection.prepareStatement(sql); try { inClause.setParameters(stmt, 1); int result = stmt.executeUpdate(); if (result != ids.size()) { LOG.debug("DB delete failed for " + tmd.getName() + "/" + ids); } count += result; } finally { stmt.close(); } } return count; }
From source file:com.r4intellij.misc.rinstallcache.LibraryIndexFactory.java
static RPackage buildPackageCache(final String packageName) { log.info("rebuilding cache of " + packageName); System.err.println("rebuilding cache of " + packageName); HashMap<String, Function> api = new HashMap<String, Function>(); // note make sure to have a linebreak at the end of the output. otherwise the streamgobbler will not pick it up // String allFunsConcat = CachingUtils.evalRCommandCat("ls(getNamespace(\"" + packageName + "\"), all.names=F)"); String allFunsConcat = CachingUtils.evalRCommandCat("getNamespaceExports('" + packageName + "')"); List<String> allFuns = Splitter.on("\n").trimResults().splitToList(allFunsConcat); allFuns = Lists.newArrayList(Iterables.filter(allFuns, new Predicate<String>() { @Override//w w w . j a va2 s . c o m public boolean apply(String funName) { return !funName.contains("<-") && !funName.startsWith("."); } })); if (allFuns.isEmpty()) { System.err.println("could not detect functions in package:" + packageName); } com.google.common.base.Function<String, String> quoteFun = new com.google.common.base.Function<String, String>() { public String apply(String funName) { // paste('toggleProbes', paste(deparse(args(AnnotationDbi::toggleProbes)), collapse=""), sep='||') return "cat(paste('" + funName + "', paste(deparse(args(" + packageName + "::" + funName + ")), collapse=''), sep='----'), fill=1)"; // return "paste(" + funName + ", args(" + packageName + "::" + funName + "), sep='||');"; } }; // allFuns = allFuns.subList(5, 92); // // String getFunSigs =Joiner.on(";").join(Lists.transform(allFuns, quoteFun)); // String getFunSigs = "sigfuns <- c(" + Joiner.on(",").join(allFuns) + ");" + // "beautify_args <- function(name) { paste(deparse(substitute(name)), deparse(args(name)), collapse=\"\") }; " + // "beautify_args(sigfuns)"; // System.err.println(getFunSigs); for (List<String> funNamesBatch : Lists.partition(allFuns, 50)) { String getFunSigs = Joiner.on(";").join(Lists.transform(funNamesBatch, quoteFun)); String funSigs = CachingUtils.evalRCommand(getFunSigs); List<String> strings = Splitter.on("\n").trimResults().splitToList(funSigs); for (String funSig : strings) { String[] splitSig = funSig.split("----"); if (splitSig.length == 1) continue; String funName = splitSig[0]; String signature = splitSig[1].replace("NULL", "").replace("\n", ""); api.put(funName, new Function(funName, signature)); } } //correct but too slow // for (String funName : allFuns) { // String funSig = CachingUtils.evalRCommand("args(" + packageName + "::" + funName + ")"); // funSig = funSig.replace("NULL", "").replace("\n", ""); // if (funSig.isEmpty()) { // continue; // } // // api.put(funName, new Function(funName, funSig)); // } // String rawFunSigs = CachingUtils.evalRCommand("library(" + packageName + "); print('----'); lsf.str('package:" + packageName + "')"); // String[] splitFunSignatures = rawFunSigs.split("----\"\n")[1].replace("\n ", "").split("\n"); // List<String> funSigs = new ArrayList<String>(); // for (int i = 1; i < splitFunSignatures.length - 2; i++) { // String curLine = splitFunSignatures[i]; // if (curLine.contains(" : ")) { // funSigs.add(curLine); // } else { // funSigs.add(funSigs.remove(funSigs.size() - 1) + curLine); // } // } // // for (String nameAndSig : funSigs) { // int nameSplitter = nameAndSig.indexOf(':'); // String funName = nameAndSig.substring(0, nameSplitter).trim(); // String funSignature = nameAndSig.substring(nameSplitter + 2, nameAndSig.length()).trim(); // // api.put(funName, new Function(funName, funSignature)); // } String[] rawDocStrings = CachingUtils .evalRCommand("pckgDocu <-library(help = " + packageName + "); pckgDocu$info[[2]]").split("\n"); List<String> fusedDocStrings = new ArrayList<String>(); String curGroup = null; for (int i = 0; i < rawDocStrings.length - 2; i++) { String curRawDoc = rawDocStrings[i]; String curLine = curRawDoc.substring(curRawDoc.indexOf("\""), curRawDoc.length()); curLine = curLine.replace("\"", "").replace("\n", ""); if (curLine.startsWith(" ")) { curGroup += curLine.trim(); } else { if (!Strings.isNullOrEmpty(curGroup)) { fusedDocStrings.add(curGroup); } curGroup = curLine.trim(); } } for (String docString : fusedDocStrings) { int splitter = docString.indexOf(" "); if (splitter < 0) { System.err.println("doc string parsing failed for: " + docString); continue; } String funName = docString.substring(0, splitter).trim(); String fundDesc = docString.substring(splitter, docString.length()).trim(); Function function = api.get(funName); if (function != null) { function.setShortDesc(fundDesc); } else { System.err.println("could not find function for " + funName); } } // // // String curFunName = null, curFunDesc = ""; // matcher = Pattern.compile("1] \"(.*)\"").matcher(output); // matcher.find(); // String funDescs = matcher.group(1); // for (String docuLine : funDescs.split(lineBreaker)) { // if (docuLine.startsWith(" ")) { // curFunDesc += " " + docuLine.trim(); // } else { // if (curFunName != null) { // if (funNames.contains(curFunName) && // curFunName.matches("^[A-z.].*") && // !curFunName.equals("function") && // !curFunName.contains("<-") && // !curFunName.startsWith("[")) // api.add(new Function(curFunName, curFunDesc)); // } // // // String[] splitLine = docuLine.replaceFirst(" ", "____").split("____"); // curFunName = splitLine[0]; // curFunDesc = splitLine.length == 2 ? splitLine[1].trim() : ""; // } // } // compile dependency list List<String> cleanedDeps = getDependencies(packageName); String packageVersion = getPackageVersion(packageName); RPackage rPackage = new RPackage(packageName, new ArrayList<Function>(api.values()), packageVersion, cleanedDeps); // // add function definitions // StringBuilder getFunImplsCmd = new StringBuilder("library(" + packageName + ");\n"); // for (Function function : api) { // String funName = function.getFunName(); // getFunImplsCmd.append("print(\"" + funName + "\"); if(is.function(try(" + funName + "))) {" + funName + ";} else{ NULL};\n"); // } // // File tmpScript = File.createTempFile("rplugin", "R"); // BufferedWriter out = new BufferedWriter(new FileWriter(tmpScript)); // out.write(getFunImplsCmd.toString()); // out.close(); // // String funImpls = CachingUtils.evalRScript(tmpScript); //// tmpScript.delete(); // // matcher = Pattern.compile("1] \"(.*)\"\n(function.*)", Pattern.DOTALL).matcher(funImpls); //// String[] splitFuns = funImpls.split("\n?\\[1] \"(.*)\"\n?"); // String[] splitFuns = funImpls.split("> print.*\n.*\n"); // //// if(splitFuns.length != ) // // for (int i = 0; i < api.size(); i++) { // Function anApi = api.get(i); // matcher.find(); // // anApi.setFunSignature(splitFuns[i + 1]); // } return rPackage; }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.ClassifierProvider.java
default List<Double> crossTrainInfer(List<Map<String, Double>> X, List<String> Y, ResampleType resampleType, String label) throws AnalysisEngineProcessException { Set<Integer> indexes = IntStream.range(0, X.size()).boxed().collect(toSet()); List<Integer> indexList = new ArrayList<>(indexes); Collections.shuffle(indexList); int nfolds = (int) Math.ceil(indexList.size() / 10.0); List<Double> ret = IntStream.range(0, X.size()).mapToObj(i -> Double.NaN).collect(toList()); int fold = 1; for (List<Integer> cvTestIndexes : Lists.partition(indexList, nfolds)) { LOG.info("Train Predict Fold {}", fold++); List<Map<String, Double>> cvTrainX = new ArrayList<>(); List<String> cvTrainY = new ArrayList<>(); Sets.difference(indexes, new HashSet<>(cvTestIndexes)).forEach(cvTrainIndex -> { cvTrainX.add(X.get(cvTrainIndex)); cvTrainY.add(Y.get(cvTrainIndex)); });/*from w w w . j a v a 2 s. c o m*/ train(cvTrainX, cvTrainY, resampleType, false); for (int cvTestIndex : cvTestIndexes) { double result = infer(X.get(cvTestIndex), label); ret.set(cvTestIndex, result); } } return ret; }