List of usage examples for org.apache.commons.io FileUtils writeLines
public static void writeLines(File file, Collection lines) throws IOException
toString()
value of each item in a collection to the specified File
line by line. From source file:uk.ac.tgac.rampart.stage.Amp.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws InterruptedException, ProcessExecutionException { // Short cut to arguments Args args = this.getArgs(); try {// www .j a va2s. c om // Create AMP Pipeline Pipeline ampPipeline = new Pipeline(this.conanExecutorService, args); log.debug("Found " + ampPipeline.getProcesses().size() + " AMP stages in pipeline to process"); // Clear out anything that was here before if (args.getOutputDir().exists()) { FileUtils.deleteDirectory(args.getOutputDir()); } // Make sure the output directory exists args.getAssembliesDir().mkdirs(); // Create link for the initial input file this.getConanProcessService().createLocalSymbolicLink(args.getInputAssembly(), new File(args.getAssembliesDir(), "amp-stage-0.fa")); // Create a guest user ConanUser rampartUser = new GuestUser("daniel.mapleson@tgac.ac.uk"); // Create the AMP task ConanTask<Pipeline> ampTask = new DefaultTaskFactory().createTask(ampPipeline, 0, ampPipeline.getArgs().getArgMap(), ConanTask.Priority.HIGHEST, rampartUser); ampTask.setId("AMP"); ampTask.submit(); // Run the AMP pipeline TaskResult result; try { result = ampTask.execute(executionContext); } catch (TaskExecutionException e) { throw new ProcessExecutionException(-1, e); } // Create a symbolic link for the final assembly from the final stage this.getConanProcessService().createLocalSymbolicLink( new File(args.getAssembliesDir(), "amp-stage-" + ampPipeline.getProcesses().size() + ".fa"), args.getFinalAssembly()); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), result.getOutput()); return new DefaultExecutionResult("rampart-amp", 0, result.getOutput().toArray(new String[result.getOutput().size()]), null, -1, new ResourceUsage(result.getMaxMemUsage(), result.getActualTotalRuntime(), result.getTotalExternalCputime())); } catch (IOException e) { throw new ProcessExecutionException(-1, e); } }
From source file:uk.ac.tgac.rampart.stage.analyse.asm.AnalyseAmpAssemblies.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { StopWatch stopWatch = new StopWatch(); stopWatch.start();//from w ww.j av a2 s.c om Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } log.info("Starting Analysis of AMP assemblies"); if (!args.getOutputDir().exists()) { args.getOutputDir().mkdirs(); } // Create requested services Set<AssemblyAnalyser> requestedServices = new HashSet<>(); for (AnalyseAssembliesArgs.ToolArgs requestedService : this.getArgs().getTools()) { AssemblyAnalyser aa = this.assemblyAnalyserFactory.create(requestedService.getName(), this.conanExecutorService); aa.setArgs(requestedService); requestedServices.add(aa); } List<ExecutionResult> jobResults = new ArrayList<>(); // Just loop through all requested stats levels and execute each. // Each stage is processed linearly try { for (AssemblyAnalyser analyser : requestedServices) { List<File> assemblies = this.findAssemblies(analyser); File outputDir = new File(args.getOutputDir(), analyser.getName().toLowerCase()); String jobPrefix = this.getArgs().getJobPrefix() + "-" + analyser.getName().toLowerCase(); jobResults.addAll(analyser.execute(assemblies, outputDir, jobPrefix, this.conanExecutorService)); } // Create the stats table with information derived from the configuration file. AssemblyStatsTable table = this.createTable(); // Merge all the results for (AssemblyAnalyser analyser : requestedServices) { List<File> assemblies = this.findAssemblies(analyser); File outputDir = new File(args.getOutputDir(), analyser.getName().toLowerCase()); Map<String, String> asm2GroupMap = new HashMap<>(); for (File b : assemblies) { asm2GroupMap.put(b.getName(), "amp"); } analyser.updateTable(table, outputDir); } // Save table to disk File finalTSVFile = new File(args.getOutputDir(), "scores.tsv"); table.saveTsv(finalTSVFile); log.debug("Saved final results in TSV format to: " + finalTSVFile.getAbsolutePath()); File finalSummaryFile = new File(args.getOutputDir(), "scores.tsv"); table.saveSummary(finalSummaryFile); log.debug("Saved final results in summary format to: " + finalSummaryFile.getAbsolutePath()); stopWatch.stop(); TaskResult taskResult = new DefaultTaskResult("rampart-amp_analysis", true, jobResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput()); return new DefaultExecutionResult(taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage(taskResult.getMaxMemUsage(), taskResult.getActualTotalRuntime(), taskResult.getTotalExternalCputime())); } catch (ConanParameterException | IOException ioe) { throw new ProcessExecutionException(5, ioe); } }
From source file:uk.ac.tgac.rampart.stage.analyse.asm.AnalyseMassAssemblies.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try {/*from w w w . j av a 2 s . c om*/ StopWatch stopWatch = new StopWatch(); stopWatch.start(); Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } log.info("Starting Analysis of MASS assemblies"); if (!args.getOutputDir().exists()) { args.getOutputDir().mkdirs(); } // Create requested services Set<AssemblyAnalyser> requestedServices = args.getAssemblyAnalysers(); for (AssemblyAnalyser requestedService : requestedServices) { requestedService.setConanExecutorService(this.conanExecutorService); } // Keep a list of all job ids List<ExecutionResult> jobResults = new ArrayList<>(); List<File> unitigAssemblies = new ArrayList<>(); List<File> contigAssemblies = new ArrayList<>(); List<File> scaffoldAssemblies = new ArrayList<>(); List<File> bestAssemblies = new ArrayList<>(); List<File> bubbles = new ArrayList<>(); List<String> mappings = new ArrayList<>(); int index = 1; // Update mass job values with kmer genie info if required if (args.kmerCalcResults != null && args.kmerCalcResults.exists()) { Mass.setKmerValues(args.getKmerCalcResults(), args.getMassJobs()); } // Loop through MASS groups to get assemblies for (MassJob.Args jobArgs : args.getMassJobs()) { jobArgs.initialise(); String massGroup = jobArgs.getName(); File inputDir = new File(args.getMassDir(), massGroup); if (!inputDir.exists()) { throw new ProcessExecutionException(-1, "Could not find output from mass group: " + massGroup + "; at: " + inputDir.getAbsolutePath()); } final File unitigsDir = jobArgs.getUnitigsDir(); final File contigsDir = jobArgs.getContigsDir(); final File scaffoldsDir = jobArgs.getScaffoldsDir(); final File longestDir = jobArgs.getLongestDir(); if (unitigsDir.exists()) { unitigAssemblies.addAll(AnalyseMassAssemblies.assembliesFromDir(unitigsDir)); } if (contigsDir.exists()) { contigAssemblies.addAll(AnalyseMassAssemblies.assembliesFromDir(contigsDir)); } if (scaffoldsDir.exists()) { scaffoldAssemblies.addAll(AnalyseMassAssemblies.assembliesFromDir(scaffoldsDir)); } bestAssemblies.addAll(AnalyseMassAssemblies.assembliesFromDir(longestDir)); for (Assembler assembler : jobArgs.getAssemblers()) { File assembly = assembler.getBestAssembly(); if (assembler.getBubbleFile() != null) { bubbles.add(assembler.getBubbleFile()); } String run = assembler.getAssemblerArgs().getOutputDir().getName(); String line = Integer.toString(index) + "\t" + massGroup + "\t" + run + "\t" + assembly.getAbsolutePath() + "\t" + (assembler.getBubbleFile() != null ? assembler.getBubbleFile().getAbsolutePath() : "NA"); mappings.add(line); index++; } } // Make symbolic links for easy access File unitigsDir = new File(args.getAssembliesDir(), "unitigs"); File contigsDir = new File(args.getAssembliesDir(), "contigs"); File scaffoldsDir = new File(args.getAssembliesDir(), "scaffolds"); File bestDir = new File(args.getAssembliesDir(), "longest"); File bubblesDir = new File(args.getAssembliesDir(), "bubbles"); this.makeLinks(unitigAssemblies, unitigsDir); this.makeLinks(contigAssemblies, contigsDir); this.makeLinks(scaffoldAssemblies, scaffoldsDir); this.makeLinks(bestAssemblies, bestDir); this.makeLinks(bubbles, bubblesDir); // Write out linkage file FileUtils.writeLines(args.getAssemblyLinkageFile(), mappings); for (AssemblyAnalyser analyser : requestedServices) { File outputDir = new File(args.getOutputDir(), analyser.getName().toLowerCase()); String jobPrefix = this.getArgs().getJobPrefix() + "-" + analyser.getName().toLowerCase(); // Run analysis for each assembly grouping if fast. Otherwise just do the highest order assemblies. if (analyser.isFast()) { if (!unitigAssemblies.isEmpty()) { jobResults.addAll(analyser.execute(unitigAssemblies, new File(outputDir, "unitigs"), jobPrefix + "-unitigs", this.conanExecutorService)); } if (!contigAssemblies.isEmpty()) { jobResults.addAll(analyser.execute(contigAssemblies, new File(outputDir, "contigs"), jobPrefix + "-contigs", this.conanExecutorService)); } if (!scaffoldAssemblies.isEmpty()) { jobResults.addAll(analyser.execute(scaffoldAssemblies, new File(outputDir, "scaffolds"), jobPrefix + "-scaffolds", this.conanExecutorService)); } } File bestOutDir = analyser.isFast() ? new File(outputDir, "longest") : outputDir; jobResults .addAll(analyser.execute(bestAssemblies, bestOutDir, jobPrefix, this.conanExecutorService)); } stopWatch.stop(); TaskResult taskResult = new DefaultTaskResult("rampart-mass_analysis", true, jobResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput()); return new DefaultExecutionResult(taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage(taskResult.getMaxMemUsage(), taskResult.getActualTotalRuntime(), taskResult.getTotalExternalCputime())); } catch (ConanParameterException | IOException e) { throw new ProcessExecutionException(4, e); } }
From source file:uk.ac.tgac.rampart.stage.analyse.asm.stats.AssemblyStatsTable.java
public void saveTsv(File outputFile) throws IOException { List<String> lines = new ArrayList<>(); // Add the header line lines.add(new AssemblyStats().getStatsFileHeader()); // Add the data for (AssemblyStats stats : this) { lines.add(stats.toTabString());/*from w w w . j ava 2 s . c o m*/ } // Write data to disk FileUtils.writeLines(outputFile, lines); }
From source file:uk.ac.tgac.rampart.stage.analyse.asm.stats.AssemblyStatsTable.java
public void saveSummary(File outputFile) throws IOException { List<String> lines = new ArrayList<>(); // Add the data for (AssemblyStats stats : this) { lines.add(stats.toString() + "\n"); }/* w w w .j a v a 2 s . co m*/ // Write data to disk FileUtils.writeLines(outputFile, lines); }
From source file:uk.ac.tgac.rampart.stage.analyse.reads.KmerAnalysisReads.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try {/* www . jav a 2 s . c o m*/ StopWatch stopWatch = new StopWatch(); stopWatch.start(); log.info("Starting Kmer Counting on all Reads"); // Create shortcut to args for convienience Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } // Create the output directory args.getOutputDir().mkdirs(); JobOutputMap jfCountOutputs = new JobOutputMap(); List<ExecutionResult> jobResults = new ArrayList<>(); List<ExecutionResult> allJobResults = new ArrayList<>(); // Create the output directory for the RAW datasets File rawOutputDir = new File(args.getOutputDir(), "raw"); if (!rawOutputDir.exists()) { rawOutputDir.mkdirs(); } // Start jellyfish on all RAW datasets for (Library lib : args.getAllLibraries()) { // Execute jellyfish and add id to list of job ids JobOutput jfOut = this.executeJellyfishCount(args, "raw", args.getOutputDir(), lib); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); jfCountOutputs.updateTracker("raw", jfOut.getOutputFile()); } // Also start jellyfish on all the prep-processed libraries from MECQ if (args.getAllMecqs() != null) { for (Mecq.EcqArgs ecqArgs : args.getAllMecqs()) { // Create the output directory for the RAW datasets File ecqOutputDir = new File(args.getOutputDir(), ecqArgs.getName()); if (!ecqOutputDir.exists()) { ecqOutputDir.mkdirs(); } for (Library lib : ecqArgs.getOutputLibraries()) { // Add jellyfish id to list of job ids JobOutput jfOut = this.executeJellyfishCount(args, ecqArgs.getName(), args.getOutputDir(), lib); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); jfCountOutputs.updateTracker(ecqArgs.getName(), jfOut.getOutputFile()); } } } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Kmer counting all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait(jobResults, args.getJobPrefix() + "-count-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kmer-count-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); JobOutputMap mergedOutputs = new JobOutputMap(); // Now execute merge jobs if required for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) { String ecqName = entry.getKey(); Set<File> fileSet = entry.getValue(); // Only merge if there's more than one library if (fileSet.size() > 1) { JobOutput jfOut = this.executeJellyfishMerger(args, ecqName, fileSet, new File(args.getOutputDir(), ecqName)); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); mergedOutputs.updateTracker(ecqName, jfOut.getOutputFile()); } } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Creating merged kmer counts for all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait(jobResults, args.getJobPrefix() + "-merge-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kmer-merge-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); // Combine all jellyfish out maps jfCountOutputs.combine(mergedOutputs); String katGcpJobPrefix = args.getJobPrefix() + "-kat-gcp"; // Run KAT GCP on everything List<ExecutionResult> katGcpResults = this.executeKatGcp(jfCountOutputs, katGcpJobPrefix, args.getThreadsPerProcess(), args.getMemoryPerProcess(), args.isRunParallel()); for (ExecutionResult result : katGcpResults) { result.setName(result.getName().substring(args.getJobPrefix().length() + 1)); jobResults.add(result); allJobResults.add(result); } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Running \"kat gcp\" for all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait(jobResults, katGcpJobPrefix + "*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kat-gcp-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); log.info("Kmer counting of all reads finished."); stopWatch.stop(); TaskResult taskResult = new DefaultTaskResult("rampart-read_analysis-kmer", true, allJobResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput()); return new DefaultExecutionResult(taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage(taskResult.getMaxMemUsage(), taskResult.getActualTotalRuntime(), taskResult.getTotalExternalCputime())); } catch (ConanParameterException | IOException e) { throw new ProcessExecutionException(-1, e); } }
From source file:uk.ac.tgac.rampart.stage.CalcOptimalKmer.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws InterruptedException, ProcessExecutionException { try {/*www .ja v a2s .c om*/ StopWatch stopWatch = new StopWatch(); stopWatch.start(); log.info("Starting Optimal Kmer calculations"); Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } List<ExecutionResult> results = new ArrayList<>(); Map<String, File> kg2FileMap = new HashMap<>(); this.mass2OptimalKmerMap.clear(); // Execute each config for (Map.Entry<String, List<Library>> entry : args.getKg2inputsMap().entrySet()) { File kgOutputDir = new File(args.getOutputDir(), entry.getKey()); File kgOutputFile = new File(kgOutputDir, "kmergenie_results.log"); kg2FileMap.put(entry.getKey(), kgOutputFile); // Ensure output directory for this MASS run exists if (!kgOutputDir.exists() && !kgOutputDir.mkdirs()) { throw new IOException( "Couldn't create kmer genie output directory at: " + kgOutputDir.getAbsolutePath()); } ExecutionResult result = this.executeKmerGenie(kgOutputDir, kgOutputFile, entry.getValue()); result.setName("kmergenie-" + entry.getKey()); results.add(result); } // Wait for all assembly jobs to finish if they are running in parallel. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Kmer Genie jobs were executed in parallel, waiting for all to complete"); this.conanExecutorService.executeScheduledWait(results, args.getJobPrefix() + "-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-wait", args.getOutputDir()); } // Retrieve kmer genie results for (MassJob.Args massJobArgs : args.massJobArgList) { String massJobName = massJobArgs.getName(); String kgName = args.mass2kgMap.get(massJobName); if (kgName != null) { int bestKmer = KmerGenieV16.Args.getBestKmer(kg2FileMap.get(kgName)); if (bestKmer <= 0) { throw new IOException( "Best kmer could not be determined by Kmer Genie. Recommend you restart the MASS stage with manually specified kmer values for this MASS job. Mass job : " + massJobName + "; kmer config: " + kgName); } else { log.info("Best kmer for " + massJobName + " (" + kgName + ") is " + bestKmer); } mass2OptimalKmerMap.put(massJobName, bestKmer); } } List<String> kmerMapLines = new ArrayList<>(); for (Map.Entry<String, Integer> bestKmerEntry : this.mass2OptimalKmerMap.entrySet()) { kmerMapLines.add(bestKmerEntry.getKey() + "\t" + bestKmerEntry.getValue()); } FileUtils.writeLines(args.getResultFile(), kmerMapLines); log.info("Written kmer calculations to: " + args.getResultFile().getAbsolutePath()); stopWatch.stop(); TaskResult tr = new DefaultTaskResult("rampart-mass-kmercalc", true, results, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), tr.getOutput()); log.info("Optimal Kmer calculations complete"); return new DefaultExecutionResult(tr.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage( tr.getMaxMemUsage(), tr.getActualTotalRuntime(), tr.getTotalExternalCputime())); } catch (IOException ioe) { throw new ProcessExecutionException(-1, ioe); } }
From source file:uk.ac.tgac.rampart.stage.Mass.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try {/*from ww w. j a v a2s .c o m*/ StopWatch stopWatch = new StopWatch(); stopWatch.start(); log.info("Starting MASS"); // Get shortcut to the args Args args = (Args) this.getProcessArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } List<ExecutionResult> results = new ArrayList<>(); List<ExecutionResult> allResults = new ArrayList<>(); List<TaskResult> massJobResults = new ArrayList<>(); Map<String, Integer> optimalKmerMap = null; // Work out kmer genie configs and how they relate to mass jobs if (args.kmerCalcArgs != null) { setKmerValues(args.kmerCalcArgs.getResultFile(), args.getMassJobArgList()); log.info("Loaded optimal kmer values"); } log.info("Starting MASS jobs"); for (MassJob.Args massJobArgs : args.getMassJobArgList()) { // Ensure output directory for this MASS run exists if (!massJobArgs.getOutputDir().exists() && !massJobArgs.getOutputDir().mkdirs()) { throw new IOException("Couldn't create directory for MASS"); } // Execute the mass job and record any job ids MassJobResult mjr = this.executeMassJob(massJobArgs, executionContext); massJobResults.add(mjr.getAllResults()); } for (TaskResult mjr : massJobResults) { results.addAll(mjr.getProcessResults()); allResults.addAll(mjr.getProcessResults()); } // Wait for all assembly jobs to finish if they are running in parallel. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("MASS jobs were executed in parallel, waiting for all to complete"); this.conanExecutorService.executeScheduledWait(results, args.getJobPrefix() + "-mass-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-wait", args.getOutputDir()); } // For each MASS job to check an output file exist for (MassJob.Args singleMassArgs : args.getMassJobArgList()) { for (Assembler asm : singleMassArgs.getAssemblers()) { if ((asm.makesScaffolds() && !asm.getScaffoldsFile().exists()) || (asm.makesContigs() && !asm.getContigsFile().exists()) || (asm.makesUnitigs() && !asm.getUnitigsFile().exists())) { throw new ProcessExecutionException(2, "MASS job \"" + singleMassArgs.getName() + "\" did not produce any output files"); } } } log.info("MASS complete"); stopWatch.stop(); this.taskResult = new DefaultTaskResult("rampart-mass", true, allResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), this.taskResult.getOutput()); return new DefaultExecutionResult(this.taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage(this.taskResult.getMaxMemUsage(), this.taskResult.getActualTotalRuntime(), this.taskResult.getTotalExternalCputime())); } catch (IOException ioe) { throw new ProcessExecutionException(-1, ioe); } }
From source file:uk.ac.tgac.rampart.stage.Mecq.java
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try {/*from w w w.j a v a 2 s. c o m*/ StopWatch stopWatch = new StopWatch(); stopWatch.start(); log.info("Starting MECQ Process"); // Create shortcut to args for convienience Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } // If the output directory doesn't exist then make it if (!args.getMecqDir().exists()) { log.debug("Creating MECQ directory"); args.getMecqDir().mkdirs(); args.getOutputDir().mkdirs(); } // Passthrough links for raw libraries to output for (Library lib : args.getLibraries()) { this.createOutputLinks(new File(args.getOutputDir(), "raw"), null, null, lib); } List<ReadEnhancer> readEnhancers = new ArrayList<>(); List<ExecutionResult> finalResults = new ArrayList<>(); List<ExecutionResult> results = new ArrayList<>(); // For each ecq process all libraries for (EcqArgs ecqArgs : args.getEqcArgList()) { // Process each lib for (Library lib : ecqArgs.getLibraries()) { // Create the output directory File ecqLibDir = new File(ecqArgs.getOutputDir(), lib.getName()); if (ecqLibDir.exists()) { try { FileUtils.deleteDirectory(ecqLibDir); } catch (IOException e) { throw new ProcessExecutionException(2, "Could not delete ecqDir: " + ecqLibDir.getAbsolutePath(), e); } } ecqLibDir.mkdirs(); // Create a job name String title = ecqArgs.getName() + "_" + lib.getName(); String jobName = ecqArgs.getJobPrefix() + "_" + title; GenericReadEnhancerArgs genericArgs = new GenericReadEnhancerArgs(); genericArgs.setInput(lib); genericArgs.setOutputDir(ecqLibDir); genericArgs.setThreads(ecqArgs.getThreads()); genericArgs.setMemoryGb(ecqArgs.getMemory()); // Create the actual error corrector from the user provided EcqArgs ReadEnhancer readEnhancer = ReadEnhancerFactory.create(ecqArgs.getTool(), genericArgs, this.conanExecutorService); // Configure read enhancer if (ecqArgs.getCheckedArgs() != null && !ecqArgs.getCheckedArgs().trim().isEmpty()) { readEnhancer.getReadEnchancerArgs().parse(ecqArgs.getCheckedArgs()); } readEnhancer.getReadEnchancerArgs().setUncheckedArgs(ecqArgs.getUncheckedArgs()); readEnhancer.setup(); // Add this to the list in case we need it later readEnhancers.add(readEnhancer); // Execute this error corrector ExecutionResult result = this.conanExecutorService.executeProcess(readEnhancer.toConanProcess(), ecqLibDir, jobName, ecqArgs.getThreads(), ecqArgs.getMemory(), ecqArgs.isRunParallel() || args.isRunParallel()); result.setName(title); results.add(result); finalResults.add(result); // Create links for outputs from this assembler to known locations this.createOutputLinks(new File(args.getOutputDir(), ecqArgs.getName()), readEnhancer, ecqArgs, lib); } // If we're using a scheduler, and we don't want to run separate ECQ in parallel, and we want to parallelise // each library processed by this ECQ, then wait here. if (executionContext.usingScheduler() && ecqArgs.isRunParallel() && !args.isRunParallel()) { log.info("Waiting for completion of: " + ecqArgs.getName() + "; for all requested libraries"); MultiWaitResult mrw = this.conanExecutorService.executeScheduledWait(results, ecqArgs.getJobPrefix() + "*", ExitStatus.Type.COMPLETED_SUCCESS, args.getJobPrefix() + "-wait", ecqArgs.getOutputDir()); results.clear(); } } // If we're using a scheduler and we have been asked to run each MECQ group for each library // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel() && !args.getEqcArgList().isEmpty()) { log.info("Running all ECQ groups in parallel, waiting for completion"); MultiWaitResult mrw = this.conanExecutorService.executeScheduledWait(results, args.getJobPrefix() + "-ecq*", ExitStatus.Type.COMPLETED_SUCCESS, args.getJobPrefix() + "-wait", args.getMecqDir()); } // For each ecq check all output files exist for (EcqArgs ecqArgs : args.getEqcArgList()) { for (Library lib : ecqArgs.getOutputLibraries()) { for (File file : lib.getFiles()) { if (!file.exists()) { throw new ProcessExecutionException(2, "MECQ job \"" + ecqArgs.name + "\" did not produce the expected output file: " + file.getAbsolutePath()); } } } } log.info("MECQ Finished"); stopWatch.stop(); this.taskResult = new DefaultTaskResult("rampart-mecq", true, finalResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines(new File(args.getMecqDir(), args.getJobPrefix() + ".summary"), this.taskResult.getOutput()); return new DefaultExecutionResult(this.taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage(this.taskResult.getMaxMemUsage(), this.taskResult.getActualTotalRuntime(), this.taskResult.getTotalExternalCputime())); } catch (IOException e) { throw new ProcessExecutionException(2, e); } }
From source file:uk.co.q3c.v7.base.navigate.TextReaderSitemapProviderTest.java
private void prepFile() throws IOException { FileUtils.writeLines(modifiedFile, lines); }