Java tutorial
/* * Copyright 2016 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package uk.ac.ebi.eva.pipeline.jobs; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.opencga.lib.common.Config; import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.Job; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.IntegrationTest; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import uk.ac.ebi.eva.pipeline.configuration.JobOptions; import uk.ac.ebi.eva.pipeline.configuration.GenotypedVcfWorkflowConfiguration; import uk.ac.ebi.eva.pipeline.jobs.steps.VepAnnotationGeneratorStep; import uk.ac.ebi.eva.pipeline.jobs.steps.VepInputGeneratorStep; import uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep; import uk.ac.ebi.eva.test.utils.JobTestUtils; import java.io.File; import java.nio.file.Paths; import java.util.*; import static org.junit.Assert.*; /** * @author Diego Poggioli * * Workflow test for {@link GenotypedVcfJob} */ @IntegrationTest @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration(classes = { JobOptions.class, GenotypedVcfJob.class, GenotypedVcfWorkflowConfiguration.class }) public class GenotypedVcfJobWorkflowTest { private JobLauncherTestUtils jobLauncherTestUtils; @Autowired private JobOptions jobOptions; @Autowired private JobLauncher jobLauncher; @Autowired @Qualifier("genotypedJob") public Job job; private String inputFileResouce; private String outputDir; private String compressExtension; private String dbName; private String vepInput; private String vepOutput; private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System.getenv("OPENCGA_HOME") : "/opt/opencga"; @Test public void allStepsShouldBeExecuted() throws Exception { initVariantConfigurationJob(); JobExecution execution = jobLauncherTestUtils.launchJob(); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); assertEquals(7, execution.getStepExecutions().size()); List<StepExecution> steps = new ArrayList<>(execution.getStepExecutions()); StepExecution transformStep = steps.get(0); StepExecution loadStep = steps.get(1); Map<String, StepExecution> parallelStepsNameToStepExecution = new HashMap<>(); for (int i = 2; i <= steps.size() - 1; i++) { parallelStepsNameToStepExecution.put(steps.get(i).getStepName(), steps.get(i)); } assertEquals(GenotypedVcfJob.NORMALIZE_VARIANTS, transformStep.getStepName()); assertEquals(GenotypedVcfJob.LOAD_VARIANTS, loadStep.getStepName()); Set<String> parallelStepNamesExecuted = parallelStepsNameToStepExecution.keySet(); Set<String> parallelStepNamesToCheck = new HashSet<>(Arrays.asList( PopulationStatisticsJob.CALCULATE_STATISTICS, PopulationStatisticsJob.LOAD_STATISTICS, VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE, VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION, AnnotationLoaderStep.LOAD_VEP_ANNOTATION)); assertEquals(parallelStepNamesToCheck, parallelStepNamesExecuted); assertTrue(transformStep.getEndTime().before(loadStep.getStartTime())); assertTrue(loadStep.getEndTime().before( parallelStepsNameToStepExecution.get(PopulationStatisticsJob.CALCULATE_STATISTICS).getStartTime())); assertTrue(loadStep.getEndTime().before(parallelStepsNameToStepExecution .get(VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE).getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(PopulationStatisticsJob.CALCULATE_STATISTICS).getEndTime() .before(parallelStepsNameToStepExecution.get(PopulationStatisticsJob.LOAD_STATISTICS) .getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE) .getEndTime().before(parallelStepsNameToStepExecution .get(VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION).getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION) .getEndTime().before(parallelStepsNameToStepExecution.get(AnnotationLoaderStep.LOAD_VEP_ANNOTATION) .getStartTime())); } @Test public void optionalStepsShouldBeSkipped() throws Exception { initVariantConfigurationJob(); jobOptions.getPipelineOptions().put(AnnotationJob.SKIP_ANNOT, true); jobOptions.getPipelineOptions().put(PopulationStatisticsJob.SKIP_STATS, true); JobExecution execution = jobLauncherTestUtils.launchJob(); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); assertEquals(2, execution.getStepExecutions().size()); List<StepExecution> steps = new ArrayList<>(execution.getStepExecutions()); StepExecution transformStep = steps.get(0); StepExecution loadStep = steps.get(1); assertEquals(GenotypedVcfJob.NORMALIZE_VARIANTS, transformStep.getStepName()); assertEquals(GenotypedVcfJob.LOAD_VARIANTS, loadStep.getStepName()); assertTrue(transformStep.getEndTime().before(loadStep.getStartTime())); } @Test public void statsStepsShouldBeSkipped() throws Exception { initVariantConfigurationJob(); jobOptions.getPipelineOptions().put(PopulationStatisticsJob.SKIP_STATS, true); jobOptions.getPipelineOptions().put("db.name", "diegoTest"); JobExecution execution = jobLauncherTestUtils.launchJob(); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); assertEquals(5, execution.getStepExecutions().size()); List<StepExecution> steps = new ArrayList<>(execution.getStepExecutions()); StepExecution transformStep = steps.get(0); StepExecution loadStep = steps.get(1); Map<String, StepExecution> parallelStepsNameToStepExecution = new HashMap<>(); for (int i = 2; i <= steps.size() - 1; i++) { parallelStepsNameToStepExecution.put(steps.get(i).getStepName(), steps.get(i)); } assertEquals(GenotypedVcfJob.NORMALIZE_VARIANTS, transformStep.getStepName()); assertEquals(GenotypedVcfJob.LOAD_VARIANTS, loadStep.getStepName()); Set<String> parallelStepNamesExecuted = parallelStepsNameToStepExecution.keySet(); Set<String> parallelStepNamesToCheck = new HashSet<>(Arrays.asList( VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE, VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION, AnnotationLoaderStep.LOAD_VEP_ANNOTATION)); assertEquals(parallelStepNamesToCheck, parallelStepNamesExecuted); assertTrue(transformStep.getEndTime().before(loadStep.getStartTime())); assertTrue(loadStep.getEndTime().before(parallelStepsNameToStepExecution .get(VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE).getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(VepInputGeneratorStep.FIND_VARIANTS_TO_ANNOTATE) .getEndTime().before(parallelStepsNameToStepExecution .get(VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION).getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(VepAnnotationGeneratorStep.GENERATE_VEP_ANNOTATION) .getEndTime().before(parallelStepsNameToStepExecution.get(AnnotationLoaderStep.LOAD_VEP_ANNOTATION) .getStartTime())); } @Test public void annotationStepsShouldBeSkipped() throws Exception { initVariantConfigurationJob(); jobOptions.getPipelineOptions().put(AnnotationJob.SKIP_ANNOT, true); JobExecution execution = jobLauncherTestUtils.launchJob(); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); assertEquals(4, execution.getStepExecutions().size()); List<StepExecution> steps = new ArrayList<>(execution.getStepExecutions()); StepExecution transformStep = steps.get(0); StepExecution loadStep = steps.get(1); Map<String, StepExecution> parallelStepsNameToStepExecution = new HashMap<>(); for (int i = 2; i <= steps.size() - 1; i++) { parallelStepsNameToStepExecution.put(steps.get(i).getStepName(), steps.get(i)); } assertEquals(GenotypedVcfJob.NORMALIZE_VARIANTS, transformStep.getStepName()); assertEquals(GenotypedVcfJob.LOAD_VARIANTS, loadStep.getStepName()); Set<String> parallelStepNamesExecuted = parallelStepsNameToStepExecution.keySet(); Set<String> parallelStepNamesToCheck = new HashSet<>(Arrays .asList(PopulationStatisticsJob.CALCULATE_STATISTICS, PopulationStatisticsJob.LOAD_STATISTICS)); assertEquals(parallelStepNamesToCheck, parallelStepNamesExecuted); assertTrue(transformStep.getEndTime().before(loadStep.getStartTime())); assertTrue(loadStep.getEndTime().before( parallelStepsNameToStepExecution.get(PopulationStatisticsJob.CALCULATE_STATISTICS).getStartTime())); assertTrue(parallelStepsNameToStepExecution.get(PopulationStatisticsJob.CALCULATE_STATISTICS).getEndTime() .before(parallelStepsNameToStepExecution.get(PopulationStatisticsJob.LOAD_STATISTICS) .getStartTime())); } /** * JobLauncherTestUtils is initialized here because in GenotypedVcfJob there are two Job beans * in this way it is possible to specify the Job to run (and avoid NoUniqueBeanDefinitionException) * @throws Exception */ @Before public void setUp() throws Exception { jobOptions.loadArgs(); jobLauncherTestUtils = new JobLauncherTestUtils(); jobLauncherTestUtils.setJob(job); jobLauncherTestUtils.setJobLauncher(jobLauncher); inputFileResouce = jobOptions.getPipelineOptions().getString("input.vcf"); outputDir = jobOptions.getPipelineOptions().getString("output.dir"); compressExtension = jobOptions.getPipelineOptions().getString("compressExtension"); dbName = jobOptions.getPipelineOptions().getString("db.name"); vepInput = jobOptions.getPipelineOptions().getString("vep.input"); vepOutput = jobOptions.getPipelineOptions().getString("vep.output"); JobTestUtils.cleanDBs(dbName); } @After public void tearDown() throws Exception { JobTestUtils.cleanDBs(dbName); } private void initVariantConfigurationJob() { String inputFile = GenotypedVcfJobTest.class.getResource(inputFileResouce).getFile(); String mockVep = GenotypedVcfJobTest.class.getResource("/mockvep.pl").getFile(); jobOptions.getPipelineOptions().put("input.vcf", inputFile); jobOptions.getPipelineOptions().put("app.vep.path", mockVep); Config.setOpenCGAHome(opencgaHome); // transformedVcf file init String transformedVcf = outputDir + inputFileResouce + ".variants.json" + compressExtension; File transformedVcfFile = new File(transformedVcf); transformedVcfFile.delete(); assertFalse(transformedVcfFile.exists()); //stats file init VariantSource source = (VariantSource) jobOptions.getVariantOptions() .get(VariantStorageManager.VARIANT_SOURCE); File statsFile = new File(Paths.get(outputDir).resolve(VariantStorageManager.buildFilename(source)) + ".variants.stats.json.gz"); statsFile.delete(); assertFalse(statsFile.exists()); // ensure the stats file doesn't exist from previous executions // annotation files init File vepInputFile = new File(vepInput); vepInputFile.delete(); assertFalse(vepInputFile.exists()); File vepOutputFile = new File(vepOutput); vepOutputFile.delete(); assertFalse(vepOutputFile.exists()); } }