List of usage examples for org.apache.commons.configuration ConfigurationUtils copy
public static void copy(Configuration source, Configuration target)
Copy all properties from the source configuration to the target configuration.
From source file:org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.java
public SparkGraphComputer(final HadoopGraph hadoopGraph) { super(hadoopGraph); this.sparkConfiguration = new HadoopConfiguration(); ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration); }
From source file:org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.java
private Future<ComputerResult> submitWithExecutor(Executor exec) { // create the completable future return CompletableFuture.<ComputerResult>supplyAsync(() -> { final long startTime = System.currentTimeMillis(); // apache and hadoop configurations that are used throughout the graph computer computation final org.apache.commons.configuration.Configuration apacheConfiguration = new HadoopConfiguration( this.sparkConfiguration); if (!apacheConfiguration.containsKey(Constants.SPARK_SERIALIZER)) apacheConfiguration.setProperty(Constants.SPARK_SERIALIZER, GryoSerializer.class.getCanonicalName()); apacheConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT_HAS_EDGES, this.persist.equals(GraphComputer.Persist.EDGES)); final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(apacheConfiguration); final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration); final Storage sparkContextStorage = SparkContextStorage.open(apacheConfiguration); final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, Object.class)); final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, Object.class)); final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, Object.class)); final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom( hadoopConfiguration.getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, Object.class)); String inputLocation = null; if (inputFromSpark) inputLocation = Constants//from ww w . j av a 2 s. co m .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), sparkContextStorage) .orElse(null); else if (inputFromHDFS) inputLocation = Constants .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), fileSystemStorage) .orElse(null); if (null == inputLocation) inputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION); if (null != inputLocation && hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, null) == null && // if an InputRDD is specified, then ignore InputFormat hadoopConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, null) != null && FileInputFormat.class.isAssignableFrom(hadoopConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class))) { try { apacheConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem .get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString()); hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem .get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString()); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } } final InputRDD inputRDD; final OutputRDD outputRDD; final boolean filtered; try { inputRDD = hadoopConfiguration .getClass(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, InputFormatRDD.class, InputRDD.class) .newInstance(); outputRDD = hadoopConfiguration .getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, OutputFormatRDD.class, OutputRDD.class) .newInstance(); // if the input class can filter on load, then set the filters if (inputRDD instanceof InputFormatRDD && GraphFilterAware.class .isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class, InputFormat.class))) { GraphFilterAware.storeGraphFilter(apacheConfiguration, hadoopConfiguration, this.graphFilter); filtered = false; } else if (inputRDD instanceof GraphFilterAware) { ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter); filtered = false; } else if (this.graphFilter.hasFilter()) { filtered = true; } else { filtered = false; } } catch (final InstantiationException | IllegalAccessException e) { throw new IllegalStateException(e.getMessage(), e); } SparkMemory memory = null; // delete output location final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); if (null != outputLocation) { if (outputToHDFS && fileSystemStorage.exists(outputLocation)) fileSystemStorage.rm(outputLocation); if (outputToSpark && sparkContextStorage.exists(outputLocation)) sparkContextStorage.rm(outputLocation); } // the Spark application name will always be set by SparkContextStorage, thus, INFO the name to make it easier to debug logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "[" + this.mapReducers + "]"); // create the spark configuration from the graph computer configuration final SparkConf sparkConfiguration = new SparkConf(); hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue())); // execute the vertex program and map reducers and if there is a failure, auto-close the spark context try { final JavaSparkContext sparkContext = new JavaSparkContext( SparkContext.getOrCreate(sparkConfiguration)); this.loadJars(sparkContext, hadoopConfiguration); // add the project jars to the cluster Spark.create(sparkContext.sc()); // this is the context RDD holder that prevents GC updateLocalConfiguration(sparkContext, sparkConfiguration); // create a message-passing friendly rdd from the input rdd JavaPairRDD<Object, VertexWritable> computedGraphRDD = null; boolean partitioned = false; JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(apacheConfiguration, sparkContext); // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting if (filtered) { this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter); loadedGraphRDD = SparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter); } // if the loaded graph RDD is already partitioned use that partitioner, else partition it with HashPartitioner if (loadedGraphRDD.partitioner().isPresent()) this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: " + loadedGraphRDD.partitioner().get()); else { final Partitioner partitioner = new HashPartitioner( this.workersSet ? this.workers : loadedGraphRDD.partitions().size()); this.logger.debug("Partitioning the loaded graphRDD: " + partitioner); loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner); partitioned = true; } assert loadedGraphRDD.partitioner().isPresent(); // if the loaded graphRDD was already partitioned previous, then this coalesce/repartition will not take place if (this.workersSet) { if (loadedGraphRDD.partitions().size() > this.workers) // ensures that the loaded graphRDD does not have more partitions than workers loadedGraphRDD = loadedGraphRDD.coalesce(this.workers); else if (loadedGraphRDD.partitions().size() < this.workers) // ensures that the loaded graphRDD does not have less partitions than workers loadedGraphRDD = loadedGraphRDD.repartition(this.workers); } // persist the vertex program loaded graph as specified by configuration or else use default cache() which is MEMORY_ONLY if (!inputFromSpark || partitioned || filtered) loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString( hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"))); //////////////////////////////// // process the vertex program // //////////////////////////////// if (null != this.vertexProgram) { // set up the vertex program and wire up configurations JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null; memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext); this.vertexProgram.setup(memory); memory.broadcastMemory(sparkContext); final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration(); this.vertexProgram.storeState(vertexProgramConfiguration); ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration); ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration); // execute the vertex program while (true) { memory.setInExecute(true); viewIncomingRDD = SparkExecutor.executeVertexProgramIteration(loadedGraphRDD, viewIncomingRDD, memory, vertexProgramConfiguration); memory.setInExecute(false); if (this.vertexProgram.terminate(memory)) break; else { memory.incrIteration(); memory.broadcastMemory(sparkContext); } } memory.complete(); // drop all transient memory keys // write the computed graph to the respective output (rdd or output format) computedGraphRDD = SparkExecutor.prepareFinalGraphRDD(loadedGraphRDD, viewIncomingRDD, this.vertexProgram.getVertexComputeKeys()); if ((hadoopConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, null) != null || hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, null) != null) && !this.persist.equals(Persist.NOTHING)) { outputRDD.writeGraphRDD(apacheConfiguration, computedGraphRDD); } } final boolean computedGraphCreated = computedGraphRDD != null; if (!computedGraphCreated) computedGraphRDD = loadedGraphRDD; final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory); ////////////////////////////// // process the map reducers // ////////////////////////////// if (!this.mapReducers.isEmpty()) { if (computedGraphCreated && !outputToSpark) { // drop all the edges of the graph as they are not used in mapReduce processing computedGraphRDD = computedGraphRDD.mapValues(vertexWritable -> { vertexWritable.get().dropEdges(Direction.BOTH); return vertexWritable; }); // if there is only one MapReduce to execute, don't bother wasting the clock cycles. if (this.mapReducers.size() > 1) computedGraphRDD = computedGraphRDD.persist(StorageLevel.fromString(hadoopConfiguration .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"))); } for (final MapReduce mapReduce : this.mapReducers) { // execute the map reduce job final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration( apacheConfiguration); mapReduce.storeState(newApacheConfiguration); // map final JavaPairRDD mapRDD = SparkExecutor.executeMap((JavaPairRDD) computedGraphRDD, mapReduce, newApacheConfiguration); // combine final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? SparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD; // reduce final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? SparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD; // write the map reduce output back to disk and computer result memory try { mapReduce.addResultToMemory(finalMemory, hadoopConfiguration .getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, OutputFormatRDD.class, OutputRDD.class) .newInstance().writeMemoryRDD(apacheConfiguration, mapReduce.getMemoryKey(), reduceRDD)); } catch (final InstantiationException | IllegalAccessException e) { throw new IllegalStateException(e.getMessage(), e); } } } // unpersist the loaded graph if it will not be used again (no PersistedInputRDD) // if the graphRDD was loaded from Spark, but then partitioned, its a different RDD if ((!inputFromSpark || partitioned || filtered) && computedGraphCreated) loadedGraphRDD.unpersist(); // unpersist the computed graph if it will not be used again (no PersistedOutputRDD) if (!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING)) computedGraphRDD.unpersist(); // delete any file system or rdd data if persist nothing if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) { if (outputToHDFS) fileSystemStorage.rm(outputLocation); if (outputToSpark) sparkContextStorage.rm(outputLocation); } // update runtime and return the newly computed graph finalMemory.setRuntime(System.currentTimeMillis() - startTime); return new DefaultComputerResult( InputOutputHelper.getOutputGraph(apacheConfiguration, this.resultGraph, this.persist), finalMemory.asImmutable()); } finally { if (!apacheConfiguration.getBoolean(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false)) Spark.close(); } }, exec); }
From source file:org.parosproxy.paros.common.AbstractParam.java
@Override public AbstractParam clone() { try {// ww w . j a v a 2 s .c om AbstractParam clone = this.getClass().newInstance(); FileConfiguration fileConfig = new XMLConfiguration(); ConfigurationUtils.copy(this.getConfig(), fileConfig); clone.load(fileConfig); return clone; } catch (Exception e) { logger.error(e.getMessage(), e); } return null; }