Example usage for org.apache.commons.configuration ConfigurationUtils copy

Introduction

In this page you can find the example usage for org.apache.commons.configuration ConfigurationUtils copy.

Prototype

public static void copy(Configuration source, Configuration target)

Source Link

Document

Copy all properties from the source configuration to the target configuration.

Usage

From source file:org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.java

public SparkGraphComputer(final HadoopGraph hadoopGraph) {
    super(hadoopGraph);
    this.sparkConfiguration = new HadoopConfiguration();
    ConfigurationUtils.copy(this.hadoopGraph.configuration(), this.sparkConfiguration);
}

From source file:org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.java

private Future<ComputerResult> submitWithExecutor(Executor exec) {
    // create the completable future                                                   
    return CompletableFuture.<ComputerResult>supplyAsync(() -> {
        final long startTime = System.currentTimeMillis();
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration apacheConfiguration = new HadoopConfiguration(
                this.sparkConfiguration);
        if (!apacheConfiguration.containsKey(Constants.SPARK_SERIALIZER))
            apacheConfiguration.setProperty(Constants.SPARK_SERIALIZER,
                    GryoSerializer.class.getCanonicalName());
        apacheConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT_HAS_EDGES,
                this.persist.equals(GraphComputer.Persist.EDGES));
        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(apacheConfiguration);
        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final Storage sparkContextStorage = SparkContextStorage.open(apacheConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, Object.class));
        String inputLocation = null;
        if (inputFromSpark)
            inputLocation = Constants//from   ww w . j av a 2 s.  co m
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            sparkContextStorage)
                    .orElse(null);
        else if (inputFromHDFS)
            inputLocation = Constants
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            fileSystemStorage)
                    .orElse(null);
        if (null == inputLocation)
            inputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION);

        if (null != inputLocation
                && hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, null) == null && // if an InputRDD is specified, then ignore InputFormat
        hadoopConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, null) != null
                && FileInputFormat.class.isAssignableFrom(hadoopConfiguration
                        .getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class))) {
            try {
                apacheConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem
                        .get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
                hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, FileSystem
                        .get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath().toString());
            } catch (final IOException e) {
                throw new IllegalStateException(e.getMessage(), e);
            }
        }
        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = hadoopConfiguration
                    .getClass(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, InputFormatRDD.class, InputRDD.class)
                    .newInstance();
            outputRDD = hadoopConfiguration
                    .getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, OutputFormatRDD.class, OutputRDD.class)
                    .newInstance();
            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD && GraphFilterAware.class
                    .isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT,
                            InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(apacheConfiguration, hadoopConfiguration, this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else if (this.graphFilter.hasFilter()) {
                filtered = true;
            } else {
                filtered = false;
            }
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        SparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation))
                fileSystemStorage.rm(outputLocation);
            if (outputToSpark && sparkContextStorage.exists(outputLocation))
                sparkContextStorage.rm(outputLocation);
        }

        // the Spark application name will always be set by SparkContextStorage, thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX
                + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "["
                + this.mapReducers + "]");

        // create the spark configuration from the graph computer configuration
        final SparkConf sparkConfiguration = new SparkConf();
        hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
        // execute the vertex program and map reducers and if there is a failure, auto-close the spark context
        try {
            final JavaSparkContext sparkContext = new JavaSparkContext(
                    SparkContext.getOrCreate(sparkConfiguration));
            this.loadJars(sparkContext, hadoopConfiguration); // add the project jars to the cluster
            Spark.create(sparkContext.sc()); // this is the context RDD holder that prevents GC
            updateLocalConfiguration(sparkContext, sparkConfiguration);
            // create a message-passing friendly rdd from the input rdd
            JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
            boolean partitioned = false;
            JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(apacheConfiguration,
                    sparkContext);
            // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
            if (filtered) {
                this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
                loadedGraphRDD = SparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
            }
            // if the loaded graph RDD is already partitioned use that partitioner, else partition it with HashPartitioner
            if (loadedGraphRDD.partitioner().isPresent())
                this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: "
                        + loadedGraphRDD.partitioner().get());
            else {
                final Partitioner partitioner = new HashPartitioner(
                        this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
            }
            assert loadedGraphRDD.partitioner().isPresent();
            // if the loaded graphRDD was already partitioned previous, then this coalesce/repartition will not take place
            if (this.workersSet) {
                if (loadedGraphRDD.partitions().size() > this.workers) // ensures that the loaded graphRDD does not have more partitions than workers
                    loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
                else if (loadedGraphRDD.partitions().size() < this.workers) // ensures that the loaded graphRDD does not have less partitions than workers
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
            }
            // persist the vertex program loaded graph as specified by configuration or else use default cache() which is MEMORY_ONLY
            if (!inputFromSpark || partitioned || filtered)
                loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(
                        hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));

            ////////////////////////////////
            // process the vertex program //
            ////////////////////////////////
            if (null != this.vertexProgram) {
                // set up the vertex program and wire up configurations
                JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                this.vertexProgram.setup(memory);
                memory.broadcastMemory(sparkContext);
                final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                this.vertexProgram.storeState(vertexProgramConfiguration);
                ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration);
                ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
                // execute the vertex program
                while (true) {
                    memory.setInExecute(true);
                    viewIncomingRDD = SparkExecutor.executeVertexProgramIteration(loadedGraphRDD,
                            viewIncomingRDD, memory, vertexProgramConfiguration);
                    memory.setInExecute(false);
                    if (this.vertexProgram.terminate(memory))
                        break;
                    else {
                        memory.incrIteration();
                        memory.broadcastMemory(sparkContext);
                    }
                }
                memory.complete(); // drop all transient memory keys
                // write the computed graph to the respective output (rdd or output format)
                computedGraphRDD = SparkExecutor.prepareFinalGraphRDD(loadedGraphRDD, viewIncomingRDD,
                        this.vertexProgram.getVertexComputeKeys());
                if ((hadoopConfiguration.get(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, null) != null
                        || hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, null) != null)
                        && !this.persist.equals(Persist.NOTHING)) {
                    outputRDD.writeGraphRDD(apacheConfiguration, computedGraphRDD);
                }
            }

            final boolean computedGraphCreated = computedGraphRDD != null;
            if (!computedGraphCreated)
                computedGraphRDD = loadedGraphRDD;

            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);

            //////////////////////////////
            // process the map reducers //
            //////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    computedGraphRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1)
                        computedGraphRDD = computedGraphRDD.persist(StorageLevel.fromString(hadoopConfiguration
                                .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                }

                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(
                            apacheConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = SparkExecutor.executeMap((JavaPairRDD) computedGraphRDD,
                            mapReduce, newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE)
                            ? SparkExecutor.executeCombine(mapRDD, newApacheConfiguration)
                            : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE)
                            ? SparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration)
                            : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    try {
                        mapReduce.addResultToMemory(finalMemory,
                                hadoopConfiguration
                                        .getClass(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD,
                                                OutputFormatRDD.class, OutputRDD.class)
                                        .newInstance().writeMemoryRDD(apacheConfiguration,
                                                mapReduce.getMemoryKey(), reduceRDD));
                    } catch (final InstantiationException | IllegalAccessException e) {
                        throw new IllegalStateException(e.getMessage(), e);
                    }
                }
            }

            // unpersist the loaded graph if it will not be used again (no PersistedInputRDD)
            // if the graphRDD was loaded from Spark, but then partitioned, its a different RDD
            if ((!inputFromSpark || partitioned || filtered) && computedGraphCreated)
                loadedGraphRDD.unpersist();
            // unpersist the computed graph if it will not be used again (no PersistedOutputRDD)
            if (!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING))
                computedGraphRDD.unpersist();
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS)
                    fileSystemStorage.rm(outputLocation);
                if (outputToSpark)
                    sparkContextStorage.rm(outputLocation);
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            return new DefaultComputerResult(
                    InputOutputHelper.getOutputGraph(apacheConfiguration, this.resultGraph, this.persist),
                    finalMemory.asImmutable());
        } finally {
            if (!apacheConfiguration.getBoolean(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false))
                Spark.close();
        }
    }, exec);
}

From source file:org.parosproxy.paros.common.AbstractParam.java

@Override
public AbstractParam clone() {
    try {//  ww  w  . j  a  v  a  2  s .c om
        AbstractParam clone = this.getClass().newInstance();
        FileConfiguration fileConfig = new XMLConfiguration();
        ConfigurationUtils.copy(this.getConfig(), fileConfig);
        clone.load(fileConfig);
        return clone;
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
    }
    return null;
}