List of usage examples for org.apache.commons.configuration BaseConfiguration BaseConfiguration
BaseConfiguration
From source file:org.apache.tinkerpop.gremlin.driver.SettingsTest.java
@Test public void shouldCreateFromConfiguration() { final Configuration conf = new BaseConfiguration(); conf.setProperty("port", 8000); conf.setProperty("nioPoolSize", 16); conf.setProperty("workerPoolSize", 32); conf.setProperty("username", "user1"); conf.setProperty("password", "password1"); conf.setProperty("jaasEntry", "JaasIt"); conf.setProperty("protocol", "protocol0"); conf.setProperty("hosts", Arrays.asList("255.0.0.1", "255.0.0.2", "255.0.0.3")); conf.setProperty("serializer.className", "my.serializers.MySerializer"); conf.setProperty("serializer.config.any", "thing"); conf.setProperty("connectionPool.enableSsl", true); conf.setProperty("connectionPool.trustCertChainFile", "pem"); conf.setProperty("connectionPool.minSize", 100); conf.setProperty("connectionPool.maxSize", 200); conf.setProperty("connectionPool.minSimultaneousUsagePerConnection", 300); conf.setProperty("connectionPool.maxSimultaneousUsagePerConnection", 400); conf.setProperty("connectionPool.maxInProcessPerConnection", 500); conf.setProperty("connectionPool.minInProcessPerConnection", 600); conf.setProperty("connectionPool.maxWaitForConnection", 700); conf.setProperty("connectionPool.maxContentLength", 800); conf.setProperty("connectionPool.reconnectInterval", 900); conf.setProperty("connectionPool.reconnectInitialDelay", 1000); conf.setProperty("connectionPool.resultIterationBatchSize", 1100); conf.setProperty("connectionPool.channelizer", "channelizer0"); final Settings settings = Settings.from(conf); assertEquals(8000, settings.port);// w w w . j av a 2s . c o m assertEquals(16, settings.nioPoolSize); assertEquals(32, settings.workerPoolSize); assertEquals("user1", settings.username); assertEquals("password1", settings.password); assertEquals("JaasIt", settings.jaasEntry); assertEquals("protocol0", settings.protocol); assertEquals("my.serializers.MySerializer", settings.serializer.className); assertEquals("thing", settings.serializer.config.get("any")); assertEquals(true, settings.connectionPool.enableSsl); assertEquals("pem", settings.connectionPool.trustCertChainFile); assertEquals(100, settings.connectionPool.minSize); assertEquals(200, settings.connectionPool.maxSize); assertEquals(300, settings.connectionPool.minSimultaneousUsagePerConnection); assertEquals(400, settings.connectionPool.maxSimultaneousUsagePerConnection); assertEquals(500, settings.connectionPool.maxInProcessPerConnection); assertEquals(600, settings.connectionPool.minInProcessPerConnection); assertEquals(700, settings.connectionPool.maxWaitForConnection); assertEquals(800, settings.connectionPool.maxContentLength); assertEquals(900, settings.connectionPool.reconnectInterval); assertEquals(1000, settings.connectionPool.reconnectInitialDelay); assertEquals(1100, settings.connectionPool.resultIterationBatchSize); assertEquals("channelizer0", settings.connectionPool.channelizer); }
From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java
@Override public GraphComputer program(final VertexProgram vertexProgram) { super.program(vertexProgram); this.memory.addVertexProgramMemoryComputeKeys(this.vertexProgram); final BaseConfiguration apacheConfiguration = new BaseConfiguration(); apacheConfiguration.setDelimiterParsingDisabled(true); vertexProgram.storeState(apacheConfiguration); ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, this.giraphConfiguration); this.vertexProgram.getMessageCombiner().ifPresent( combiner -> this.giraphConfiguration.setMessageCombinerClass(GiraphMessageCombiner.class)); return this; }
From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java
@Override public int run(final String[] args) { final Storage storage = FileSystemStorage.open(this.giraphConfiguration); storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT_HAS_EDGES, this.persist.equals(Persist.EDGES)); try {/*www .j a v a 2 s . c o m*/ // store vertex and edge filters (will propagate down to native InputFormat or else GiraphVertexInputFormat will process) final BaseConfiguration apacheConfiguration = new BaseConfiguration(); apacheConfiguration.setDelimiterParsingDisabled(true); GraphFilterAware.storeGraphFilter(apacheConfiguration, this.giraphConfiguration, this.graphFilter); // it is possible to run graph computer without a vertex program (and thus, only map reduce jobs if they exist) if (null != this.vertexProgram) { // a way to verify in Giraph whether the traversal will go over the wire or not try { VertexProgram.createVertexProgram(this.hadoopGraph, ConfUtil.makeApacheConfiguration(this.giraphConfiguration)); } catch (final IllegalStateException e) { if (e.getCause() instanceof NumberFormatException) throw new NotSerializableException( "The provided traversal is not serializable and thus, can not be distributed across the cluster"); } // remove historic combiners in configuration propagation (this occurs when job chaining) if (!this.vertexProgram.getMessageCombiner().isPresent()) this.giraphConfiguration.unset(GiraphConstants.MESSAGE_COMBINER_CLASS.getKey()); // split required workers across system (open map slots + max threads per machine = total amount of TinkerPop workers) if (!this.useWorkerThreadsInConfiguration) { final Cluster cluster = new Cluster(GiraphGraphComputer.this.giraphConfiguration); int totalMappers = cluster.getClusterStatus().getMapSlotCapacity() - 1; // 1 is needed for master cluster.close(); if (this.workers <= totalMappers) { this.giraphConfiguration.setWorkerConfiguration(this.workers, this.workers, 100.0F); this.giraphConfiguration.setNumComputeThreads(1); } else { if (totalMappers == 0) totalMappers = 1; // happens in local mode int threadsPerMapper = Long .valueOf(Math.round((double) this.workers / (double) totalMappers)).intValue(); // TODO: need to find least common denominator this.giraphConfiguration.setWorkerConfiguration(totalMappers, totalMappers, 100.0F); this.giraphConfiguration.setNumComputeThreads(threadsPerMapper); } } // prepare the giraph vertex-centric computing job final GiraphJob job = new GiraphJob(this.giraphConfiguration, Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram); job.getInternalJob().setJarByClass(GiraphGraphComputer.class); this.logger.info(Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram); // handle input paths (if any) String inputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION, null); if (null != inputLocation && FileInputFormat.class.isAssignableFrom(this.giraphConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class))) { inputLocation = Constants.getSearchGraphLocation(inputLocation, storage) .orElse(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); FileInputFormat.setInputPaths(job.getInternalJob(), new Path(inputLocation)); } // handle output paths (if any) String outputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); if (null != outputLocation && FileOutputFormat.class.isAssignableFrom(this.giraphConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class))) { outputLocation = Constants.getGraphLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(outputLocation)); } // execute the job and wait until it completes (if it fails, throw an exception) if (!job.run(true)) throw new IllegalStateException( "The GiraphGraphComputer job failed -- aborting all subsequent MapReduce jobs: " + job.getInternalJob().getStatus().getFailureInfo()); // add vertex program memory values to the return memory for (final MemoryComputeKey memoryComputeKey : this.vertexProgram.getMemoryComputeKeys()) { if (!memoryComputeKey.isTransient() && storage.exists(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey()))) { final ObjectWritableIterator iterator = new ObjectWritableIterator(this.giraphConfiguration, new Path(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey()))); if (iterator.hasNext()) { this.memory.set(memoryComputeKey.getKey(), iterator.next().getValue()); } // vertex program memory items are not stored on disk storage.rm(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey())); } } final Path path = new Path(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), Constants.HIDDEN_ITERATION)); this.memory.setIteration( (Integer) new ObjectWritableIterator(this.giraphConfiguration, path).next().getValue()); storage.rm(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), Constants.HIDDEN_ITERATION)); } // do map reduce jobs this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT_HAS_EDGES, this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT_HAS_EDGES, true)); for (final MapReduce mapReduce : this.mapReducers) { this.memory.addMapReduceMemoryKey(mapReduce); MapReduceHelper.executeMapReduceJob(mapReduce, this.memory, this.giraphConfiguration); } // if no persistence, delete the graph and memory output if (this.persist.equals(Persist.NOTHING)) storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } return 0; }
From source file:org.apache.tinkerpop.gremlin.hadoop.groovy.plugin.HadoopRemoteAcceptor.java
@Override public Object connect(final List<String> args) throws RemoteException { if (args.size() == 0) { this.hadoopGraph = HadoopGraph.open(new BaseConfiguration()); this.shell.getInterp().getContext().setProperty("g", this.hadoopGraph); }// ww w . java2 s . com if (args.size() == 1) { try { final FileConfiguration configuration = new PropertiesConfiguration(); configuration.load(new File(args.get(0))); this.hadoopGraph = HadoopGraph.open(configuration); this.shell.getInterp().getContext().setProperty("g", this.hadoopGraph); } catch (final Exception e) { throw new RemoteException(e.getMessage(), e); } } else if (args.size() == 2) { try { final FileConfiguration configuration = new PropertiesConfiguration(); configuration.load(new File(args.get(0))); this.hadoopGraph = HadoopGraph.open(configuration); this.graphVariable = args.get(1); this.shell.getInterp().getContext().setProperty(args.get(1), this.hadoopGraph); } catch (final Exception e) { throw new RemoteException(e.getMessage(), e); } } return this.hadoopGraph; }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer.java
@Override public GraphComputer program(final VertexProgram vertexProgram) { super.program(vertexProgram); this.memory.addVertexProgramMemoryComputeKeys(this.vertexProgram); final BaseConfiguration apacheConfiguration = new BaseConfiguration(); vertexProgram.storeState(apacheConfiguration); ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, this.giraphConfiguration); this.vertexProgram.getMessageCombiner() .ifPresent(combiner -> this.giraphConfiguration.setCombinerClass(GiraphMessageCombiner.class)); return this; }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.io.InputOutputRDDTest.java
@Test public void shouldReadFromWriteToArbitraryRDD() throws Exception { final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_RDD, ExampleInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output"); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); /////////* w w w . ja v a2s .co m*/ Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class).result(GraphComputer.ResultGraph.NEW) .persist( GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal( GraphTraversalSource.build() .engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)), "gremlin-groovy", "g.V()") .create(graph)) .submit().get(); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.io.InputRDDTest.java
@Test public void shouldReadFromArbitraryRDD() { final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_RDD, ExampleInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, GryoOutputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output"); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); //////////from w w w. j a v a2 s . com Graph graph = GraphFactory.open(configuration); assertEquals(Double.valueOf(123.0d), graph .traversal(GraphTraversalSource.computer(SparkGraphComputer.class)).V().values("age").sum().next()); assertEquals(Long.valueOf(4l), graph.traversal(GraphTraversalSource.computer(SparkGraphComputer.class)).V().count().next()); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.io.OutputRDDTest.java
@Test public void shouldWriteToArbitraryRDD() throws Exception { final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, HadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_RDD, ExampleOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output"); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); //////////ww w.jav a 2 s . c o m Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class).result(GraphComputer.ResultGraph.NEW) .persist( GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal( GraphTraversalSource.build() .engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)), "gremlin-groovy", "g.V()") .create(graph)) .submit().get(); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.util.MapReduceHelper.java
public static void executeMapReduceJob(final MapReduce mapReduce, final Memory.Admin memory, final Configuration configuration) throws IOException, ClassNotFoundException, InterruptedException { final Configuration newConfiguration = new Configuration(configuration); final boolean vertexProgramExists = newConfiguration.get(VertexProgram.VERTEX_PROGRAM, null) != null; if (vertexProgramExists) { newConfiguration.set(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputOutputHelper//ww w . j a va 2s .c o m .getInputFormat((Class) newConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class)) .getCanonicalName()); newConfiguration.unset(Constants.GREMLIN_HADOOP_GRAPH_FILTER); } final BaseConfiguration apacheConfiguration = new BaseConfiguration(); apacheConfiguration.setDelimiterParsingDisabled(true); mapReduce.storeState(apacheConfiguration); ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration); final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort(); final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort(); newConfiguration.setClass(Constants.GREMLIN_HADOOP_MAP_REDUCE_CLASS, mapReduce.getClass(), MapReduce.class); final Job job = Job.getInstance(newConfiguration, mapReduce.toString()); HadoopGraph.LOGGER.info(Constants.GREMLIN_HADOOP_JOB_PREFIX + mapReduce.toString()); job.setJarByClass(HadoopGraph.class); if (mapSort.isPresent()) job.setSortComparatorClass(ObjectWritableComparator.ObjectWritableMapComparator.class); job.setMapperClass(HadoopMap.class); if (mapReduce.doStage(MapReduce.Stage.REDUCE)) { if (mapReduce.doStage(MapReduce.Stage.COMBINE)) job.setCombinerClass(HadoopCombine.class); job.setReducerClass(HadoopReduce.class); } else { if (mapSort.isPresent()) { job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order? } else { job.setNumReduceTasks(0); } } job.setMapOutputKeyClass(ObjectWritable.class); job.setMapOutputValueClass(ObjectWritable.class); job.setOutputKeyClass(ObjectWritable.class); job.setOutputValueClass(ObjectWritable.class); job.setInputFormatClass(GraphFilterInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // if there is no vertex program, then grab the graph from the input location final Path graphPath; if (vertexProgramExists) { graphPath = new Path( Constants.getGraphLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION))); } else { graphPath = new Path(newConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); } Path memoryPath = new Path( Constants.getMemoryLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), (reduceSort.isPresent() ? mapReduce.getMemoryKey() + "-temp" : mapReduce.getMemoryKey()))); if (FileSystem.get(newConfiguration).exists(memoryPath)) { FileSystem.get(newConfiguration).delete(memoryPath, true); } FileInputFormat.setInputPaths(job, graphPath); FileOutputFormat.setOutputPath(job, memoryPath); job.waitForCompletion(true); // if there is a reduce sort, we need to run another identity MapReduce job if (reduceSort.isPresent()) { final Job reduceSortJob = Job.getInstance(newConfiguration, "ReduceKeySort"); reduceSortJob.setSortComparatorClass(ObjectWritableComparator.ObjectWritableReduceComparator.class); reduceSortJob.setMapperClass(Mapper.class); reduceSortJob.setReducerClass(Reducer.class); reduceSortJob.setMapOutputKeyClass(ObjectWritable.class); reduceSortJob.setMapOutputValueClass(ObjectWritable.class); reduceSortJob.setOutputKeyClass(ObjectWritable.class); reduceSortJob.setOutputValueClass(ObjectWritable.class); reduceSortJob.setInputFormatClass(SequenceFileInputFormat.class); reduceSortJob.setOutputFormatClass(SequenceFileOutputFormat.class); reduceSortJob.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order? FileInputFormat.setInputPaths(reduceSortJob, memoryPath); final Path sortedMemoryPath = new Path(Constants.getMemoryLocation( newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), mapReduce.getMemoryKey())); FileOutputFormat.setOutputPath(reduceSortJob, sortedMemoryPath); reduceSortJob.waitForCompletion(true); FileSystem.get(newConfiguration).delete(memoryPath, true); // delete the temporary memory path memoryPath = sortedMemoryPath; } mapReduce.addResultToMemory(memory, new ObjectWritableIterator(newConfiguration, memoryPath)); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java
@Override public Iterator<Vertex> head(final String location, final Class parserClass, final int totalLines) { final org.apache.commons.configuration.Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, Constants.getSearchGraphLocation(location, this).get()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, parserClass.getCanonicalName()); try {/*from w w w . j a v a 2 s.c om*/ if (InputFormat.class.isAssignableFrom(parserClass)) return IteratorUtils.limit(new HadoopVertexIterator(HadoopGraph.open(configuration)), totalLines); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parser class must be an " + InputFormat.class.getCanonicalName() + ": " + parserClass.getCanonicalName()); }