Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.datastax.openflights; import org.apache.commons.configuration.BaseConfiguration; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.ConfigurationUtils; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.tinkerpop.gremlin.process.computer.GraphComputer; import org.apache.tinkerpop.gremlin.process.computer.Memory; import org.apache.tinkerpop.gremlin.process.computer.MessageScope; import org.apache.tinkerpop.gremlin.process.computer.Messenger; import org.apache.tinkerpop.gremlin.process.computer.VertexProgram; import org.apache.tinkerpop.gremlin.process.computer.bulkloading.BulkLoader; import org.apache.tinkerpop.gremlin.process.computer.bulkloading.IncrementalBulkLoader; import org.apache.tinkerpop.gremlin.process.computer.util.AbstractVertexProgramBuilder; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Graph; import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.structure.VertexProperty; import org.apache.tinkerpop.gremlin.structure.util.GraphFactory; import org.apache.tinkerpop.gremlin.structure.util.StringFactory; import org.javatuples.Pair; import org.javatuples.Tuple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; /** * @author Daniel Kuppitz (http://gremlin.guru) */ public class OpenflightsBulkLoaderVertexProgram implements VertexProgram<Tuple> { // This is basically a copy of BulkLoadingVertexProgram that fixes https://issues.apache.org/jira/browse/TINKERPOP3-973 private static final Logger LOGGER = LoggerFactory.getLogger(OpenflightsBulkLoaderVertexProgram.class); public static final String BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX = "gremlin.bulkLoaderVertexProgram"; public static final String BULK_LOADER_CLASS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "class"); public static final String BULK_LOADER_VERTEX_ID_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "vertexIdProperty"); public static final String INTERMEDIATE_BATCH_SIZE_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "intermediateBatchSize"); public static final String KEEP_ORIGINAL_IDS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "keepOriginalIds"); public static final String USER_SUPPLIED_IDS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "userSuppliedIds"); public static final String WRITE_GRAPH_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "writeGraph"); public static final String DEFAULT_BULK_LOADER_VERTEX_ID = "bulkLoader.vertex.id"; private final MessageScope messageScope; private final Set<String> elementComputeKeys; private Configuration configuration; private BulkLoader bulkLoader; private Graph graph; private GraphTraversalSource g; private long intermediateBatchSize; private static final ThreadLocal<AtomicLong> counter = new ThreadLocal<AtomicLong>() { @Override protected AtomicLong initialValue() { return new AtomicLong(); } }; private OpenflightsBulkLoaderVertexProgram() { messageScope = MessageScope.Local.of(__::inE); elementComputeKeys = new HashSet<>(); } private BulkLoader createBulkLoader() { final BulkLoader loader; final Configuration config = configuration.subset(BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX); if (config.containsKey("class")) { final String className = config.getString("class"); try { final Class<?> bulkLoaderClass = Class.forName(className); loader = (BulkLoader) bulkLoaderClass.getConstructor().newInstance(); } catch (ClassNotFoundException e) { LOGGER.error("Unable to find custom bulk loader class: {}", className); throw new IllegalStateException(e); } catch (Exception e) { LOGGER.error("Unable to create an instance of the given bulk loader class: {}", className); throw new IllegalStateException(e); } } else { loader = new IncrementalBulkLoader(); } loader.configure(configuration); return loader; } /** * Eventually commits the current transaction and closes the current graph instance. commit() will be called * if close is set true, otherwise it will only be called if the intermediate batch size is set and reached. * * @param close Whether to close the current graph instance after calling commit() or not. */ private void commit(final boolean close) { if (!close && (intermediateBatchSize == 0L || counter.get().incrementAndGet() % intermediateBatchSize != 0)) return; if (null != graph) { if (graph.features().graph().supportsTransactions()) { LOGGER.info("Committing transaction on Graph instance: {} [{}]", graph, counter.get().get()); try { graph.tx().commit(); LOGGER.debug("Committed transaction on Graph instance: {}", graph); } catch (Exception e) { LOGGER.error("Failed to commit transaction on Graph instance: {}", graph); graph.tx().rollback(); throw e; } } if (close) { try { graph.close(); LOGGER.info("Closed Graph instance: {}", graph); graph = null; } catch (Exception e) { LOGGER.warn("Failed to close Graph instance", e); } } } } @Override public void setup(final Memory memory) { counter.get().set(0L); } @Override public void loadState(final Graph graph, final Configuration config) { configuration = new BaseConfiguration(); if (config != null) { ConfigurationUtils.copy(config, configuration); } intermediateBatchSize = configuration.getLong(INTERMEDIATE_BATCH_SIZE_CFG_KEY, 0L); elementComputeKeys .add(configuration.getString(BULK_LOADER_VERTEX_ID_CFG_KEY, DEFAULT_BULK_LOADER_VERTEX_ID)); bulkLoader = createBulkLoader(); } @Override public void storeState(final Configuration config) { VertexProgram.super.storeState(config); if (configuration != null) { ConfigurationUtils.copy(configuration, config); } } @Override public void workerIterationStart(final Memory memory) { if (null == graph) { graph = GraphFactory.open(configuration.subset(WRITE_GRAPH_CFG_KEY)); LOGGER.info("Opened Graph instance: {}", graph); try { if (!graph.features().graph().supportsConcurrentAccess()) { throw new IllegalStateException("The given graph instance does not allow concurrent access."); } g = graph.traversal(); } catch (Exception e) { try { graph.close(); } catch (Exception e2) { LOGGER.warn("Failed to close Graph instance", e2); } throw e; } } else { LOGGER.warn("Leaked Graph instance: {}", graph); } } @Override public void workerIterationEnd(final Memory memory) { this.commit(true); } @Override public void execute(final Vertex sourceVertex, final Messenger<Tuple> messenger, final Memory memory) { try { executeInternal(sourceVertex, messenger, memory); } catch (Exception e) { if (graph.features().graph().supportsTransactions()) { graph.tx().rollback(); } throw e; } } private void executeInternal(final Vertex sourceVertex, final Messenger<Tuple> messenger, final Memory memory) { if (memory.isInitialIteration()) { // get or create the vertex final Vertex targetVertex = bulkLoader.getOrCreateVertex(sourceVertex, graph, g); // write all the properties of the vertex to the newly created vertex final Iterator<VertexProperty<Object>> vpi = sourceVertex.properties(); while (vpi.hasNext()) { bulkLoader.getOrCreateVertexProperty(vpi.next(), targetVertex, graph, g); } this.commit(false); if (!bulkLoader.useUserSuppliedIds()) { // create an id pair and send it to all the vertex's incoming adjacent vertices sourceVertex.property(bulkLoader.getVertexIdProperty(), targetVertex.id()); messenger.sendMessage(messageScope, Pair.with(sourceVertex.id(), targetVertex.id())); } } else if (memory.getIteration() == 1) { if (bulkLoader.useUserSuppliedIds()) { final Vertex outV = bulkLoader.getVertex(sourceVertex, graph, g); sourceVertex.edges(Direction.OUT).forEachRemaining(edge -> { final Vertex inV = bulkLoader.getVertex(edge.inVertex(), graph, g); bulkLoader.getOrCreateEdge(edge, outV, inV, graph, g); this.commit(false); }); } else { // create an id map and populate it with all the incoming messages final Map<Object, Object> idPairs = new HashMap<>(); final Iterator<Tuple> idi = messenger.receiveMessages(); while (idi.hasNext()) { final Tuple idPair = idi.next(); idPairs.put(idPair.getValue(0), idPair.getValue(1)); } // get the vertex with given the dummy id property final Long outVId = sourceVertex.value(bulkLoader.getVertexIdProperty()); final Vertex outV = bulkLoader.getVertexById(outVId, graph, g); // for all the incoming edges of the vertex, get the incoming adjacent vertex and write the edge and its properties sourceVertex.edges(Direction.OUT).forEachRemaining(edge -> { final Object inVId = idPairs.get(edge.inVertex().id()); final Vertex inV = bulkLoader.getVertexById(inVId, graph, g); bulkLoader.getOrCreateEdge(edge, outV, inV, graph, g); this.commit(false); }); } } else if (memory.getIteration() == 2) { final Long vertexId = sourceVertex.value(bulkLoader.getVertexIdProperty()); bulkLoader.getVertexById(vertexId, graph, g).property(bulkLoader.getVertexIdProperty()).remove(); this.commit(false); } } @Override public boolean terminate(final Memory memory) { switch (memory.getIteration()) { case 1: return bulkLoader.keepOriginalIds(); case 2: return true; } return false; } @Override public Set<String> getElementComputeKeys() { return elementComputeKeys; } @Override public Set<MessageScope> getMessageScopes(final Memory memory) { return Collections.singleton(messageScope); } @SuppressWarnings({ "CloneDoesntDeclareCloneNotSupportedException", "CloneDoesntCallSuperClone" }) @Override public VertexProgram<Tuple> clone() { return this; } @Override public GraphComputer.ResultGraph getPreferredResultGraph() { return GraphComputer.ResultGraph.ORIGINAL; } @Override public GraphComputer.Persist getPreferredPersist() { return GraphComputer.Persist.NOTHING; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); if (bulkLoader != null) { sb.append("bulkLoader=").append(bulkLoader.getClass().getSimpleName()).append(","); sb.append("vertexIdProperty=").append(bulkLoader.getVertexIdProperty()).append(","); sb.append("userSuppliedIds=").append(bulkLoader.useUserSuppliedIds()).append(","); sb.append("keepOriginalIds=").append(bulkLoader.keepOriginalIds()).append(","); } else { sb.append("bulkLoader=").append(bulkLoader).append(","); } sb.append("batchSize=").append(intermediateBatchSize); return StringFactory.vertexProgramString(this, sb.toString()); } public static Builder build() { return new Builder(); } public static class Builder extends AbstractVertexProgramBuilder<Builder> { private Builder() { super(OpenflightsBulkLoaderVertexProgram.class); } @SuppressWarnings("unchecked") @Override public OpenflightsBulkLoaderVertexProgram create(final Graph graph) { ConfigurationUtils.append(graph.configuration().subset(BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX), configuration); return (OpenflightsBulkLoaderVertexProgram) VertexProgram.createVertexProgram(graph, configuration); } private void setGraphConfigurationProperty(final String key, final Object value) { configuration.setProperty(String.join(".", WRITE_GRAPH_CFG_KEY, key), value); } /** * Sets the class name of the BulkLoader implementation to be used. */ public Builder bulkLoader(final String className) { configuration.setProperty(BULK_LOADER_CLASS_CFG_KEY, className); return this; } /** * Sets the class of the BulkLoader implementation to be used. */ public Builder bulkLoader(final Class<? extends BulkLoader> clazz) { return bulkLoader(clazz.getCanonicalName()); } /** * Sets the name of the property that is used to store the original vertex identifiers in the target graph. */ public Builder vertexIdProperty(final String name) { configuration.setProperty(BULK_LOADER_VERTEX_ID_CFG_KEY, name); return this; } /** * Specifies whether user supplied identifiers should be used when the bulk loader creates vertices in the * target graph. */ public Builder userSuppliedIds(final boolean useUserSuppliedIds) { configuration.setProperty(USER_SUPPLIED_IDS_CFG_KEY, useUserSuppliedIds); return this; } /** * Specifies whether the original vertex identifiers should be kept in the target graph or not. In case of false * BulkLoaderVertexProgram will add another iteration to remove the properties and it won't be possible to use * the data for further incremental bulk loads. */ public Builder keepOriginalIds(final boolean keepOriginalIds) { configuration.setProperty(KEEP_ORIGINAL_IDS_CFG_KEY, keepOriginalIds); return this; } /** * The batch size for a single transaction (number of vertices in the vertex loading stage; number of edges in * the edge loading stage). */ public Builder intermediateBatchSize(final int batchSize) { configuration.setProperty(INTERMEDIATE_BATCH_SIZE_CFG_KEY, batchSize); return this; } /** * A configuration for the target graph that can be passed to GraphFactory.open(). */ public Builder writeGraph(final String configurationFile) throws ConfigurationException { final Configuration conf = new PropertiesConfiguration(configurationFile); conf.getKeys().forEachRemaining(key -> setGraphConfigurationProperty(key, conf.getProperty(key))); return this; } } @Override public Features getFeatures() { return new Features() { @Override public boolean requiresLocalMessageScopes() { return true; } @Override public boolean requiresVertexPropertyAddition() { return true; } }; } }