com.datastax.openflights.OpenflightsBulkLoaderVertexProgram.java Source code

Java tutorial

Introduction

Here is the source code for com.datastax.openflights.OpenflightsBulkLoaderVertexProgram.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datastax.openflights;

import org.apache.commons.configuration.BaseConfiguration;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.ConfigurationUtils;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.process.computer.Memory;
import org.apache.tinkerpop.gremlin.process.computer.MessageScope;
import org.apache.tinkerpop.gremlin.process.computer.Messenger;
import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
import org.apache.tinkerpop.gremlin.process.computer.bulkloading.BulkLoader;
import org.apache.tinkerpop.gremlin.process.computer.bulkloading.IncrementalBulkLoader;
import org.apache.tinkerpop.gremlin.process.computer.util.AbstractVertexProgramBuilder;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.apache.tinkerpop.gremlin.structure.util.GraphFactory;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import org.javatuples.Pair;
import org.javatuples.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

/**
 * @author Daniel Kuppitz (http://gremlin.guru)
 */
public class OpenflightsBulkLoaderVertexProgram implements VertexProgram<Tuple> {

    // This is basically a copy of BulkLoadingVertexProgram that fixes https://issues.apache.org/jira/browse/TINKERPOP3-973

    private static final Logger LOGGER = LoggerFactory.getLogger(OpenflightsBulkLoaderVertexProgram.class);

    public static final String BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX = "gremlin.bulkLoaderVertexProgram";
    public static final String BULK_LOADER_CLASS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX,
            "class");
    public static final String BULK_LOADER_VERTEX_ID_CFG_KEY = String.join(".",
            BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "vertexIdProperty");
    public static final String INTERMEDIATE_BATCH_SIZE_CFG_KEY = String.join(".",
            BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX, "intermediateBatchSize");
    public static final String KEEP_ORIGINAL_IDS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX,
            "keepOriginalIds");
    public static final String USER_SUPPLIED_IDS_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX,
            "userSuppliedIds");
    public static final String WRITE_GRAPH_CFG_KEY = String.join(".", BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX,
            "writeGraph");
    public static final String DEFAULT_BULK_LOADER_VERTEX_ID = "bulkLoader.vertex.id";

    private final MessageScope messageScope;
    private final Set<String> elementComputeKeys;
    private Configuration configuration;
    private BulkLoader bulkLoader;
    private Graph graph;
    private GraphTraversalSource g;
    private long intermediateBatchSize;

    private static final ThreadLocal<AtomicLong> counter = new ThreadLocal<AtomicLong>() {
        @Override
        protected AtomicLong initialValue() {
            return new AtomicLong();
        }
    };

    private OpenflightsBulkLoaderVertexProgram() {
        messageScope = MessageScope.Local.of(__::inE);
        elementComputeKeys = new HashSet<>();
    }

    private BulkLoader createBulkLoader() {
        final BulkLoader loader;
        final Configuration config = configuration.subset(BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX);
        if (config.containsKey("class")) {
            final String className = config.getString("class");
            try {
                final Class<?> bulkLoaderClass = Class.forName(className);
                loader = (BulkLoader) bulkLoaderClass.getConstructor().newInstance();
            } catch (ClassNotFoundException e) {
                LOGGER.error("Unable to find custom bulk loader class: {}", className);
                throw new IllegalStateException(e);
            } catch (Exception e) {
                LOGGER.error("Unable to create an instance of the given bulk loader class: {}", className);
                throw new IllegalStateException(e);
            }
        } else {
            loader = new IncrementalBulkLoader();
        }
        loader.configure(configuration);
        return loader;
    }

    /**
     * Eventually commits the current transaction and closes the current graph instance. commit() will be called
     * if close is set true, otherwise it will only be called if the intermediate batch size is set and reached.
     *
     * @param close Whether to close the current graph instance after calling commit() or not.
     */
    private void commit(final boolean close) {
        if (!close && (intermediateBatchSize == 0L || counter.get().incrementAndGet() % intermediateBatchSize != 0))
            return;
        if (null != graph) {
            if (graph.features().graph().supportsTransactions()) {
                LOGGER.info("Committing transaction on Graph instance: {} [{}]", graph, counter.get().get());
                try {
                    graph.tx().commit();
                    LOGGER.debug("Committed transaction on Graph instance: {}", graph);
                } catch (Exception e) {
                    LOGGER.error("Failed to commit transaction on Graph instance: {}", graph);
                    graph.tx().rollback();
                    throw e;
                }
            }
            if (close) {
                try {
                    graph.close();
                    LOGGER.info("Closed Graph instance: {}", graph);
                    graph = null;
                } catch (Exception e) {
                    LOGGER.warn("Failed to close Graph instance", e);
                }
            }
        }
    }

    @Override
    public void setup(final Memory memory) {
        counter.get().set(0L);
    }

    @Override
    public void loadState(final Graph graph, final Configuration config) {
        configuration = new BaseConfiguration();
        if (config != null) {
            ConfigurationUtils.copy(config, configuration);
        }
        intermediateBatchSize = configuration.getLong(INTERMEDIATE_BATCH_SIZE_CFG_KEY, 0L);
        elementComputeKeys
                .add(configuration.getString(BULK_LOADER_VERTEX_ID_CFG_KEY, DEFAULT_BULK_LOADER_VERTEX_ID));
        bulkLoader = createBulkLoader();
    }

    @Override
    public void storeState(final Configuration config) {
        VertexProgram.super.storeState(config);
        if (configuration != null) {
            ConfigurationUtils.copy(configuration, config);
        }
    }

    @Override
    public void workerIterationStart(final Memory memory) {
        if (null == graph) {
            graph = GraphFactory.open(configuration.subset(WRITE_GRAPH_CFG_KEY));
            LOGGER.info("Opened Graph instance: {}", graph);
            try {
                if (!graph.features().graph().supportsConcurrentAccess()) {
                    throw new IllegalStateException("The given graph instance does not allow concurrent access.");
                }
                g = graph.traversal();
            } catch (Exception e) {
                try {
                    graph.close();
                } catch (Exception e2) {
                    LOGGER.warn("Failed to close Graph instance", e2);
                }
                throw e;
            }
        } else {
            LOGGER.warn("Leaked Graph instance: {}", graph);
        }
    }

    @Override
    public void workerIterationEnd(final Memory memory) {
        this.commit(true);
    }

    @Override
    public void execute(final Vertex sourceVertex, final Messenger<Tuple> messenger, final Memory memory) {
        try {
            executeInternal(sourceVertex, messenger, memory);
        } catch (Exception e) {
            if (graph.features().graph().supportsTransactions()) {
                graph.tx().rollback();
            }
            throw e;
        }
    }

    private void executeInternal(final Vertex sourceVertex, final Messenger<Tuple> messenger, final Memory memory) {
        if (memory.isInitialIteration()) {
            // get or create the vertex
            final Vertex targetVertex = bulkLoader.getOrCreateVertex(sourceVertex, graph, g);
            // write all the properties of the vertex to the newly created vertex
            final Iterator<VertexProperty<Object>> vpi = sourceVertex.properties();
            while (vpi.hasNext()) {
                bulkLoader.getOrCreateVertexProperty(vpi.next(), targetVertex, graph, g);
            }
            this.commit(false);
            if (!bulkLoader.useUserSuppliedIds()) {
                // create an id pair and send it to all the vertex's incoming adjacent vertices
                sourceVertex.property(bulkLoader.getVertexIdProperty(), targetVertex.id());
                messenger.sendMessage(messageScope, Pair.with(sourceVertex.id(), targetVertex.id()));
            }
        } else if (memory.getIteration() == 1) {
            if (bulkLoader.useUserSuppliedIds()) {
                final Vertex outV = bulkLoader.getVertex(sourceVertex, graph, g);
                sourceVertex.edges(Direction.OUT).forEachRemaining(edge -> {
                    final Vertex inV = bulkLoader.getVertex(edge.inVertex(), graph, g);
                    bulkLoader.getOrCreateEdge(edge, outV, inV, graph, g);
                    this.commit(false);
                });
            } else {
                // create an id map and populate it with all the incoming messages
                final Map<Object, Object> idPairs = new HashMap<>();
                final Iterator<Tuple> idi = messenger.receiveMessages();
                while (idi.hasNext()) {
                    final Tuple idPair = idi.next();
                    idPairs.put(idPair.getValue(0), idPair.getValue(1));
                }
                // get the vertex with given the dummy id property
                final Long outVId = sourceVertex.value(bulkLoader.getVertexIdProperty());
                final Vertex outV = bulkLoader.getVertexById(outVId, graph, g);
                // for all the incoming edges of the vertex, get the incoming adjacent vertex and write the edge and its properties
                sourceVertex.edges(Direction.OUT).forEachRemaining(edge -> {
                    final Object inVId = idPairs.get(edge.inVertex().id());
                    final Vertex inV = bulkLoader.getVertexById(inVId, graph, g);
                    bulkLoader.getOrCreateEdge(edge, outV, inV, graph, g);
                    this.commit(false);
                });
            }
        } else if (memory.getIteration() == 2) {
            final Long vertexId = sourceVertex.value(bulkLoader.getVertexIdProperty());
            bulkLoader.getVertexById(vertexId, graph, g).property(bulkLoader.getVertexIdProperty()).remove();
            this.commit(false);
        }
    }

    @Override
    public boolean terminate(final Memory memory) {
        switch (memory.getIteration()) {
        case 1:
            return bulkLoader.keepOriginalIds();
        case 2:
            return true;
        }
        return false;
    }

    @Override
    public Set<String> getElementComputeKeys() {
        return elementComputeKeys;
    }

    @Override
    public Set<MessageScope> getMessageScopes(final Memory memory) {
        return Collections.singleton(messageScope);
    }

    @SuppressWarnings({ "CloneDoesntDeclareCloneNotSupportedException", "CloneDoesntCallSuperClone" })
    @Override
    public VertexProgram<Tuple> clone() {
        return this;
    }

    @Override
    public GraphComputer.ResultGraph getPreferredResultGraph() {
        return GraphComputer.ResultGraph.ORIGINAL;
    }

    @Override
    public GraphComputer.Persist getPreferredPersist() {
        return GraphComputer.Persist.NOTHING;
    }

    @Override
    public String toString() {
        final StringBuilder sb = new StringBuilder();
        if (bulkLoader != null) {
            sb.append("bulkLoader=").append(bulkLoader.getClass().getSimpleName()).append(",");
            sb.append("vertexIdProperty=").append(bulkLoader.getVertexIdProperty()).append(",");
            sb.append("userSuppliedIds=").append(bulkLoader.useUserSuppliedIds()).append(",");
            sb.append("keepOriginalIds=").append(bulkLoader.keepOriginalIds()).append(",");
        } else {
            sb.append("bulkLoader=").append(bulkLoader).append(",");
        }
        sb.append("batchSize=").append(intermediateBatchSize);
        return StringFactory.vertexProgramString(this, sb.toString());
    }

    public static Builder build() {
        return new Builder();
    }

    public static class Builder extends AbstractVertexProgramBuilder<Builder> {

        private Builder() {
            super(OpenflightsBulkLoaderVertexProgram.class);
        }

        @SuppressWarnings("unchecked")
        @Override
        public OpenflightsBulkLoaderVertexProgram create(final Graph graph) {
            ConfigurationUtils.append(graph.configuration().subset(BULK_LOADER_VERTEX_PROGRAM_CFG_PREFIX),
                    configuration);
            return (OpenflightsBulkLoaderVertexProgram) VertexProgram.createVertexProgram(graph, configuration);
        }

        private void setGraphConfigurationProperty(final String key, final Object value) {
            configuration.setProperty(String.join(".", WRITE_GRAPH_CFG_KEY, key), value);
        }

        /**
         * Sets the class name of the BulkLoader implementation to be used.
         */
        public Builder bulkLoader(final String className) {
            configuration.setProperty(BULK_LOADER_CLASS_CFG_KEY, className);
            return this;
        }

        /**
         * Sets the class of the BulkLoader implementation to be used.
         */
        public Builder bulkLoader(final Class<? extends BulkLoader> clazz) {
            return bulkLoader(clazz.getCanonicalName());
        }

        /**
         * Sets the name of the property that is used to store the original vertex identifiers in the target graph.
         */
        public Builder vertexIdProperty(final String name) {
            configuration.setProperty(BULK_LOADER_VERTEX_ID_CFG_KEY, name);
            return this;
        }

        /**
         * Specifies whether user supplied identifiers should be used when the bulk loader creates vertices in the
         * target graph.
         */
        public Builder userSuppliedIds(final boolean useUserSuppliedIds) {
            configuration.setProperty(USER_SUPPLIED_IDS_CFG_KEY, useUserSuppliedIds);
            return this;
        }

        /**
         * Specifies whether the original vertex identifiers should be kept in the target graph or not. In case of false
         * BulkLoaderVertexProgram will add another iteration to remove the properties and it won't be possible to use
         * the data for further incremental bulk loads.
         */
        public Builder keepOriginalIds(final boolean keepOriginalIds) {
            configuration.setProperty(KEEP_ORIGINAL_IDS_CFG_KEY, keepOriginalIds);
            return this;
        }

        /**
         * The batch size for a single transaction (number of vertices in the vertex loading stage; number of edges in
         * the edge loading stage).
         */
        public Builder intermediateBatchSize(final int batchSize) {
            configuration.setProperty(INTERMEDIATE_BATCH_SIZE_CFG_KEY, batchSize);
            return this;
        }

        /**
         * A configuration for the target graph that can be passed to GraphFactory.open().
         */
        public Builder writeGraph(final String configurationFile) throws ConfigurationException {
            final Configuration conf = new PropertiesConfiguration(configurationFile);
            conf.getKeys().forEachRemaining(key -> setGraphConfigurationProperty(key, conf.getProperty(key)));
            return this;
        }
    }

    @Override
    public Features getFeatures() {
        return new Features() {
            @Override
            public boolean requiresLocalMessageScopes() {
                return true;
            }

            @Override
            public boolean requiresVertexPropertyAddition() {
                return true;
            }
        };
    }
}