org.dswarm.graph.resources.MaintainResource.java Source code

Java tutorial

Introduction

Here is the source code for org.dswarm.graph.resources.MaintainResource.java

Source

/**
 * This file is part of d:swarm graph extension.
 *
 * d:swarm graph extension is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * d:swarm graph extension is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with d:swarm graph extension.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.dswarm.graph.resources;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;

import javax.ws.rs.Consumes;
import javax.ws.rs.DELETE;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Charsets;
import com.google.common.io.Resources;
import org.mapdb.Atomic;
import org.mapdb.DB;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Result;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.schema.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.dswarm.common.DMPStatics;
import org.dswarm.common.types.Tuple;
import org.dswarm.graph.BasicNeo4jProcessor;
import org.dswarm.graph.DMPGraphException;
import org.dswarm.graph.DataModelNeo4jProcessor;
import org.dswarm.graph.GraphIndexStatics;
import org.dswarm.graph.GraphProcessingStatics;
import org.dswarm.graph.deprecate.DataModelNeo4jDeprecator;
import org.dswarm.graph.deprecate.RecordsNeo4jDeprecator;
import org.dswarm.graph.deprecate.RelationshipDeprecator;
import org.dswarm.graph.index.MapDBUtils;
import org.dswarm.graph.index.NamespaceIndex;
import org.dswarm.graph.index.SchemaIndexUtils;
import org.dswarm.graph.tx.Neo4jTransactionHandler;
import org.dswarm.graph.tx.TransactionHandler;
import org.dswarm.graph.utils.GraphDatabaseUtils;
import org.dswarm.graph.utils.NamespaceUtils;

/**
 * @author tgaengler
 */
@Path("/maintain")
public class MaintainResource extends GraphResource {

    private static final Logger LOG = LoggerFactory.getLogger(MaintainResource.class);

    private static final String PERSISTENT_GRAPH_DATABASE_IDENTIFIER = "persistent graph database";

    private static final long chunkSize = 50000;

    // TODO: maybe divide this into 2 queries and without OPTIONAL
    private static final String DELETE_CYPHER = "MATCH (a) WITH a LIMIT %d OPTIONAL MATCH (a)-[r]-() DELETE a,r RETURN COUNT(*) AS entity_count";

    private static final String DEPRECATE_DATA_MODEL_TYPE = "deprecate data model";

    private static final String DEPRECATE_RECORDS_TYPE = "deprecate records";

    public MaintainResource() {

    }

    @GET
    @Path("/ping")
    public String ping() {

        MaintainResource.LOG.debug("ping was called");

        return "pong";
    }

    @POST
    @Path("/deprecate/datamodel")
    @Consumes(MediaType.APPLICATION_JSON)
    @Produces(MediaType.APPLICATION_JSON)
    public Response deprecateDataModel(final String jsonObjectString, @Context final GraphDatabaseService database)
            throws DMPGraphException {

        MaintainResource.LOG.info("try to deprecate data model in graph db");

        final ObjectNode requestJSON = deserializeJSON(jsonObjectString, DEPRECATE_DATA_MODEL_TYPE);

        final String dataModelUri = requestJSON.get(DMPStatics.DATA_MODEL_URI_IDENTIFIER).asText();

        final TransactionHandler tx = new Neo4jTransactionHandler(database);
        final NamespaceIndex namespaceIndex = new NamespaceIndex(database, tx);

        final String prefixedDataModelUri = namespaceIndex.createPrefixedURI(dataModelUri);

        MaintainResource.LOG.info("try to deprecate statements in data model '{}' ('{}') in graph db", dataModelUri,
                prefixedDataModelUri);

        final BasicNeo4jProcessor processor = new DataModelNeo4jProcessor(database, tx, namespaceIndex,
                prefixedDataModelUri);

        final RelationshipDeprecator dataModelDeprecator = new DataModelNeo4jDeprecator(processor, true,
                prefixedDataModelUri);

        dataModelDeprecator.work();

        final int relationshipsDeprecated = dataModelDeprecator.getRelationshipsDeprecated();

        if (relationshipsDeprecated > 0) {

            // update data model version only when some statements are deprecated the DB
            dataModelDeprecator.getVersionHandler().updateLatestVersion();
        }

        dataModelDeprecator.closeTransaction();

        LOG.info("deprecated '{}' relationships in data model '{}' ('{}') in graph db", relationshipsDeprecated,
                dataModelUri, prefixedDataModelUri);

        final ObjectNode resultJSON = simpleObjectMapper.createObjectNode();
        resultJSON.put("deprecated", relationshipsDeprecated);
        final String result = serializeJSON(resultJSON, DEPRECATE_DATA_MODEL_TYPE);

        return Response.ok(result, MediaType.APPLICATION_JSON_TYPE).build();
    }

    @POST
    @Path("/deprecate/records")
    @Consumes(MediaType.APPLICATION_JSON)
    @Produces(MediaType.APPLICATION_JSON)
    public Response deprecateRecords(final String jsonObjectString, @Context final GraphDatabaseService database)
            throws DMPGraphException {

        MaintainResource.LOG.info("try to deprecate records in a data model in graph db");

        final ObjectNode requestJSON = deserializeJSON(jsonObjectString, DEPRECATE_RECORDS_TYPE);

        final String dataModelUri = requestJSON.get(DMPStatics.DATA_MODEL_URI_IDENTIFIER).asText();

        final Collection<String> recordURIs = getRecordURIs(requestJSON);

        final TransactionHandler tx = new Neo4jTransactionHandler(database);
        final NamespaceIndex namespaceIndex = new NamespaceIndex(database, tx);

        final String prefixedDataModelUri = namespaceIndex.createPrefixedURI(dataModelUri);

        final Collection<String> prefixedRecordURIs = prefixRecordURIs(recordURIs, namespaceIndex);

        MaintainResource.LOG.info("try to deprecate '{}' records in data model '{}' ('{}') in graph db",
                prefixedRecordURIs.size(), dataModelUri, prefixedDataModelUri);

        final BasicNeo4jProcessor processor = new DataModelNeo4jProcessor(database, tx, namespaceIndex,
                prefixedDataModelUri);

        final RelationshipDeprecator recordsDeprecator = new RecordsNeo4jDeprecator(processor, true,
                prefixedDataModelUri, prefixedRecordURIs);

        recordsDeprecator.work();

        final int relationshipsDeprecated = recordsDeprecator.getRelationshipsDeprecated();

        if (relationshipsDeprecated > 0) {

            // update data model version only when some statements are deprecated the DB
            recordsDeprecator.getVersionHandler().updateLatestVersion();
        }

        recordsDeprecator.closeTransaction();

        LOG.info("deprecated '{}' records with '{}' relationships in data model '{}' ('{}') in graph db",
                prefixedRecordURIs.size(), relationshipsDeprecated, dataModelUri, prefixedDataModelUri);

        final ObjectNode resultJSON = simpleObjectMapper.createObjectNode();
        resultJSON.put("deprecated", relationshipsDeprecated);
        final String result = serializeJSON(resultJSON, DEPRECATE_DATA_MODEL_TYPE);

        return Response.ok(result, MediaType.APPLICATION_JSON_TYPE).build();

    }

    /**
     * note utilise this endpoint with care, because it cleans your complete db!
     *
     * @param database the graph database
     */
    @DELETE
    @Path("/delete")
    @Produces("application/json")
    public Response cleanGraph(@Context final GraphDatabaseService database) throws IOException, DMPGraphException {

        MaintainResource.LOG.debug("start cleaning up the db");

        final long deleted = deleteSomeStatements(database);

        MaintainResource.LOG.debug("finished delete-all-entities TXs");

        MaintainResource.LOG.debug("start legacy indices clean-up");

        // TODO: maybe separate index clean-up + observe index clean-up
        // => maybe we also need to do a label + relationship types clean-up ... => this is not supported right now ...

        deleteSomeLegacyIndices(database);

        MaintainResource.LOG.debug("finished legacy indices clean-up");

        MaintainResource.LOG.debug("start schema indices clean-up");

        deleteSomeSchemaIndices(database);

        MaintainResource.LOG.debug("finished schema indices clean-up");

        MaintainResource.LOG.debug("finished cleaning up the db");

        final StringWriter out = new StringWriter();
        final JsonGenerator generator = simpleObjectMapper.getFactory().createGenerator(out);

        generator.writeStartObject();
        generator.writeNumberField("deleted", deleted);
        generator.writeEndObject();
        generator.flush();
        generator.close();

        final String result = out.toString();

        out.flush();
        out.close();

        return Response.ok(result, MediaType.APPLICATION_JSON_TYPE).build();
    }

    @POST
    @Path("/schemaindices")
    public Response createSchemaIndices(@Context final GraphDatabaseService database) throws DMPGraphException {

        SchemaIndexUtils.createSchemaIndices(database, PERSISTENT_GRAPH_DATABASE_IDENTIFIER);

        initPrefixes(database);

        return Response.ok().build();
    }

    @POST
    @Path("/initprefixcounter")
    public Response initPrefixCounter(@Context final GraphDatabaseService database) throws DMPGraphException {

        initPrefixCounterInternal(database);

        return Response.ok().build();
    }

    private long deleteSomeStatements(final GraphDatabaseService database) throws DMPGraphException {

        final String deleteQuery = String.format(DELETE_CYPHER, MaintainResource.chunkSize);

        long deleted = 0;

        int i = 0;

        while (true) {

            i++;

            try (final Transaction tx = database.beginTx()) {

                MaintainResource.LOG.debug("try to delete up to {} nodes and their relationships for the {}. time",
                        MaintainResource.chunkSize, i);

                final Result result = database.execute(deleteQuery);

                if (result == null) {

                    MaintainResource.LOG
                            .debug("there are no more results for removal available, i.e. result is empty");

                    tx.success();
                    tx.close();

                    break;
                }

                if (!result.hasNext()) {

                    MaintainResource.LOG.debug(
                            "there are no more results for removal available, i.e. result iterator is empty");

                    result.close();
                    tx.success();
                    tx.close();

                    break;
                }

                final Map<String, Object> row = result.next();

                if (row == null || row.isEmpty()) {

                    MaintainResource.LOG
                            .debug("there are no more results for removal available, i.e. row map is empty");

                    result.close();
                    tx.success();
                    tx.close();

                    break;
                }

                final Entry<String, Object> entry = row.entrySet().iterator().next();

                if (entry == null) {

                    MaintainResource.LOG
                            .debug("there are no more results for removal available, i.e. entry is not available");

                    result.close();
                    tx.success();
                    tx.close();

                    break;
                }

                final Object value = entry.getValue();

                if (value == null) {

                    MaintainResource.LOG
                            .debug("there are no more results for removal available, i.e. value is not available");

                    result.close();
                    tx.success();
                    tx.close();

                    break;
                }

                if (!entry.getKey().equals("entity_count")) {

                    MaintainResource.LOG.debug(
                            "there are no more results for removal available, i.e. entity count is not available");

                    result.close();
                    tx.success();
                    tx.close();

                    break;
                }

                final Long count = (Long) value;

                deleted += count;

                MaintainResource.LOG.debug("deleted {} entities", count);

                if (count < chunkSize) {

                    MaintainResource.LOG.debug(
                            "there are no more results for removal available, i.e. current result is smaller than chunk size");

                    result.close();
                    tx.success();

                    break;
                }

                result.close();
                tx.success();
                tx.close();
            } catch (final Exception e) {

                final String message = "couldn't finish delete-all-entities TX successfully";

                MaintainResource.LOG.error(message, e);

                throw new DMPGraphException(message);
            }
        }

        return deleted;
    }

    private void deleteSomeLegacyIndices(final GraphDatabaseService database) throws DMPGraphException {

        MaintainResource.LOG.debug("start delete legacy indices TX");

        try (final Transaction itx = database.beginTx()) {

            final Index<Node> resources = database.index().forNodes(GraphIndexStatics.RESOURCES_INDEX_NAME);
            final Index<Node> values = database.index().forNodes(GraphIndexStatics.VALUES_INDEX_NAME);
            final Index<Node> resourcesWDataModel = database.index()
                    .forNodes(GraphIndexStatics.RESOURCES_W_DATA_MODEL_INDEX_NAME);
            final Index<Node> resourceTypes = database.index()
                    .forNodes(GraphIndexStatics.RESOURCE_TYPES_INDEX_NAME);
            final Tuple<Set<Long>, DB> statementHashesMapDBIndexTuple = getOrCreateLongIndex(
                    GraphIndexStatics.STATEMENT_HASHES_INDEX_NAME, database);
            final Index<Relationship> statementUUIDs = database.index()
                    .forRelationships(GraphIndexStatics.STATEMENT_UUIDS_INDEX_NAME);

            if (resources != null) {

                MaintainResource.LOG.debug("delete {} legacy index", GraphIndexStatics.RESOURCES_INDEX_NAME);

                resources.delete();
            }

            if (resourcesWDataModel != null) {

                MaintainResource.LOG.debug("delete {} legacy index",
                        GraphIndexStatics.RESOURCES_W_DATA_MODEL_INDEX_NAME);

                resourcesWDataModel.delete();
            }

            if (resourceTypes != null) {

                MaintainResource.LOG.debug("delete {} legacy index", GraphIndexStatics.RESOURCE_TYPES_INDEX_NAME);

                resourceTypes.delete();
            }

            final DB mapDB = statementHashesMapDBIndexTuple.v2();

            if (mapDB.exists(GraphIndexStatics.STATEMENT_HASHES_INDEX_NAME)) {

                MaintainResource.LOG.debug("delete {} mapdb index", GraphIndexStatics.STATEMENT_HASHES_INDEX_NAME);

                mapDB.delete(GraphIndexStatics.STATEMENT_HASHES_INDEX_NAME);
                mapDB.commit();
                mapDB.close();
            }

            if (statementUUIDs != null) {

                MaintainResource.LOG.debug("delete {} legacy index", GraphIndexStatics.STATEMENT_UUIDS_INDEX_NAME);

                statementUUIDs.delete();
            }

            if (values != null) {

                MaintainResource.LOG.debug("delete {} legacy index", GraphIndexStatics.VALUES_INDEX_NAME);

                values.delete();
            }

            itx.success();
            itx.close();
        } catch (final Exception e) {

            final String message = "couldn't finish delete legacy indices TX successfully";

            MaintainResource.LOG.error(message, e);

            throw new DMPGraphException(message);
        }

        MaintainResource.LOG.debug("finished delete legacy indices TX");
    }

    private void deleteSomeSchemaIndices(final GraphDatabaseService database) throws DMPGraphException {

        MaintainResource.LOG.debug("start delete schema indices TX");

        try (final Transaction itx = database.beginTx()) {

            final Schema schema = database.schema();

            if (schema == null) {

                MaintainResource.LOG.debug("no schema available");

                itx.success();
                itx.close();

                return;
            }

            final Iterable<IndexDefinition> indexDefinitions = schema.getIndexes();

            if (indexDefinitions == null) {

                MaintainResource.LOG.debug("no schema indices available");

                itx.success();
                itx.close();

                return;
            }

            for (final IndexDefinition indexDefinition : indexDefinitions) {

                MaintainResource.LOG.debug("drop '{}' : '{}' schema index", indexDefinition.getLabel().name(),
                        indexDefinition.getPropertyKeys().iterator().next());

                indexDefinition.drop();
            }

            itx.success();
            itx.close();
        } catch (final Exception e) {

            final String message = "couldn't finish delete schema indices TX successfully";

            MaintainResource.LOG.error(message, e);

            throw new DMPGraphException(message);
        }

        MaintainResource.LOG.debug("finished delete schema indices TX");
    }

    private Tuple<Set<Long>, DB> getOrCreateLongIndex(final String name, final GraphDatabaseService database)
            throws IOException {

        final String storeDir = GraphDatabaseUtils.determineMapDBIndexStoreDir(database);

        // storeDir + File.separator + MapDBUtils.INDEX_DIR + name
        return MapDBUtils.createOrGetPersistentLongIndexTreeSetGlobalTransactional(storeDir + File.separator + name,
                name);
    }

    private void initPrefixes(final GraphDatabaseService database) throws DMPGraphException {

        MaintainResource.LOG.debug("start initialising namespaces index");

        final TransactionHandler tx = new Neo4jTransactionHandler(database);
        final NamespaceIndex namespaceIndex = new NamespaceIndex(database, tx);

        tx.ensureRunningTx();

        try {

            final URL prefixesFileURL = Resources.getResource("prefixes.json");
            final String prefixesJSONString = Resources.toString(prefixesFileURL, Charsets.UTF_8);
            final Map<String, String> prefixesNamespacesMap = simpleObjectMapper.readValue(prefixesJSONString,
                    new TypeReference<HashMap<String, String>>() {

                    });

            for (final Map.Entry<String, String> entry : prefixesNamespacesMap.entrySet()) {

                final String prefix = entry.getKey();
                final String namespace = entry.getValue();

                final Optional<Node> optionalPrefix = NamespaceUtils.getPrefix(namespace, database);

                if (!optionalPrefix.isPresent()) {

                    namespaceIndex.addPrefix(namespace, prefix);
                } else {

                    final String prefixFromDB = (String) optionalPrefix.get()
                            .getProperty(GraphProcessingStatics.PREFIX_PROPERTY);

                    MaintainResource.LOG.debug(
                            "prefix '{}' is already available for namespace '{}', i.e., no further entry with prefix '{}' will be created",
                            prefixFromDB, namespace, prefix);
                }
            }

            tx.succeedTx();
        } catch (final Exception e) {

            tx.failTx();

            final String message = "couldn't initialize prefixes successfully";

            LOG.error(message);

            throw new DMPGraphException(message, e);
        }

        MaintainResource.LOG.debug("finished initialising namespaces index");
    }

    private Collection<String> getRecordURIs(final ObjectNode json) {

        final JsonNode recordsNode = json.get(DMPStatics.RECORDS_IDENTIFIER);

        final ArrayList<String> recordURIs = new ArrayList<>();

        for (final JsonNode recordNode : recordsNode) {

            final String recordURI = recordNode.asText();

            recordURIs.add(recordURI);
        }

        return recordURIs;
    }

    private Collection<String> prefixRecordURIs(final Collection<String> recordURIs,
            final NamespaceIndex namespaceIndex) throws DMPGraphException {

        final ArrayList<String> prefixedRecordURIs = new ArrayList<>();

        for (final String recordURI : recordURIs) {

            final String prefixedRecordURI = namespaceIndex.createPrefixedURI(recordURI);

            prefixedRecordURIs.add(prefixedRecordURI);
        }

        return prefixedRecordURIs;
    }

    private void initPrefixCounterInternal(final GraphDatabaseService database) throws DMPGraphException {

        try (final Transaction tx = database.beginTx()) {

            final ResourceIterator<Node> nodes = database.findNodes(GraphProcessingStatics.PREFIX_LABEL);

            if (nodes == null) {

                LOG.debug("no prefix nodes available - prefix does not need to initialised with a specific value");

                tx.success();
                tx.close();

                return;
            }

            long biggestPrefixNumber = 0;

            while (nodes.hasNext()) {

                final Node prefixNode = nodes.next();

                final Object prefixObject = prefixNode.getProperty(GraphProcessingStatics.PREFIX_PROPERTY, null);

                if (prefixObject == null) {

                    LOG.debug("could not find prefix at prefix node '{}'", prefixNode.getId());

                    continue;
                }

                final String prefix = (String) prefixObject;

                if (!prefix.startsWith(NamespaceUtils.NAMESPACE_PREFIX_BASE)) {

                    // namespace is no candidate for prefix counter number

                    continue;
                }

                if (!(prefix.length() > 2)) {

                    // namespace is no candidate for prefix counter number

                    continue;
                }

                final String prefixNumberString = prefix.substring(2, prefix.length());

                try {

                    final Long prefixNumber = Long.valueOf(prefixNumberString);

                    if (prefixNumber > biggestPrefixNumber) {

                        biggestPrefixNumber = prefixNumber;
                    }
                } catch (final NumberFormatException e) {

                    LOG.debug("could not convert prefix number string '{}' to a number", prefixNumberString);
                }
            }

            nodes.close();

            // create persistent prefix counter with biggest given prefix number + 1
            biggestPrefixNumber = biggestPrefixNumber + 1;

            final String storeDir = GraphDatabaseUtils.determineMapDBIndexStoreDir(database);

            final Tuple<Atomic.Long, DB> prefixCounterTuple = MapDBUtils
                    .createOrGetPersistentLongIndexGlobalTransactional(
                            storeDir + File.separator + GraphIndexStatics.PREFIX_COUNTER_INDEX_NAME,
                            GraphIndexStatics.PREFIX_COUNTER_INDEX_NAME, biggestPrefixNumber);
            final DB prefixCounterDB = prefixCounterTuple.v2();

            prefixCounterDB.commit();
            prefixCounterDB.close();

            LOG.info("initialized the prefix counter index with '{}'", biggestPrefixNumber);

            tx.success();
            tx.close();
        } catch (final Exception e) {

            final String message = "couldn't finish init prefix counter TX successfully";

            MaintainResource.LOG.error(message, e);

            throw new DMPGraphException(message);
        }
    }
}