de.gwdg.europeanaqa.client.rest.DocumentTransformer.java Source code

Java tutorial

Introduction

Here is the source code for de.gwdg.europeanaqa.client.rest.DocumentTransformer.java

Source

package de.gwdg.europeanaqa.client.rest;

import com.mongodb.DBRef;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.bson.BsonDocument;
import org.bson.Document;
import org.bson.codecs.BsonTypeClassMap;
import org.bson.codecs.DocumentCodec;
import org.bson.codecs.configuration.CodecRegistries;
import org.bson.codecs.configuration.CodecRegistry;
import org.bson.types.ObjectId;

/**
 *
 * @author Pter Kirly <peter.kiraly at gwdg.de>
 */
public class DocumentTransformer {

    static final Logger logger = Logger.getLogger(DocumentTransformer.class.getCanonicalName());

    DocumentCodec codec;
    MongoDatabase mongoDb;

    private final static Map<String, String> entities = new LinkedHashMap<String, String>() {
        {
            put("agents", "edm:Agent");
            put("concepts", "skos:Concept");
            put("timespans", "edm:TimeSpan");
            put("places", "edm:Place");
            put("licenses", "cc:License");

            put("aggregations", "ore:Aggregation");
            put("providedCHOs", "edm:ProvidedCHO");
            put("proxies", "ore:Proxy");
            put("europeanaAggregation", "edm:EuropeanaAggregation");
        }
    };

    private final static Map<String, String> subEntities = new LinkedHashMap<String, String>() {
        {
            put("webResources", "edm:WebResource");
        }
    };

    private final static List<String> languageFields = Arrays.asList("prefLabel", "altLabel", "note",

            "edmDataProvider", "edmProvider", "edmRights", "edmLanguage", "edmCountry", "year", "begin",
            "edmCurrentLocation", "end", "edmHasMet", "edmHasType", "hasView", "edmIncorporates",
            "edmIsDerivativeOf", "edmIsNextInSequence", "edmIsRelatedTo", "edmIsRepresentationOf", "edmIsSimilarTo",
            "edmIsSuccessorOf", "edmIsShownBy", "edmIsShownAt", "edmLanguage", "edmLandingPage", "edmObject",
            "edmPreview", "edmProvider", "edmRealizes", "edmRights", "edmType", "edmUgc", "edmUnstored",
            "edmPreviewNoDistribute",

            "dcContributor", "dcCoverage", "dcSubject", "dcCreator", "dcDate", "dcDescription", "dcFormat",
            "dcIdentifier", "dcLanguage", "dcPublisher", "dcRelation", "dcRights", "dcSource", "dcSubject",
            "dcTitle", "dcType", "dctermsTOC",

            "dctermsAlternative", "dctermsCreated", "dctermsExtent", "dctermsHasPart", "dctermsIsFormatOf",
            "dctermsIsPartOf", "dctermsIsReferencedBy", "dctermsIssued", "dctermsIsVersionOf", "dctermsMedium",
            "dctermsProvenance", "dctermsReferences", "dctermsSpatial", "isPartOf", "dctermsTemporal",

            "rdaGr2BiographicalInformation", "rdaGr2DateOfBirth", "rdaGr2DateOfDeath", "rdaGr2DateOfEstablishment",
            "rdaGr2DateOfTermination", "rdaGr2Gender", "rdaGr2ProfessionOrOccupation", "rdaGr2PlaceOfBirth",
            "rdaGr2PlaceOfDeath",

            "foafName");

    private final static Map<String, String> fieldDictionary = new HashMap<String, String>() {
        {
            put("about", "@about");

            // dc
            put("dcContributor", "dc:contributor");
            put("dcCoverage", "dc:coverage");
            put("dcCreator", "dc:creator");
            put("dcDate", "dc:date");
            put("dcDescription", "dc:description");
            put("dcFormat", "dc:format");
            put("dcIdentifier", "dc:identifier");
            put("dcLanguage", "dc:language");
            put("dcPublisher", "dc:publisher");
            put("dcRelation", "dc:relation");
            put("dcRights", "dc:rights");
            put("dcSource", "dc:source");
            put("dcSubject", "dc:subject");
            put("dcTitle", "dc:title");
            put("dcType", "dc:type");

            // dcterms
            put("dctermsAlternative", "dcterms:alternative");
            put("dctermsConformsTo", "dcterms:conformsTo");
            put("dctermsCreated", "dcterms:created");
            put("dctermsExtent", "dcterms:extent");
            put("dctermsHasFormat", "dcterms:hasFormat");
            put("dctermsHasPart", "dcterms:hasPart");
            put("dctermsHasVersion", "dcterms:hasVersion");
            put("dctermsIsFormatOf", "dcterms:isFormatOf");
            put("dctermsIsPartOf", "dcterms:isPartOf");
            put("isPartOf", "dcterms:isPartOf");
            put("dctermsIsReferencedBy", "dcterms:isReferencedBy");
            put("dctermsIsReplacedBy", "dcterms:isReplacedBy");
            put("dctermsIsRequiredBy", "dcterms:isRequiredBy");
            put("dctermsIssued", "dcterms:issued");
            put("dctermsIsVersionOf", "dcterms:isVersionOf");
            put("dctermsMedium", "dcterms:medium");
            put("dctermsProvenance", "dcterms:provenance");
            put("dctermsReferences", "dcterms:references");
            put("dctermsReplaces", "dcterms:replaces");
            put("dctermsRequires", "dcterms:requires");
            put("dctermsSpatial", "dcterms:spatial");
            put("dctermsTOC", "dcterms:tableOfContents");
            put("dctermsTemporal", "dcterms:temporal");

            // skos
            put("altLabel", "skos:altLabel");
            put("prefLabel", "skos:prefLabel");
            put("related", "skos:related");
            put("related", "skos:related");
            put("note", "skos:note");
            put("broader", "skos:broader");
            put("narrower", "skos:narrower");
            put("broadMatch", "skos:broadMatch");
            put("narrowMatch", "skos:narrowMatch");
            put("exactMatch", "skos:exactMatch");
            put("relatedMatch", "skos:relatedMatch");
            put("closeMatch", "skos:closeMatch");
            put("notation", "skos:notation");
            put("inScheme", "skos:inScheme");

            // edm
            put("aggregatedCHO", "edm:aggregatedCHO");
            put("begin", "edm:begin");
            put("edmcodecName", "edm:codecName");
            put("edmcomponentColor", "edm:componentColor");
            put("edmCountry", "edm:country");
            put("edmCurrentLocation", "edm:currentLocation");
            put("edmDataProvider", "edm:dataProvider");
            put("end", "edm:end");
            put("europeanaProxy", "edm:europeanaProxy");
            put("edmhasColorSpace", "edm:hasColorSpace");
            put("edmHasMet", "edm:hasMet");
            put("edmHasType", "edm:hasType");
            put("hasView", "edm:hasView");
            put("edmHasView", "edm:hasView");
            put("edmIncorporates", "edm:incorporates");
            put("edmIsDerivativeOf", "edm:isDerivativeOf");
            put("edmIsNextInSequence", "edm:isNextInSequence");
            put("edmIsRelatedTo", "edm:isRelatedTo");
            put("edmIsRepresentationOf", "edm:isRepresentationOf");
            put("edmIsSimilarTo", "edm:isSimilarTo");
            put("edmIsSuccessorOf", "edm:isSuccessorOf");
            put("edmIsShownBy", "edm:isShownBy");
            put("edmIsShownAt", "edm:isShownAt");
            put("edmLanguage", "edm:language");
            put("edmLandingPage", "edm:landingPage");
            put("edmObject", "edm:object");
            put("edmPreview", "edm:preview");
            put("edmProvider", "edm:provider");
            put("edmRealizes", "edm:realizes");
            put("edmRights", "edm:rights");
            put("edmspatialResolution", "edm:spatialResolution");
            put("edmType", "edm:type");
            put("edmUgc", "edm:ugc");
            put("edmUnstored", "edm:unstored");
            put("edmWasPresentAt", "edm:wasPresentAt");
            put("year", "edm:year");
            // not in http://labs.europeana.eu/api/data-fields
            put("edmPreviewNoDistribute", "edm:previewNoDistribute");

            // ore
            put("proxyIn", "ore:proxyIn");
            put("proxyFor", "ore:proxyFor");
            put("aggregates", "ore:aggregates");

            // wgs84 or wgs84_pos?
            put("longitude", "wgs84:long");
            put("latitude", "wgs84:lat");
            put("altitude", "wgs84:alt");

            // owl
            put("owlSameAs", "owl:sameAs");

            // rdaGr2
            put("rdaGr2BiographicalInformation", "rdaGr2:biographicalInformation");
            put("rdaGr2DateOfBirth", "rdaGr2:dateOfBirth");
            put("rdaGr2DateOfDeath", "rdaGr2:dateOfDeath");
            put("rdaGr2DateOfEstablishment", "rdaGr2:dateOfEstablishment");
            put("rdaGr2DateOfTermination", "rdaGr2:dateOfTermination");
            put("rdaGr2Gender", "rdaGr2:gender");
            put("rdaGr2ProfessionOrOccupation", "rdaGr2:professionOrOccupation");
            // not in http://labs.europeana.eu/api/data-fields
            // in /11004/E66D8929E1ABD5BDD48E64E86D12EAEB7760AA60
            put("rdaGr2PlaceOfBirth", "rdaGr2:placeOfBirth");
            put("rdaGr2PlaceOfDeath", "rdaGr2:placeOfDeath");

            // foaf
            put("foafName", "foaf:name");

            // odlr
            put("odrlInheritFrom", "odrl:inheritFrom");

            // cc
            put("ccDeprecatedOn", "cc:deprecatedOn");
        }
    };

    public DocumentTransformer(MongoDatabase mongoDb) {
        CodecRegistry codecRegistry = CodecRegistries.fromRegistries(MongoClient.getDefaultCodecRegistry());
        codec = new DocumentCodec(codecRegistry, new BsonTypeClassMap());

        this.mongoDb = mongoDb;
    }

    public void transform(Document record) {
        transform(record, true);
    }

    public void transform(Document record, boolean withFieldRename) {
        record.remove("_id");
        record.remove("className");
        record.put("identifier", record.get("about"));
        record.remove("about");
        record.put("sets", record.get("europeanaCollectionName"));
        for (String entity : entities.keySet()) {
            if (record.containsKey(entity)) {
                Object value = record.get(entity);
                if (value instanceof List) {
                    ArrayList list = (ArrayList) value;
                    if (!list.isEmpty() && list.get(0).getClass().getCanonicalName().equals("org.bson.Document")) {
                        List<Document> refs = (List<Document>) record.get(entity);
                        if (refs != null && refs.size() > 0) {
                            List<Document> transformedValues = new ArrayList<>();
                            for (Document ref : refs) {
                                String collection = (String) ref.get("$ref");
                                ObjectId id = (ObjectId) ref.get("$id");
                                Document doc = resolveReference(collection, id, withFieldRename);
                                transformedValues.add(doc);
                            }
                            if (withFieldRename) {
                                record.remove(entity);
                                record.put(entities.get(entity), transformedValues);
                            } else {
                                record.put(entity, transformedValues);
                            }
                        } else {
                            // System.err.println("EMPTY: " + entity + " " + refs);
                            record.remove(entity);
                        }
                    } else {
                        List<DBRef> refs = (List<DBRef>) record.get(entity);
                        if (refs != null && refs.size() > 0) {
                            List<Document> transformedValues = new ArrayList<>();
                            for (DBRef ref : refs) {
                                Document doc = resolveReference(ref, withFieldRename);
                                transformedValues.add(doc);
                            }
                            if (withFieldRename) {
                                record.remove(entity);
                                record.put(entities.get(entity), transformedValues);
                            } else {
                                record.put(entity, transformedValues);
                            }
                        } else {
                            // System.err.println("EMPTY: " + entity + " " + refs);
                            record.remove(entity);
                        }
                    }
                } else if (value instanceof DBRef) {
                    if (withFieldRename) {
                        record.remove(entity);
                        record.put(entities.get(entity), resolveReference((DBRef) value, withFieldRename));
                    } else {
                        record.put(entity, resolveReference((DBRef) value, withFieldRename));
                    }
                } else if (value instanceof Document) {
                    String collection = (String) ((Document) value).get("$ref");
                    ObjectId id = (ObjectId) ((Document) value).get("$id");
                    Document doc = resolveReference(collection, id, withFieldRename);
                    if (withFieldRename) {
                        record.remove(entity);
                        record.put(entities.get(entity), doc);
                    } else {
                        record.put(entity, doc);
                    }
                } else {
                    logger.log(Level.SEVERE, "UNKNOWN: {0} {1}",
                            new Object[] { entity, value.getClass().getCanonicalName() });
                }
            }
        }
    }

    private Document resolveReference(DBRef ref, boolean withFieldRename) {
        String collection = ref.getCollectionName();
        ObjectId id = (ObjectId) ref.getId();
        return resolveReference(collection, id, withFieldRename);
        /*
        Document doc = mongoDb.getCollection(collection).find(Filters.eq("_id", ref.getId())).first();
        if (doc != null) {
           doc.remove("_id");
           doc.remove("className");
           transformLanguageStructure(doc);
           if (collection.equals("PhysicalThing") && withFieldRename) {
        doc.put("europeanaProxy", Arrays.asList(((Boolean)doc.get("europeanaProxy")).toString()));
           }
           if (withFieldRename)
        replaceKeys(doc);
           for (String key : subEntities.keySet()) {
        if (doc.containsKey(key)) {
           List<Document> subDocs = new ArrayList<Document>();
           List<DBRef> subRefs = (List<DBRef>) doc.get(key);
           for (DBRef subRef : subRefs) {
              subDocs.add(resolveReference(subRef, withFieldRename));
           }
           doc.remove(key);
           doc.put(subEntities.get(key), subDocs);
        }
           }
        }
        return doc;
        */
    }

    private Document resolveReference(String collection, ObjectId id, boolean withFieldRename) {
        Document doc = mongoDb.getCollection(collection).find(Filters.eq("_id", id)).first();
        if (doc != null) {
            doc.remove("_id");
            doc.remove("className");
            transformLanguageStructure(doc);
            if (collection.equals("PhysicalThing") && withFieldRename) {
                doc.put("europeanaProxy", Arrays.asList(((Boolean) doc.get("europeanaProxy")).toString()));
            }
            if (withFieldRename)
                replaceKeys(doc);
            for (String key : subEntities.keySet()) {
                if (doc.containsKey(key)) {
                    List<Document> subDocs = new ArrayList<Document>();
                    List<DBRef> subRefs = (List<DBRef>) doc.get(key);
                    for (DBRef subRef : subRefs) {
                        subDocs.add(resolveReference(subRef, withFieldRename));
                    }
                    doc.remove(key);
                    doc.put(subEntities.get(key), subDocs);
                }
            }
        }
        return doc;
    }

    private void replaceKeys(Document doc) {
        for (Map.Entry<String, String> field : fieldDictionary.entrySet()) {
            replaceKey(doc, field.getKey(), field.getValue());
        }
    }

    private void replaceKey(Document doc, String from, String to) {
        if (doc.containsKey(from)) {
            doc.put(to, doc.get(from));
            doc.remove(from);
        }
    }

    private void transformLanguageStructure(Document doc) {
        for (String field : fieldDictionary.keySet()) {
            if (doc.containsKey(field) && doc.get(field) instanceof Document) {
                replaceLanguage(doc, field);
            }
        }
    }

    private void replaceLanguage(Document doc, String key) {
        Document field = (Document) doc.get(key);
        List<Object> instances = new ArrayList<>();
        for (String lang : field.keySet()) {
            List<String> values = (List<String>) field.get(lang);
            if (values != null && values.size() > 0) {
                for (String value : values) {
                    if (!lang.equals("def")) {
                        Document instance = new Document();
                        instance.append("@lang", lang);
                        instance.append("#value", value);
                        instances.add(instance);
                    } else {
                        instances.add(value);
                    }
                }
            }
        }
        doc.put(key, instances);
    }

}