Example usage for com.mongodb DBObject keySet

List of usage examples for com.mongodb DBObject keySet

Introduction

In this page you can find the example usage for com.mongodb DBObject keySet.

Prototype

Set<String> keySet();

Source Link

Document

Returns this object's fields' names

Usage

From source file:com.hipstogram.storm.state.TrackCountMongoDBMapper.java

License:Apache License

@Override
public OpaqueValue getValue(DBObject dbObject) {
    DBObject histogram = (DBObject) dbObject.get(HISTOGRAM_NAME);
    Object[] keys = histogram.keySet().toArray();
    Integer l = 0;// w ww . j  ava 2 s.  c  o  m

    if (keys.length != 0)
        l = Integer.parseInt(histogram.get((String) keys[0]).toString());

    return (OpaqueValue) new OpaqueValue<Number>(0l, l);
}

From source file:com.ibm.db2j.MongoDB.java

License:Open Source License

/**
 * This method analyses the result document from Mongo and determines a suitable  
 * initial Logical Table definition from it.
 * //from   w ww.  j a v  a  2 s  . com
 * @param resultDoc - the mongo document that has been retrieved from the collection.
 * @return a String representing a Logical Table definition appropriate to the resultDoc.
 */
// 
// 
private static String generateLTDefFromMongoDocument(DBObject resultDoc) {
    StringBuilder ltDef = new StringBuilder();
    for (String fieldName : resultDoc.keySet()) {
        // The field gives us the column name
        Object mongoField = resultDoc.get(fieldName);
        if (mongoField != null) {
            if (mongoField instanceof java.lang.String) {
                ltDef = ltDef.append(fieldName).append(" VARCHAR(255), ");
            } else if (mongoField instanceof java.lang.Integer) {
                ltDef = ltDef.append(fieldName).append(" INTEGER, ");
            } else if (mongoField instanceof java.lang.Double) {
                ltDef = ltDef.append(fieldName).append(" DOUBLE, ");
            } else if (mongoField instanceof java.lang.Boolean) {
                ltDef = ltDef.append(fieldName).append(" BOOLEAN, ");
            } else if (mongoField instanceof java.util.Date) {
                ltDef = ltDef.append(fieldName).append(" DATE, ");
            } else if (mongoField instanceof org.bson.types.BSONTimestamp) {
                ltDef = ltDef.append(fieldName).append(" TIMESTAMP, ");
            } else if (mongoField instanceof org.bson.types.ObjectId
                    || mongoField instanceof com.mongodb.BasicDBObject
                    || mongoField instanceof com.mongodb.BasicDBList) {
                ltDef = ltDef.append(fieldName).append(" VARCHAR(255), ");
            }

        }
    }
    //remove a trailing ", "..
    int defLength = ltDef.length();
    if (defLength > 2) {
        ltDef = ltDef.delete(defLength - 2, defLength - 1);
    }
    return ltDef.toString();
}

From source file:com.ikanow.infinit.e.harvest.HarvestControllerPipeline.java

License:Open Source License

private void splitDocuments(DocumentPojo doc, SourcePojo source, SourcePipelinePojo splitter,
        List<DocumentPojo> docs) {
    try {//from w  ww .  j  av a2  s .c o m
        if (null == source.getRssConfig()) {
            source.setRssConfig(new SourceRssConfigPojo());
        }
        if (null != source.getRssConfig().getExtraUrls()) { // refreshed ready for new document
            source.getRssConfig().setExtraUrls(null);
        }

        HashMap<String, Object> jsonLookup = new HashMap<String, Object>();
        if ((null != splitter.splitter.getScriptlang())
                && splitter.splitter.getScriptlang().startsWith("automatic")) {
            // (automatic or automatic_json or automatic_xml)

            String[] args = splitter.splitter.getScript().split("\\s*,\\s*");
            Object[] objList = null;

            String field = args[0];
            if (field.startsWith(DocumentPojo.fullText_)) { // fullText, or fullText.[x] where [x] is the root value

                DocumentPojo dummyDoc = new DocumentPojo();
                dummyDoc.setFullText(doc.getFullText());
                MetadataSpecPojo dummyContent = new MetadataSpecPojo();
                dummyContent.fieldName = "extract";
                dummyContent.scriptlang = "stream";
                dummyContent.flags = "o";

                if (field.equals(DocumentPojo.fullText_)) { // fullText
                    dummyContent.script = "";
                } else {
                    dummyContent.script = field.substring(1 + DocumentPojo.fullText_.length()); //+1 for the "."
                }
                _uah.processMetadataChain(dummyDoc, Arrays.asList(dummyContent), source.getRssConfig(), null);

                BasicDBObject dummyDocDbo = (BasicDBObject) dummyDoc.toDb();
                dummyDocDbo = (BasicDBObject) dummyDocDbo.get(DocumentPojo.metadata_);
                if (null != dummyDocDbo) {
                    objList = ((Collection<?>) (dummyDocDbo.get("extract"))).toArray(); // (returns a list of strings)
                }
            } //TESTED (doc_splitter_test_auto_json, json: test3, xml: test4)
            else if (field.startsWith(DocumentPojo.metadata_)) { // field starts with "metadata."
                objList = doc.getMetadata().get(field.substring(1 + DocumentPojo.metadata_.length())); //+1 for the "."               
            } //TESTED (doc_splitter_test_auto_json, test1)
            else { // direct reference to metadata field
                objList = doc.getMetadata().get(field);
            } //TESTED (doc_splitter_test_auto_json, test2)

            if ((null != objList) && (objList.length > 0)) {
                source.getRssConfig().setExtraUrls(new ArrayList<ExtraUrlPojo>(objList.length));
                int num = 0;
                for (Object o : objList) {
                    num++;
                    ExtraUrlPojo url = new ExtraUrlPojo();
                    if ((1 == args.length) || !(o instanceof DBObject)) { // generate default URL
                        url.url = doc.getUrl() + "#" + num;
                    } //TESTED (doc_splitter_test_auto_json, test1)
                    else if (2 == args.length) { // url specified in the format <fieldname-in-dot-notation>
                        url.url = MongoDbUtil.getProperty((DBObject) o, args[1]);
                    } //TESTED (doc_splitter_test_auto_json, test2)
                    else { // url specified in format <message-format-with-{1}-{2}-etc>,<fieldname-in-dot-notation-for-1>,..
                        ArrayList<Object> cmdArgs = new ArrayList<Object>(args.length - 1); //-2 + 1 (+1 - see below)
                        cmdArgs.add("[INDEX_FROM_1_NOT_0]");
                        for (int j = 2; j < args.length; ++j) {
                            cmdArgs.add(MongoDbUtil.getProperty((DBObject) o, args[j]));
                        }
                        url.url = MessageFormat.format(args[1], cmdArgs.toArray());
                    } //TESTED (doc_splitter_test_auto_json, test3, test4)

                    if (null == url.url) { // (if we can't extract a URL then bail out)
                        continue;
                    }

                    url.title = new StringBuffer(doc.getTitle()).append(" (").append(num).append(")")
                            .toString();
                    url.fullText = o.toString();
                    source.getRssConfig().getExtraUrls().add(url);
                    if (splitter.splitter.getScriptlang().startsWith("automatic_")) { // automatic_json or automatic_xml
                        jsonLookup.put(url.url, o);
                    }
                }
            } //TESTED (doc_splitter_test_auto_json)
        } else { // normal case - run the 'follow web links' code to get the docs
            source.getRssConfig().setSearchConfig(splitter.splitter);

            FeedHarvester_searchEngineSubsystem subsys = new FeedHarvester_searchEngineSubsystem();
            subsys.generateFeedFromSearch(source, _hc, doc);
        }
        if (null != source.getRssConfig().getExtraUrls()) {
            for (ExtraUrlPojo newDocInfo : source.getRssConfig().getExtraUrls()) {
                if (null == doc.getSourceUrl()) { // (if sourceUrl != null, bypass it's because it's been generated by a file so is being deleted anyway)
                    //(note: this null check above is relied upon by the federated query engine, so don't go randomly changing it!) 

                    if (_hc.getDuplicateManager().isDuplicate_Url(newDocInfo.url, source, null)) {
                        //TODO: should handle updateCycle_secs?
                        continue;
                    }
                }
                DocumentPojo newDoc = new DocumentPojo();
                newDoc.setCreated(doc.getCreated());
                newDoc.setModified(doc.getModified());
                newDoc.setUrl(newDocInfo.url);
                newDoc.setTitle(newDocInfo.title);
                newDoc.setDescription(newDocInfo.description);
                newDoc.setFullText(newDocInfo.fullText);

                // For JSON, also create the metadata)
                if (null != splitter.splitter.getScriptlang()) {
                    if (splitter.splitter.getScriptlang().equals("automatic_json")) {
                        newDoc.addToMetadata("json", jsonLookup.get(newDoc.getUrl()));
                    } else if (splitter.splitter.getScriptlang().equals("automatic_xml")) {
                        Object obj = jsonLookup.get(newDoc.getUrl());
                        if (obj instanceof DBObject) {
                            DBObject dbo = (DBObject) obj;
                            for (String key : dbo.keySet()) {
                                Object objArray = dbo.get(key);
                                if (objArray instanceof Object[]) {
                                    newDoc.addToMetadata(key, (Object[]) objArray);
                                } else if (objArray instanceof Collection<?>) {
                                    newDoc.addToMetadata(key, ((Collection<?>) objArray).toArray());
                                }
                            }
                        } //(test4)
                    }
                } //TESTED (doc_splitter_test_auto_json, test1:json, test4:xml)

                // Published date is a bit more complex
                if (null != newDocInfo.publishedDate) {
                    try {
                        newDoc.setPublishedDate(new Date(DateUtility.parseDate(newDocInfo.publishedDate)));
                    } catch (Exception e) {
                    }
                } //TESTED (test3,test4)
                if (null == newDoc.getPublishedDate()) {
                    newDoc.setPublishedDate(doc.getPublishedDate());
                } //TESTED (test1)
                if (null == newDoc.getPublishedDate()) {
                    newDoc.setPublishedDate(doc.getCreated());
                } //TESTED (test2)
                newDoc.setTempSource(source);
                newDoc.setSource(doc.getSource());
                newDoc.setMediaType(doc.getMediaType());
                newDoc.setSourceKey(doc.getSourceKey());
                newDoc.setSourceUrl(doc.getSourceUrl()); // (otherwise won't be able to delete child docs that come from a file)
                newDoc.setCommunityId(doc.getCommunityId());
                newDoc.setDocGeo(doc.getDocGeo());
                newDoc.setIndex(doc.getIndex());

                newDoc.setSpawnedFrom(splitter);
                docs.add(newDoc);
            } //end loop over URLs
        } //TESTED
    } catch (Exception e) {
        StringBuffer errMessage = HarvestExceptionUtils.createExceptionMessage(e);
        _hc.getHarvestStatus().logMessage(errMessage.toString(), true);
    } //TESTED (test4)

}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java

License:Open Source License

/**
 * Add events to the elastic search index for events
 * and the mongodb collection//from w  ww .j a v  a2s . c om
 * so they are searchable for searchsuggest
 * 
 * Step 1.a, try to just update alias's
 * Step 1.b, if fail, create new entry
 * 
 * Step 2, Update totalfreq and doccount
 * 
 * Step 3, After updating totalfreq and doccount, write to ES for every group
 * 
 * @param events
 */
public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewAssocs = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getAssociation();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, AssociationFeaturePojo> evtCommunity : eventFeatures.values()) {

        Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next();
            try {
                AssociationFeaturePojo evtFeature = evtFeatureKV.getValue();
                long nSavedDocCount = evtFeature.getDoccount();

                ObjectId communityID = evtFeature.getCommunityId();

                if ((null == currCache) || !currCache.getCommunityId().equals(evtFeatureKV.getKey())) {
                    currCache = CommunityFeatureCaches.getCommunityFeatureCache(evtFeatureKV.getKey());
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out.println(
                                    "AssociationAggregationUtils.updateEventFeatures, Opened cache for community: "
                                            + evtFeatureKV.getKey());
                    }
                } //TESTED (by hand)               

                // Is this in our cache? If so can short cut a bunch of the DB interaction:
                AssociationFeaturePojo cachedAssoc = currCache.getCachedAssocFeature(evtFeature);
                if (null != cachedAssoc) {
                    if (_incrementalMode) {
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "AssociationAggregationUtils.updateEventFeatures, skip cached: "
                                                + cachedAssoc.toDb());
                            //TODO (INF-2825): should be continue-ing here so can use delta more efficiently...
                        }
                    } else if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out
                                    .println("AssociationAggregationUtils.updateEventFeatures, grabbed cached: "
                                            + cachedAssoc.toDb());
                    }
                    numCacheHits++;
                } //TESTED (by hand)         
                else {
                    numCacheMisses++;
                }

                //try to update
                BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex());
                query.put(AssociationFeaturePojo.communityId_, communityID);

                //Step1 try to update alias
                //update arrays
                BasicDBObject multiopAliasArrays = new BasicDBObject();
                // Entity1 Alias:
                if (null != evtFeature.getEntity1_index()) {
                    evtFeature.addEntity1(evtFeature.getEntity1_index());
                }
                if (null != evtFeature.getEntity1()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity1())
                            || !cachedAssoc.getEntity1().containsAll(evtFeature.getEntity1())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                evtFeature.getEntity1());
                        multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE);
                    }
                } //TESTED (by hand)

                // Entity2 Alias:
                if (null != evtFeature.getEntity2_index()) {
                    evtFeature.addEntity2(evtFeature.getEntity2_index());
                }
                if (null != evtFeature.getEntity2()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity2())
                            || !cachedAssoc.getEntity2().containsAll(evtFeature.getEntity2())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                evtFeature.getEntity2());
                        multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE);
                    }
                } //TESTED (by hand)

                // verb/verb cat alias:
                if (null != evtFeature.getVerb_category()) {
                    evtFeature.addVerb(evtFeature.getVerb_category());
                }
                if (null != evtFeature.getVerb()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getVerb())
                            || !cachedAssoc.getVerb().containsAll(evtFeature.getVerb())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb());
                        multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE);
                    }
                } //TESTED (by hand)

                // OK - now we can copy across the fields into the cache:
                if (null != cachedAssoc) {
                    currCache.updateCachedAssocFeatureStatistics(cachedAssoc, evtFeature); //(evtFeature is now fully up to date)
                } //TESTED (by hand)

                BasicDBObject updateOp = new BasicDBObject();
                if (!multiopAliasArrays.isEmpty()) {
                    updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays);
                }
                // Document count for this event
                BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_,
                        nSavedDocCount);
                updateOp.put(MongoDbManager.inc_, updateFreqDocCount);

                BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1);
                fields.put(AssociationFeaturePojo.entity1_, 1);
                fields.put(AssociationFeaturePojo.entity2_, 1);
                fields.put(AssociationFeaturePojo.verb_, 1);
                //(slightly annoying, since only want these if updating dc but won't know
                // until after i've got this object)

                fields.put(AssociationFeaturePojo.db_sync_time_, 1);
                fields.put(AssociationFeaturePojo.db_sync_doccount_, 1);

                DBObject dboUpdate = null;
                if (_diagnosticMode) {
                    if (null == cachedAssoc) {
                        dboUpdate = col.findOne(query, fields);
                    }
                } else {
                    if (null != cachedAssoc) {
                        col.update(query, updateOp, false, false);
                    } else { // Not cached - so have to grab the feature we're either getting or creating
                        dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                false, true);
                        // (can use findAndModify because specify index, ie the shard key)
                        // (returns event before the changes above, update the feature object below)
                        // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                    }
                }
                if ((null != cachedAssoc) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                {
                    AssociationFeaturePojo egp = cachedAssoc;

                    if (null == egp) {
                        egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class);
                        evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount);
                        evtFeature.setDb_sync_doccount(egp.getDb_sync_doccount());
                        evtFeature.setDb_sync_time(egp.getDb_sync_time());
                        if (null != egp.getEntity1()) {
                            for (String ent : egp.getEntity1())
                                evtFeature.addEntity1(ent);
                        }
                        if (null != egp.getEntity2()) {
                            for (String ent : egp.getEntity2())
                                evtFeature.addEntity2(ent);
                        }
                        if (null != egp.getVerb()) {
                            for (String verb : egp.getVerb())
                                evtFeature.addVerb(verb);
                        }
                    } //TESTED (cached and non-cached cases)
                      // (in the cached case, evtFeature has already been updated by updateCachedAssocFeatureStatistics)

                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, found: "
                                    + ((BasicDBObject) egp.toDb()).toString());
                        if (_logInDiagnosticMode)
                            System.out.println(
                                    "AssociationAggregationUtils.updateEventFeatures, ^^^ found from query: "
                                            + query.toString() + " / " + updateOp.toString());
                    }
                    // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule) 
                } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)
                {
                    numNewAssocs++;

                    // Synchronization settings for the newly created object
                    evtFeature.setDb_sync_doccount(nSavedDocCount);
                    if (null == savedSyncTime) {
                        savedSyncTime = Long.toString(System.currentTimeMillis());
                    }
                    evtFeature.setDb_sync_time(savedSyncTime);

                    // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                    // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                    // only set here, and the findAndModify is atomic

                    BasicDBObject baseFields = new BasicDBObject();
                    if (null != evtFeature.getEntity1_index()) {
                        baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index());
                    }
                    if (null != evtFeature.getEntity2_index()) {
                        baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index());
                    }
                    if (null != evtFeature.getVerb_category()) {
                        baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category());
                    }
                    baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type());
                    baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount());
                    baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time());
                    baseFields.put(AssociationFeaturePojo.db_sync_prio_, 1000.0); // (ensures new objects are quickly index-synchronized)

                    if (!_diagnosticMode) {
                        // Store the object
                        col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                    } else {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, not found: "
                                    + query.toString() + " / " + baseFields.toString() + "/ orig_update= "
                                    + updateOp.toString());
                    }

                    // (Note even in background aggregation mode we still perform the feature synchronization
                    //  for new entities - and it has to be right at the end because it "corrupts" the objects)

                } //(end if first time seen)

                if (null == cachedAssoc) { // First time we've seen this locally, so add to cache
                    currCache.addCachedAssocFeature(evtFeature);
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out
                                    .println("AssociationAggregationUtils.updateEventFeatures, added to cache: "
                                            + evtFeature.toDb());
                    }
                } //TESTED (by hand)                           
            } catch (Exception e) {
                // Exception, remove from feature list
                it.remove();

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);
            }

        } // (end loop over all communities for the set of features sharing and index)                        
    } // (end loop over indexes) 

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some assocs were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getAssocCacheSize();
        }
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" assoc_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_assocs=").append(eventFeatures.size()).append(" new_assocs=")
                .append(numNewAssocs).append(" cache_misses=").append(numCacheMisses).append(" cache_hits=")
                .append(numCacheHits).append(" cache_size=").append(cacheSize);

        logger.info(logMsg.toString());
    }

}

From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

/**
 * Updates the feature entries for the list of entities
 * that was just extracted including changing frequency,
 * adding aliases etc/*from  w  w  w.ja  v  a2  s  . c  om*/
 * 
 * This method now has 3 steps:
 * 1. Try to update alias
 *    1.a If fail, create new gaz
 * 2. Update totalfreq and doccount
 * 
 * @param ents List of entities to update in the entity feature
 */
public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewEntities = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getEntity();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) {

        Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next();
            try {
                EntityFeaturePojo entFeature = entFeatureKV.getValue();

                long nSavedDocCount = entFeature.getDoccount();
                long nSavedFreqCount = entFeature.getTotalfreq();
                // (these should be constant across all communities but keep it here
                //  so can assign it using entFeature, it's v cheap so no need to get once like for sync vars)

                // For each community, see if the entity feature already exists *for that community*               
                ObjectId communityID = entFeature.getCommunityId();
                if (null != communityID) {
                    if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) {
                        currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey());
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: "
                                                + entFeatureKV.getKey());
                        }
                    } //TESTED (by hand)

                    // Is this in our cache? If so can short cut a bunch of the DB interaction:
                    EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature);
                    if (null != cachedEnt) {
                        if (_incrementalMode) {
                            if (_diagnosticMode) {
                                if (_logInDiagnosticMode)
                                    System.out.println(
                                            "EntityAggregationUtils.updateEntityFeatures, skip cached: "
                                                    + cachedEnt.toDb());
                                //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently...
                            }
                        } else if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out
                                        .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: "
                                                + cachedEnt.toDb());
                        }
                        numCacheHits++;

                    } //TESTED (by hand)                  
                    else {
                        numCacheMisses++;
                    }

                    BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex());
                    query.put(EntityFeaturePojo.communityId_, communityID);
                    BasicDBObject updateOp = new BasicDBObject();
                    // Add aliases:
                    BasicDBObject updateOpA = new BasicDBObject();
                    if (null != entFeature.getAlias()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getAlias())
                                || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                                    entFeature.getAlias());
                            updateOpA.put(EntityFeaturePojo.alias_, multiopE);
                        } //TESTED (by hand)
                    }
                    // Add link data, if there is any:
                    if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks())
                                || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_,
                                    entFeature.getSemanticLinks());
                            updateOpA.put(EntityFeaturePojo.linkdata_, multiopF);
                        } //TESTED (by hand)
                    }
                    // OK - now we can copy across the fields into the cache:
                    if (null != cachedEnt) {
                        currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date)
                    } //TESTED (by hand)

                    if (!updateOpA.isEmpty()) {
                        updateOp.put(MongoDbManager.addToSet_, updateOpA);
                    }
                    // Update frequency:
                    BasicDBObject updateOpB = new BasicDBObject();
                    updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount);
                    updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount);
                    updateOp.put(MongoDbManager.inc_, updateOpB);

                    //try to use find/modify to see if something comes back and set doc freq/totalfreq
                    BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1);
                    fields.put(EntityFeaturePojo.doccount_, 1);
                    fields.put(EntityFeaturePojo.alias_, 1);
                    fields.put(EntityFeaturePojo.linkdata_, 1);
                    //(slightly annoying, since only want these 2 largish fields if updating freq but won't know
                    // until after i've got this object)                  
                    fields.put(EntityFeaturePojo.db_sync_time_, 1);
                    fields.put(EntityFeaturePojo.db_sync_doccount_, 1);

                    DBObject dboUpdate = null;
                    if (_diagnosticMode) {
                        if (null == cachedEnt) {
                            dboUpdate = col.findOne(query, fields);
                        }
                    } else {
                        if (null != cachedEnt) {
                            col.update(query, updateOp, false, false);
                        } else { // Not cached - so have to grab the feature we're either getting or creating
                            dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                    false, true);
                            // (can use findAndModify because specify index, ie the shard key)
                            // (returns entity before the changes above, update the feature object below)
                            // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                        }
                    }
                    if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                    {
                        EntityFeaturePojo gp = cachedEnt;

                        // (Update the entity feature to be correct so that it can be accurately synchronized with the index)
                        if (null == gp) {
                            gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class);
                            entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount);
                            entFeature.setDoccount(gp.getDoccount() + nSavedDocCount);
                            entFeature.setDbSyncDoccount(gp.getDbSyncDoccount());
                            entFeature.setDbSyncTime(gp.getDbSyncTime());
                            if (null != gp.getAlias()) {
                                entFeature.addAllAlias(gp.getAlias());
                            }
                            if (null != gp.getSemanticLinks()) {
                                entFeature.addToSemanticLinks(gp.getSemanticLinks());
                            }
                        } //TESTED (cached case and non-cached case)
                          // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics)

                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, found: "
                                        + ((BasicDBObject) gp.toDb()).toString());
                            if (_logInDiagnosticMode)
                                System.out.println(
                                        "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: "
                                                + query.toString() + " / " + updateOp.toString());
                        }
                        // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) 
                    } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)
                    {
                        numNewEntities++;

                        // Synchronization settings for the newly created object
                        if (null == savedSyncTime) {
                            savedSyncTime = Long.toString(System.currentTimeMillis());
                        }
                        entFeature.setDbSyncDoccount(nSavedDocCount);
                        entFeature.setDbSyncTime(savedSyncTime);

                        // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                        // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                        // only set here, and the findAndModify is atomic

                        // (Do in raw MongoDB for performance)
                        BasicDBObject baseFields = new BasicDBObject();
                        baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString());
                        baseFields.put(EntityFeaturePojo.type_, entFeature.getType());
                        baseFields.put(EntityFeaturePojo.disambiguated_name_,
                                entFeature.getDisambiguatedName());
                        baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount());
                        baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0);
                        baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime());
                        if ((null != entFeature.getSemanticLinks())
                                && !entFeature.getSemanticLinks().isEmpty()) {
                            baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks());
                        }

                        //attempt to add geotag (makes necessary checks on util side)
                        //also add ontology type if geotag is found
                        EntityGeotagAggregationUtils.addEntityGeo(entFeature);
                        if (entFeature.getGeotag() != null) {
                            BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat);
                            geo.put(GeoPojo.lon_, entFeature.getGeotag().lon);
                            baseFields.put(EntityFeaturePojo.geotag_, geo);

                            if (entFeature.getOntology_type() != null) {
                                baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type());
                            }
                        }

                        if (!_diagnosticMode) {
                            // Store the object
                            col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                        } else {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: "
                                        + query.toString() + ": " + baseFields.toString());
                        }

                    } //(end first time this feature seen - globally)

                    if (null == cachedEnt) { // First time we've seen this locally, so add to cache
                        currCache.addCachedEntityFeature(entFeature);
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out
                                        .println("EntityAggregationUtils.updateEntityFeatures, added to cache: "
                                                + entFeature.toDb());
                        }
                    } //TESTED (by hand)                     

                } //(end if community id assigned)
            } catch (Exception e) {
                // Exception, remove from feature list
                it.remove();

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);
            }

        } // (end loop over communities)
    } // (end loop over indexes)

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getEntityCacheSize();
        }
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities)
                .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits)
                .append(" cache_size=").append(cacheSize);

        logger.info(logMsg.toString());
    }

}

From source file:com.imaginea.mongodb.requestdispatchers.DocumentRequestDispatcher.java

License:Apache License

/**
 * Gets the keys within a nested document and adds it to the complete Set.
 * Used by getKeysRequest function above.
 * //w w w  .  j av a2s .co  m
 * @param doc
 *            document
 * @param completeSet
 *            collection of all keys
 * @param prefix
 *            For nested docs. For the key <foo.bar.baz>, the prefix would
 *            be <foo.bar>
 */
private void getNestedKeys(DBObject doc, Set<String> completeSet, String prefix) {
    Set<String> allKeys = doc.keySet();
    Iterator<String> it = allKeys.iterator();
    while (it.hasNext()) {
        String temp = it.next();
        completeSet.add(prefix + temp);
        if (doc.get(temp) instanceof BasicDBObject) {
            getNestedKeys((DBObject) doc.get(temp), completeSet, prefix + temp + ".");
        }
    }
}

From source file:com.imaginea.mongodb.services.DocumentServiceImpl.java

License:Apache License

/**
 * Gets the list of documents inside a collection in a database in mongo to
 * which user is connected to.//w ww  .  ja v a2 s  .c o m
 * 
 * @param dbName
 *            Name of Database
 * @param collectionName
 *            Name of Collection from which to get all Documents
 * 
 * @param query
 *            query to be performed. In case of empty query {} return all
 *            docs.
 * 
 * @param keys
 *            Keys to be present in the resulted docs.
 * 
 * @param limit
 *            Number of docs to show.
 * 
 * @param skip
 *            Docs to skip from the front.
 * 
 * @return List of all documents.
 * @exception EmptyDatabaseNameException
 *                If database name is null
 * @exception EmptyCollectionNameException
 *                If Collection name is null
 * @exception UndefinedDatabaseException
 *                If database is not present
 * @exception UndefinedCollectionException
 *                If Collection is not present
 * @exception DatabaseException
 *                throw super type of UndefinedDatabaseException
 * @exception ValidationException
 *                throw super type of
 *                EmptyDatabaseNameException,EmptyCollectionNameException
 * @exception CollectionException
 *                throw super type of UndefinedCollectionException
 * @exception DocumentException
 *                exception while performing get doc list
 * 
 */

public ArrayList<DBObject> getQueriedDocsList(String dbName, String collectionName, DBObject query,
        DBObject keys, int limit, int skip)
        throws DatabaseException, CollectionException, DocumentException, ValidationException {

    mongoInstance = mongoInstanceProvider.getMongoInstance();

    if (dbName == null) {
        throw new EmptyDatabaseNameException("Database name is null");

    }
    if (dbName.equals("")) {
        throw new EmptyDatabaseNameException("Database Name Empty");
    }

    if (collectionName == null) {
        throw new EmptyCollectionNameException("Collection name is null");
    }
    if (collectionName.equals("")) {
        throw new EmptyCollectionNameException("Collection Name Empty");
    }

    ArrayList<DBObject> dataList = new ArrayList<DBObject>();
    try {
        if (!mongoInstance.getDatabaseNames().contains(dbName)) {
            throw new UndefinedDatabaseException("DB with name [" + dbName + "]DOES_NOT_EXIST");
        }

        if (!mongoInstance.getDB(dbName).getCollectionNames().contains(collectionName)) {
            throw new UndefinedCollectionException("Collection with name [" + collectionName
                    + "] DOES NOT EXIST in Database [" + dbName + "]");
        }
        if (keys.keySet().isEmpty()) {
            keys.put("_id", 1); // For empty keys return all _id of all docs
        }

        // Return Queried Documents
        DBCursor cursor = mongoInstance.getDB(dbName).getCollection(collectionName).find(query, keys);
        cursor.limit(limit);
        cursor.skip(skip);

        if (cursor.hasNext()) {
            while (cursor.hasNext()) {
                dataList.add(cursor.next());
            }
        }
    } catch (MongoException e) {
        throw new DocumentException(ErrorCodes.GET_DOCUMENT_LIST_EXCEPTION, "GET_DOCUMENT_LIST_EXCEPTION",
                e.getCause());
    }
    return dataList;

}

From source file:com.indeed.iupload.core.authentification.FileBasedUserPermissionProvider.java

License:Apache License

protected void setUp(DBObject configRoot) {
    this.rootUserNames = getStringSetField(configRoot, "rootUserNames");
    this.anyoneWritableRepositories = getStringSetField(configRoot, "anyoneWritableRepositories");
    this.indexwiseAllowedUsers = new HashMap<String, Map<String, Set<String>>>();

    final DBObject repositoryIndexMap = (DBObject) configRoot.get("indexwiseAllowedUsers");
    for (String repositoryName : repositoryIndexMap.keySet()) {
        final DBObject indexUsersMap = (DBObject) repositoryIndexMap.get(repositoryName);
        final Map<String, Set<String>> map = new HashMap<String, Set<String>>();
        for (String indexName : indexUsersMap.keySet()) {
            map.put(indexName, getStringSetField(indexUsersMap, indexName));
        }// w  w  w.  j  av a  2  s  . com
        this.indexwiseAllowedUsers.put(repositoryName, map);
    }
}

From source file:com.jaspersoft.mongodb.importer.MongoDbImporter.java

License:Open Source License

public void validate(String tableName) {
    DBCollection collection = mongodbConnection.getMongoDatabase().getCollection(tableName);
    long size = collection.getCount();
    if (size == 0) {
        logger.error("No data in Mongo database");
        return;// w w  w .j  ava  2s.co m
    }
    logger.info("Elements in collection: " + size);
    logger.info("Validating the first 5 entries");
    DBCursor cursor = collection.find().limit(5);
    DBObject object;
    Object value;
    logger.info("---------------");
    while (cursor.hasNext()) {
        object = cursor.next();
        for (String id : object.keySet()) {
            value = object.get(id);
            logger.info(value + " -> " + value.getClass().getName());
        }
        logger.info("---------------");
    }
}

From source file:com.jaspersoft.mongodb.MongoDbFieldsProvider.java

License:Open Source License

private void processDBObject(String parentFieldName, DBObject currentDbObject,
        Map<String, Class<?>> fieldNames) {
    Object value;// www  . jav  a  2 s  . c  om
    if (logger.isDebugEnabled()) {
        logger.debug("processDBObject parentFieldName: " + parentFieldName);
        logger.debug("processDBObject currentDbObject: " + currentDbObject.toString());
    }
    for (String fieldName : currentDbObject.keySet()) {
        value = currentDbObject.get(fieldName);
        if (value == null) {
            continue;
        }
        if (value instanceof BasicDBList) {
            fieldNames.put((parentFieldName == null ? "" : parentFieldName + ".") + fieldName, List.class);
        } else if (value instanceof BasicDBObject) {
            processDBObject((parentFieldName == null ? "" : parentFieldName + ".") + fieldName,
                    (DBObject) value, fieldNames);
        } else {
            fieldNames.put((parentFieldName == null ? "" : parentFieldName + ".") + fieldName,
                    value.getClass());
        }
    }
}