Example usage for com.mongodb DBCollection findOne

List of usage examples for com.mongodb DBCollection findOne


In this page you can find the example usage for com.mongodb DBCollection findOne.


public DBObject findOne(@Nullable final DBObject query, final DBCollectionFindOptions findOptions) 

Source Link


Get a single document from collection.


From source file:bhl.pages.database.MongoConnection.java

License:Open Source License

 * Get the content of the given page and document
 * @param docid the document identifier (an integer in BHL)
 * @param pageid the page identifier (NOT page_sequence) also an int
 * @return the textual content as a String
 * @throws Exception /*from  w w w. j  ava 2  s . c o m*/
public String getPageContent(String docid, String pageid) throws DbException {
    try {
        if (docid != null && pageid != null) {
            DBCollection coll = db.getCollection(Database.PAGES);
            BasicDBObject ref = new BasicDBObject();
            ref.put(JSONKeys.IA_IDENTIFIER, docid);
            ref.put(JSONKeys.BHL_PAGE_ID, Integer.parseInt(pageid));
            BasicDBObject key = new BasicDBObject();
            key.put(JSONKeys.PAGE_SEQUENCE, 1);
            DBObject obj = coll.findOne(ref, key);
            if (obj != null) {
                Object pobj = obj.get(JSONKeys.PAGE_SEQUENCE);
                int pageNo = ((Number) pobj).intValue();
                DBCollection coll2 = db.getCollection(Database.DOCUMENTS);
                BasicDBObject ref2 = new BasicDBObject();
                ref2.put(JSONKeys.IA_IDENTIFIER, docid);
                ref2.put(JSONKeys.PAGE_SEQUENCE, pageNo);
                BasicDBObject key2 = new BasicDBObject();
                key2.put(JSONKeys.CONTENT, 1);
                Object obj2 = coll2.findOne(ref2, key2);
                if (obj2 != null)
                    return (String) ((DBObject) obj2).get(JSONKeys.CONTENT);
                    throw new Exception("could not find content for docid=" + docid + ", pageid=" + pageid);
            } else
                throw new Exception("could not find docid=" + docid + ", pageid=" + pageid);
        } else
            throw new Exception("Missing docid or pageid");
    } catch (Exception e) {
        throw new DbException(e);

From source file:com.ebay.cloud.cms.dal.persistence.MongoExecutor.java

License:Apache License

public static DBObject findOne(PersistenceContext context, MetaClass metadata, DBObject queryObject,
        DBObject fieldObject) {/* ww w .j  a v  a  2  s  . c  o m*/
    long start = System.currentTimeMillis();
    DBObject findResult = null;
    String msg = "success";
    DBCollection dbCollection = context.getDBCollection(metadata);
    try {
        findResult = dbCollection.findOne(queryObject, fieldObject);
    } catch (Throwable t) {
        msg = t.getMessage();
    } finally {
        logMongoAction(context, "find", start, dbCollection, queryObject, fieldObject, null, null, msg);
    return findResult;

From source file:com.ikanow.infinit.e.api.knowledge.DocumentHandler.java

License:Open Source License

 * Get information function that returns the user information in the form of a JSON String.
 * @param isAdmin //from   w w  w .  j a  va  2  s. c om
 * @param  key   the key definition of the user ( example email@email.com )
 * @return      a JSON string representation of the person information on success
public ResponsePojo getInfo(String userIdStr, String sourceKey, String idStrOrUrl, boolean bReturnFullText,
        boolean returnRawData, boolean isAdmin) {
    ResponsePojo rp = new ResponsePojo();

    try {
        // Set up the query
        BasicDBObject query = new BasicDBObject();
        ObjectId id = null;
        if (null == sourceKey) {
            id = new ObjectId(idStrOrUrl);
            query.put(DocumentPojo._id_, id);
        } else {
            query.put(DocumentPojo.sourceKey_, sourceKey);
            query.put(DocumentPojo.url_, idStrOrUrl);

        if (!isAdmin)
                    new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
        // (use DBObject here because DocumentPojo is pretty big and this call could conceivably have perf implications)

        BasicDBObject fieldsQ = new BasicDBObject();
        if (!bReturnFullText) {
            fieldsQ.put(DocumentPojo.fullText_, 0); // (XML/JSON have fullText as part of pojo)

        BasicDBObject dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);

        if ((null == dbo) || ((null != dbo.get(DocumentPojo.url_))
                && dbo.getString(DocumentPojo.url_).startsWith("?DEL?"))) {
            if (null != id) { // this might be the update id...               
                query = new BasicDBObject(DocumentPojo.updateId_, id);
                dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);
        //TESTED (update case, normal case, and intermediate case where both update and original still exist)

        if (null == dbo) {
            rp.setResponse(new ResponseObject("Doc Info", true, "Document not found"));
            return rp;
        DocumentPojo dp = DocumentPojo.fromDb(dbo, DocumentPojo.class);
        if (bReturnFullText) {
            if (null == dp.getFullText()) { // (Some things like database records might have this stored already)
                byte[] storageArray = new byte[200000];
                DBCollection contentDB = DbManager.getDocument().getContent();
                BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, dp.getUrl());
                        new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, dp.getSourceKey())));
                BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
                BasicDBObject dboContent = (BasicDBObject) contentDB.findOne(contentQ, fields);
                if (null != dboContent) {
                    byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_));
                    ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
                    GZIPInputStream gzip = new GZIPInputStream(in);
                    int nRead = 0;
                    StringBuffer output = new StringBuffer();
                    while (nRead >= 0) {
                        nRead = gzip.read(storageArray, 0, 200000);
                        if (nRead > 0) {
                            String s = new String(storageArray, 0, nRead, "UTF-8");
        } else if (!returnRawData) {
            dp.setFullText(null); // (obviously will normally contain full text anyway)
        } else // if ( returnRawData )
            //check if the harvest type is file, return the file instead
            //if file is db return the json
            //get source
            SourcePojo source = getSourceFromKey(dp.getSourceKey());
            if (source.getExtractType().equals("File")) {
                //get file from harvester
                String fileURL = dp.getUrl();
                if (dp.getSourceUrl() != null)
                    fileURL = dp.getSourceUrl();
                byte[] bytes = FileHarvester.getFile(fileURL, source);
                if (bytes == null) {
                    // Try returning JSON instead
                    String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
                    DocumentFileInterface dfp = new DocumentFileInterface();

                    dfp.bytes = json.getBytes();
                    dfp.mediaType = "application/json";

                            new ResponseObject("Doc Info", true, "Document bytes returned successfully"));
                    rp.setData(dfp, null);
                    return rp;
                } else {
                    DocumentFileInterface dfp = new DocumentFileInterface();
                    dfp.bytes = bytes;
                    dfp.mediaType = getMediaType(fileURL);
                            new ResponseObject("Doc Info", true, "Document bytes returned successfully"));
                    rp.setData(dfp, null);
                    return rp;
            } else {
                String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
                DocumentFileInterface dfp = new DocumentFileInterface();

                dfp.bytes = json.getBytes();
                dfp.mediaType = "application/json";

                rp.setResponse(new ResponseObject("Doc Info", true, "Document bytes returned successfully"));
                rp.setData(dfp, null);
                return rp;
        rp.setData(dp, new DocumentPojoApiMap());
        rp.setResponse(new ResponseObject("Doc Info", true, "Feed info returned successfully"));
    } //(end full text vs raw data)
    catch (Exception e) {
        // If an exception occurs log the error
        logger.error("Exception Message: " + e.getMessage(), e);
        rp.setResponse(new ResponseObject("Doc Info", false, "error returning feed: " + e.getMessage()));
    // Return Json String representing the user
    return rp;

From source file:com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils.java

License:Open Source License

 * Add events to the elastic search index for events
 * and the mongodb collection/*from   w w  w .jav  a  2 s  . co m*/
 * so they are searchable for searchsuggest
 * Step 1.a, try to just update alias's
 * Step 1.b, if fail, create new entry
 * Step 2, Update totalfreq and doccount
 * Step 3, After updating totalfreq and doccount, write to ES for every group
 * @param events
public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewAssocs = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getAssociation();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, AssociationFeaturePojo> evtCommunity : eventFeatures.values()) {

        Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next();
            try {
                AssociationFeaturePojo evtFeature = evtFeatureKV.getValue();
                long nSavedDocCount = evtFeature.getDoccount();

                ObjectId communityID = evtFeature.getCommunityId();

                if ((null == currCache) || !currCache.getCommunityId().equals(evtFeatureKV.getKey())) {
                    currCache = CommunityFeatureCaches.getCommunityFeatureCache(evtFeatureKV.getKey());
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                                    "AssociationAggregationUtils.updateEventFeatures, Opened cache for community: "
                                            + evtFeatureKV.getKey());
                } //TESTED (by hand)               

                // Is this in our cache? If so can short cut a bunch of the DB interaction:
                AssociationFeaturePojo cachedAssoc = currCache.getCachedAssocFeature(evtFeature);
                if (null != cachedAssoc) {
                    if (_incrementalMode) {
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                        "AssociationAggregationUtils.updateEventFeatures, skip cached: "
                                                + cachedAssoc.toDb());
                            //TODO (INF-2825): should be continue-ing here so can use delta more efficiently...
                    } else if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                                    .println("AssociationAggregationUtils.updateEventFeatures, grabbed cached: "
                                            + cachedAssoc.toDb());
                } //TESTED (by hand)         
                else {

                //try to update
                BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex());
                query.put(AssociationFeaturePojo.communityId_, communityID);

                //Step1 try to update alias
                //update arrays
                BasicDBObject multiopAliasArrays = new BasicDBObject();
                // Entity1 Alias:
                if (null != evtFeature.getEntity1_index()) {
                if (null != evtFeature.getEntity1()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity1())
                            || !cachedAssoc.getEntity1().containsAll(evtFeature.getEntity1())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                        multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE);
                } //TESTED (by hand)

                // Entity2 Alias:
                if (null != evtFeature.getEntity2_index()) {
                if (null != evtFeature.getEntity2()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getEntity2())
                            || !cachedAssoc.getEntity2().containsAll(evtFeature.getEntity2())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                        multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE);
                } //TESTED (by hand)

                // verb/verb cat alias:
                if (null != evtFeature.getVerb_category()) {
                if (null != evtFeature.getVerb()) {
                    if ((null == cachedAssoc) || (null == cachedAssoc.getVerb())
                            || !cachedAssoc.getVerb().containsAll(evtFeature.getVerb())) {
                        BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb());
                        multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE);
                } //TESTED (by hand)

                // OK - now we can copy across the fields into the cache:
                if (null != cachedAssoc) {
                    currCache.updateCachedAssocFeatureStatistics(cachedAssoc, evtFeature); //(evtFeature is now fully up to date)
                } //TESTED (by hand)

                BasicDBObject updateOp = new BasicDBObject();
                if (!multiopAliasArrays.isEmpty()) {
                    updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays);
                // Document count for this event
                BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_,
                updateOp.put(MongoDbManager.inc_, updateFreqDocCount);

                BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1);
                fields.put(AssociationFeaturePojo.entity1_, 1);
                fields.put(AssociationFeaturePojo.entity2_, 1);
                fields.put(AssociationFeaturePojo.verb_, 1);
                //(slightly annoying, since only want these if updating dc but won't know
                // until after i've got this object)

                fields.put(AssociationFeaturePojo.db_sync_time_, 1);
                fields.put(AssociationFeaturePojo.db_sync_doccount_, 1);

                DBObject dboUpdate = null;
                if (_diagnosticMode) {
                    if (null == cachedAssoc) {
                        dboUpdate = col.findOne(query, fields);
                } else {
                    if (null != cachedAssoc) {
                        col.update(query, updateOp, false, false);
                    } else { // Not cached - so have to grab the feature we're either getting or creating
                        dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                false, true);
                        // (can use findAndModify because specify index, ie the shard key)
                        // (returns event before the changes above, update the feature object below)
                        // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                if ((null != cachedAssoc) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                    AssociationFeaturePojo egp = cachedAssoc;

                    if (null == egp) {
                        egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class);
                        evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount);
                        if (null != egp.getEntity1()) {
                            for (String ent : egp.getEntity1())
                        if (null != egp.getEntity2()) {
                            for (String ent : egp.getEntity2())
                        if (null != egp.getVerb()) {
                            for (String verb : egp.getVerb())
                    } //TESTED (cached and non-cached cases)
                      // (in the cached case, evtFeature has already been updated by updateCachedAssocFeatureStatistics)

                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, found: "
                                    + ((BasicDBObject) egp.toDb()).toString());
                        if (_logInDiagnosticMode)
                                    "AssociationAggregationUtils.updateEventFeatures, ^^^ found from query: "
                                            + query.toString() + " / " + updateOp.toString());
                    // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule) 
                } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)

                    // Synchronization settings for the newly created object
                    if (null == savedSyncTime) {
                        savedSyncTime = Long.toString(System.currentTimeMillis());

                    // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                    // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                    // only set here, and the findAndModify is atomic

                    BasicDBObject baseFields = new BasicDBObject();
                    if (null != evtFeature.getEntity1_index()) {
                        baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index());
                    if (null != evtFeature.getEntity2_index()) {
                        baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index());
                    if (null != evtFeature.getVerb_category()) {
                        baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category());
                    baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type());
                    baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount());
                    baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time());
                    baseFields.put(AssociationFeaturePojo.db_sync_prio_, 1000.0); // (ensures new objects are quickly index-synchronized)

                    if (!_diagnosticMode) {
                        // Store the object
                        col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                    } else {
                        if (_logInDiagnosticMode)
                            System.out.println("AssociationAggregationUtils.updateEventFeatures, not found: "
                                    + query.toString() + " / " + baseFields.toString() + "/ orig_update= "
                                    + updateOp.toString());

                    // (Note even in background aggregation mode we still perform the feature synchronization
                    //  for new entities - and it has to be right at the end because it "corrupts" the objects)

                } //(end if first time seen)

                if (null == cachedAssoc) { // First time we've seen this locally, so add to cache
                    if (_diagnosticMode) {
                        if (_logInDiagnosticMode)
                                    .println("AssociationAggregationUtils.updateEventFeatures, added to cache: "
                                            + evtFeature.toDb());
                } //TESTED (by hand)                           
            } catch (Exception e) {
                // Exception, remove from feature list

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);

        } // (end loop over all communities for the set of features sharing and index)                        
    } // (end loop over indexes) 

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some assocs were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getAssocCacheSize();
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" assoc_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_assocs=").append(eventFeatures.size()).append(" new_assocs=")
                .append(numNewAssocs).append(" cache_misses=").append(numCacheMisses).append(" cache_hits=")
                .append(numCacheHits).append(" cache_size=").append(cacheSize);



From source file:com.ikanow.infinit.e.processing.generic.aggregation.EntityAggregationUtils.java

License:Open Source License

 * Updates the feature entries for the list of entities
 * that was just extracted including changing frequency,
 * adding aliases etc//from  ww  w .  ja va 2s  . com
 * This method now has 3 steps:
 * 1. Try to update alias
 *    1.a If fail, create new gaz
 * 2. Update totalfreq and doccount
 * @param ents List of entities to update in the entity feature
public static void updateEntityFeatures(Map<String, Map<ObjectId, EntityFeaturePojo>> entFeatures) {
    // Some diagnostic counters:
    int numCacheMisses = 0;
    int numCacheHits = 0;
    int numNewEntities = 0;
    long entityAggregationTime = new Date().getTime();

    DBCollection col = DbManager.getFeature().getEntity();

    // (This fn is normally run for a single community id)
    CommunityFeatureCaches.CommunityFeatureCache currCache = null;

    String savedSyncTime = null;
    for (Map<ObjectId, EntityFeaturePojo> entCommunity : entFeatures.values()) {

        Iterator<Map.Entry<ObjectId, EntityFeaturePojo>> it = entCommunity.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<ObjectId, EntityFeaturePojo> entFeatureKV = it.next();
            try {
                EntityFeaturePojo entFeature = entFeatureKV.getValue();

                long nSavedDocCount = entFeature.getDoccount();
                long nSavedFreqCount = entFeature.getTotalfreq();
                // (these should be constant across all communities but keep it here
                //  so can assign it using entFeature, it's v cheap so no need to get once like for sync vars)

                // For each community, see if the entity feature already exists *for that community*               
                ObjectId communityID = entFeature.getCommunityId();
                if (null != communityID) {
                    if ((null == currCache) || !currCache.getCommunityId().equals(entFeatureKV.getKey())) {
                        currCache = CommunityFeatureCaches.getCommunityFeatureCache(entFeatureKV.getKey());
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                        "EntityAggregationUtils.updateEntityFeatures, Opened cache for community: "
                                                + entFeatureKV.getKey());
                    } //TESTED (by hand)

                    // Is this in our cache? If so can short cut a bunch of the DB interaction:
                    EntityFeaturePojo cachedEnt = currCache.getCachedEntityFeature(entFeature);
                    if (null != cachedEnt) {
                        if (_incrementalMode) {
                            if (_diagnosticMode) {
                                if (_logInDiagnosticMode)
                                            "EntityAggregationUtils.updateEntityFeatures, skip cached: "
                                                    + cachedEnt.toDb());
                                //TODO (INF-2825): should be continue-ing here (after implementing incremental caching fully) so can use delta more efficiently...
                        } else if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                        .println("EntityAggregationUtils.updateEntityFeatures, grabbed cached: "
                                                + cachedEnt.toDb());

                    } //TESTED (by hand)                  
                    else {

                    BasicDBObject query = new BasicDBObject(EntityFeaturePojo.index_, entFeature.getIndex());
                    query.put(EntityFeaturePojo.communityId_, communityID);
                    BasicDBObject updateOp = new BasicDBObject();
                    // Add aliases:
                    BasicDBObject updateOpA = new BasicDBObject();
                    if (null != entFeature.getAlias()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getAlias())
                                || !cachedEnt.getAlias().containsAll(entFeature.getAlias())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_,
                            updateOpA.put(EntityFeaturePojo.alias_, multiopE);
                        } //TESTED (by hand)
                    // Add link data, if there is any:
                    if ((null != entFeature.getSemanticLinks()) && !entFeature.getSemanticLinks().isEmpty()) {
                        if ((null == cachedEnt) || (null == cachedEnt.getSemanticLinks())
                                || !cachedEnt.getSemanticLinks().containsAll(entFeature.getSemanticLinks())) {
                            //(if the data we have is already cached, don't bother adding it again)
                            BasicDBObject multiopF = new BasicDBObject(MongoDbManager.each_,
                            updateOpA.put(EntityFeaturePojo.linkdata_, multiopF);
                        } //TESTED (by hand)
                    // OK - now we can copy across the fields into the cache:
                    if (null != cachedEnt) {
                        currCache.updateCachedEntityFeatureStatistics(cachedEnt, entFeature); //(entFeature is now fully up to date)
                    } //TESTED (by hand)

                    if (!updateOpA.isEmpty()) {
                        updateOp.put(MongoDbManager.addToSet_, updateOpA);
                    // Update frequency:
                    BasicDBObject updateOpB = new BasicDBObject();
                    updateOpB.put(EntityFeaturePojo.totalfreq_, nSavedFreqCount);
                    updateOpB.put(EntityFeaturePojo.doccount_, nSavedDocCount);
                    updateOp.put(MongoDbManager.inc_, updateOpB);

                    //try to use find/modify to see if something comes back and set doc freq/totalfreq
                    BasicDBObject fields = new BasicDBObject(EntityFeaturePojo.totalfreq_, 1);
                    fields.put(EntityFeaturePojo.doccount_, 1);
                    fields.put(EntityFeaturePojo.alias_, 1);
                    fields.put(EntityFeaturePojo.linkdata_, 1);
                    //(slightly annoying, since only want these 2 largish fields if updating freq but won't know
                    // until after i've got this object)                  
                    fields.put(EntityFeaturePojo.db_sync_time_, 1);
                    fields.put(EntityFeaturePojo.db_sync_doccount_, 1);

                    DBObject dboUpdate = null;
                    if (_diagnosticMode) {
                        if (null == cachedEnt) {
                            dboUpdate = col.findOne(query, fields);
                    } else {
                        if (null != cachedEnt) {
                            col.update(query, updateOp, false, false);
                        } else { // Not cached - so have to grab the feature we're either getting or creating
                            dboUpdate = col.findAndModify(query, fields, new BasicDBObject(), false, updateOp,
                                    false, true);
                            // (can use findAndModify because specify index, ie the shard key)
                            // (returns entity before the changes above, update the feature object below)
                            // (also atomically creates the object if it doesn't exist so is "distributed-safe")
                    if ((null != cachedEnt) || ((dboUpdate != null) && !dboUpdate.keySet().isEmpty())) // (feature already exists)
                        EntityFeaturePojo gp = cachedEnt;

                        // (Update the entity feature to be correct so that it can be accurately synchronized with the index)
                        if (null == gp) {
                            gp = EntityFeaturePojo.fromDb(dboUpdate, EntityFeaturePojo.class);
                            entFeature.setTotalfreq(gp.getTotalfreq() + nSavedFreqCount);
                            entFeature.setDoccount(gp.getDoccount() + nSavedDocCount);
                            if (null != gp.getAlias()) {
                            if (null != gp.getSemanticLinks()) {
                        } //TESTED (cached case and non-cached case)
                          // (in the cached case, entFeature has already been updated by updateCachedEntityFeatureStatistics)

                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, found: "
                                        + ((BasicDBObject) gp.toDb()).toString());
                            if (_logInDiagnosticMode)
                                        "EntityAggregationUtils.updateEntityFeatures, ^^^ found from query: "
                                                + query.toString() + " / " + updateOp.toString());
                        // (In background aggregation mode we update db_sync_prio when checking the doc update schedule) 
                    } else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)

                        // Synchronization settings for the newly created object
                        if (null == savedSyncTime) {
                            savedSyncTime = Long.toString(System.currentTimeMillis());

                        // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is 
                        // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are 
                        // only set here, and the findAndModify is atomic

                        // (Do in raw MongoDB for performance)
                        BasicDBObject baseFields = new BasicDBObject();
                        baseFields.put(EntityFeaturePojo.dimension_, entFeature.getDimension().toString());
                        baseFields.put(EntityFeaturePojo.type_, entFeature.getType());
                        baseFields.put(EntityFeaturePojo.db_sync_doccount_, entFeature.getDbSyncDoccount());
                        baseFields.put(EntityFeaturePojo.db_sync_prio_, 1000.0);
                        baseFields.put(EntityFeaturePojo.db_sync_time_, entFeature.getDbSyncTime());
                        if ((null != entFeature.getSemanticLinks())
                                && !entFeature.getSemanticLinks().isEmpty()) {
                            baseFields.put(EntityFeaturePojo.linkdata_, entFeature.getSemanticLinks());

                        //attempt to add geotag (makes necessary checks on util side)
                        //also add ontology type if geotag is found
                        if (entFeature.getGeotag() != null) {
                            BasicDBObject geo = new BasicDBObject(GeoPojo.lat_, entFeature.getGeotag().lat);
                            geo.put(GeoPojo.lon_, entFeature.getGeotag().lon);
                            baseFields.put(EntityFeaturePojo.geotag_, geo);

                            if (entFeature.getOntology_type() != null) {
                                baseFields.put(EntityFeaturePojo.ontology_type_, entFeature.getOntology_type());

                        if (!_diagnosticMode) {
                            // Store the object
                            col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
                        } else {
                            if (_logInDiagnosticMode)
                                System.out.println("EntityAggregationUtils.updateEntityFeatures, not found: "
                                        + query.toString() + ": " + baseFields.toString());

                    } //(end first time this feature seen - globally)

                    if (null == cachedEnt) { // First time we've seen this locally, so add to cache
                        if (_diagnosticMode) {
                            if (_logInDiagnosticMode)
                                        .println("EntityAggregationUtils.updateEntityFeatures, added to cache: "
                                                + entFeature.toDb());
                    } //TESTED (by hand)                     

                } //(end if community id assigned)
            } catch (Exception e) {
                // Exception, remove from feature list

                // If an exception occurs log the error
                logger.error("Exception Message: " + e.getMessage(), e);

        } // (end loop over communities)
    } // (end loop over indexes)

    if ((numCacheHits > 0) || (numCacheMisses > 0)) { // ie some ents were grabbed
        int cacheSize = 0;
        if (null != currCache) {
            cacheSize = currCache.getEntityCacheSize();
        StringBuffer logMsg = new StringBuffer() // (should append key, but don't have that...)
                .append(" ent_agg_time_ms=").append(new Date().getTime() - entityAggregationTime)
                .append(" total_ents=").append(entFeatures.size()).append(" new_ents=").append(numNewEntities)
                .append(" cache_misses=").append(numCacheMisses).append(" cache_hits=").append(numCacheHits)
                .append(" cache_size=").append(cacheSize);



From source file:com.ikanow.infinit.e.processing.generic.store_and_index.StoreAndIndexManager.java

License:Open Source License

 * Remove a doc from the data store, ensures all the fields specified in "fields" are populated (ready for index deletion)
 * @param col/*from  ww w  .  j  a  v a 2  s .c  o m*/
 * @param doc - needs  url, sourceKey set
 * @param fields - fields to retrieve (index, created), set in calling function outside of loop for performance
 * CALLED FROM: removeFromDatastore_byURL(col, List<doc>, bDeleteContent) <- ADDS INDEX, CREATED TO FIELDS 
 *                removeFromDataStore_byURL(List<doc>, bDeleteContent) [ALSO DELETES FROM INDEX AFTER ADDED FROM HERE]
 *                   MongoDocumentTxfer.doDelete(...)  <- SETS URL, SOURCE URL, SOURCE KEY, COMMUNITY ID, INDEX, _ID
 *                   processDocuments(...) [ always called after harvester: have sourceUrl, sourceKey, 
 *                                     DON'T have _id, BUT do have updateId and index (correct except in many geo cases)]
 *                   pruneSource(source, ...) <- SETS URL, SOURCE URL, SOURCE KEY, INDEX
 *                      updateHarvestStatus(...)
private void removeFromDatastore_byURL(DBCollection col, DocumentPojo doc, BasicDBObject fields,
        boolean bDeleteContent) {

    // 1] Create the query to soft delete the document

    BasicDBObject query = new BasicDBObject();
    query.put(DocumentPojo.url_, doc.getUrl());
    query.put(DocumentPojo.sourceKey_, SourcePojo.getDistributedKeyQueryTerm(doc.getSourceKey()));

    // 2] Delete the content if needed

    if (bDeleteContent) {
        if (docHasExternalContent(doc.getUrl(), doc.getSourceUrl())) {
            if (!_diagnosticMode) {
            } else {
                System.out.println("StoreAndIndexManager.removeFromDatastore_byUrl(2), delete content: "
                        + doc.getSourceKey() + "/" + doc.getUrl());

    // 3] Work out which fields we have and which (if any we need to go and fetch):

    boolean needToFindAndModify = false;

    if (null == doc.getId()) { // This is called from processDocuments

        if (null != doc.getUpdateId()) { // update case...
            doc.setId(doc.getUpdateId()); // (note this is overwritten by addToDatastore later, in update case, so we're good)

            // (doc.index is populated but may not be correct because of the "many geos" workaround):
            if (DocumentPojoIndexMap.hasManyGeos(doc)) {
                // (note this check isn't stateless, it actually populates "locs" at the same time
                //  this is handled in addToDatastore (update case), temp removed when adding to DB
            } //TESTED (2.1.2, diagnostic mode, doc2)
        } else { // Not an update case, we're going to have to grab the document after all, which is a bit slower
            needToFindAndModify = true;
    } //TESTED (2.1.2, diagnostic mode, doc2)
    if (!needToFindAndModify) { // set created if we need to, since we're not grabbing it from the datastore
        if (null != doc.getUpdateId()) { // (this means we have an approx created if we don't need to go fetch the deleted doc)
            doc.setCreated(new Date(doc.getUpdateId().getTime()));
        } //TESTED (2.1.2, diagnostic mode, doc2)               
    // (if we're here and index is not set, then it is intended to be null)

    // 4] Update the doc_metadata collection

    BasicDBObject softDelete = getSoftDeleteUpdate();
    BasicDBObject deadDoc = null; // (not normally needed)

    if (needToFindAndModify) { // less pleasant, need to go grab the doc
        deadDoc = (BasicDBObject) col.findOne(query, fields);
    } //TESTED (2.1.2)

    if (!_diagnosticMode) {
        col.update(query, softDelete, false, true); // (needs to be multi- even though there's a single element for sharding reasons)         
    } //TESTED (2.1.2)

    // 5] Add fields if necessary

    if (null != deadDoc) {
        doc.setCreated((Date) deadDoc.get(DocumentPojo.created_));
        // (if getting this doc anyway then might as well get the created)
        doc.setId((ObjectId) deadDoc.get(DocumentPojo._id_));
        doc.setIndex((String) deadDoc.get(DocumentPojo.index_));

        if (_diagnosticMode) {
                    .println("StoreAndIndexManager.removeFromDatastore_byUrl(2): found " + deadDoc.toString());
    } //TESTED (2.1.2)
    else if (_diagnosticMode) {
        if (!needToFindAndModify) {
            System.out.println("StoreAndIndexManager.removeFromDatastore_byUrl(2): straight deleted "
                    + doc.toDb().toString());
        } else {
                    "StoreAndIndexManager.removeFromDatastore_byUrl(2): didn't find " + query.toString());
    } //TESTED (2.1.2)

From source file:com.ikanow.infinit.e.processing.generic.synchronization.SynchronizationManager.java

License:Open Source License

 * Does the DB sync, pulls all solr docs that occured from the
 * cleanseStartTime and source and makes sure they are in the mongo db.
 * /*from  w  ww  .java2 s .  c  o  m*/
 * @param lastCleanse 1 hour before this harvester started
 * @param sources list of sources we are syncing
 * @return The number of errors fixed (docs deleted)
//TODO INF-2239 ... lol fail if syncDB isn't called then dbCache is empty and everything gets deleted...
public int syncSearch(long cleanseStartTime, Set<String> dbCache) {
    int fixcount = 0;
    StoreAndIndexManager storeManager = new StoreAndIndexManager();

    DBCollection documentDb = DbManager.getDocument().getMetadata();
    BasicDBObject queryFields = new BasicDBObject(); // (ie just _id, basically only need to know if it exists)
    try {
        //get solr entries from last cleanse point   
        int source_index = 0;
        int source_count = sources.size();
        for (SourcePojo sp : sources) {
            if (bKillMeNow) {
                return fixcount;
            List<DocumentPojo> docs_to_remove = new ArrayList<DocumentPojo>();

            // Get all indexes this source might use:
            StringBuffer sb = new StringBuffer("document_index");
            for (ObjectId sCommunityId : sp.getCommunityIds()) {

            ElasticSearchManager esm = ElasticSearchManager.getIndex(sb.toString());

            SearchRequestBuilder searchOptions = esm.getSearchOptions();
            BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
            boolQuery.must(QueryBuilders.termQuery(DocumentPojo.sourceKey_, sp.getKey()));
            searchOptions.setSize(200); // (note this is multiplied by the number of primary shards)
            SearchResponse rsp = esm.doQuery(boolQuery, searchOptions);
            String scrollId = rsp.getScrollId();
            int nSkip = 0;

            for (;;) // Until no more hits
                rsp = esm.doScrollingQuery(scrollId, "10m");
                SearchHit[] docs = rsp.getHits().getHits();
                scrollId = rsp.getScrollId();

                if ((null == docs) || (0 == docs.length)) {
                if (docs.length > 100) { // just display large checks)
                    logger.info("Checking ES docs for large source=" + sp.getKey() + " source: " + source_index
                            + "/" + source_count + " from " + nSkip + " to " + (nSkip + docs.length));

                //Check all solr docs against mongodb

                for (SearchHit hit : docs) {
                    String idStr = hit.getId();
                    boolean found = true; //(fail closed!)
                    if (null == dbCache) {
                        ObjectId id = new ObjectId(idStr);
                        BasicDBObject query = new BasicDBObject(DocumentPojo._id_, id);
                        query.put(DocumentPojo.sourceKey_, sp.getDistributedKeyQueryTerm()); // (ensures uses only the right shard)
                        DBObject dbo = documentDb.findOne(query, queryFields);
                        found = (dbo != null);
                    } //TESTED
                    else {
                        found = dbCache.contains(idStr);
                    } //TESTED
                    if (!found) {
                        ObjectId id = new ObjectId(idStr);
                        DocumentPojo doc = new DocumentPojo();
                        doc.setIndex(hit.getIndex() + "/document_index");
                        logger.info("db sync removing doc: " + id + "/" + hit.getIndex() + "/" + source_index
                                + " not found in mongo");
                    } // end if not found
                } // end loop over docs to check

                nSkip += docs.length;
            } // until no more hits
            if (!docs_to_remove.isEmpty()) {

            //CHECK OLD FEEDS 10 at atime
            int iteration = 1;
            boolean removedAll = true;
            while (removedAll) {
                int rows = iteration * iteration * 10;//exponential scaling 10x^2
                int oldfixes = 0;

                //get old docs from es
                SearchRequestBuilder searchOptionsOLD = esm.getSearchOptions();
                BoolQueryBuilder boolQueryOLD = QueryBuilders.boolQuery();
                boolQueryOLD.must(QueryBuilders.termQuery(DocumentPojo.sourceKey_, sp.getKey()));
                searchOptionsOLD.addSort(DocumentPojo.created_, SortOrder.ASC);
                SearchResponse rspOLD = esm.doQuery(boolQueryOLD, searchOptionsOLD);
                SearchHit[] docsOLD = rspOLD.getHits().getHits();

                //Check all solr docs against mongodb

                for (SearchHit hit : docsOLD) {
                    String idStr = hit.getId();
                    boolean found = true;
                    if (null == dbCache) {
                        ObjectId id = new ObjectId(idStr);
                        BasicDBObject queryOLD = new BasicDBObject(DocumentPojo._id_, id);
                        DBObject dbo = documentDb.findOne(queryOLD, queryFields);
                        found = (dbo != null);
                    } //TESTED
                    else {
                        found = dbCache.contains(idStr);
                    } //TESTED
                    if (!found) {
                        // Also need to check the DB since dbCache is not guaranteed to be populated with the same
                        // number of "final" docs
                        ObjectId id = new ObjectId(idStr);
                        if (rows > 10) { // (dbCache always loaded with the first 10 rows)
                            BasicDBObject queryOLD = new BasicDBObject(DocumentPojo._id_, id);
                            if (null != documentDb.findOne(queryOLD, queryFields)) { // it is actually present
                        DocumentPojo doc = new DocumentPojo();
                        doc.setIndex(hit.getIndex() + "/document_index");
                                "db sync removing doc: " + idStr + "/" + source_index + " not found in mongo");
                if (!docs_to_remove.isEmpty()) {

                if (oldfixes != rows)
                    removedAll = false;
        } // end loop over sources
    } catch (Exception e) {
        // If an exception occurs log the error
        logger.error("Exception Message: " + e.getMessage(), e);
    return fixcount;

From source file:com.nlp.twitterstream.MongoUtil.java

License:Open Source License

 * Get first document that matches selection in database Return only
 * selected fields/*from  ww  w .j ava 2s . c om*/
 * @param collection
 *            DBCollection object
 * @return DBObject
public DBObject getOneDocFields(DBCollection collection, DBObject obj1, DBObject obj2) {
    myDoc = collection.findOne(obj1, obj2);

    return myDoc;

From source file:com.socialsky.mods.MongoPersistor.java

License:Apache License

private void doFindOne(Message<JsonObject> message) {
    String collection = getMandatoryString("collection", message);
    if (collection == null) {
        return;/* w  ww  .j  a v a 2s.  c om*/
    JsonObject matcher = message.body().getObject("matcher");
    JsonObject keys = message.body().getObject("keys");
    DBCollection coll = db.getCollection(collection);
    DBObject res;
    if (matcher == null) {
        res = keys != null ? coll.findOne(null, jsonToDBObject(keys)) : coll.findOne();
    } else {
        res = keys != null ? coll.findOne(jsonToDBObject(matcher), jsonToDBObject(keys))
                : coll.findOne(jsonToDBObject(matcher));
    JsonObject reply = new JsonObject();
    if (res != null) {
        JsonObject m = new JsonObject(res.toMap());
        reply.putObject("result", m);
    sendOK(message, reply);

From source file:com.softinstigate.restheart.db.CollectionDAO.java

License:Open Source License

 * Deletes a collection./* w ww .j  a  v  a2 s  .  c o m*/
 * @param dbName the database name of the collection
 * @param collName the collection name
 * @param etag the entity tag. must match to allow actual write (otherwise
 * http error code is returned)
 * @return the HttpStatus code to set in the http response
public static int deleteCollection(String dbName, String collName, ObjectId etag) {
    DBCollection coll = getCollection(dbName, collName);

    BasicDBObject checkEtag = new BasicDBObject("_id", "_properties");
    checkEtag.append("_etag", etag);

    DBObject exists = coll.findOne(checkEtag, fieldsToReturn);

    if (exists == null) {
        return HttpStatus.SC_PRECONDITION_FAILED;
    } else {
        return HttpStatus.SC_NO_CONTENT;