Java tutorial
// MongoInputSplit.java /* * Copyright 2010 10gen Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.mongodb.hadoop.mapred.input; import com.mongodb.*; import com.mongodb.hadoop.util.*; import com.mongodb.util.*; import org.apache.commons.logging.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import java.io.*; import java.util.*; @SuppressWarnings("deprecation") public class MongoInputSplit implements Writable, InputSplit { public MongoInputSplit(MongoURI inputURI, DBObject query, DBObject fields, DBObject sort, int limit, int skip) { log.info("Creating a new MongoInputSplit for MongoURI '" + inputURI + "', query: '" + query + "', fieldSpec: '" + fields + "', sort: '" + sort + "', limit: " + limit + ", skip: " + skip + " ."); _mongoURI = inputURI; _querySpec = query; _fieldSpec = fields; _sortSpec = sort; _limit = limit; _skip = skip; getCursor(); } public MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit split) { this(split.getMongoURI(), split.getQuerySpec(), split.getFieldSpec(), split.getSortSpec(), split.getLimit(), split.getSkip()); } /** * This is supposed to return the size of the split in bytes, but for now, for sanity sake we return the # of docs * in the split instead. * * @return */ @Override public long getLength() { return Long.MAX_VALUE; } @Override public String[] getLocations() { return _mongoURI.getHosts().toArray(new String[_mongoURI.getHosts().size()]); } /** * Serialize the Split instance */ public void write(DataOutput out) throws IOException { out.writeUTF(_mongoURI.toString()); out.writeUTF(JSON.serialize(_querySpec)); out.writeUTF(JSON.serialize(_fieldSpec)); out.writeUTF(JSON.serialize(_sortSpec)); out.writeInt(_limit); out.writeInt(_skip); } @Override public void readFields(DataInput in) throws IOException { _mongoURI = new MongoURI(in.readUTF()); _querySpec = (DBObject) JSON.parse(in.readUTF()); _fieldSpec = (DBObject) JSON.parse(in.readUTF()); _sortSpec = (DBObject) JSON.parse(in.readUTF()); _limit = in.readInt(); _skip = in.readInt(); getCursor(); if (log.isDebugEnabled()) { log.debug("Deserialized MongoInputSplit ... { length = " + getLength() + ", locations = " + Arrays.toString(getLocations()) + ", query = " + _querySpec + ", fields = " + _fieldSpec + ", sort = " + _sortSpec + ", limit = " + _limit + ", skip = " + _skip + "}"); } } public DBCursor getCursor() { // Return the cursor with the split's query, etc. already slotted in for // them. // todo - support limit/skip if (_cursor == null) { _cursor = MongoConfigUtil.getCollection(_mongoURI).find(_querySpec, _fieldSpec).sort(_sortSpec); _cursor.slaveOk(); } return _cursor; } @Override public String toString() { return "MongoInputSplit{URI=" + _mongoURI + ", query=" + _querySpec + '}'; } public MongoInputSplit() { } private MongoURI _mongoURI; private DBObject _querySpec; private DBObject _fieldSpec; private DBObject _sortSpec; private int _limit = 0; private int _skip = 0; private long _length = -1; private transient DBCursor _cursor; private static final Log log = LogFactory.getLog(MongoInputSplit.class); }