Java tutorial
/** * Twitter Tools * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cc.twittertools.search.api; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; import javax.annotation.Nullable; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.similarities.LMDirichletSimilarity; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import cc.twittertools.index.IndexStatuses; import cc.twittertools.index.IndexStatuses.StatusField; import cc.twittertools.thrift.gen.TQuery; import cc.twittertools.thrift.gen.TResult; import cc.twittertools.thrift.gen.TrecSearch; import cc.twittertools.thrift.gen.TrecSearchException; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; public class TrecSearchHandler implements TrecSearch.Iface { private static final Logger LOG = Logger.getLogger(TrecSearchHandler.class); private static QueryParser QUERY_PARSER = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); private final IndexSearcher searcher; private final Map<String, String> credentials; public TrecSearchHandler(File indexPath, @Nullable Map<String, String> credentials) throws IOException { Preconditions.checkNotNull(indexPath); Preconditions.checkArgument(indexPath.exists()); // Can be null, in which case we don't check for credentials. this.credentials = credentials; IndexReader reader = DirectoryReader.open(FSDirectory.open(indexPath)); searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } public List<TResult> search(TQuery query) throws TrecSearchException { Preconditions.checkNotNull(query); // Verify credentials. if (credentials != null && (!credentials.containsKey(query.group) || !credentials.get(query.group).equals(query.token))) { LOG.info(String.format("Access denied for (%s, %s)", query.group, query.token)); throw new TrecSearchException("Invalid credentials: access denied."); } List<TResult> results = Lists.newArrayList(); long startTime = System.currentTimeMillis(); try { Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, query.max_id, true, true); Query q = QUERY_PARSER.parse(query.text); int num = query.num_results > 10000 ? 10000 : query.num_results; TopDocs rs = searcher.search(q, filter, num); for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); TResult p = new TResult(); p.id = (Long) hit.getField(StatusField.ID.name).numericValue(); p.screen_name = hit.get(StatusField.SCREEN_NAME.name); p.epoch = (Long) hit.getField(StatusField.EPOCH.name).numericValue(); p.text = hit.get(StatusField.TEXT.name); p.rsv = scoreDoc.score; p.followers_count = (Integer) hit.getField(StatusField.FOLLOWERS_COUNT.name).numericValue(); p.statuses_count = (Integer) hit.getField(StatusField.STATUSES_COUNT.name).numericValue(); if (hit.get(StatusField.LANG.name) != null) { p.lang = hit.get(StatusField.LANG.name); } if (hit.get(StatusField.IN_REPLY_TO_STATUS_ID.name) != null) { p.in_reply_to_status_id = (Long) hit.getField(StatusField.IN_REPLY_TO_STATUS_ID.name) .numericValue(); } if (hit.get(StatusField.IN_REPLY_TO_USER_ID.name) != null) { p.in_reply_to_user_id = (Long) hit.getField(StatusField.IN_REPLY_TO_USER_ID.name) .numericValue(); } if (hit.get(StatusField.RETWEETED_STATUS_ID.name) != null) { p.retweeted_status_id = (Long) hit.getField(StatusField.RETWEETED_STATUS_ID.name) .numericValue(); } if (hit.get(StatusField.RETWEETED_USER_ID.name) != null) { p.retweeted_user_id = (Long) hit.getField(StatusField.RETWEETED_USER_ID.name).numericValue(); } if (hit.get(StatusField.RETWEET_COUNT.name) != null) { p.retweeted_count = (Integer) hit.getField(StatusField.RETWEET_COUNT.name).numericValue(); } results.add(p); } } catch (Exception e) { e.printStackTrace(); throw new TrecSearchException(e.getMessage()); } long endTime = System.currentTimeMillis(); LOG.info(String.format("%4dms %s", (endTime - startTime), query.toString())); return results; } }