co.cask.cdap.examples.wordcount.RetrieveCountsHandler.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.examples.wordcount.RetrieveCountsHandler.java

Source

/*
 * Copyright  2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.examples.wordcount;

import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Get;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceContext;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import com.google.common.base.Charsets;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;

import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.QueryParam;

/**
 * Retrieve Counts service handler.
 */
public class RetrieveCountsHandler extends AbstractHttpServiceHandler {
    private static final int WORD_COUNT_LIMIT = 10;

    @Property
    private final String wordStatsTableName;

    @Property
    private final String wordCountTableName;

    @Property
    private final String uniqueCountTableName;

    @Property
    private final String wordAssocTableName;

    private Table wordStatsTable;
    private KeyValueTable wordCountsTable;
    private UniqueCountTable uniqueCountTable;
    private AssociationTable associationTable;

    public RetrieveCountsHandler(WordCount.WordCountConfig config) {
        this.wordStatsTableName = config.getWordStatsTable();
        this.wordCountTableName = config.getWordCountTable();
        this.uniqueCountTableName = config.getUniqueCountTable();
        this.wordAssocTableName = config.getWordAssocTable();
    }

    @Override
    protected void configure() {
        super.configure();
        useDatasets(wordStatsTableName, wordCountTableName, uniqueCountTableName, wordAssocTableName);
    }

    @Override
    public void initialize(HttpServiceContext context) throws Exception {
        super.initialize(context);
        wordStatsTable = context.getDataset(wordStatsTableName);
        wordCountsTable = context.getDataset(wordCountTableName);
        uniqueCountTable = context.getDataset(uniqueCountTableName);
        associationTable = context.getDataset(wordAssocTableName);
    }

    /**
     * Returns total number of words, the number of unique words, and the average word length.
     */
    @Path("stats")
    @GET
    public void getStats(HttpServiceRequest request, HttpServiceResponder responder) {
        long totalWords = 0L;
        long uniqueWords = 0L;
        double averageLength = 0.0;

        // Read the total_length and total_words to calculate average length
        Row result = wordStatsTable.get(new Get("totals", "total_length", "total_words"));
        if (!result.isEmpty()) {
            // Extract the total sum of lengths
            long totalLength = result.getLong("total_length", 0);

            // Extract the total count of words
            totalWords = result.getLong("total_words", 0);

            // Compute the average length
            if (totalLength != 0 && totalWords != 0) {
                averageLength = ((double) totalLength) / totalWords;

                // Read the unique word count
                uniqueWords = uniqueCountTable.readUniqueCount();
            }
        }

        // Return a map as JSON
        Map<String, Object> results = new HashMap<>();
        results.put("totalWords", totalWords);
        results.put("uniqueWords", uniqueWords);
        results.put("averageLength", averageLength);

        responder.sendJson(results);
    }

    /**
     * Returns the count for a specific word and its word associations, up to the specified limit or
     * a pre-set limit of ten if not specified.
     */
    @Path("count/{word}")
    @GET
    public void getCount(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("word") String word,
            @DefaultValue("10") @QueryParam("limit") Integer limit) {
        // Read the word count
        byte[] countBytes = wordCountsTable.read(Bytes.toBytes(word));
        long wordCount = countBytes == null ? 0L : Bytes.toLong(countBytes);

        // Read the top associated words
        Map<String, Long> wordsAssocs = associationTable.readWordAssocs(word, limit);

        // Build a map with results
        Map<String, Object> results = new HashMap<>();
        results.put("word", word);
        results.put("count", wordCount);
        results.put("assocs", wordsAssocs);

        responder.sendJson(results);
    }

    /**
     * Returns the counts for all words in the input.  The request body is expected to contain
     * a comma-separated list of words.
     */
    @Path("counts")
    @POST
    public void getCounts(HttpServiceRequest request, HttpServiceResponder responder) {
        String wordString = Charsets.UTF_8.decode(request.getContent()).toString();
        String[] words = wordString.split(",");
        Map<String, Long> wordCounts = Maps.newHashMap();
        Stopwatch timer = new Stopwatch().start();
        for (int i = 0; i < words.length; i++) {
            byte[] countBytes = wordCountsTable.read(Bytes.toBytes(words[i]));
            long count = countBytes != null ? Bytes.toLong(countBytes) : 0;
            wordCounts.put(words[i], count);
        }
        timer.stop();
        Map<String, Object> responseBody = Maps.newHashMap();
        responseBody.put("counts", wordCounts);
        responseBody.put("elapsed", timer.toString());
        responder.sendJson(responseBody);
    }

    /**
     * Returns the counts for all words in the input.  The request body is expected to contain
     * a comma-separated list of words.
     *
     * <p>
     * This endpoint method differs from {@link RetrieveCountsHandler#getCounts(HttpServiceRequest,HttpServiceResponder)}
     * in using {@link KeyValueTable#readAll(byte[][])} to perform a batched read.
     * </p>
     */
    @Path("multicounts")
    @POST
    public void getMultiCounts(HttpServiceRequest request, HttpServiceResponder responder) {
        String wordString = Charsets.UTF_8.decode(request.getContent()).toString();
        String[] words = wordString.split(",");
        byte[][] wordBytes = new byte[words.length][];
        for (int i = 0; i < words.length; i++) {
            wordBytes[i] = Bytes.toBytes(words[i]);
        }
        Stopwatch timer = new Stopwatch().start();
        Map<byte[], byte[]> results = wordCountsTable.readAll(wordBytes);
        Map<String, Long> wordCounts = Maps.newHashMap();
        for (Map.Entry<byte[], byte[]> entry : results.entrySet()) {
            byte[] val = entry.getValue();
            wordCounts.put(Bytes.toString(entry.getKey()), val != null ? Bytes.toLong(entry.getValue()) : 0);
        }
        timer.stop();
        Map<String, Object> response = Maps.newHashMap();
        response.put("counts", wordCounts);
        response.put("elapsed", timer.toString());
        responder.sendJson(response);
    }

    /**
     * Returns the count of associations for a specific word pair.
     */
    @Path("assoc/{word1}/{word2}")
    @GET
    public void getAssoc(HttpServiceRequest request, HttpServiceResponder responder,
            @PathParam("word1") String word1, @PathParam("word2") String word2) {
        // Read the top associated words
        long count = associationTable.getAssoc(word1, word2);

        // Return a map as JSON
        Map<String, Object> results = new HashMap<>();
        results.put("word1", word1);
        results.put("word2", word2);
        results.put("count", count);

        responder.sendJson(results);
    }
}