com.cloudera.oryx.app.serving.kmeans.Assign.java Source code

Introduction

Here is the source code for com.cloudera.oryx.app.serving.kmeans.Assign.java
Source

/*
 * Copyright (c) 2014, Cloudera and Intel, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */

package com.cloudera.oryx.app.serving.kmeans;

import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.fileupload.FileItem;

import com.cloudera.oryx.app.serving.CSVMessageBodyWriter;
import com.cloudera.oryx.app.serving.OryxServingException;
import com.cloudera.oryx.common.text.TextUtils;

/**
 * <p>Responds to a GET request to {@code /assign/[datum]}, or a POST to {@code /assign}
 * containing several data points, one on each line. The inputs are data points to cluster,
 * delimited, like "1,foo,3.0".</p>
 *
 * <p>The response body contains the result of clustering, one for each input data point, one per
 * line.</p>
 */
@Path("/assign")
public final class Assign extends AbstractKMeansResource {

    @GET
    @Path("{datum}")
    @Produces({ MediaType.TEXT_PLAIN, CSVMessageBodyWriter.TEXT_CSV, MediaType.APPLICATION_JSON })
    public String get(@PathParam("datum") String datum) throws OryxServingException {
        return nearestClusterID(datum).toString();
    }

    @POST
    @Consumes({ MediaType.TEXT_PLAIN, CSVMessageBodyWriter.TEXT_CSV, MediaType.APPLICATION_JSON })
    @Produces({ MediaType.TEXT_PLAIN, CSVMessageBodyWriter.TEXT_CSV, MediaType.APPLICATION_JSON })
    public List<String> post(Reader reader) throws IOException, OryxServingException {
        return doPost(maybeBuffer(reader));
    }

    @POST
    @Consumes(MediaType.MULTIPART_FORM_DATA)
    @Produces({ MediaType.TEXT_PLAIN, CSVMessageBodyWriter.TEXT_CSV, MediaType.APPLICATION_JSON })
    public List<String> post(@Context HttpServletRequest request) throws IOException, OryxServingException {
        List<String> result = new ArrayList<>();
        for (FileItem item : parseMultipart(request)) {
            InputStream in = maybeDecompress(item.getContentType(), item.getInputStream());
            try (BufferedReader reader = maybeBuffer(new InputStreamReader(in, StandardCharsets.UTF_8))) {
                result.addAll(doPost(reader));
            }
        }
        return result;
    }

    private List<String> doPost(BufferedReader buffered) throws IOException, OryxServingException {
        List<String> predictions = new ArrayList<>();
        String line;
        while ((line = buffered.readLine()) != null) {
            predictions.add(nearestClusterID(line).toString());
        }
        return predictions;
    }

    private Integer nearestClusterID(String datum) throws OryxServingException {
        check(datum != null && !datum.isEmpty(), "Data is needed to cluster");
        String[] tokens = TextUtils.parseDelimited(datum, ',');
        return cluster(tokens).getFirst();
    }

}