com.google.appengine.tools.mapreduce.MapReduceServlet.java Source code

Java tutorial

Introduction

Here is the source code for com.google.appengine.tools.mapreduce.MapReduceServlet.java

Source

/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.appengine.tools.mapreduce;

import com.google.appengine.api.datastore.DatastoreService;
import com.google.appengine.api.datastore.DatastoreServiceFactory;
import com.google.appengine.tools.mapreduce.workers.ControllerWorker;
import com.google.appengine.tools.mapreduce.workers.MapperWorker;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobID;

import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Servlet for all MapReduce API related functions.
 *
 * This should be specified as the handler for MapReduce URLs in web.xml.
 * For instance:
 * <pre>
 * {@code
 * <servlet>
 *   <servlet-name>mapreduce</servlet-name>
 *   <servlet-class>com.google.appengine.tools.mapreduce.MapReduceServlet</servlet-class>
 * </servlet>
 * <servlet-mapping>
 *   <servlet-name>mapreduce</servlet-name>
 *   <url-pattern>/mapreduce/*</url-pattern>
 * </servlet-mapping>
 * }
 *
 * Generally you'll want this handler to be protected by an admin security constraint
 * (see <a
 * href="http://code.google.com/appengine/docs/java/config/webxml.html#Security_and_Authentication">
 * Security and Authentication</a>)
 * for more details.
 * </pre>
 *
 * @author frew@google.com (Fred Wulff)
 */
public class MapReduceServlet extends HttpServlet {

    private static final Logger log = Logger.getLogger(MapReduceServlet.class.getName());

    static final String START_PATH = "start";
    static final String COMMAND_PATH = "command";

    private DatastoreService ds = DatastoreServiceFactory.getDatastoreService();
    private Clock clock = new SystemClock();

    MapReduceScheduler scheduler = new MapReduceScheduler();
    CommandHandler commandHandler = new CommandHandler(scheduler);
    ControllerWorker controllerWorker = new ControllerWorker(clock);
    MapperWorker mapperWorker = new MapperWorker(clock);

    /**
     * Returns the handler portion of the URL path.
     * 
     * For example, getHandler(https://www.google.com/foo/bar) -> bar
     * Note that for command handlers,
     * getHandler(https://www.google.com/foo/command/bar) -> command/bar
     */
    static String getHandler(HttpServletRequest request) {
        String requestURI = request.getRequestURI();
        return requestURI.substring(UrlUtil.getDividingIndex(requestURI) + 1);
    }

    /**
     * Checks to ensure that the current request was sent via the task queue.
     *
     * If the request is not in the task queue, returns false, and sets the
     * response status code to 403. This protects against CSRF attacks against
     * task queue-only handlers.
     *
     * @return true if the request is a task queue request
     */
    private boolean checkForTaskQueue(HttpServletRequest request, HttpServletResponse response) {
        if (request.getHeader("X-AppEngine-QueueName") == null) {
            log.log(Level.SEVERE, "Received unexpected non-task queue request. Possible CSRF attack.");
            try {
                response.sendError(HttpServletResponse.SC_FORBIDDEN, "Received unexpected non-task queue request.");
            } catch (IOException ioe) {
                throw new RuntimeException("Encountered error writing error", ioe);
            }
            return false;
        }
        return true;
    }

    /**
     * Checks to ensure that the current request was sent via an AJAX request.
     *
     * If the request was not sent by an AJAX request, returns false, and sets
     * the response status code to 403. This protects against CSRF attacks against
     * AJAX only handlers.
     *
     * @return true if the request is a task queue request
     */
    private boolean checkForAjax(HttpServletRequest request, HttpServletResponse response) {
        if (!"XMLHttpRequest".equals(request.getHeader("X-Requested-With"))) {
            log.log(Level.SEVERE, "Received unexpected non-XMLHttpRequest command. Possible CSRF attack.");
            try {
                response.sendError(HttpServletResponse.SC_FORBIDDEN,
                        "Received unexpected non-XMLHttpRequest command.");
            } catch (IOException ioe) {
                throw new RuntimeException("Encountered error writing error", ioe);
            }
            return false;
        }
        return true;
    }

    /**
     * Handles all MapReduce callbacks.
     */
    @Override
    public void doPost(HttpServletRequest request, HttpServletResponse response) {
        String handler = MapReduceServlet.getHandler(request);
        if (handler.startsWith(ControllerWorker.CONTROLLER_PATH)) {
            if (!checkForTaskQueue(request, response)) {
                return;
            }
            controllerWorker.handleController(request, response);
        } else if (handler.startsWith(MapperWorker.MAPPER_WORKER_PATH)) {
            if (!checkForTaskQueue(request, response)) {
                return;
            }
            mapperWorker.handleMapperWorker(request, response);
        } else if (handler.startsWith(START_PATH)) {
            // We don't add a GET handler for this one, since we're expecting the user 
            // to POST the whole XML specification.
            // TODO(frew): Make name customizable.
            // TODO(frew): Add ability to specify a redirect.
            scheduler.handleStart(
                    ConfigurationXmlUtil.getConfigurationFromXml(request.getParameter("configuration")),
                    "Automatically run request", request);
        } else if (handler.startsWith(COMMAND_PATH)) {
            if (!checkForAjax(request, response)) {
                return;
            }
            commandHandler.handleCommand(handler.substring(COMMAND_PATH.length() + 1), request, response);
        } else {
            throw new RuntimeException("Received an unknown MapReduce request handler. See logs for more detail.");
        }
    }

    public void doGet(HttpServletRequest request, HttpServletResponse response) {
        String handler = MapReduceServlet.getHandler(request);
        if (handler.startsWith(COMMAND_PATH)) {
            if (!checkForAjax(request, response)) {
                return;
            }
            commandHandler.handleCommand(handler.substring(COMMAND_PATH.length() + 1), request, response);
        } else {
            handleStaticResources(handler, response);
        }
    }

    private Configuration reducerConfigFromMapperConfig(JobID mapperJobID, MapReduceState mapperState) {

        Configuration reducerConfig = ConfigurationXmlUtil
                .getConfigurationFromXml(mapperState.getConfigurationXML());

        // define input for the reducing "mapper" as a function of our just completed mapping job
        reducerConfig.set("mapreduce.inputformat.class", IntermediateInputFormat.class.getName());
        reducerConfig.set(IntermediateInputFormat.MAPPER_JOBID_KEY, mapperJobID.toString());
        reducerConfig.set(IntermediateInputFormat.MIN_OUTPUT_KEY, mapperState.getOutputKeyRange().getMinKey());
        reducerConfig.set(IntermediateInputFormat.MAX_OUTPUT_KEY, mapperState.getOutputKeyRange().getMaxKey());

        // defined the "mapper" that handles the reduction process
        reducerConfig.set("mapreduce.map.class", ReducingMapper.class.getName());

        if (reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY) != null) {
            reducerConfig.set(AppEngineJobContext.MAPPER_SHARD_COUNT_KEY,
                    reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY));
        }
        return reducerConfig;
    }

    // VisibleForTesting
    void setClock(Clock clock) {
        this.clock = clock;
    }

    /**
     * Handle serving of static resources (which we do dynamically so users
     * only have to add one entry to their web.xml).
     */
    public void handleStaticResources(String handler, HttpServletResponse response) {
        String fileName = null;
        if (handler.equals("status")) {
            response.setContentType("text/html");
            fileName = "overview.html";
        } else if (handler.equals("detail")) {
            response.setContentType("text/html");
            fileName = "detail.html";
        } else if (handler.equals("base.css")) {
            response.setContentType("text/css");
            fileName = "base.css";
        } else if (handler.equals("jquery.js")) {
            response.setContentType("text/javascript");
            fileName = "jquery-1.4.2.min.js";
        } else if (handler.equals("status.js")) {
            response.setContentType("text/javascript");
            fileName = "status.js";
        } else {
            try {
                response.sendError(404);
            } catch (IOException e) {
                throw new RuntimeException("Encountered error sending 404", e);
            }
            return;
        }

        response.setHeader("Cache-Control", "public; max-age=300");

        try {
            InputStream resourceStream = MapReduceServlet.class
                    .getResourceAsStream("/com/google/appengine/tools/mapreduce/" + fileName);
            OutputStream responseStream = response.getOutputStream();
            byte[] buffer = new byte[1024];
            int bytesRead;
            while ((bytesRead = resourceStream.read(buffer)) != -1) {
                responseStream.write(buffer, 0, bytesRead);
            }
            responseStream.flush();
        } catch (FileNotFoundException e) {
            throw new RuntimeException("Couldn't find static file for MapReduce library", e);
        } catch (IOException e) {
            throw new RuntimeException("Couldn't read static file for MapReduce library", e);
        }
    }
}