de.qaware.javaone.solrspark.SolrSpark.java Source code

Java tutorial

Introduction

Here is the source code for de.qaware.javaone.solrspark.SolrSpark.java

Source

/*
 * _____________________________________________________________________________
 * 
 *           Project:    JavaOne 2015
 * _____________________________________________________________________________
 * 
 *        Created by:    Johannes Weigend, QAware GmbH
 *     Creation date:    October 2015
 * _____________________________________________________________________________
 * 
 *         License:      Apache License 2.0
 * _____________________________________________________________________________ 
 */
package de.qaware.javaone.solrspark;

import com.lucidworks.spark.SolrRDD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.logging.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkException;
import org.apache.spark.api.java.JavaRDD;

/**
 * Solr / Spark in combination.
 *
 * @author weigend
 */
public class SolrSpark {

    /**
     * Tester for the SOLR/Spark Cluster. Queries Solr and process results with
     * Spark.
     *
     * @param args not used.
     * @throws SolrServerException if Solr specific Exception occours.
     * @throws IOException if an IO Exception occours.
     */
    public static void main(String[] args) throws SolrServerException, IOException {

        // Connect to SparkCluster
        SparkConf sparkConf = new SparkConf().setAppName("SolrSpark").setMaster("spark://192.168.25.100:2182")
                .set("spark.executor.memory", "256m");

        // Init Spark Context and add ourself
        SparkContext jsc = new SparkContext(sparkConf);
        jsc.addJar("target/SolrSpark-1.0-SNAPSHOT.jar");

        // Query SOLR Cloud IP:Zookeeperport, collection     
        SolrRDD solrRDD = new SolrRDD("192.168.25.100:2181", "ekgdata");

        // do until exit 
        String query = ""; // example: logMessage:*Exception* 
        while (!query.equals("exit")) {
            try {
                System.out.print("\nquery:>");
                query = new BufferedReader(new InputStreamReader(System.in)).readLine();

                Date start = new Date();

                // Query and Process
                int totalLength = queryAndProcess(query, solrRDD, jsc);

                // PrintResults
                Date end = new Date();
                DecimalFormat format = new DecimalFormat();
                String timeTaken = format.format(end.getTime() - start.getTime());
                String length = format.format(totalLength);

                System.out.println("Result:\nLength of selected log messages: " + length + ". Time taken: "
                        + timeTaken + " ms.");

            } catch (Exception e) {
                Logger.getLogger(SolrSpark.class.getName()).severe(e.getMessage());
                // retry
            }
        }
    }

    /**
     * This method queries SOLR and processes all found documents. For each
     * document the length of the field 'logRawEntry' is returned (The count of
     * unicode chars).
     *
     * @param query a query string.
     * @param solrRDD the solrRDD Spark abstraction.
     * @param jsc the Spark context.
     * @return the sum of the stringlength of all log messages.
     * @throws SolrServerException When the Solr processing gets into trouble.
     */
    private static int queryAndProcess(String query, SolrRDD solrRDD, SparkContext jsc) throws SolrServerException {
        SolrQuery solrQuery = new SolrQuery(query);
        solrQuery.addSort("id", SolrQuery.ORDER.asc);

        // 1. Smart filtering by searching
        JavaRDD<SolrDocument> result = solrRDD.query(jsc, solrQuery);

        // 2. Parallel processing of the search results
        // parallel map in the cluster - calculate the sum of bytes from all log messages
        int totalLength = 0;
        if (!result.isEmpty()) {
            totalLength = result.map((SolrDocument d) -> {
                if (d == null) {
                    return 0;
                }
                String logEntry = (String) d.getFieldValue("logRawEntry");
                return logEntry != null ? logEntry.length() : 0;
            }).reduce((Integer i, Integer j) -> {
                return i + j;
            });
        }
        return totalLength;
    }
}