Java tutorial
/* * _____________________________________________________________________________ * * Project: JavaOne 2015 * _____________________________________________________________________________ * * Created by: Johannes Weigend, QAware GmbH * Creation date: October 2015 * _____________________________________________________________________________ * * License: Apache License 2.0 * _____________________________________________________________________________ */ package de.qaware.javaone.solrspark; import com.lucidworks.spark.SolrRDD; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.text.DecimalFormat; import java.util.Date; import java.util.logging.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.SparkException; import org.apache.spark.api.java.JavaRDD; /** * Solr / Spark in combination. * * @author weigend */ public class SolrSpark { /** * Tester for the SOLR/Spark Cluster. Queries Solr and process results with * Spark. * * @param args not used. * @throws SolrServerException if Solr specific Exception occours. * @throws IOException if an IO Exception occours. */ public static void main(String[] args) throws SolrServerException, IOException { // Connect to SparkCluster SparkConf sparkConf = new SparkConf().setAppName("SolrSpark").setMaster("spark://192.168.25.100:2182") .set("spark.executor.memory", "256m"); // Init Spark Context and add ourself SparkContext jsc = new SparkContext(sparkConf); jsc.addJar("target/SolrSpark-1.0-SNAPSHOT.jar"); // Query SOLR Cloud IP:Zookeeperport, collection SolrRDD solrRDD = new SolrRDD("192.168.25.100:2181", "ekgdata"); // do until exit String query = ""; // example: logMessage:*Exception* while (!query.equals("exit")) { try { System.out.print("\nquery:>"); query = new BufferedReader(new InputStreamReader(System.in)).readLine(); Date start = new Date(); // Query and Process int totalLength = queryAndProcess(query, solrRDD, jsc); // PrintResults Date end = new Date(); DecimalFormat format = new DecimalFormat(); String timeTaken = format.format(end.getTime() - start.getTime()); String length = format.format(totalLength); System.out.println("Result:\nLength of selected log messages: " + length + ". Time taken: " + timeTaken + " ms."); } catch (Exception e) { Logger.getLogger(SolrSpark.class.getName()).severe(e.getMessage()); // retry } } } /** * This method queries SOLR and processes all found documents. For each * document the length of the field 'logRawEntry' is returned (The count of * unicode chars). * * @param query a query string. * @param solrRDD the solrRDD Spark abstraction. * @param jsc the Spark context. * @return the sum of the stringlength of all log messages. * @throws SolrServerException When the Solr processing gets into trouble. */ private static int queryAndProcess(String query, SolrRDD solrRDD, SparkContext jsc) throws SolrServerException { SolrQuery solrQuery = new SolrQuery(query); solrQuery.addSort("id", SolrQuery.ORDER.asc); // 1. Smart filtering by searching JavaRDD<SolrDocument> result = solrRDD.query(jsc, solrQuery); // 2. Parallel processing of the search results // parallel map in the cluster - calculate the sum of bytes from all log messages int totalLength = 0; if (!result.isEmpty()) { totalLength = result.map((SolrDocument d) -> { if (d == null) { return 0; } String logEntry = (String) d.getFieldValue("logRawEntry"); return logEntry != null ? logEntry.length() : 0; }).reduce((Integer i, Integer j) -> { return i + j; }); } return totalLength; } }