logfile.LogfileStreamer.java Source code

Java tutorial

Introduction

Here is the source code for logfile.LogfileStreamer.java

Source

/*
 * Licensed to Elasticsearch under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package logfile;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricFilter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.metrics.ElasticsearchReporter;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.NodeBuilder;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.client.Requests.indexRequest;

/*
curl http://localhost:9200/_percolator/metrics/indexing-request-monitor -X PUT -d '{"query":{"bool":{"must":[{"term":{"name":"usagov-indexing-requests"}},{"range":{"mean_rate":{"from":"0.10","include_lower":false}}}]}}}'
 */
public class LogfileStreamer {

    private static final String INDEX = "logfile";
    private static final String TYPE = "log";
    private static final int MAX_BULK_SIZE = 100;
    private static final String ISO_8601_DATE_FORMAT_STRING = "yyyy-MM-dd'T'HH:mm:ssZ";
    private static final DateFormat ISO_8601_DATE_FORMAT = new SimpleDateFormat(ISO_8601_DATE_FORMAT_STRING,
            Locale.US);

    private final TransportClient client;
    private BulkRequestBuilder bulk;
    private StopWatch sw;
    private long startTimestamp;

    // statistics
    private final MetricRegistry registry = new MetricRegistry();
    private final Meter entryMeter = registry.meter("usagov-incoming-requests");
    private final Meter indexingMeter = registry.meter("usagov-indexing-requests");
    private final Counter heartbeatCounter = registry.counter("usa-gov-heartbearts-count");
    private final Timer bulkRequestTimer = registry.timer("bulk-request-timer");

    private static final String clusterName = System.getProperty("cluster.name", "metrics");

    public static void main(String[] args) throws Exception {
        new LogfileStreamer().run();
    }

    public LogfileStreamer() {
        final Settings settings = Settings.builder().put("cluster.name", clusterName).build();
        client = TransportClient.builder().settings(settings).build()
                .addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress("localhost", 9300)));
        reset();
    }

    public void run() throws Exception {
        startElasticsearchIfNecessary();
        createIndexAndMappingIfNecessary();

        // index into the metrics index without date formatting
        ElasticsearchReporter reporter = ElasticsearchReporter.forRegistry(registry).hosts("localhost:9200")
                .indexDateFormat("").percolationNotifier(new HttpNotifier()).percolationFilter(MetricFilter.ALL)
                .build();
        reporter.start(60, TimeUnit.SECONDS);

        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false);
        ObjectReader reader = objectMapper.reader(Map.class);
        MappingIterator<Map<String, Object>> iterator = reader.readValues(getInputStream());

        try {
            while (iterator.hasNextValue()) {
                Map<String, Object> entry = iterator.nextValue();
                if (entry.containsKey("_heartbeat_")) {
                    heartbeatCounter.inc();
                    continue;
                }

                if (entry.containsKey("ll") && entry.containsKey("t")) {
                    long timestamp = ((Integer) entry.get("t")).longValue();
                    List<Number> location = (List<Number>) entry.get("ll");
                    double latitude = location.get(0).doubleValue();
                    double longitude = location.get(1).doubleValue();

                    addToBulkRequest(timestamp, latitude, longitude);
                    entryMeter.mark(1);
                }
            }
        } finally {
            executeBulkRequest();
        }
    }

    private void addToBulkRequest(long timestamp, double latitude, double longitude) {
        String geohash = GeoHashUtils.encode(latitude, longitude);
        String isoDate = ISO_8601_DATE_FORMAT.format(new Date(timestamp * 1000));
        String json = String.format("{\"date\":\"%s\", \"geohash\":\"%s\" }", isoDate, geohash);
        bulk.add(indexRequest().index(INDEX).type(TYPE).source(json));
        System.out.print(".");

        executeBulkRequest();
    }

    private void executeBulkRequest() {
        if (bulk.numberOfActions() == 0)
            return;
        long secondsSinceLastUpdate = System.currentTimeMillis() / 1000 - startTimestamp;
        if (bulk.numberOfActions() < MAX_BULK_SIZE && secondsSinceLastUpdate < 10)
            return;

        BulkResponse bulkResponse = null;
        final Timer.Context context = bulkRequestTimer.time();
        try {
            bulkResponse = bulk.execute().actionGet();
        } finally {
            context.stop();
        }
        logStatistics(bulkResponse.getItems().length);
        reset();
    }

    private void logStatistics(long itemsIndexed) {
        long totalTimeInSeconds = sw.stop().totalTime().seconds();
        double totalDocumentsPerSecond = (totalTimeInSeconds == 0) ? itemsIndexed
                : (double) itemsIndexed / totalTimeInSeconds;
        System.out.println(String.format("\nIndexed %s documents, %.2f per second in %s seconds", itemsIndexed,
                totalDocumentsPerSecond, totalTimeInSeconds));
        indexingMeter.mark(1);
    }

    private void reset() {
        sw = new StopWatch().start();
        startTimestamp = System.currentTimeMillis() / 1000;
        bulk = client.prepareBulk();
    }

    private InputStream getInputStream() throws Exception {
        URL url = new URL("http://developer.usa.gov/1usagov");
        HttpURLConnection request = (HttpURLConnection) url.openConnection();
        return request.getInputStream();
    }

    private void createIndexAndMappingIfNecessary() {
        try {
            client.admin().indices().prepareCreate("logfile").execute().actionGet();
        } catch (Exception e) {
            e.printStackTrace();
        }

        try {
            XContentBuilder mappingContent = XContentFactory.jsonBuilder().startObject().startObject("log")
                    .startObject("properties").startObject("geohash").field("type", "geo_point")
                    .field("geohash", true).endObject().endObject().endObject().endObject();

            client.admin().indices().preparePutMapping("logfile").setType("log").setSource(mappingContent).execute()
                    .actionGet();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void startElasticsearchIfNecessary() throws IOException {
        if (!"no".equals(System.getProperty("create.es.instance"))) {
            final Path tempDirectory = Files.createTempDirectory("usa-gov-logfile-parser");
            System.out.println("Starting elasticsearch instance (data: " + tempDirectory + ")");
            final Settings settings = Settings.settingsBuilder().put("cluster.name", clusterName)
                    .put("path.home", tempDirectory.toString()).put("path.data", tempDirectory.toString())
                    .put(Node.HTTP_ENABLED, true).build();
            NodeBuilder.nodeBuilder().settings(settings).node().start();
        } else {
            System.out.println("Not starting elasticsearch instance, please check if available at localhost:9200");
        }
    }
}