com.yolodata.tbana.cascading.splunk.SplunkScheme.java Source code

Introduction

Here is the source code for com.yolodata.tbana.cascading.splunk.SplunkScheme.java
Source

/*
 * Copyright (c) 2013 Yolodata, LLC,  All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.yolodata.tbana.cascading.splunk;

import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import com.yolodata.tbana.hadoop.mapred.splunk.SplunkConf;
import com.yolodata.tbana.hadoop.mapred.splunk.SplunkDataQuery;
import com.yolodata.tbana.hadoop.mapred.splunk.SplunkInputFormat;
import com.yolodata.tbana.hadoop.mapred.util.ArrayListTextWritable;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;

import java.io.IOException;
import java.io.Serializable;

public class SplunkScheme extends Scheme<JobConf, RecordReader, OutputCollector, Object[], Object[]>
        implements Serializable {

    private SplunkDataQuery splunkDataQuery;

    public SplunkScheme(SplunkDataQuery splunkDataQuery) {
        this.splunkDataQuery = splunkDataQuery;
        setFields(Fields.ALL);
    }

    private void setFields(Fields fields) {
        if (!fields.isUnknown() && !fields.isAll())
            return;

        if (!fields.contains(new Fields("offset"))) {
            fields = new Fields("offset").append(fields);
        }

        setSourceFields(fields);
    }

    @Override
    public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap,
            JobConf conf) {

        conf.setInputFormat(SplunkInputFormat.class);
        conf.set(SplunkConf.SPLUNK_SEARCH_QUERY, this.splunkDataQuery.getSplunkQuery());
        conf.set(SplunkConf.SPLUNK_EARLIEST_TIME, this.splunkDataQuery.getEarliestTimeString());
        conf.set(SplunkConf.SPLUNK_LATEST_TIME, this.splunkDataQuery.getLatestTimeString());
    }

    @Override
    public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) {
        Object[] pair = new Object[] { sourceCall.getInput().createKey(), sourceCall.getInput().createValue() };

        sourceCall.setContext(pair);

        try {
            // Skip the header
            sourceCall.getInput().next(pair[0], pair[1]);
        } catch (IOException e) {
            throw new RuntimeException("Could not skip the header");
        }
    }

    @Override
    public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap,
            JobConf conf) {
        throw new NotImplementedException("Storing to Splunk not implemented");
    }

    @Override
    public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
            throws IOException {
        Tuple result = new Tuple();

        Object key = sourceCall.getContext()[0];
        Object value = sourceCall.getContext()[1];

        boolean hasNext = sourceCall.getInput().next(key, value);
        if (!hasNext) {
            return false;
        }

        // Skip nulls
        if (key == null || value == null) {
            return true;
        }

        LongWritable keyWritable = (LongWritable) key;
        ArrayListTextWritable values = (ArrayListTextWritable) value;
        result.add(keyWritable);

        for (Text textValue : values)
            result.add(textValue);

        sourceCall.getIncomingEntry().setTuple(result);

        return true;
    }

    @Override
    public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
            throws IOException {
        throw new NotImplementedException("Storing to Splunk not implemented");
    }
}