com.rackspacecloud.blueflood.io.AstyanaxReader.java Source code

Introduction

Here is the source code for com.rackspacecloud.blueflood.io.AstyanaxReader.java
Source

/*
 * Copyright 2013 Rackspace
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package com.rackspacecloud.blueflood.io;

import com.codahale.metrics.Timer;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Table;
import com.netflix.astyanax.Keyspace;
import com.netflix.astyanax.connectionpool.OperationResult;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.connectionpool.exceptions.NotFoundException;
import com.netflix.astyanax.model.*;
import com.netflix.astyanax.query.RowQuery;
import com.netflix.astyanax.serializers.AbstractSerializer;
import com.netflix.astyanax.serializers.BooleanSerializer;
import com.netflix.astyanax.serializers.StringSerializer;
import com.netflix.astyanax.shallows.EmptyColumnList;
import com.netflix.astyanax.util.RangeBuilder;
import com.rackspacecloud.blueflood.cache.MetadataCache;
import com.rackspacecloud.blueflood.exceptions.CacheException;
import com.rackspacecloud.blueflood.io.serializers.NumericSerializer;
import com.rackspacecloud.blueflood.io.serializers.StringMetadataSerializer;
import com.rackspacecloud.blueflood.outputs.formats.MetricData;
import com.rackspacecloud.blueflood.rollup.Granularity;
import com.rackspacecloud.blueflood.service.SlotState;
import com.rackspacecloud.blueflood.types.*;
import com.rackspacecloud.blueflood.utils.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;

public class AstyanaxReader extends AstyanaxIO {
    private static final Logger log = LoggerFactory.getLogger(AstyanaxReader.class);
    private static final MetadataCache metaCache = MetadataCache.getInstance();
    private static final AstyanaxReader INSTANCE = new AstyanaxReader();
    private static final String rollupTypeCacheKey = MetricMetadata.ROLLUP_TYPE.toString().toLowerCase();
    private static final String dataTypeCacheKey = MetricMetadata.TYPE.toString().toLowerCase();

    private static final Keyspace keyspace = getKeyspace();

    public static AstyanaxReader getInstance() {
        return INSTANCE;
    }

    /**
     * Method that returns all metadata for a given locator as a map.
     *
     * @param locator  locator name
     * @return Map of metadata for that locator
     * @throws RuntimeException(com.netflix.astyanax.connectionpool.exceptions.ConnectionException)
     */
    public Map<String, String> getMetadataValues(Locator locator) {
        Timer.Context ctx = Instrumentation.getReadTimerContext(CassandraModel.CF_METRIC_METADATA);
        try {
            final ColumnList<String> results = keyspace.prepareQuery(CassandraModel.CF_METRIC_METADATA)
                    .getKey(locator).execute().getResult();
            return new HashMap<String, String>() {
                {
                    for (Column<String> result : results) {
                        put(result.getName(), result.getValue(StringMetadataSerializer.get()));
                    }
                }
            };
        } catch (NotFoundException ex) {
            Instrumentation.markNotFound(CassandraModel.CF_METRIC_METADATA);
            return null;
        } catch (ConnectionException e) {
            log.error("Error reading metadata value", e);
            Instrumentation.markReadError(e);
            throw new RuntimeException(e);
        } finally {
            ctx.stop();
        }
    }

    public Table<Locator, String, String> getMetadataValues(Set<Locator> locators) {
        ColumnFamily CF = CassandraModel.CF_METRIC_METADATA;
        boolean isBatch = locators.size() > 1;
        Table<Locator, String, String> metaTable = HashBasedTable.create();

        Timer.Context ctx = isBatch ? Instrumentation.getBatchReadTimerContext(CF)
                : Instrumentation.getReadTimerContext(CF);
        try {
            // We don't paginate this call. So we should make sure the number of reads is tolerable.
            // TODO: Think about paginating this call.
            OperationResult<Rows<Locator, String>> query = keyspace.prepareQuery(CF).getKeySlice(locators)
                    .execute();

            for (Row<Locator, String> row : query.getResult()) {
                ColumnList<String> columns = row.getColumns();
                for (Column<String> column : columns) {
                    String metaValue = column.getValue(StringMetadataSerializer.get());
                    String metaKey = column.getName();
                    metaTable.put(row.getKey(), metaKey, metaValue);
                }
            }
        } catch (ConnectionException e) {
            if (e instanceof NotFoundException) { // TODO: Not really sure what happens when one of the keys is not found.
                Instrumentation.markNotFound(CF);
            } else {
                if (isBatch) {
                    Instrumentation.markBatchReadError(e);
                } else {
                    Instrumentation.markReadError(e);
                }
            }
            log.warn((isBatch ? "Batch " : "") + " read query failed for column family " + CF.getName(), e);
        } finally {
            ctx.stop();
        }

        return metaTable;
    }

    /**
     * Method that makes the actual cassandra call to get the most recent string value for a locator
     *
     * @param locator  locator name
     * @return String most recent string value for metric.
     * @throws RuntimeException(com.netflix.astyanax.connectionpool.exceptions.ConnectionException)
     */
    public String getLastStringValue(Locator locator) {
        Timer.Context ctx = Instrumentation.getReadTimerContext(CassandraModel.CF_METRICS_STRING);

        try {
            ColumnList<Long> query = keyspace.prepareQuery(CassandraModel.CF_METRICS_STRING).getKey(locator)
                    .withColumnRange(new RangeBuilder().setReversed(true).setLimit(1).build()).execute()
                    .getResult();

            return query.isEmpty() ? null : query.getColumnByIndex(0).getStringValue();
        } catch (ConnectionException e) {
            if (e instanceof NotFoundException) {
                Instrumentation.markNotFound(CassandraModel.CF_METRICS_STRING);
            } else {
                Instrumentation.markReadError(e);
            }
            log.warn("Could not get previous string metric value for locator " + locator, e);
            throw new RuntimeException(e);
        } finally {
            ctx.stop();
        }
    }

    /**
     * Returns the recently seen locators, i.e. those that should be rolled up, for a given shard.
     * 'Should' means:
     *  1) A locator is capable of rollup (it is not a string/boolean metric).
     *  2) A locator has had new data in the past {@link com.rackspacecloud.blueflood.io.AstyanaxWriter.LOCATOR_TTL} seconds.
     *
     * @param shard Number of the shard you want the recent locators for. 0-127 inclusive.
     * @return Collection of locators
     * @throws RuntimeException(com.netflix.astyanax.connectionpool.exceptions.ConnectionException)
     */
    public Collection<Locator> getLocatorsToRollup(long shard) {
        Timer.Context ctx = Instrumentation.getReadTimerContext(CassandraModel.CF_METRICS_LOCATOR);
        try {
            RowQuery<Long, Locator> query = keyspace.prepareQuery(CassandraModel.CF_METRICS_LOCATOR).getKey(shard);
            return query.execute().getResult().getColumnNames();
        } catch (NotFoundException e) {
            Instrumentation.markNotFound(CassandraModel.CF_METRICS_LOCATOR);
            return Collections.emptySet();
        } catch (ConnectionException e) {
            Instrumentation.markReadError(e);
            log.error("Error reading locators", e);
            throw new RuntimeException("Error reading locators", e);
        } finally {
            ctx.stop();
        }
    }

    /**
     * Gets all ShardStates for a given shard.
     *
     * @param shard Shard to retrieve all SlotState objects for.
     */
    public Collection<SlotState> getShardState(int shard) {
        Timer.Context ctx = Instrumentation.getReadTimerContext(CassandraModel.CF_METRICS_STATE);
        final Collection<SlotState> slotStates = new LinkedList<SlotState>();
        try {
            ColumnList<SlotState> columns = keyspace.prepareQuery(CassandraModel.CF_METRICS_STATE)
                    .getKey((long) shard).execute().getResult();

            for (Column<SlotState> column : columns) {
                slotStates.add(column.getName().withTimestamp(column.getLongValue()));
            }
        } catch (ConnectionException e) {
            Instrumentation.markReadError(e);
            log.error("Error getting shard state for shard " + shard, e);
            throw new RuntimeException(e);
        } finally {
            ctx.stop();
        }
        return slotStates;
    }

    private ColumnList<Long> getColumnsFromDB(final Locator locator, ColumnFamily<Locator, Long> srcCF,
            Range range) {
        List<Locator> locators = new LinkedList<Locator>() {
            {
                add(locator);
            }
        };
        ColumnList<Long> columns = getColumnsFromDB(locators, srcCF, range).get(locator);
        return columns == null ? new EmptyColumnList<Long>() : columns;
    }

    private Map<Locator, ColumnList<Long>> getColumnsFromDB(List<Locator> locators, ColumnFamily<Locator, Long> CF,
            Range range) {
        if (range.getStart() > range.getStop()) {
            throw new RuntimeException(String.format("Invalid rollup range: ", range.toString()));
        }
        boolean isBatch = locators.size() != 1;

        final Map<Locator, ColumnList<Long>> columns = new HashMap<Locator, ColumnList<Long>>();
        final RangeBuilder rangeBuilder = new RangeBuilder().setStart(range.getStart()).setEnd(range.getStop());

        Timer.Context ctx = isBatch ? Instrumentation.getBatchReadTimerContext(CF)
                : Instrumentation.getReadTimerContext(CF);
        try {
            // We don't paginate this call. So we should make sure the number of reads is tolerable.
            // TODO: Think about paginating this call.
            OperationResult<Rows<Locator, Long>> query = keyspace.prepareQuery(CF).getKeySlice(locators)
                    .withColumnRange(rangeBuilder.build()).execute();

            for (Row<Locator, Long> row : query.getResult()) {
                columns.put(row.getKey(), row.getColumns());
            }
        } catch (ConnectionException e) {
            if (e instanceof NotFoundException) { // TODO: Not really sure what happens when one of the keys is not found.
                Instrumentation.markNotFound(CF);
            } else {
                if (isBatch) {
                    Instrumentation.markBatchReadError(e);
                } else {
                    Instrumentation.markReadError(e);
                }
            }
            log.warn((isBatch ? "Batch " : "") + " read query failed for column family " + CF.getName(), e);
        } finally {
            ctx.stop();
        }

        return columns;
    }

    // todo: this could be the basis for every rollup read method.
    // todo: A better interface may be to pass the serializer in instead of the class type.
    public <T extends Rollup> Points<T> getDataToRoll(Class<T> type, Locator locator, Range range,
            ColumnFamily<Locator, Long> cf) throws IOException {
        AbstractSerializer serializer = NumericSerializer.serializerFor(type);
        // special cases. :( the problem here is that the normal full res serializer returns Number instances instead of
        // SimpleNumber instances.
        // todo: this logic will only become more complicated. It needs to be in its own method and the serializer needs
        // to be known before we ever get to this method (see above comment).
        if (cf == CassandraModel.CF_METRICS_FULL) {
            serializer = NumericSerializer.simpleNumberSerializer;
        } else if (cf == CassandraModel.CF_METRICS_PREAGGREGATED_FULL) {
            // consider a method for this.  getSerializer(CF, TYPE);
            if (type.equals(TimerRollup.class)) {
                serializer = NumericSerializer.timerRollupInstance;
            } else if (type.equals(SetRollup.class)) {
                serializer = NumericSerializer.setRollupInstance;
            } else if (type.equals(GaugeRollup.class)) {
                serializer = NumericSerializer.gaugeRollupInstance;
            } else if (type.equals(CounterRollup.class)) {
                serializer = NumericSerializer.CounterRollupInstance;
            } else {
                serializer = NumericSerializer.simpleNumberSerializer;
            }
        }

        ColumnList<Long> cols = getColumnsFromDB(locator, cf, range);
        Points<T> points = new Points<T>();
        try {
            for (Column<Long> col : cols) {
                points.add(new Points.Point<T>(col.getName(), (T) col.getValue(serializer)));
            }
        } catch (RuntimeException ex) {
            log.error("Problem deserializing data for " + locator + " (" + range + ") from " + cf.getName(), ex);
            throw new IOException(ex);
        }
        return points;
    }

    public static String getUnitString(Locator locator) {
        String unitString = null;
        // Only grab units from cassandra, if we have to
        if (!Util.shouldUseESForUnits()) {
            try {
                unitString = metaCache.get(locator, MetricMetadata.UNIT.name().toLowerCase(), String.class);
            } catch (CacheException ex) {
                log.warn("Cache exception reading unitString from MetadataCache: ", ex);
            }
            if (unitString == null) {
                unitString = Util.UNKNOWN;
            }
        }

        return unitString;
    }

    public static String getType(Locator locator) {
        String type = null;
        try {
            type = metaCache.get(locator, MetricMetadata.TYPE.name().toLowerCase(), String.class);
        } catch (CacheException ex) {
            log.warn("Cache exception reading type from MetadataCache. ", ex);
        }
        if (type == null) {
            type = Util.UNKNOWN;
        }
        return type;
    }

    public MetricData getDatapointsForRange(Locator locator, Range range, Granularity gran) {
        try {
            Object type = metaCache.get(locator, dataTypeCacheKey);
            RollupType rollupType = RollupType.fromString(metaCache.get(locator, rollupTypeCacheKey));

            if (type == null) {
                return getNumericOrStringRollupDataForRange(locator, range, gran, rollupType);
            }

            DataType metricType = new DataType((String) type);
            if (!DataType.isKnownMetricType(metricType)) {
                return getNumericOrStringRollupDataForRange(locator, range, gran, rollupType);
            }

            if (metricType.equals(DataType.STRING)) {
                gran = Granularity.FULL;
                return getStringMetricDataForRange(locator, range, gran);
            } else if (metricType.equals(DataType.BOOLEAN)) {
                gran = Granularity.FULL;
                return getBooleanMetricDataForRange(locator, range, gran);
            } else {
                return getNumericMetricDataForRange(locator, range, gran, rollupType, metricType);
            }

        } catch (CacheException e) {
            log.warn(
                    "Caught exception trying to find metric type from meta cache for locator " + locator.toString(),
                    e);
            return getNumericOrStringRollupDataForRange(locator, range, gran, RollupType.BF_BASIC);
        }
    }

    // TODO: This should be the only method all output handlers call. We should be able to deprecate
    // other individual metric fetch methods once this gets in.
    public Map<Locator, MetricData> getDatapointsForRange(List<Locator> locators, Range range, Granularity gran) {
        ListMultimap<ColumnFamily, Locator> locatorsByCF = ArrayListMultimap.create();
        Map<Locator, MetricData> results = new HashMap<Locator, MetricData>();

        for (Locator locator : locators) {
            try {
                RollupType rollupType = RollupType.fromString(
                        (String) metaCache.get(locator, MetricMetadata.ROLLUP_TYPE.name().toLowerCase()));
                DataType dataType = new DataType(
                        (String) metaCache.get(locator, MetricMetadata.TYPE.name().toLowerCase()));
                ColumnFamily cf = CassandraModel.getColumnFamily(rollupType, dataType, gran);
                List<Locator> locs = locatorsByCF.get(cf);
                locs.add(locator);
            } catch (Exception e) {
                // pass for now. need metric to figure this stuff out.
            }
        }

        for (ColumnFamily CF : locatorsByCF.keySet()) {
            List<Locator> locs = locatorsByCF.get(CF);
            Map<Locator, ColumnList<Long>> metrics = getColumnsFromDB(locs, CF, range);
            // transform columns to MetricData
            for (Locator loc : metrics.keySet()) {
                MetricData data = transformColumnsToMetricData(loc, metrics.get(loc), gran);
                if (data != null) {
                    results.put(loc, data);
                }
            }
        }

        return results;
    }

    public MetricData getHistogramsForRange(Locator locator, Range range, Granularity granularity)
            throws IOException {
        if (!granularity.isCoarser(Granularity.FULL)) {
            throw new RuntimeException("Histograms are not available for granularity " + granularity.toString());
        }

        ColumnFamily cf = CassandraModel.getColumnFamily(HistogramRollup.class, granularity);
        Points<HistogramRollup> histogramRollupPoints = getDataToRoll(HistogramRollup.class, locator, range, cf);
        return new MetricData(histogramRollupPoints, getUnitString(locator), MetricData.Type.HISTOGRAM);
    }

    // Used for string metrics
    private MetricData getStringMetricDataForRange(Locator locator, Range range, Granularity gran) {
        Points<String> points = new Points<String>();
        ColumnList<Long> results = getColumnsFromDB(locator, CassandraModel.CF_METRICS_STRING, range);

        for (Column<Long> column : results) {
            try {
                points.add(new Points.Point<String>(column.getName(), column.getValue(StringSerializer.get())));
            } catch (RuntimeException ex) {
                log.error("Problem deserializing String data for " + locator + " (" + range + ") from "
                        + CassandraModel.CF_METRICS_STRING.getName(), ex);
            }
        }

        return new MetricData(points, getUnitString(locator), MetricData.Type.STRING);
    }

    private MetricData getBooleanMetricDataForRange(Locator locator, Range range, Granularity gran) {
        Points<Boolean> points = new Points<Boolean>();
        ColumnList<Long> results = getColumnsFromDB(locator, CassandraModel.CF_METRICS_STRING, range);

        for (Column<Long> column : results) {
            try {
                points.add(new Points.Point<Boolean>(column.getName(), column.getValue(BooleanSerializer.get())));
            } catch (RuntimeException ex) {
                log.error("Problem deserializing Boolean data for " + locator + " (" + range + ") from "
                        + CassandraModel.CF_METRICS_STRING.getName(), ex);
            }
        }

        return new MetricData(points, getUnitString(locator), MetricData.Type.BOOLEAN);
    }

    // todo: replace this with methods that pertain to type (which can be used to derive a serializer).
    private MetricData getNumericMetricDataForRange(Locator locator, Range range, Granularity gran,
            RollupType rollupType, DataType dataType) {
        ColumnFamily<Locator, Long> CF = CassandraModel.getColumnFamily(rollupType, dataType, gran);

        Points points = new Points();
        ColumnList<Long> results = getColumnsFromDB(locator, CF, range);

        // todo: this will not work when we cannot derive data type from granularity. we will need to know what kind of
        // data we are asking for and use a specific reader method.
        AbstractSerializer serializer = NumericSerializer.serializerFor(RollupType.classOf(rollupType, gran));

        for (Column<Long> column : results) {
            try {
                points.add(pointFromColumn(column, gran, serializer));
            } catch (RuntimeException ex) {
                log.error("Problem deserializing data for " + locator + " (" + range + ") from " + CF.getName(),
                        ex);
            }
        }

        return new MetricData(points, getUnitString(locator), MetricData.Type.NUMBER);
    }

    // gets called when we DO NOT know what the data type is (numeric, string, etc.)
    private MetricData getNumericOrStringRollupDataForRange(Locator locator, Range range, Granularity gran,
            RollupType rollupType) {
        Instrumentation.markScanAllColumnFamilies();

        final MetricData metricData = getNumericMetricDataForRange(locator, range, gran, rollupType,
                DataType.DOUBLE);

        if (metricData.getData().getPoints().size() > 0) {
            return metricData;
        }

        return getStringMetricDataForRange(locator, range, gran);
    }

    private MetricData transformColumnsToMetricData(Locator locator, ColumnList<Long> columns, Granularity gran) {
        try {
            RollupType rollupType = RollupType.fromString(metaCache.get(locator, rollupTypeCacheKey));
            DataType dataType = new DataType(metaCache.get(locator, dataTypeCacheKey));
            String unit = getUnitString(locator);
            MetricData.Type outputType = MetricData.Type.from(rollupType, dataType);
            Points points = getPointsFromColumns(columns, rollupType, dataType, gran);
            MetricData data = new MetricData(points, unit, outputType);
            return data;
        } catch (Exception e) {
            return null;
        }
    }

    private Points getPointsFromColumns(ColumnList<Long> columnList, RollupType rollupType, DataType dataType,
            Granularity gran) {
        Points points = new Points();

        AbstractSerializer serializer = serializerFor(rollupType, dataType, gran);
        for (Column<Long> column : columnList) {
            points.add(new Points.Point(column.getName(), column.getValue(serializer)));
        }

        return points;
    }

    // todo: don't need gran anymore.
    private Points.Point pointFromColumn(Column<Long> column, Granularity gran, AbstractSerializer serializer) {
        if (serializer instanceof NumericSerializer.RawSerializer)
            return new Points.Point(column.getName(), new SimpleNumber(column.getValue(serializer)));
        else
            // this works for EVERYTHING except SimpleNumber.
            return new Points.Point(column.getName(), column.getValue(serializer));
    }
}