com.linkedin.pinot.index.persist.AvroDataPublisherTest.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.index.persist.AvroDataPublisherTest.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.index.persist;

import com.linkedin.pinot.util.TestUtils;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.extractors.FieldExtractorFactory;
import com.linkedin.pinot.core.data.readers.AvroRecordReader;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.data.readers.RecordReaderFactory;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion;
import com.linkedin.pinot.core.indexsegment.utils.AvroUtils;

public class AvroDataPublisherTest {

    private final String AVRO_DATA = "data/test_sample_data.avro";
    private final String JSON_DATA = "data/test_sample_data.json";
    private final String AVRO_MULTI_DATA = "data/test_sample_data_multi_value.avro";

    @Test
    public void TestReadAvro() throws Exception {

        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
        final String jsonPath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));

        Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING)
                .addSingleValueDimension("column2", DataType.STRING).build();

        final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
        config.setFormat(FileFormat.AVRO);
        config.setInputFilePath(filePath);

        config.setSegmentVersion(SegmentVersion.v1);

        AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);

        int cnt = 0;
        for (String line : FileUtils.readLines(new File(jsonPath))) {

            JSONObject obj = new JSONObject(line);
            if (avroDataPublisher.hasNext()) {
                GenericRow recordRow = avroDataPublisher.next();

                for (String column : recordRow.getFieldNames()) {
                    String valueFromJson = obj.get(column).toString();
                    String valueFromAvro = recordRow.getValue(column).toString();
                    if (cnt > 1) {
                        Assert.assertEquals(valueFromJson, valueFromAvro);
                    }
                }
            }
            cnt++;
        }
        Assert.assertEquals(cnt, 10001);
    }

    @Test
    public void TestReadPartialAvro() throws Exception {
        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
        final String jsonPath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));

        final List<String> projectedColumns = new ArrayList<String>();
        projectedColumns.add("column3");
        projectedColumns.add("column2");

        Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING)
                .addSingleValueDimension("column2", DataType.STRING).build();
        final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);

        config.setFormat(FileFormat.AVRO);
        config.setInputFilePath(filePath);

        config.setSegmentVersion(SegmentVersion.v1);

        final AvroRecordReader avroDataPublisher = new AvroRecordReader(
                FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
        avroDataPublisher.next();
        int cnt = 0;
        for (final String line : FileUtils.readLines(new File(jsonPath))) {

            final JSONObject obj = new JSONObject(line);
            if (avroDataPublisher.hasNext()) {
                final GenericRow recordRow = avroDataPublisher.next();
                // System.out.println(recordRow);
                Assert.assertEquals(recordRow.getFieldNames().length, 2);
                for (final String column : recordRow.getFieldNames()) {
                    final String valueFromJson = obj.get(column).toString();
                    final String valueFromAvro = recordRow.getValue(column).toString();
                    if (cnt > 1) {
                        Assert.assertEquals(valueFromAvro, valueFromJson);
                    }
                }
            }
            cnt++;
        }
        Assert.assertEquals(10001, cnt);
    }

    @Test
    public void TestReadMultiValueAvro() throws Exception {

        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_MULTI_DATA));

        final SegmentGeneratorConfig config = new SegmentGeneratorConfig(
                AvroUtils.extractSchemaFromAvro(new File(filePath)));
        config.setFormat(FileFormat.AVRO);
        config.setInputFilePath(filePath);

        config.setSegmentVersion(SegmentVersion.v1);

        AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);

        int cnt = 0;

        while (avroDataPublisher.hasNext()) {
            GenericRow recordRow = avroDataPublisher.next();
            for (String column : recordRow.getFieldNames()) {
                String valueStringFromAvro = null;
                if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) {
                    Object valueFromAvro = recordRow.getValue(column);
                    valueStringFromAvro = valueFromAvro.toString();
                } else {
                    Object[] valueFromAvro = (Object[]) recordRow.getValue(column);
                    valueStringFromAvro = "[";
                    int i = 0;
                    for (Object valueObject : valueFromAvro) {
                        if (i++ == 0) {
                            valueStringFromAvro += valueObject.toString();
                        } else {
                            valueStringFromAvro += ", " + valueObject.toString();
                        }
                    }
                    valueStringFromAvro += "]";
                }

            }
            cnt++;
        }
        Assert.assertEquals(28949, cnt);
    }
}