com.cloudera.cdk.data.filesystem.TestFileSystemDatasetReader.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.data.filesystem.TestFileSystemDatasetReader.java

Source

/**
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.cdk.data.filesystem;

import com.cloudera.cdk.data.TestDatasetReaders;
import com.cloudera.cdk.data.DatasetReader;
import com.cloudera.cdk.data.DatasetReaderException;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.codehaus.jackson.node.JsonNodeFactory;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import static com.cloudera.cdk.data.filesystem.DatasetTestUtilities.*;
import org.apache.avro.generic.GenericData;

public class TestFileSystemDatasetReader extends TestDatasetReaders {

    @Override
    public DatasetReader newReader() throws IOException {
        return new FileSystemDatasetReader<String>(FileSystem.getLocal(new Configuration()),
                new Path(Resources.getResource("data/strings-100.avro").getFile()), STRING_SCHEMA);
    }

    @Override
    public int getTotalRecords() {
        return 100;
    }

    @Override
    public DatasetTestUtilities.RecordValidator getValidator() {
        return new DatasetTestUtilities.RecordValidator<GenericData.Record>() {
            @Override
            public void validate(GenericData.Record record, int recordNum) {
                Assert.assertEquals(String.valueOf(recordNum), record.get("text"));
            }
        };
    }

    private FileSystem fileSystem;

    @Before
    public void setUp() throws IOException {
        fileSystem = FileSystem.getLocal(new Configuration());
    }

    @Test
    public void testEvolvedSchema() throws IOException {
        Schema schema = Schema.createRecord("mystring", null, null, false);
        schema.setFields(Lists.newArrayList(new Field("text", Schema.create(Type.STRING), null, null),
                new Field("text2", Schema.create(Type.STRING), null, JsonNodeFactory.instance.textNode("N/A"))));

        FileSystemDatasetReader<Record> reader = new FileSystemDatasetReader<Record>(fileSystem,
                new Path(Resources.getResource("data/strings-100.avro").getFile()), schema);

        checkReaderBehavior(reader, 100, new RecordValidator<Record>() {
            @Override
            public void validate(Record record, int recordNum) {
                Assert.assertEquals(String.valueOf(recordNum), record.get("text"));
                Assert.assertEquals("N/A", record.get("text2"));
            }
        });
    }

    @Test(expected = IllegalArgumentException.class)
    public void testNullFileSystem() {
        DatasetReader<String> reader = new FileSystemDatasetReader<String>(null,
                new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA);
    }

    @Test(expected = IllegalArgumentException.class)
    public void testNullFile() {
        DatasetReader<String> reader = new FileSystemDatasetReader<String>(fileSystem, null, STRING_SCHEMA);
    }

    @Test(expected = DatasetReaderException.class)
    public void testMissingFile() {
        DatasetReader<String> reader = new FileSystemDatasetReader<String>(fileSystem,
                new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA);

        // the reader should not fail until open()
        Assert.assertNotNull(reader);

        reader.open();
    }

    @Test(expected = DatasetReaderException.class)
    public void testEmptyFile() throws IOException {
        final Path emptyFile = new Path("/tmp/empty-file.avro");

        // outside the try block; if this fails then it isn't correct to remove it
        Assert.assertTrue("Failed to create a new empty file", fileSystem.createNewFile(emptyFile));

        try {
            DatasetReader<String> reader = new FileSystemDatasetReader<String>(fileSystem, emptyFile,
                    STRING_SCHEMA);

            // the reader should not fail until open()
            Assert.assertNotNull(reader);

            reader.open();
        } finally {
            Assert.assertTrue("Failed to clean up empty file", fileSystem.delete(emptyFile, true));
        }
    }
}