gaffer.accumulostore.inputformat.InputFormatTest.java Source code

Java tutorial

Introduction

Here is the source code for gaffer.accumulostore.inputformat.InputFormatTest.java

Source

/*
 * Copyright 2016 Crown Copyright
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gaffer.accumulostore.inputformat;

import gaffer.accumulostore.AccumuloProperties;
import gaffer.accumulostore.AccumuloStore;
import gaffer.accumulostore.MockAccumuloStore;
import gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityKeyPackage;
import gaffer.accumulostore.key.core.impl.classic.ClassicKeyPackage;
import gaffer.commonutil.StreamUtil;
import gaffer.commonutil.TestGroups;
import gaffer.data.element.Edge;
import gaffer.data.element.Element;
import gaffer.data.element.Entity;
import gaffer.data.elementdefinition.view.View;
import gaffer.operation.OperationException;
import gaffer.operation.impl.add.AddElements;
import gaffer.store.StoreException;
import gaffer.store.schema.Schema;
import gaffer.user.User;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

public class InputFormatTest {

    private enum KeyPackage {
        BYTE_ENTITY_KEY_PACKAGE, CLASSIC_KEY_PACKAGE
    }

    private static final int NUM_ENTRIES = 1000;
    private static final List<Element> DATA = new ArrayList<>();
    static {
        for (int i = 0; i < NUM_ENTRIES; i++) {
            final Entity entity = new Entity(TestGroups.ENTITY);
            entity.setVertex("" + i);
            entity.putProperty("property1", 1);

            final Edge edge = new Edge(TestGroups.EDGE);
            edge.setSource("" + i);
            edge.setDestination("B");
            edge.setDirected(true);
            edge.putProperty("property1", 2);

            final Edge edge2 = new Edge(TestGroups.EDGE);
            edge2.setSource("" + i);
            edge2.setDestination("C");
            edge2.setDirected(true);
            edge2.putProperty("property2", 3);

            DATA.add(edge);
            DATA.add(edge2);
            DATA.add(entity);
        }
    }

    @Rule
    public final TemporaryFolder testFolder = new TemporaryFolder();

    @Test
    public void shouldReturnCorrectDataToMapReduceJob() throws Exception {
        final View view = new View.Builder().build();
        final Set<String> expectedResults = new HashSet<>();
        for (final Element element : DATA) {
            expectedResults.add(element.toString());
        }
        shouldReturnCorrectDataToMapReduceJob(KeyPackage.BYTE_ENTITY_KEY_PACKAGE, view, "instance1",
                expectedResults);
        shouldReturnCorrectDataToMapReduceJob(KeyPackage.CLASSIC_KEY_PACKAGE, view, "instance2", expectedResults);
    }

    @Test
    public void shouldReturnCorrectDataToMapReduceJobWithView() throws Exception {
        final View view = new View.Builder().edge(TestGroups.EDGE).build();
        final Set<String> expectedResults = new HashSet<>();
        for (final Element element : DATA) {
            if (element.getGroup().equals(TestGroups.EDGE)) {
                expectedResults.add(element.toString());
            }
        }
        shouldReturnCorrectDataToMapReduceJob(KeyPackage.BYTE_ENTITY_KEY_PACKAGE, view, "instance3",
                expectedResults);
        shouldReturnCorrectDataToMapReduceJob(KeyPackage.CLASSIC_KEY_PACKAGE, view, "instance4", expectedResults);
    }

    private void shouldReturnCorrectDataToMapReduceJob(final KeyPackage kp, final View view,
            final String instanceName, final Set<String> expectedResults) throws Exception {
        final AccumuloStore store = new MockAccumuloStore();
        final Schema schema = Schema.fromJson(StreamUtil.schemas(getClass()));
        final AccumuloProperties properties = AccumuloProperties
                .loadStoreProperties(StreamUtil.storeProps(getClass()));
        switch (kp) {
        case BYTE_ENTITY_KEY_PACKAGE:
            properties.setKeyPackageClass(ByteEntityKeyPackage.class.getName());
            properties.setInstanceName(instanceName + "_BYTE_ENTITY");
            break;
        case CLASSIC_KEY_PACKAGE:
            properties.setKeyPackageClass(ClassicKeyPackage.class.getName());
            properties.setInstanceName(instanceName + "_CLASSIC");
        }
        try {
            store.initialise(schema, properties);
        } catch (StoreException e) {
            fail("StoreException thrown: " + e);
        }
        setupGraph(store);

        // Set up local conf
        final JobConf conf = new JobConf();
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.job.tracker", "local");
        final FileSystem fs = FileSystem.getLocal(conf);

        // Update configuration with instance, table name, etc.
        store.updateConfiguration(conf, view);

        // Run Driver
        final File outputFolder = testFolder.newFolder();
        FileUtils.deleteDirectory(outputFolder);
        final Driver driver = new Driver(outputFolder.getAbsolutePath());
        driver.setConf(conf);
        driver.run(new String[] {});

        // Read results and check correct
        final SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputFolder + "/part-m-00000"),
                conf);
        final Text text = new Text();
        final Set<String> results = new HashSet<>();
        while (reader.next(text)) {
            results.add(text.toString());
        }
        reader.close();
        assertEquals(expectedResults, results);
        FileUtils.deleteDirectory(outputFolder);
    }

    private void setupGraph(final AccumuloStore store) {
        try {
            store.execute(new AddElements(DATA), new User());
        } catch (OperationException e) {
            fail("Couldn't add elements: " + e);
        }
    }

    private class Driver extends Configured implements Tool {

        private final String outputDir;

        Driver(final String outputDir) {
            this.outputDir = outputDir;
        }

        @Override
        public int run(final String[] args) throws Exception {
            final Configuration conf = getConf();
            final Job job = new Job(conf);
            job.setJarByClass(getClass());
            job.setInputFormatClass(ElementInputFormat.class);
            job.setMapperClass(AMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(NullWritable.class);
            job.setNumReduceTasks(0);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);

            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            SequenceFileOutputFormat.setOutputPath(job, new Path(outputDir));
            job.setNumReduceTasks(0);
            job.waitForCompletion(true);

            return job.isSuccessful() ? 0 : 1;
        }
    }

    private static class AMapper extends Mapper<Element, NullWritable, Text, NullWritable> {

        protected void map(final Element key, final NullWritable nw, final Context context)
                throws IOException, InterruptedException {
            context.write(new Text(key.toString()), nw);
        }
    }

}