org.apache.accumulo.examples.wikisearch.logic.TestQueryLogic.java Source code

Introduction

Here is the source code for org.apache.accumulo.examples.wikisearch.logic.TestQueryLogic.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.examples.wikisearch.logic;

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;

import junit.framework.Assert;

import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper;
import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator;
import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
import org.apache.accumulo.examples.wikisearch.sample.Document;
import org.apache.accumulo.examples.wikisearch.sample.Field;
import org.apache.accumulo.examples.wikisearch.sample.Results;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.task.MapContextImpl;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.security.Credentials;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.Test;

public class TestQueryLogic {

    private static final String METADATA_TABLE_NAME = "wikiMetadata";

    private static final String TABLE_NAME = "wiki";

    private static final String INDEX_TABLE_NAME = "wikiIndex";

    private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";

    private static final String TABLE_NAMES[] = { METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME,
            INDEX_TABLE_NAME };

    private class MockAccumuloRecordWriter extends RecordWriter<Text, Mutation> {
        @Override
        public void write(Text key, Mutation value) throws IOException, InterruptedException {
            try {
                writerMap.get(key).addMutation(value);
            } catch (MutationsRejectedException e) {
                throw new IOException("Error adding mutation", e);
            }
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            try {
                for (BatchWriter w : writerMap.values()) {
                    w.flush();
                    w.close();
                }
            } catch (MutationsRejectedException e) {
                throw new IOException("Error closing Batch Writer", e);
            }
        }

    }

    private Connector c = null;
    private Configuration conf = new Configuration();
    private HashMap<Text, BatchWriter> writerMap = new HashMap<Text, BatchWriter>();
    private QueryLogic table = null;

    @Before
    public void setup() throws Exception {

        Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG);
        Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG);
        Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG);

        conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
        conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
        conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
        conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
        conf.set(WikipediaConfiguration.NUM_GROUPS, "1");

        MockInstance i = new MockInstance();
        c = i.getConnector("root", new PasswordToken(""));
        WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false);
        for (String table : TABLE_NAMES) {
            writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1));
        }

        TaskAttemptID id = new TaskAttemptID("fake", 1, TaskType.MAP, 1, 1);
        TaskAttemptContext context = new TaskAttemptContextImpl(conf, id);

        RawLocalFileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
        Assert.assertNotNull(url);
        File data = new File(url.toURI());
        Path tmpFile = new Path(data.getAbsolutePath());

        // Setup the Mapper
        WikipediaInputSplit split = new WikipediaInputSplit(
                new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0);
        AggregatingRecordReader rr = new AggregatingRecordReader();
        Path ocPath = new Path(tmpFile, "oc");
        OutputCommitter oc = new FileOutputCommitter(ocPath, context);
        fs.deleteOnExit(ocPath);
        StandaloneStatusReporter sr = new StandaloneStatusReporter();
        rr.initialize(split, context);
        MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
        WikipediaMapper mapper = new WikipediaMapper();

        // there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey
        final MapContextImpl<LongWritable, Text, Text, Mutation> mapContext = new MapContextImpl<LongWritable, Text, Text, Mutation>(
                conf, id, rr, rw, oc, sr, split);
        // Load data into Mock Accumulo
        Mapper<LongWritable, Text, Text, Mutation>.Context con = mapper.new Context() {
            /**
             * Get the input split for this map.
             */
            public InputSplit getInputSplit() {
                return mapContext.getInputSplit();
            }

            @Override
            public LongWritable getCurrentKey() throws IOException, InterruptedException {
                return mapContext.getCurrentKey();
            }

            @Override
            public Text getCurrentValue() throws IOException, InterruptedException {
                return mapContext.getCurrentValue();
            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                return mapContext.nextKeyValue();
            }

            @Override
            public Counter getCounter(Enum<?> counterName) {
                return mapContext.getCounter(counterName);
            }

            @Override
            public Counter getCounter(String groupName, String counterName) {
                return mapContext.getCounter(groupName, counterName);
            }

            @Override
            public OutputCommitter getOutputCommitter() {
                return mapContext.getOutputCommitter();
            }

            @Override
            public void write(Text key, Mutation value) throws IOException, InterruptedException {
                mapContext.write(key, value);
            }

            @Override
            public String getStatus() {
                return mapContext.getStatus();
            }

            @Override
            public TaskAttemptID getTaskAttemptID() {
                return mapContext.getTaskAttemptID();
            }

            @Override
            public void setStatus(String msg) {
                mapContext.setStatus(msg);
            }

            @Override
            public Path[] getArchiveClassPaths() {
                return mapContext.getArchiveClassPaths();
            }

            @Override
            public String[] getArchiveTimestamps() {
                return mapContext.getArchiveTimestamps();
            }

            @Override
            public URI[] getCacheArchives() throws IOException {
                return mapContext.getCacheArchives();
            }

            @Override
            public URI[] getCacheFiles() throws IOException {
                return mapContext.getCacheArchives();
            }

            @Override
            public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException {
                return mapContext.getCombinerClass();
            }

            @Override
            public Configuration getConfiguration() {
                return mapContext.getConfiguration();
            }

            @Override
            public Path[] getFileClassPaths() {
                return mapContext.getFileClassPaths();
            }

            @Override
            public String[] getFileTimestamps() {
                return mapContext.getFileTimestamps();
            }

            @Override
            public RawComparator<?> getGroupingComparator() {
                return mapContext.getGroupingComparator();
            }

            @Override
            public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
                return mapContext.getInputFormatClass();
            }

            @Override
            public String getJar() {
                return mapContext.getJar();
            }

            @Override
            public JobID getJobID() {
                return mapContext.getJobID();
            }

            @Override
            public String getJobName() {
                return mapContext.getJobName();
            }

            /*@Override
            public boolean userClassesTakesPrecedence() {
              return mapContext.userClassesTakesPrecedence();
            }*/

            @Override
            public boolean getJobSetupCleanupNeeded() {
                return mapContext.getJobSetupCleanupNeeded();
            }

            @Override
            public boolean getTaskCleanupNeeded() {
                return mapContext.getTaskCleanupNeeded();
            }

            @Override
            public Path[] getLocalCacheArchives() throws IOException {
                return mapContext.getLocalCacheArchives();
            }

            @Override
            public Path[] getLocalCacheFiles() throws IOException {
                return mapContext.getLocalCacheFiles();
            }

            @Override
            public Class<?> getMapOutputKeyClass() {
                return mapContext.getMapOutputKeyClass();
            }

            @Override
            public Class<?> getMapOutputValueClass() {
                return mapContext.getMapOutputValueClass();
            }

            @Override
            public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException {
                return mapContext.getMapperClass();
            }

            @Override
            public int getMaxMapAttempts() {
                return mapContext.getMaxMapAttempts();
            }

            @Override
            public int getMaxReduceAttempts() {
                return mapContext.getMaxReduceAttempts();
            }

            @Override
            public int getNumReduceTasks() {
                return mapContext.getNumReduceTasks();
            }

            @Override
            public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException {
                return mapContext.getOutputFormatClass();
            }

            @Override
            public Class<?> getOutputKeyClass() {
                return mapContext.getOutputKeyClass();
            }

            @Override
            public Class<?> getOutputValueClass() {
                return mapContext.getOutputValueClass();
            }

            @Override
            public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException {
                return mapContext.getPartitionerClass();
            }

            @Override
            public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException {
                return mapContext.getReducerClass();
            }

            @Override
            public RawComparator<?> getSortComparator() {
                return mapContext.getSortComparator();
            }

            @Override
            public boolean getSymlink() {
                return mapContext.getSymlink();
            }

            @Override
            public Path getWorkingDirectory() throws IOException {
                return mapContext.getWorkingDirectory();
            }

            @Override
            public void progress() {
                mapContext.progress();
            }

            @Override
            public boolean getProfileEnabled() {
                return mapContext.getProfileEnabled();
            }

            @Override
            public String getProfileParams() {
                return mapContext.getProfileParams();
            }

            @Override
            public IntegerRanges getProfileTaskRange(boolean isMap) {
                return mapContext.getProfileTaskRange(isMap);
            }

            @Override
            public String getUser() {
                return mapContext.getUser();
            }

            @Override
            public Credentials getCredentials() {
                return mapContext.getCredentials();
            }

            @Override
            public float getProgress() {
                return mapContext.getProgress();
            }
        };

        mapper.run(con);

        // Flush and close record writers.
        rw.close(context);

        table = new QueryLogic();
        table.setMetadataTableName(METADATA_TABLE_NAME);
        table.setTableName(TABLE_NAME);
        table.setIndexTableName(INDEX_TABLE_NAME);
        table.setReverseIndexTableName(RINDEX_TABLE_NAME);
        table.setUseReadAheadIterator(false);
        table.setUnevaluatedFields(Collections.singletonList("TEXT"));
    }

    void debugQuery(String tableName) throws Exception {
        Scanner s = c.createScanner(tableName, new Authorizations("all"));
        Range r = new Range();
        s.setRange(r);
        for (Entry<Key, Value> entry : s)
            System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
    }

    @Test
    public void testTitle() throws Exception {
        Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
        Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
        List<String> auths = new ArrayList<String>();
        auths.add("enwiki");

        Results results = table.runQuery(c, auths,
                "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null,
                null);
        List<Document> docs = results.getResults();
        assertEquals(4, docs.size());

        results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null);
        docs = results.getResults();
        assertEquals(1, docs.size());
        for (Document doc : docs) {
            System.out.println("id: " + doc.getId());
            for (Field field : doc.getFields())
                System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
        }
    }

}