Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.examples.wikisearch.logic; import static org.junit.Assert.assertEquals; import java.io.File; import java.io.IOException; import java.net.URL; import java.net.URI; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; import junit.framework.Assert; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.MutationsRejectedException; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.mock.MockInstance; import org.apache.accumulo.core.client.security.tokens.PasswordToken; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper; import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator; import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader; import org.apache.accumulo.examples.wikisearch.sample.Document; import org.apache.accumulo.examples.wikisearch.sample.Field; import org.apache.accumulo.examples.wikisearch.sample.Results; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.mapreduce.task.MapContextImpl; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.conf.Configuration.IntegerRanges; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MapContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.security.Credentials; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Before; import org.junit.Test; public class TestQueryLogic { private static final String METADATA_TABLE_NAME = "wikiMetadata"; private static final String TABLE_NAME = "wiki"; private static final String INDEX_TABLE_NAME = "wikiIndex"; private static final String RINDEX_TABLE_NAME = "wikiReverseIndex"; private static final String TABLE_NAMES[] = { METADATA_TABLE_NAME, TABLE_NAME, RINDEX_TABLE_NAME, INDEX_TABLE_NAME }; private class MockAccumuloRecordWriter extends RecordWriter<Text, Mutation> { @Override public void write(Text key, Mutation value) throws IOException, InterruptedException { try { writerMap.get(key).addMutation(value); } catch (MutationsRejectedException e) { throw new IOException("Error adding mutation", e); } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { try { for (BatchWriter w : writerMap.values()) { w.flush(); w.close(); } } catch (MutationsRejectedException e) { throw new IOException("Error closing Batch Writer", e); } } } private Connector c = null; private Configuration conf = new Configuration(); private HashMap<Text, BatchWriter> writerMap = new HashMap<Text, BatchWriter>(); private QueryLogic table = null; @Before public void setup() throws Exception { Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG); conf.set(AggregatingRecordReader.START_TOKEN, "<page>"); conf.set(AggregatingRecordReader.END_TOKEN, "</page>"); conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME); conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1"); conf.set(WikipediaConfiguration.NUM_GROUPS, "1"); MockInstance i = new MockInstance(); c = i.getConnector("root", new PasswordToken("")); WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false); for (String table : TABLE_NAMES) { writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1)); } TaskAttemptID id = new TaskAttemptID("fake", 1, TaskType.MAP, 1, 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, id); RawLocalFileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml"); Assert.assertNotNull(url); File data = new File(url.toURI()); Path tmpFile = new Path(data.getAbsolutePath()); // Setup the Mapper WikipediaInputSplit split = new WikipediaInputSplit( new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0); AggregatingRecordReader rr = new AggregatingRecordReader(); Path ocPath = new Path(tmpFile, "oc"); OutputCommitter oc = new FileOutputCommitter(ocPath, context); fs.deleteOnExit(ocPath); StandaloneStatusReporter sr = new StandaloneStatusReporter(); rr.initialize(split, context); MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter(); WikipediaMapper mapper = new WikipediaMapper(); // there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey final MapContextImpl<LongWritable, Text, Text, Mutation> mapContext = new MapContextImpl<LongWritable, Text, Text, Mutation>( conf, id, rr, rw, oc, sr, split); // Load data into Mock Accumulo Mapper<LongWritable, Text, Text, Mutation>.Context con = mapper.new Context() { /** * Get the input split for this map. */ public InputSplit getInputSplit() { return mapContext.getInputSplit(); } @Override public LongWritable getCurrentKey() throws IOException, InterruptedException { return mapContext.getCurrentKey(); } @Override public Text getCurrentValue() throws IOException, InterruptedException { return mapContext.getCurrentValue(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return mapContext.nextKeyValue(); } @Override public Counter getCounter(Enum<?> counterName) { return mapContext.getCounter(counterName); } @Override public Counter getCounter(String groupName, String counterName) { return mapContext.getCounter(groupName, counterName); } @Override public OutputCommitter getOutputCommitter() { return mapContext.getOutputCommitter(); } @Override public void write(Text key, Mutation value) throws IOException, InterruptedException { mapContext.write(key, value); } @Override public String getStatus() { return mapContext.getStatus(); } @Override public TaskAttemptID getTaskAttemptID() { return mapContext.getTaskAttemptID(); } @Override public void setStatus(String msg) { mapContext.setStatus(msg); } @Override public Path[] getArchiveClassPaths() { return mapContext.getArchiveClassPaths(); } @Override public String[] getArchiveTimestamps() { return mapContext.getArchiveTimestamps(); } @Override public URI[] getCacheArchives() throws IOException { return mapContext.getCacheArchives(); } @Override public URI[] getCacheFiles() throws IOException { return mapContext.getCacheArchives(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { return mapContext.getCombinerClass(); } @Override public Configuration getConfiguration() { return mapContext.getConfiguration(); } @Override public Path[] getFileClassPaths() { return mapContext.getFileClassPaths(); } @Override public String[] getFileTimestamps() { return mapContext.getFileTimestamps(); } @Override public RawComparator<?> getGroupingComparator() { return mapContext.getGroupingComparator(); } @Override public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { return mapContext.getInputFormatClass(); } @Override public String getJar() { return mapContext.getJar(); } @Override public JobID getJobID() { return mapContext.getJobID(); } @Override public String getJobName() { return mapContext.getJobName(); } /*@Override public boolean userClassesTakesPrecedence() { return mapContext.userClassesTakesPrecedence(); }*/ @Override public boolean getJobSetupCleanupNeeded() { return mapContext.getJobSetupCleanupNeeded(); } @Override public boolean getTaskCleanupNeeded() { return mapContext.getTaskCleanupNeeded(); } @Override public Path[] getLocalCacheArchives() throws IOException { return mapContext.getLocalCacheArchives(); } @Override public Path[] getLocalCacheFiles() throws IOException { return mapContext.getLocalCacheFiles(); } @Override public Class<?> getMapOutputKeyClass() { return mapContext.getMapOutputKeyClass(); } @Override public Class<?> getMapOutputValueClass() { return mapContext.getMapOutputValueClass(); } @Override public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { return mapContext.getMapperClass(); } @Override public int getMaxMapAttempts() { return mapContext.getMaxMapAttempts(); } @Override public int getMaxReduceAttempts() { return mapContext.getMaxReduceAttempts(); } @Override public int getNumReduceTasks() { return mapContext.getNumReduceTasks(); } @Override public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { return mapContext.getOutputFormatClass(); } @Override public Class<?> getOutputKeyClass() { return mapContext.getOutputKeyClass(); } @Override public Class<?> getOutputValueClass() { return mapContext.getOutputValueClass(); } @Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return mapContext.getPartitionerClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { return mapContext.getReducerClass(); } @Override public RawComparator<?> getSortComparator() { return mapContext.getSortComparator(); } @Override public boolean getSymlink() { return mapContext.getSymlink(); } @Override public Path getWorkingDirectory() throws IOException { return mapContext.getWorkingDirectory(); } @Override public void progress() { mapContext.progress(); } @Override public boolean getProfileEnabled() { return mapContext.getProfileEnabled(); } @Override public String getProfileParams() { return mapContext.getProfileParams(); } @Override public IntegerRanges getProfileTaskRange(boolean isMap) { return mapContext.getProfileTaskRange(isMap); } @Override public String getUser() { return mapContext.getUser(); } @Override public Credentials getCredentials() { return mapContext.getCredentials(); } @Override public float getProgress() { return mapContext.getProgress(); } }; mapper.run(con); // Flush and close record writers. rw.close(context); table = new QueryLogic(); table.setMetadataTableName(METADATA_TABLE_NAME); table.setTableName(TABLE_NAME); table.setIndexTableName(INDEX_TABLE_NAME); table.setReverseIndexTableName(RINDEX_TABLE_NAME); table.setUseReadAheadIterator(false); table.setUnevaluatedFields(Collections.singletonList("TEXT")); } void debugQuery(String tableName) throws Exception { Scanner s = c.createScanner(tableName, new Authorizations("all")); Range r = new Range(); s.setRange(r); for (Entry<Key, Value> entry : s) System.out.println(entry.getKey().toString() + " " + entry.getValue().toString()); } @Test public void testTitle() throws Exception { Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF); Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF); List<String> auths = new ArrayList<String>(); auths.add("enwiki"); Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null); List<Document> docs = results.getResults(); assertEquals(4, docs.size()); results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null); docs = results.getResults(); assertEquals(1, docs.size()); for (Document doc : docs) { System.out.println("id: " + doc.getId()); for (Field field : doc.getFields()) System.out.println(field.getFieldName() + " -> " + field.getFieldValue()); } } }