Java tutorial
/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * 2012 LinkedIn Corp. All Rights Reserved. */ package com.browseengine.bobo.geosearch.merge.impl; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.Vector; import org.apache.lucene.index.LuceneUtils; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.jmock.Expectations; import org.jmock.Mockery; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.test.annotation.IfProfileValue; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import com.browseengine.bobo.geosearch.IFieldNameFilterConverter; import com.browseengine.bobo.geosearch.IGeoConverter; import com.browseengine.bobo.geosearch.IGeoUtil; import com.browseengine.bobo.geosearch.bo.CartesianGeoRecord; import com.browseengine.bobo.geosearch.bo.GeoSearchConfig; import com.browseengine.bobo.geosearch.bo.GeoSegmentInfo; import com.browseengine.bobo.geosearch.bo.LatitudeLongitudeDocId; import com.browseengine.bobo.geosearch.impl.BTree; import com.browseengine.bobo.geosearch.impl.GeoRecordBTree; import com.browseengine.bobo.geosearch.merge.IGeoMergeInfo; /** * * @author Geoff Cooney * */ @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration({ "/TEST-servlet.xml" }) @IfProfileValue(name = "test-suite", values = { "unit", "all" }) public class BufferedGeoMergerTest { Mockery context = new Mockery(); private static final String SEGMENT_BASE_NAME = "segment"; Directory dir; GeoSearchConfig geoConfig; IGeoMergeInfo geoMergeInfo; BufferedGeoMerger bufferedGeoMerger; List<SegmentInfo> segmentsToMerge; List<SegmentReader> segmentReaders; Map<String, GeoRecordBTree> inputTrees; SegmentInfo newSegment; GeoRecordBTree outputTree; IGeoConverter geoConverter; IGeoUtil geoUtil; TreeSet<CartesianGeoRecord> expectedOutputTree; @Before public void setUp() { geoConfig = new GeoSearchConfig(); geoMergeInfo = context.mock(IGeoMergeInfo.class); dir = new RAMDirectory(); geoConverter = geoConfig.getGeoConverter(); geoUtil = geoConfig.getGeoUtil(); bufferedGeoMerger = new BufferedGeoMerger() { @Override public BTree<CartesianGeoRecord> getInputBTree(Directory directory, String geoFileName, int bufferSizePerGeoReader) { return inputTrees.get(geoFileName); } @Override public BTree<CartesianGeoRecord> getOutputBTree(int newSegmentSize, Iterator<CartesianGeoRecord> inputIterator, Directory directory, String outputFileName, GeoSegmentInfo geoSegmentInfo) throws IOException { outputTree = new GeoRecordBTree(newSegmentSize, inputIterator, directory, outputFileName, geoSegmentInfo); return outputTree; } @Override public boolean loadFieldNameFilterConverter(Directory directory, String geoFileName, IFieldNameFilterConverter fieldNameFilterConverter) throws IOException { return true; } }; expectedOutputTree = geoUtil.getBinaryTreeOrderedByBitMag(); } private void setUpMergeObjects(int[] docsPerSegment, int[] deletedDocsPerSegment) throws IOException { assertEquals( "Test specification error. Both arrays should contain one entry per segment and be the same size.", docsPerSegment.length, deletedDocsPerSegment.length); segmentsToMerge = new Vector<SegmentInfo>(); segmentReaders = new Vector<SegmentReader>(); inputTrees = new HashMap<String, GeoRecordBTree>(); int segmentStart = 0; for (int i = 0; i < docsPerSegment.length; i++) { final int segmentSize = docsPerSegment[i]; final int deletedDocs = deletedDocsPerSegment[i]; final String name = SEGMENT_BASE_NAME + i; SegmentReader reader = buildSegmentReader(name, segmentSize, deletedDocs); segmentReaders.add(reader); SegmentInfo segment = LuceneUtils.buildSegmentInfo(name, segmentSize, deletedDocs, dir); segmentsToMerge.add(segment); GeoRecordBTree inputTree = buildInputTree(segmentStart, segmentSize, deletedDocsPerSegment[i]); String fileName = geoConfig.getGeoFileName(name); inputTrees.put(fileName, inputTree); segmentStart += (segmentSize - deletedDocs); } String newSegmentName = "newSegment"; newSegment = LuceneUtils.buildSegmentInfo(newSegmentName, segmentStart, 0, dir); } private SegmentReader buildSegmentReader(final String name, final int segmentSize, final int deletedDocs) { SegmentReader reader = new SegmentReader() { @Override public synchronized boolean isDeleted(int n) { return isIdDeleted(n, deletedDocs, segmentSize); } @Override public String getSegmentName() { return name; } @Override public int maxDoc() { return segmentSize; } }; return reader; } private GeoRecordBTree buildInputTree(int segmentStartInNewIndex, int segmentSize, int numberOfDeletes) throws IOException { TreeSet<CartesianGeoRecord> tree = geoUtil.getBinaryTreeOrderedByBitMag(); int absoluteDocId = segmentStartInNewIndex; for (int i = 0; i < segmentSize; i++) { int docid = i; boolean isDeleted = isIdDeleted(docid, numberOfDeletes, segmentSize); int numberOfLocations = (int) (Math.random() * 4); for (int j = 0; j < numberOfLocations; j++) { double longitude = Math.random(); double latitude = Math.random(); LatitudeLongitudeDocId longitudeLatitudeDocId = new LatitudeLongitudeDocId(latitude, longitude, docid); CartesianGeoRecord geoRecord = geoConverter.toCartesianGeoRecord(longitudeLatitudeDocId, CartesianGeoRecord.DEFAULT_FILTER_BYTE); tree.add(geoRecord); if (!isDeleted) { LatitudeLongitudeDocId absoluteLongitudeLatitudeDocId = new LatitudeLongitudeDocId(latitude, longitude, absoluteDocId); CartesianGeoRecord absoluteGeoRecord = geoConverter.toCartesianGeoRecord( absoluteLongitudeLatitudeDocId, CartesianGeoRecord.DEFAULT_FILTER_BYTE); expectedOutputTree.add(absoluteGeoRecord); } } if (!isDeleted) { absoluteDocId++; } } return new GeoRecordBTree(tree); } private boolean isIdDeleted(int id, int numberOfDeletes, int totalDocs) { if (numberOfDeletes != 0) { int previousNumDeleted = Math.round((id * numberOfDeletes) / (float) totalDocs); int nextNumDeleted = Math.round(((id + 1) * numberOfDeletes) / (float) totalDocs); if (nextNumDeleted != previousNumDeleted) { return true; } } return false; } boolean isVerifyOutputTreeAgainstExpected = true; private void checkOutputTreeAgainstExpected() throws IOException { if (!isVerifyOutputTreeAgainstExpected) { return; } assertEquals("trees sould be equal in size", expectedOutputTree.size(), outputTree.getArrayLength()); Iterator<CartesianGeoRecord> outputIterator = outputTree.getIterator(CartesianGeoRecord.MIN_VALID_GEORECORD, CartesianGeoRecord.MAX_VALID_GEORECORD); Iterator<CartesianGeoRecord> expectedIterator = expectedOutputTree.iterator(); int i = 0; while (outputIterator.hasNext() && expectedIterator.hasNext()) { CartesianGeoRecord actualGeoRecord = outputIterator.next(); CartesianGeoRecord expectedGeoRecord = expectedIterator.next(); assertEquals( "Index " + i + " of tree does not match expected. Expected CartCoordDocId=" + geoConverter.toCartesianCoordinateDocId(expectedGeoRecord) + "; Actual LngLatDocId=" + geoConverter.toCartesianCoordinateDocId(actualGeoRecord), expectedGeoRecord, actualGeoRecord); i++; } } private boolean isNoGeoFiles = false; private void doMerge() throws IOException { if (isNoGeoFiles) { context.checking(new Expectations() { { atLeast(1).of(geoMergeInfo).getSegmentsToMerge(); will(returnValue(segmentsToMerge)); ignoring(geoMergeInfo).checkAborted(dir); // the expectation is that if there are no geo files, // getNewSegment() will not be called. //atLeast(1).of(geoMergeInfo).getNewSegment(); //will(returnValue(newSegment)); ignoring(geoMergeInfo).getDirectory(); will(returnValue(dir)); atLeast(1).of(geoMergeInfo).getReaders(); will(returnValue(segmentReaders)); } }); } else { context.checking(new Expectations() { { atLeast(1).of(geoMergeInfo).getSegmentsToMerge(); will(returnValue(segmentsToMerge)); ignoring(geoMergeInfo).checkAborted(dir); atLeast(1).of(geoMergeInfo).getNewSegment(); will(returnValue(newSegment)); ignoring(geoMergeInfo).getDirectory(); will(returnValue(dir)); atLeast(1).of(geoMergeInfo).getReaders(); will(returnValue(segmentReaders)); } }); } bufferedGeoMerger.merge(geoMergeInfo, geoConfig); checkOutputTreeAgainstExpected(); context.assertIsSatisfied(); } @Test public void testMerge_no_segment0_GeoFile() throws IOException { noGeoFileNames = new HashSet<String>(); noGeoFileNames.add("segment0.geo"); verifyMissingGeoFile(); } @Test public void testMerge_no_segment1_GeoFile() throws IOException { noGeoFileNames = new HashSet<String>(); noGeoFileNames.add("segment1.geo"); verifyMissingGeoFile(); } @Test public void testMerge_no_GeoFiles() throws IOException { isNoGeoFiles = true; noGeoFileNames = new HashSet<String>(); noGeoFileNames.add("segment0.geo"); noGeoFileNames.add("segment1.geo"); verifyMissingGeoFile(); } private void verifyMissingGeoFile() throws IOException { isVerifyOutputTreeAgainstExpected = false; initNoGeoFile(); testMergeSimple(); } private Set<String> noGeoFileNames; private void initNoGeoFile() { bufferedGeoMerger = new BufferedGeoMerger() { @Override public BTree<CartesianGeoRecord> getInputBTree(Directory directory, String geoFileName, int bufferSizePerGeoReader) throws IOException { if (noGeoFileNames.contains(geoFileName)) { // empty TreeSet<GeoRecord> tree return new GeoRecordBTree(new TreeSet<CartesianGeoRecord>()); } return inputTrees.get(geoFileName); } @Override public BTree<CartesianGeoRecord> getOutputBTree(int newSegmentSize, Iterator<CartesianGeoRecord> inputIterator, Directory directory, String outputFileName, GeoSegmentInfo geoSegmentInfo) throws IOException { outputTree = new GeoRecordBTree(newSegmentSize, inputIterator, directory, outputFileName, geoSegmentInfo); return outputTree; } @Override public boolean loadFieldNameFilterConverter(Directory directory, String geoFileName, IFieldNameFilterConverter fieldNameFilterConverter) throws IOException { return !noGeoFileNames.contains(geoFileName); } }; } @Test //10 x 10, no deleted docs public void testMergeSimple() throws IOException { int[] docsPerSegment = new int[] { 10, 10 }; int[] deletedDocsPerSegment = new int[] { 0, 0 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void test10SmallSegments() throws IOException { int[] docsPerSegment = new int[] { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 }; int[] deletedDocsPerSegment = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void test10Segments_1000docs() throws IOException { int[] docsPerSegment = new int[] { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; int[] deletedDocsPerSegment = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testSimpleDelete() throws IOException { int[] docsPerSegment = new int[] { 10 }; int[] deletedDocsPerSegment = new int[] { 5 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testDeleteAndMerge_2SmallSegments() throws IOException { int[] docsPerSegment = new int[] { 10, 10 }; int[] deletedDocsPerSegment = new int[] { 2, 1 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testDeleteAndMerge_10LargeSegments_variedDeletes() throws IOException { int[] docsPerSegment = new int[] { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; int[] deletedDocsPerSegment = new int[] { 10, 0, 100, 32, 50, 200, 90, 33, 5, 2 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testDelete_WholeSegment() throws IOException { int[] docsPerSegment = new int[] { 1000 }; int[] deletedDocsPerSegment = new int[] { 1000 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testMergeAndDelete_OneWholeSegmentDeleted() throws IOException { int[] docsPerSegment = new int[] { 1000, 1000 }; int[] deletedDocsPerSegment = new int[] { 1000, 0 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } @Test public void testMergeAndDelete_VariedSegmentSize_VariedDeletes() throws IOException { int[] docsPerSegment = new int[] { 1000, 10, 2000, 2000, 10, 100, 300, 70, 7, 1 }; int[] deletedDocsPerSegment = new int[] { 200, 5, 1000, 300, 1, 0, 20, 6, 2, 0 }; setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); doMerge(); } }