org.terrier.structures.indexing.singlepass.hadoop.TestSplitEmittedTerm.java Source code

Java tutorial

Introduction

Here is the source code for org.terrier.structures.indexing.singlepass.hadoop.TestSplitEmittedTerm.java

Source

/*
 * Terrier - Terabyte Retriever
 * Webpage: http://terrier.org
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.uk
 *
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is TestSplitEmittedTerm.java.
 *
 * The Original Code is Copyright (C) 2004-2014 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Richard McCreadie <richardm{a.}dcs.gla.ac.uk> (original author)
 *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
 */
package org.terrier.structures.indexing.singlepass.hadoop;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;

import junit.framework.TestCase;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;

import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitioner;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTermSplitFlush;

/** Tests for SplitEmittedTerm, including the Comparators and Partitioners */
@SuppressWarnings("deprecation")
public class TestSplitEmittedTerm extends TestCase {

    public void testMethods() throws Exception {
        SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 10, 34);
        assertEquals("t1", t1.getTerm());
        assertEquals(10, t1.getSplitno());
        assertEquals(34, t1.getFlushno());

        t1.setFlushno(11);
        assertEquals(10, t1.getSplitno());
        assertEquals(11, t1.getFlushno());

        t1.setSplitno(5);
        assertEquals(5, t1.getSplitno());
        assertEquals(11, t1.getFlushno());

        t1.setTerm("t2");
        assertEquals("t2", t1.getTerm());
    }

    private void checkWritable(final String t, final int split, final int flush) throws Exception {
        SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
        byte[] b = toBytes(t1);

        SplitEmittedTerm t2 = new SplitEmittedTerm();
        t2.readFields(new DataInputStream(new ByteArrayInputStream(b)));
        assertTrue(t1.equals(t2));
        assertTrue(t2.equals(t1));
        assertEquals(t, t2.getTerm());
        assertEquals(split, t2.getSplitno());
        assertEquals(flush, t2.getFlushno());
    }

    public void testWritable() throws Exception {
        checkWritable("t1", 10, 34);
        checkWritable("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
    }

    private byte[] toBytes(Writable w) throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(baos);
        w.write(dos);
        return baos.toByteArray();
    }

    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DM_STRING_CTOR", justification = "Check String.equals is used, not ==")
    private void checkEqualityTerm(String t, int split, int flush) throws Exception {
        SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
        SETRawComparatorTerm compare = new SETRawComparatorTerm();
        SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
        assertEquals(0, t1.compareTo(t1));
        assertTrue(t1.equals(t1));
        assertEquals(0, compare.compare(t1, t1));
        assertEquals(0, compare2.compare(t1, t1));
        byte[] t1w = toBytes(t1);
        assertEquals(0, compare.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));
        assertEquals(0, compare2.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));

        SplitEmittedTerm t1a = new SplitEmittedTerm(new String(t), split, flush);
        assertEquals(0, t1.compareTo(t1a));
        assertEquals(0, t1a.compareTo(t1));
        assertTrue(t1.equals(t1a));
        assertTrue(t1a.equals(t1));
        assertEquals(0, compare.compare(t1, t1a));
        assertEquals(0, compare.compare(t1a, t1));
        assertEquals(0, compare2.compare(t1, t1a));
        assertEquals(0, compare2.compare(t1a, t1));
    }

    public void testEqualityTerm() throws Exception {
        checkEqualityTerm("t1", 0, 0);
        checkEqualityTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
    }

    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DM_STRING_CTOR", justification = "Check String.equals is used, not ==")
    private void checkEqualityTermSplit(String t, int split1, int split2, int flush) throws Exception {
        SplitEmittedTerm t1 = new SplitEmittedTerm(t, split1, flush);
        SplitEmittedTerm t2 = new SplitEmittedTerm(new String(t), split2, flush);
        SETRawComparatorTerm compare = new SETRawComparatorTerm();
        SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();

        assertEquals(0, t1.compareTo(t1));

        assertFalse(t1.equals(t2));
        assertEquals(0, compare.compare(t1, t2));
        assertTrue(compare2.compare(t1, t2) < 0);

        byte[] t1w = toBytes(t1);
        byte[] t2w = toBytes(t2);
        assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
        assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
    }

    public void testEqualityTermSplit() throws Exception {
        checkEqualityTermSplit("t1", 0, 1, 0);
        checkEqualityTermSplit("t1", Integer.MAX_VALUE - 1, Integer.MAX_VALUE, Integer.MAX_VALUE);
    }

    private void compareTerm(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception {
        SETRawComparatorTerm compare = new SETRawComparatorTerm();
        //check for inequality of each pair
        assertFalse(t1.equals(t2));
        assertFalse(t2.equals(t1));

        assertTrue(t1.compareTo(t2) < 0);
        assertTrue(t2.compareTo(t1) > 0);
        assertTrue(compare.compare(t1, t2) < 0);
        assertTrue(compare.compare(t2, t1) > 0);

        SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
        assertTrue(compare2.compare(t1, t2) < 0);
        assertTrue(compare2.compare(t2, t1) > 0);
        byte[] t1w = toBytes(t1);
        byte[] t2w = toBytes(t2);
        assertTrue(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
        assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
    }

    public void testCompareTerm() throws Exception {
        SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
        SplitEmittedTerm t2 = new SplitEmittedTerm("t2", 0, 0);
        compareTerm(t1, t2);

        t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
        t2 = new SplitEmittedTerm("t2", Integer.MAX_VALUE, Integer.MAX_VALUE);
        compareTerm(t1, t2);
    }

    private void compareTermSplit(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception {
        SETRawComparatorTerm compare = new SETRawComparatorTerm();
        //check for inequality of each pair         
        assertFalse(t1.equals(t2));
        assertFalse(t2.equals(t1));

        assertTrue(t1.compareTo(t2) < 0);
        assertTrue(t2.compareTo(t1) > 0);
        assertEquals(0, compare.compare(t1, t2));
        assertEquals(0, compare.compare(t2, t1));

        SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
        assertTrue(compare2.compare(t1, t2) < 0);
        assertTrue(compare2.compare(t2, t1) > 0);
        byte[] t1w = toBytes(t1);
        byte[] t2w = toBytes(t2);
        assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
        assertEquals(0, compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length));
        assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
        assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
    }

    public void testCompareTermSplit() throws Exception {

        SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
        SplitEmittedTerm t2 = new SplitEmittedTerm("t1", 1, 0);
        compareTermSplit(t1, t2);

        t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE - 1, 0);
        t2 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, 0);
        compareTermSplit(t1, t2);

    }

    static final int sign(int a) {
        if (a < 0)
            return -1;
        if (a > 0)
            return 1;
        return 0;
    }

    private void compareTermFlush(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception {
        SETRawComparatorTerm compare = new SETRawComparatorTerm();
        //check for inequality of each pair         
        assertFalse(t1.equals(t2));
        assertFalse(t2.equals(t1));

        assertTrue(t1.compareTo(t2) < 0);
        assertTrue(t2.compareTo(t1) > 0);
        assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())), sign(compare.compare(t1, t2)));
        assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())), sign(compare.compare(t2, t1)));

        SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
        assertTrue(compare2.compare(t1, t2) < 0);
        assertTrue(compare2.compare(t2, t1) > 0);
        byte[] t1w = toBytes(t1);
        byte[] t2w = toBytes(t2);
        assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())),
                sign(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)));
        assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())),
                sign(compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length)));
        assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
        assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
    }

    public void testCompareTermFlush() throws Exception {
        SplitEmittedTerm t1, t2;
        t1 = new SplitEmittedTerm("t1", 0, 0);
        t2 = new SplitEmittedTerm("t1", 0, 1);
        compareTermFlush(t1, t2);

        t1 = new SplitEmittedTerm(".", 0, 0);
        t2 = new SplitEmittedTerm("0", 0, 0);
        compareTermFlush(t1, t2);

        t1 = new SplitEmittedTerm("0", 0, 0);
        t2 = new SplitEmittedTerm("\\", 0, 0);
        compareTermFlush(t1, t2);

        t1 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE - 1);
        t2 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE);
        compareTermFlush(t1, t2);
    }

    /* Test cases for SETPartitionerLowercaseAlphaTerm */
    public void testSETPLAT() throws Exception {
        final SETPartitionerLowercaseAlphaTerm p = new SETPartitionerLowercaseAlphaTerm();
        //single partition
        assertEquals(0, p.calculatePartition('0', 1));
        assertEquals(0, p.calculatePartition('9', 1));
        assertEquals(0, p.calculatePartition('-', 1));
        assertEquals(0, p.calculatePartition('a', 1));
        assertEquals(0, p.calculatePartition('z', 1));
        assertEquals(0, p.calculatePartition('}', 1));
        //two partitions
        assertEquals(0, p.calculatePartition('(', 2));
        assertEquals(0, p.calculatePartition('.', 2));
        assertEquals(0, p.calculatePartition(')', 2));
        assertEquals(0, p.calculatePartition('\\', 2));
        assertEquals(0, p.calculatePartition('/', 2));

        assertEquals(0, p.calculatePartition('0', 2));
        assertEquals(0, p.calculatePartition('9', 2));
        assertEquals(0, p.calculatePartition('-', 2));
        assertEquals(0, p.calculatePartition('a', 2));
        assertEquals(0, p.calculatePartition('l', 2));
        assertEquals(0, p.calculatePartition('m', 2));
        assertEquals(1, p.calculatePartition('n', 2));
        assertEquals(1, p.calculatePartition('o', 2));
        assertEquals(1, p.calculatePartition('z', 2));
        assertEquals(1, p.calculatePartition('}', 2));
        //(all upper case goto partition 0)
        assertEquals(0, p.calculatePartition('M', 2));
        assertEquals(0, p.calculatePartition('N', 2));
        assertEquals(0, p.calculatePartition('O', 2));

        //three partitions
        assertEquals(0, p.calculatePartition('0', 3));
        assertEquals(0, p.calculatePartition('9', 3));
        assertEquals(0, p.calculatePartition('-', 3));
        assertEquals(0, p.calculatePartition('a', 3));
        assertEquals(0, p.calculatePartition('h', 3));
        assertEquals(0, p.calculatePartition('i', 3));
        assertEquals(1, p.calculatePartition('j', 3));
        assertEquals(1, p.calculatePartition('r', 3));
        assertEquals(2, p.calculatePartition('s', 3));
        assertEquals(2, p.calculatePartition('t', 3));
        assertEquals(2, p.calculatePartition('u', 3));
        assertEquals(2, p.calculatePartition('z', 3));
        assertEquals(2, p.calculatePartition('}', 3));

        //26 partitions
        assertEquals(0, p.calculatePartition('0', 26));
        assertEquals(0, p.calculatePartition('9', 26));
        assertEquals(0, p.calculatePartition('-', 26));
        assertEquals(0, p.calculatePartition('a', 26));
        assertEquals(1, p.calculatePartition('b', 26));
        assertEquals(2, p.calculatePartition('c', 26));
        assertEquals(3, p.calculatePartition('d', 26));
        assertEquals(4, p.calculatePartition('e', 26));
        assertEquals(5, p.calculatePartition('f', 26));
        assertEquals(6, p.calculatePartition('g', 26));
        assertEquals(7, p.calculatePartition('h', 26));
        assertEquals(8, p.calculatePartition('i', 26));
        assertEquals(9, p.calculatePartition('j', 26));
        assertEquals(10, p.calculatePartition('k', 26));
        assertEquals(11, p.calculatePartition('l', 26));
        assertEquals(12, p.calculatePartition('m', 26));
        assertEquals(13, p.calculatePartition('n', 26));
        assertEquals(14, p.calculatePartition('o', 26));
        assertEquals(15, p.calculatePartition('p', 26));
        assertEquals(16, p.calculatePartition('q', 26));
        assertEquals(17, p.calculatePartition('r', 26));
        assertEquals(18, p.calculatePartition('s', 26));
        assertEquals(19, p.calculatePartition('t', 26));
        assertEquals(20, p.calculatePartition('u', 26));
        assertEquals(21, p.calculatePartition('v', 26));
        assertEquals(22, p.calculatePartition('w', 26));
        assertEquals(23, p.calculatePartition('x', 26));
        assertEquals(24, p.calculatePartition('y', 26));
        assertEquals(25, p.calculatePartition('z', 26));
    }

    /* Test cases for SETPartitioner */

    /** single map, single reducer */
    public void testSMSRCalculatePartition() throws Exception {
        final JobConf j = new JobConf();
        j.setNumMapTasks(1);
        final SETPartitioner p = new SETPartitioner();
        p.configure(j);
        assertEquals(0, p.calculatePartition(0, 1));
    }

    /** multiple map, single reducer */
    public void testMMSRCalculatePartition() throws Exception {
        final JobConf j = new JobConf();
        final int maptasks = 20;
        j.setNumMapTasks(maptasks);
        final SETPartitioner p = new SETPartitioner();
        p.configure(j);
        assertEquals(0, p.calculatePartition(0, 1));
        assertEquals(0, p.calculatePartition(19, 1));
        assertEquals(0, p.calculatePartition(10, 1));

    }

    /** multiple map, multiple reducer */
    public void testMMMRCalculatePartition() throws Exception {
        final JobConf j = new JobConf();
        final int maptasks = 20;
        j.setNumMapTasks(maptasks);
        final SETPartitioner p = new SETPartitioner();
        p.configure(j);

        assertEquals(0, p.calculatePartition(0, 2));
        assertEquals(0, p.calculatePartition(1, 2));
        assertEquals(0, p.calculatePartition(9, 2));
        assertEquals(1, p.calculatePartition(10, 2));
        assertEquals(1, p.calculatePartition(19, 2));
    }
}