Java tutorial
/** * Copyright [2012] [Datasalt Systems S.L.] * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datasalt.pangool.tuplemr.mapred; import com.datasalt.pangool.PangoolRuntimeException; import com.datasalt.pangool.io.BitField; import com.datasalt.pangool.io.ITuple; import com.datasalt.pangool.io.Schema; import com.datasalt.pangool.io.Schema.Field; import com.datasalt.pangool.io.Schema.Field.Type; import com.datasalt.pangool.io.Utf8; import com.datasalt.pangool.tuplemr.Criteria; import com.datasalt.pangool.tuplemr.Criteria.Order; import com.datasalt.pangool.tuplemr.Criteria.SortElement; import com.datasalt.pangool.tuplemr.SerializationInfo; import com.datasalt.pangool.tuplemr.TupleMRConfig; import com.datasalt.pangool.tuplemr.TupleMRConfigBuilder; import com.datasalt.pangool.tuplemr.serialization.TupleSerialization; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.serializer.Serializer; import java.io.IOException; import java.nio.ByteBuffer; import static org.apache.hadoop.io.WritableComparator.*; /** * Tuple-based MapRed job binary comparator. It decodes the binary serialization * performed by {@link TupleSerialization}. * <p/> * Used to group tuples according to * {@link TupleMRConfigBuilder#setOrderBy(com.datasalt.pangool.tuplemr.OrderBy)} */ @SuppressWarnings("rawtypes") public class SortComparator implements RawComparator<ITuple>, Configurable { protected Configuration conf; protected TupleMRConfig tupleMRConf; protected SerializationInfo serInfo; protected final SerializerComparator serializerComparator = new SerializerComparator(); private static final class Offsets { protected int offset1 = 0; protected int offset2 = 0; } private static final class Nulls { protected BitField nulls1 = new BitField(); protected BitField nulls2 = new BitField(); } protected Offsets offsets = new Offsets(); protected Nulls nulls = new Nulls(); protected boolean isMultipleSources; public TupleMRConfig getConfig() { return tupleMRConf; } public SortComparator() { } /** * Never called in MapRed jobs. Just for completion and test purposes */ @Override public int compare(ITuple w1, ITuple w2) { if (isMultipleSources) { int schemaId1 = tupleMRConf.getSchemaIdByName(w1.getSchema().getName()); int schemaId2 = tupleMRConf.getSchemaIdByName(w2.getSchema().getName()); int[] indexes1 = serInfo.getCommonSchemaIndexTranslation(schemaId1); int[] indexes2 = serInfo.getCommonSchemaIndexTranslation(schemaId2); Criteria c = tupleMRConf.getCommonCriteria(); int comparison = compare(serInfo.getCommonSchema(), c, w1, indexes1, w2, indexes2, serInfo.getCommonSchemaSerializers()); if (comparison != 0) { return comparison; } else if (schemaId1 != schemaId2) { int r = schemaId1 - schemaId2; return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r; } int schemaId = schemaId1; c = tupleMRConf.getSpecificOrderBys().get(schemaId); if (c != null) { int[] indexes = serInfo.getSpecificSchemaIndexTranslation(schemaId); return compare(serInfo.getSpecificSchema(schemaId), c, w1, indexes, w2, indexes, serInfo.getSpecificSchemaSerializers().get(schemaId)); } else { return 0; } } else { int[] indexes = serInfo.getCommonSchemaIndexTranslation(0); Criteria c = tupleMRConf.getCommonCriteria(); return compare(serInfo.getCommonSchema(), c, w1, indexes, w2, indexes, serInfo.getCommonSchemaSerializers()); } } public int compare(Schema schema, Criteria c, ITuple w1, int[] index1, ITuple w2, int[] index2, Serializer[] serializers) { for (int i = 0; i < c.getElements().size(); i++) { Field field = schema.getField(i); SortElement e = c.getElements().get(i); Object o1 = w1.get(index1[i]); Object o2 = w2.get(index2[i]); // Handling with null values if (o1 == null || o2 == null) { int cmp = nullCompare(o1, o2, e); if (cmp != 0) { return cmp; } else { continue; } } // At this point we know that both values are not null. Serializer serializer = (serializers == null) ? null : serializers[i]; int comparison = compareObjects(o1, o2, e.getCustomComparator(), field.getType(), serializer); if (comparison != 0) { return (e.getOrder() == Order.ASC ? comparison : -comparison); } } return 0; } /** * Compares two objects. Uses the given custom comparator if present. If the * type is {@link Type#OBJECT} and no raw comparator is present, then a serializer * comparator is used. */ @SuppressWarnings({ "unchecked" }) public int compareObjects(Object elem1, Object elem2, RawComparator comparator, Type type, Serializer serializer) { // If custom, just use custom. if (comparator != null) { return comparator.compare(elem1, elem2); } if (type == Type.OBJECT) { return serializerComparator.compare(elem1, serializer, elem2, serializer); } else { return compareObjects(elem1, elem2); } } @SuppressWarnings("unchecked") public static int compareObjects(Object element1, Object element2) { if (element1 == null) { return (element2 == null) ? 0 : -1; } else if (element2 == null) { return 1; } else { if (element1 instanceof String) { element1 = new Utf8((String) element1); } if (element2 instanceof String) { element2 = new Utf8((String) element2); } if (element1 instanceof byte[]) { byte[] buffer1 = (byte[]) element1; if (element2 instanceof byte[]) { byte[] buffer2 = (byte[]) element2; return compareBytes(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length); } else if (element2 instanceof ByteBuffer) { ByteBuffer buffer2 = (ByteBuffer) element2; int start2 = buffer2.arrayOffset() + buffer2.position(); int len2 = buffer2.limit() - buffer2.position(); return compareBytes(buffer1, 0, buffer1.length, buffer2.array(), start2, len2); } else { throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass()); } } else if (element1 instanceof ByteBuffer) { ByteBuffer buffer1 = (ByteBuffer) element1; int pos1 = buffer1.position(); int start1 = buffer1.arrayOffset() + pos1; int len1 = buffer1.limit() - pos1; if (element2 instanceof byte[]) { byte[] buffer2 = (byte[]) element2; return compareBytes(buffer1.array(), start1, len1, buffer2, 0, buffer2.length); } else if (element2 instanceof ByteBuffer) { ByteBuffer buffer2 = (ByteBuffer) element2; int pos2 = buffer2.position(); int start2 = buffer2.arrayOffset() + pos2; int len2 = buffer2.limit() - pos2; return compareBytes(buffer1.array(), start1, len1, buffer2.array(), start2, len2); } else { throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass()); } } else if (element1 instanceof Comparable) { return ((Comparable) element1).compareTo(element2); } else if (element2 instanceof Comparable) { return -((Comparable) element2).compareTo(element1); } else { throw new PangoolRuntimeException( "Not comparable elements:" + element1.getClass() + " with object " + element2.getClass()); } } } public int nullCompare(Object o1, Object o2, SortElement se) { int res = -2; if (o1 == null) { res = (o2 == null) ? 0 : -1; } else if (o2 == null) { res = 1; } if (res == -2) { throw new IllegalArgumentException( "None of the two object passed as parameters are null. " + "That is not allowed"); } return (se.getNullOrder() == Criteria.NullOrder.NULL_SMALLEST && se.getOrder() == Order.ASC) ? res : -res; } @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { return (isMultipleSources) ? compareMultipleSources(b1, s1, l1, b2, s2, l2) : compareOneSource(b1, s1, l1, b2, s2, l2); } catch (IOException e) { throw new RuntimeException(e); } } protected int compareMultipleSources(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) throws IOException { Schema commonSchema = serInfo.getCommonSchema(); Criteria commonOrder = tupleMRConf.getCommonCriteria(); int comparison = compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets, nulls); if (comparison != 0) { return comparison; } int schemaId1 = readVInt(b1, offsets.offset1); int schemaId2 = readVInt(b2, offsets.offset2); if (schemaId1 != schemaId2) { int r = schemaId1 - schemaId2; return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r; } int vintSize = WritableUtils.decodeVIntSize(b1[offsets.offset1]); offsets.offset1 += vintSize; offsets.offset2 += vintSize; // sources are the same Criteria criteria = tupleMRConf.getSpecificOrderBys().get(schemaId1); if (criteria == null) { return 0; } Schema specificSchema = serInfo.getSpecificSchema(schemaId1); return compare(b1, offsets.offset1, b2, offsets.offset2, specificSchema, criteria, offsets, nulls); } private int compareOneSource(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) throws IOException { Schema commonSchema = serInfo.getCommonSchema(); Criteria commonOrder = tupleMRConf.getCommonCriteria(); return compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets, nulls); } protected int compare(byte[] b1, int s1, byte[] b2, int s2, Schema schema, Criteria criteria, Offsets o, Nulls n) throws IOException { o.offset1 = s1; o.offset2 = s2; // Reading nulls bit field, if present if (schema.containsNullableFields()) { o.offset1 += n.nulls1.deser(b1, s1); o.offset2 += n.nulls2.deser(b2, s2); } for (int depth = 0; depth < criteria.getElements().size(); depth++) { Field field = schema.getField(depth); Field.Type type = field.getType(); SortElement sortElement = criteria.getElements().get(depth); Order sort = sortElement.getOrder(); RawComparator comparator = sortElement.getCustomComparator(); // Control for nulls, if field is nullable. if (field.isNullable()) { Criteria.NullOrder nullOrder = sortElement.getNullOrder(); if (n.nulls1.isSet(schema.getNullablePositionFromIndex(depth))) { if (n.nulls2.isSet(schema.getNullablePositionFromIndex(depth))) { // Both are null, so both are equal. No space is used. Continue. continue; } else { // First is null return (nullOrder == Criteria.NullOrder.NULL_SMALLEST && sort == Order.ASC) ? -1 : 1; } } else if (n.nulls2.isSet(schema.getNullablePositionFromIndex(depth))) { // Second is null return (nullOrder == Criteria.NullOrder.NULL_SMALLEST && sort == Order.ASC) ? 1 : -1; } } if (comparator != null) { //custom comparator for OBJECT int length1 = WritableComparator.readVInt(b1, o.offset1); int length2 = WritableComparator.readVInt(b2, o.offset2); o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]); o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]); int comparison = comparator.compare(b1, o.offset1, length1, b2, o.offset2, length2); o.offset1 += length1; o.offset2 += length2; if (comparison != 0) { return (sort == Order.ASC) ? comparison : -comparison; } } else { //not custom comparator switch (type) { case INT: case ENUM: { int value1 = readVInt(b1, o.offset1); int value2 = readVInt(b2, o.offset2); if (value1 > value2) { return (sort == Order.ASC) ? 1 : -1; } else if (value1 < value2) { return (sort == Order.ASC) ? -1 : 1; } int vintSize = WritableUtils.decodeVIntSize(b1[o.offset1]); o.offset1 += vintSize; o.offset2 += vintSize; } break; case LONG: { long value1 = readVLong(b1, o.offset1); long value2 = readVLong(b2, o.offset2); if (value1 > value2) { return (sort == Order.ASC) ? 1 : -1; } else if (value1 < value2) { return (sort == Order.ASC) ? -1 : 1; } int vIntSize = WritableUtils.decodeVIntSize(b1[o.offset1]); o.offset1 += vIntSize; o.offset2 += vIntSize; } break; case FLOAT: { float value1 = readFloat(b1, o.offset1); float value2 = readFloat(b2, o.offset2); int comp = Float.compare(value1, value2); if (comp != 0) { return (sort == Order.ASC) ? comp : -comp; } o.offset1 += Float.SIZE / 8; o.offset2 += Float.SIZE / 8; } break; case DOUBLE: { double value1 = readDouble(b1, o.offset1); double value2 = readDouble(b2, o.offset2); int comp = Double.compare(value1, value2); if (comp != 0) { return (sort == Order.ASC) ? comp : -comp; } o.offset1 += Double.SIZE / 8; o.offset2 += Double.SIZE / 8; } break; case BOOLEAN: { byte value1 = b1[o.offset1++]; byte value2 = b2[o.offset2++]; if (value1 > value2) { return (sort == Order.ASC) ? 1 : -1; } else if (value1 < value2) { return (sort == Order.ASC) ? -1 : 1; } } break; case STRING: case OBJECT: case BYTES: { int length1 = readVInt(b1, o.offset1); int length2 = readVInt(b2, o.offset2); o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]); o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]); int comparison = compareBytes(b1, o.offset1, length1, b2, o.offset2, length2); o.offset1 += length1; o.offset2 += length2; if (comparison != 0) { return (sort == Order.ASC) ? comparison : (-comparison); } } break; default: throw new IOException("Not supported comparison for type:" + type); } } } return 0; // equals } @Override public Configuration getConf() { return conf; } @Override public void setConf(Configuration conf) { try { if (conf != null) { this.conf = conf; setTupleMRConf(TupleMRConfig.get(conf)); TupleMRConfigBuilder.initializeComparators(conf, this.tupleMRConf); serializerComparator.setConf(conf); } } catch (Exception e) { throw new RuntimeException(e); } } private void setTupleMRConf(TupleMRConfig config) { if (this.tupleMRConf != null) { throw new RuntimeException("TupleMR config is already set"); } this.tupleMRConf = config; this.serInfo = tupleMRConf.getSerializationInfo(); this.isMultipleSources = tupleMRConf.getNumIntermediateSchemas() >= 2; } }