HADOOP-14313. Replace/improve Hadoop's byte[] comparator. Contributed by Vikas Vishwakarma.
This commit is contained in:
parent
2b2399d623
commit
ddbff7c8d3
@ -196,6 +196,14 @@ by Google Inc, which can be obtained at:
|
|||||||
* HOMEPAGE:
|
* HOMEPAGE:
|
||||||
* http://code.google.com/p/snappy/
|
* http://code.google.com/p/snappy/
|
||||||
|
|
||||||
|
This product contains a modified portion of UnsignedBytes LexicographicalComparator
|
||||||
|
from Guava v21 project by Google Inc, which can be obtained at:
|
||||||
|
|
||||||
|
* LICENSE:
|
||||||
|
* license/COPYING (Apache License 2.0)
|
||||||
|
* HOMEPAGE:
|
||||||
|
* https://github.com/google/guava
|
||||||
|
|
||||||
This product optionally depends on 'JBoss Marshalling', an alternative Java
|
This product optionally depends on 'JBoss Marshalling', an alternative Java
|
||||||
serialization API, which can be obtained at:
|
serialization API, which can be obtained at:
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import sun.misc.Unsafe;
|
import sun.misc.Unsafe;
|
||||||
|
|
||||||
import com.google.common.primitives.Longs;
|
|
||||||
import com.google.common.primitives.UnsignedBytes;
|
import com.google.common.primitives.UnsignedBytes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -195,52 +194,43 @@ public int compareTo(byte[] buffer1, int offset1, int length1,
|
|||||||
length1 == length2) {
|
length1 == length2) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
final int stride = 8;
|
||||||
int minLength = Math.min(length1, length2);
|
int minLength = Math.min(length1, length2);
|
||||||
int minWords = minLength / Longs.BYTES;
|
int strideLimit = minLength & ~(stride - 1);
|
||||||
int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
|
int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
|
||||||
int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
|
int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
|
||||||
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
|
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
|
||||||
* time is no slower than comparing 4 bytes at a time even on 32-bit.
|
* time is no slower than comparing 4 bytes at a time even on 32-bit.
|
||||||
* On the other hand, it is substantially faster on 64-bit.
|
* On the other hand, it is substantially faster on 64-bit.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
|
for (i = 0; i < strideLimit; i += stride) {
|
||||||
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
|
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
|
||||||
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
|
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
|
||||||
long diff = lw ^ rw;
|
|
||||||
|
|
||||||
if (diff != 0) {
|
if (lw != rw) {
|
||||||
if (!littleEndian) {
|
if (!littleEndian) {
|
||||||
return lessThanUnsigned(lw, rw) ? -1 : 1;
|
return lessThanUnsigned(lw, rw) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use binary search
|
/*
|
||||||
int n = 0;
|
* We want to compare only the first index where left[index] !=
|
||||||
int y;
|
* right[index]. This corresponds to the least significant nonzero
|
||||||
int x = (int) diff;
|
* byte in lw ^ rw, since lw and rw are little-endian.
|
||||||
if (x == 0) {
|
* Long.numberOfTrailingZeros(diff) tells us the least significant
|
||||||
x = (int) (diff >>> 32);
|
* nonzero bit, and zeroing out the first three bits of L.nTZ gives
|
||||||
n = 32;
|
* us the shift to get that least significant nonzero byte. This
|
||||||
}
|
* comparison logic is based on UnsignedBytes from Guava v21
|
||||||
|
*/
|
||||||
y = x << 16;
|
int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
|
||||||
if (y == 0) {
|
return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
|
||||||
n += 16;
|
|
||||||
} else {
|
|
||||||
x = y;
|
|
||||||
}
|
|
||||||
|
|
||||||
y = x << 8;
|
|
||||||
if (y == 0) {
|
|
||||||
n += 8;
|
|
||||||
}
|
|
||||||
return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The epilogue to cover the last (minLength % 8) elements.
|
// The epilogue to cover the last (minLength % 8) elements.
|
||||||
for (int i = minWords * Longs.BYTES; i < minLength; i++) {
|
for (; i < minLength; i++) {
|
||||||
int result = UnsignedBytes.compare(
|
int result = UnsignedBytes.compare(
|
||||||
buffer1[offset1 + i],
|
buffer1[offset1 + i],
|
||||||
buffer2[offset2 + i]);
|
buffer2[offset2 + i]);
|
||||||
|
Loading…
Reference in New Issue
Block a user