HADOOP-14313. Replace/improve Hadoop's byte[] comparator. Contributed by Vikas Vishwakarma.

This commit is contained in:
Akira Ajisaka 2018-06-28 14:58:40 +09:00
parent 2b2399d623
commit ddbff7c8d3
No known key found for this signature in database
GPG Key ID: C1EDBB9CA400FD50
2 changed files with 25 additions and 27 deletions

View File

@ -196,6 +196,14 @@ by Google Inc, which can be obtained at:
* HOMEPAGE: * HOMEPAGE:
* http://code.google.com/p/snappy/ * http://code.google.com/p/snappy/
This product contains a modified portion of UnsignedBytes LexicographicalComparator
from Guava v21 project by Google Inc, which can be obtained at:
* LICENSE:
* license/COPYING (Apache License 2.0)
* HOMEPAGE:
* https://github.com/google/guava
This product optionally depends on 'JBoss Marshalling', an alternative Java This product optionally depends on 'JBoss Marshalling', an alternative Java
serialization API, which can be obtained at: serialization API, which can be obtained at:

View File

@ -26,7 +26,6 @@
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import sun.misc.Unsafe; import sun.misc.Unsafe;
import com.google.common.primitives.Longs;
import com.google.common.primitives.UnsignedBytes; import com.google.common.primitives.UnsignedBytes;
/** /**
@ -195,52 +194,43 @@ public int compareTo(byte[] buffer1, int offset1, int length1,
length1 == length2) { length1 == length2) {
return 0; return 0;
} }
final int stride = 8;
int minLength = Math.min(length1, length2); int minLength = Math.min(length1, length2);
int minWords = minLength / Longs.BYTES; int strideLimit = minLength & ~(stride - 1);
int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
int i;
/* /*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
* time is no slower than comparing 4 bytes at a time even on 32-bit. * time is no slower than comparing 4 bytes at a time even on 32-bit.
* On the other hand, it is substantially faster on 64-bit. * On the other hand, it is substantially faster on 64-bit.
*/ */
for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) { for (i = 0; i < strideLimit; i += stride) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
long diff = lw ^ rw;
if (diff != 0) { if (lw != rw) {
if (!littleEndian) { if (!littleEndian) {
return lessThanUnsigned(lw, rw) ? -1 : 1; return lessThanUnsigned(lw, rw) ? -1 : 1;
} }
// Use binary search /*
int n = 0; * We want to compare only the first index where left[index] !=
int y; * right[index]. This corresponds to the least significant nonzero
int x = (int) diff; * byte in lw ^ rw, since lw and rw are little-endian.
if (x == 0) { * Long.numberOfTrailingZeros(diff) tells us the least significant
x = (int) (diff >>> 32); * nonzero bit, and zeroing out the first three bits of L.nTZ gives
n = 32; * us the shift to get that least significant nonzero byte. This
} * comparison logic is based on UnsignedBytes from Guava v21
*/
y = x << 16; int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
if (y == 0) { return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
n += 16;
} else {
x = y;
}
y = x << 8;
if (y == 0) {
n += 8;
}
return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
} }
} }
// The epilogue to cover the last (minLength % 8) elements. // The epilogue to cover the last (minLength % 8) elements.
for (int i = minWords * Longs.BYTES; i < minLength; i++) { for (; i < minLength; i++) {
int result = UnsignedBytes.compare( int result = UnsignedBytes.compare(
buffer1[offset1 + i], buffer1[offset1 + i],
buffer2[offset2 + i]); buffer2[offset2 + i]);