Skip to content

Commit a4e9168

Browse files
vamsi-parasaSandhya Viswanathan
authored and
Sandhya Viswanathan
committed
8309130: x86_64 AVX512 intrinsics for Arrays.sort methods (int, long, float and double arrays)
Reviewed-by: jbhateja, sviswanathan, psandoz, kvn
1 parent 6c6beba commit a4e9168

File tree

22 files changed

+3118
-508
lines changed

22 files changed

+3118
-508
lines changed

make/modules/java.base/Lib.gmk

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,30 @@ ifeq ($(ENABLE_FALLBACK_LINKER), true)
227227
NAME := fallbackLinker, \
228228
CFLAGS := $(CFLAGS_JDKLIB) $(LIBFFI_CFLAGS), \
229229
LDFLAGS := $(LDFLAGS_JDKLIB) \
230-
$(call SET_SHARED_LIBRARY_ORIGIN), \
230+
$(call SET_SHARED_LIBRARY_ORIGIN), \
231231
LIBS := $(LIBFFI_LIBS), \
232232
LIBS_windows := $(LIBFFI_LIBS) ws2_32.lib, \
233233
))
234234

235235
TARGETS += $(BUILD_LIBFALLBACKLINKER)
236236
endif
237+
238+
################################################################################
239+
240+
ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
241+
$(eval $(call SetupJdkLibrary, BUILD_LIB_SIMD_SORT, \
242+
NAME := simdsort, \
243+
TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
244+
OPTIMIZATION := HIGH, \
245+
CFLAGS := $(CFLAGS_JDKLIB), \
246+
CXXFLAGS := $(CXXFLAGS_JDKLIB), \
247+
LDFLAGS := $(LDFLAGS_JDKLIB) \
248+
$(call SET_SHARED_LIBRARY_ORIGIN), \
249+
LIBS := $(LIBCXX), \
250+
LIBS_linux := -lc -lm -ldl, \
251+
))
252+
253+
TARGETS += $(BUILD_LIB_SIMD_SORT)
254+
endif
255+
256+
################################################################################

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4172,6 +4172,26 @@ void StubGenerator::generate_compiler_stubs() {
41724172
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
41734173
}
41744174

4175+
// Load x86_64_sort library on supported hardware to enable avx512 sort and partition intrinsics
4176+
if (UseAVX > 2 && VM_Version::supports_avx512dq()) {
4177+
void *libsimdsort = nullptr;
4178+
char ebuf_[1024];
4179+
char dll_name_simd_sort[JVM_MAXPATHLEN];
4180+
if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
4181+
libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
4182+
}
4183+
// Get addresses for avx512 sort and partition routines
4184+
if (libsimdsort != nullptr) {
4185+
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
4186+
4187+
snprintf(ebuf_, sizeof(ebuf_), "avx512_sort");
4188+
StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);
4189+
4190+
snprintf(ebuf_, sizeof(ebuf_), "avx512_partition");
4191+
StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
4192+
}
4193+
}
4194+
41754195
// Get svml stub routine addresses
41764196
void *libjsvml = nullptr;
41774197
char ebuf[1024];

src/hotspot/share/classfile/vmIntrinsics.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,14 @@ class methodHandle;
341341
do_name( copyOf_name, "copyOf") \
342342
do_signature(copyOf_signature, "([Ljava/lang/Object;ILjava/lang/Class;)[Ljava/lang/Object;") \
343343
\
344+
do_intrinsic(_arraySort, java_util_DualPivotQuicksort, arraySort_name, arraySort_signature, F_S) \
345+
do_name( arraySort_name, "sort") \
346+
do_signature(arraySort_signature, "(Ljava/lang/Class;Ljava/lang/Object;JIILjava/util/DualPivotQuicksort$SortOperation;)V") \
347+
\
348+
do_intrinsic(_arrayPartition, java_util_DualPivotQuicksort, arrayPartition_name, arrayPartition_signature, F_S) \
349+
do_name( arrayPartition_name, "partition") \
350+
do_signature(arrayPartition_signature, "(Ljava/lang/Class;Ljava/lang/Object;JIIIILjava/util/DualPivotQuicksort$PartitionOperation;)[I") \
351+
\
344352
do_intrinsic(_copyOfRange, java_util_Arrays, copyOfRange_name, copyOfRange_signature, F_S) \
345353
do_name( copyOfRange_name, "copyOfRange") \
346354
do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \

src/hotspot/share/classfile/vmSymbols.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class SerializeClosure;
145145
template(java_util_Vector, "java/util/Vector") \
146146
template(java_util_AbstractList, "java/util/AbstractList") \
147147
template(java_util_Hashtable, "java/util/Hashtable") \
148+
template(java_util_DualPivotQuicksort, "java/util/DualPivotQuicksort") \
148149
template(java_lang_Compiler, "java/lang/Compiler") \
149150
template(jdk_internal_misc_Signal, "jdk/internal/misc/Signal") \
150151
template(jdk_internal_util_Preconditions, "jdk/internal/util/Preconditions") \

src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,12 @@ void ShenandoahBarrierC2Support::verify(RootNode* root) {
387387
verify_type t;
388388
} args[6];
389389
} calls[] = {
390+
"array_partition_stub",
391+
{ { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+4, ShenandoahStore }, { -1, ShenandoahNone },
392+
{ -1, ShenandoahNone }, { -1, ShenandoahNone }, { -1, ShenandoahNone } },
393+
"arraysort_stub",
394+
{ { TypeFunc::Parms, ShenandoahStore }, { -1, ShenandoahNone }, { -1, ShenandoahNone },
395+
{ -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },
390396
"aescrypt_encryptBlock",
391397
{ { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },
392398
{ -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} },

src/hotspot/share/jvmci/vmStructs_jvmci.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@
331331
static_field(StubRoutines, _checkcast_arraycopy_uninit, address) \
332332
static_field(StubRoutines, _unsafe_arraycopy, address) \
333333
static_field(StubRoutines, _generic_arraycopy, address) \
334+
static_field(StubRoutines, _array_sort, address) \
335+
static_field(StubRoutines, _array_partition, address) \
334336
\
335337
static_field(StubRoutines, _aescrypt_encryptBlock, address) \
336338
static_field(StubRoutines, _aescrypt_decryptBlock, address) \

src/hotspot/share/opto/c2compiler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
614614
case vmIntrinsics::_min_strict:
615615
case vmIntrinsics::_max_strict:
616616
case vmIntrinsics::_arraycopy:
617+
case vmIntrinsics::_arraySort:
618+
case vmIntrinsics::_arrayPartition:
617619
case vmIntrinsics::_indexOfL:
618620
case vmIntrinsics::_indexOfU:
619621
case vmIntrinsics::_indexOfUL:

src/hotspot/share/opto/escape.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
15751575
strcmp(call->as_CallLeaf()->_name, "bigIntegerRightShiftWorker") == 0 ||
15761576
strcmp(call->as_CallLeaf()->_name, "bigIntegerLeftShiftWorker") == 0 ||
15771577
strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0 ||
1578+
strcmp(call->as_CallLeaf()->_name, "arraysort_stub") == 0 ||
1579+
strcmp(call->as_CallLeaf()->_name, "array_partition_stub") == 0 ||
15781580
strcmp(call->as_CallLeaf()->_name, "get_class_id_intrinsic") == 0)
15791581
))) {
15801582
call->dump();

src/hotspot/share/opto/library_call.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
293293

294294
case vmIntrinsics::_arraycopy: return inline_arraycopy();
295295

296+
case vmIntrinsics::_arraySort: return inline_array_sort();
297+
case vmIntrinsics::_arrayPartition: return inline_array_partition();
298+
296299
case vmIntrinsics::_compareToL: return inline_string_compareTo(StrIntrinsicNode::LL);
297300
case vmIntrinsics::_compareToU: return inline_string_compareTo(StrIntrinsicNode::UU);
298301
case vmIntrinsics::_compareToLU: return inline_string_compareTo(StrIntrinsicNode::LU);
@@ -5361,6 +5364,101 @@ void LibraryCallKit::create_new_uncommon_trap(CallStaticJavaNode* uncommon_trap_
53615364
uncommon_trap_call->set_req(0, top()); // not used anymore, kill it
53625365
}
53635366

5367+
//------------------------------inline_array_partition-----------------------
5368+
bool LibraryCallKit::inline_array_partition() {
5369+
5370+
const char *stubName = "array_partition_stub";
5371+
5372+
Node* elementType = null_check(argument(0));
5373+
Node* obj = argument(1);
5374+
Node* offset = argument(2);
5375+
Node* fromIndex = argument(4);
5376+
Node* toIndex = argument(5);
5377+
Node* indexPivot1 = argument(6);
5378+
Node* indexPivot2 = argument(7);
5379+
5380+
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
5381+
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
5382+
BasicType bt = elem_type->basic_type();
5383+
address stubAddr = nullptr;
5384+
stubAddr = StubRoutines::select_array_partition_function();
5385+
// stub not loaded
5386+
if (stubAddr == nullptr) {
5387+
return false;
5388+
}
5389+
// get the address of the array
5390+
const TypeAryPtr* obj_t = _gvn.type(obj)->isa_aryptr();
5391+
if (obj_t == nullptr || obj_t->elem() == Type::BOTTOM ) {
5392+
return false; // failed input validation
5393+
}
5394+
Node* obj_adr = make_unsafe_address(obj, offset);
5395+
5396+
// create the pivotIndices array of type int and size = 2
5397+
Node* size = intcon(2);
5398+
Node* klass_node = makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_INT)));
5399+
Node* pivotIndices = new_array(klass_node, size, 0); // no arguments to push
5400+
AllocateArrayNode* alloc = tightly_coupled_allocation(pivotIndices);
5401+
guarantee(alloc != nullptr, "created above");
5402+
Node* pivotIndices_adr = basic_plus_adr(pivotIndices, arrayOopDesc::base_offset_in_bytes(T_INT));
5403+
5404+
// pass the basic type enum to the stub
5405+
Node* elemType = intcon(bt);
5406+
5407+
// Call the stub
5408+
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::array_partition_Type(),
5409+
stubAddr, stubName, TypePtr::BOTTOM,
5410+
obj_adr, elemType, fromIndex, toIndex, pivotIndices_adr,
5411+
indexPivot1, indexPivot2);
5412+
5413+
if (!stopped()) {
5414+
set_result(pivotIndices);
5415+
}
5416+
5417+
return true;
5418+
}
5419+
5420+
5421+
//------------------------------inline_array_sort-----------------------
5422+
bool LibraryCallKit::inline_array_sort() {
5423+
5424+
const char *stubName;
5425+
stubName = "arraysort_stub";
5426+
5427+
Node* elementType = null_check(argument(0));
5428+
Node* obj = argument(1);
5429+
Node* offset = argument(2);
5430+
Node* fromIndex = argument(4);
5431+
Node* toIndex = argument(5);
5432+
5433+
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
5434+
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
5435+
BasicType bt = elem_type->basic_type();
5436+
address stubAddr = nullptr;
5437+
stubAddr = StubRoutines::select_arraysort_function();
5438+
//stub not loaded
5439+
if (stubAddr == nullptr) {
5440+
return false;
5441+
}
5442+
5443+
// get address of the array
5444+
const TypeAryPtr* obj_t = _gvn.type(obj)->isa_aryptr();
5445+
if (obj_t == nullptr || obj_t->elem() == Type::BOTTOM ) {
5446+
return false; // failed input validation
5447+
}
5448+
Node* obj_adr = make_unsafe_address(obj, offset);
5449+
5450+
// pass the basic type enum to the stub
5451+
Node* elemType = intcon(bt);
5452+
5453+
// Call the stub.
5454+
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::array_sort_Type(),
5455+
stubAddr, stubName, TypePtr::BOTTOM,
5456+
obj_adr, elemType, fromIndex, toIndex);
5457+
5458+
return true;
5459+
}
5460+
5461+
53645462
//------------------------------inline_arraycopy-----------------------
53655463
// public static native void java.lang.System.arraycopy(Object src, int srcPos,
53665464
// Object dest, int destPos,

src/hotspot/share/opto/library_call.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ class LibraryCallKit : public GraphKit {
277277
JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
278278
void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms_before_guards, int saved_reexecute_sp,
279279
uint new_idx);
280-
280+
bool inline_array_sort();
281+
bool inline_array_partition();
281282
typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
282283
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
283284
bool inline_unsafe_fence(vmIntrinsics::ID id);

src/hotspot/share/opto/runtime.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,49 @@ const TypeFunc* OptoRuntime::array_fill_Type() {
857857
return TypeFunc::make(domain, range);
858858
}
859859

860+
const TypeFunc* OptoRuntime::array_partition_Type() {
861+
// create input type (domain)
862+
int num_args = 7;
863+
int argcnt = num_args;
864+
const Type** fields = TypeTuple::fields(argcnt);
865+
int argp = TypeFunc::Parms;
866+
fields[argp++] = TypePtr::NOTNULL; // array
867+
fields[argp++] = TypeInt::INT; // element type
868+
fields[argp++] = TypeInt::INT; // low
869+
fields[argp++] = TypeInt::INT; // end
870+
fields[argp++] = TypePtr::NOTNULL; // pivot_indices (int array)
871+
fields[argp++] = TypeInt::INT; // indexPivot1
872+
fields[argp++] = TypeInt::INT; // indexPivot2
873+
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
874+
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
875+
876+
// no result type needed
877+
fields = TypeTuple::fields(1);
878+
fields[TypeFunc::Parms+0] = nullptr; // void
879+
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
880+
return TypeFunc::make(domain, range);
881+
}
882+
883+
const TypeFunc* OptoRuntime::array_sort_Type() {
884+
// create input type (domain)
885+
int num_args = 4;
886+
int argcnt = num_args;
887+
const Type** fields = TypeTuple::fields(argcnt);
888+
int argp = TypeFunc::Parms;
889+
fields[argp++] = TypePtr::NOTNULL; // array
890+
fields[argp++] = TypeInt::INT; // element type
891+
fields[argp++] = TypeInt::INT; // fromIndex
892+
fields[argp++] = TypeInt::INT; // toIndex
893+
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
894+
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
895+
896+
// no result type needed
897+
fields = TypeTuple::fields(1);
898+
fields[TypeFunc::Parms+0] = nullptr; // void
899+
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
900+
return TypeFunc::make(domain, range);
901+
}
902+
860903
// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
861904
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
862905
// create input type (domain)

src/hotspot/share/opto/runtime.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ class OptoRuntime : public AllStatic {
268268

269269
static const TypeFunc* array_fill_Type();
270270

271+
static const TypeFunc* array_sort_Type();
272+
static const TypeFunc* array_partition_Type();
271273
static const TypeFunc* aescrypt_block_Type();
272274
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
273275
static const TypeFunc* electronicCodeBook_aescrypt_Type();

src/hotspot/share/runtime/stubRoutines.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ address StubRoutines::_hf2f = nullptr;
176176
address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
177177
address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
178178

179+
address StubRoutines::_array_sort = nullptr;
180+
address StubRoutines::_array_partition = nullptr;
181+
179182
address StubRoutines::_cont_thaw = nullptr;
180183
address StubRoutines::_cont_returnBarrier = nullptr;
181184
address StubRoutines::_cont_returnBarrierExc = nullptr;

src/hotspot/share/runtime/stubRoutines.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ class StubRoutines: AllStatic {
153153
static BufferBlob* _compiler_stubs_code; // code buffer for C2 intrinsics
154154
static BufferBlob* _final_stubs_code; // code buffer for all other routines
155155

156+
static address _array_sort;
157+
static address _array_partition;
156158
// Leaf routines which implement arraycopy and their addresses
157159
// arraycopy operands aligned on element type boundary
158160
static address _jbyte_arraycopy;
@@ -375,6 +377,8 @@ class StubRoutines: AllStatic {
375377
static UnsafeArrayCopyStub UnsafeArrayCopy_stub() { return CAST_TO_FN_PTR(UnsafeArrayCopyStub, _unsafe_arraycopy); }
376378

377379
static address generic_arraycopy() { return _generic_arraycopy; }
380+
static address select_arraysort_function() { return _array_sort; }
381+
static address select_array_partition_function() { return _array_partition; }
378382

379383
static address jbyte_fill() { return _jbyte_fill; }
380384
static address jshort_fill() { return _jshort_fill; }

0 commit comments

Comments
 (0)