Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:25
erlang
1103-jit-Optimize-relational-guard-tests.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1103-jit-Optimize-relational-guard-tests.patch of Package erlang
From f8a9ccda6c4a69ac6cb029f82e473bf0ab14f533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Sun, 1 May 2022 07:32:09 +0200 Subject: [PATCH 3/3] jit: Optimize relational guard tests Optimize the generation of native code for common combinations of relational operators in guards. --- erts/emulator/beam/jit/arm/beam_asm.hpp | 21 +- .../beam/jit/arm/beam_asm_global.hpp.pl | 2 + erts/emulator/beam/jit/arm/instr_common.cpp | 329 +++++++++++++++++ erts/emulator/beam/jit/arm/ops.tab | 30 ++ erts/emulator/beam/jit/x86/beam_asm.hpp | 18 + .../beam/jit/x86/beam_asm_global.hpp.pl | 2 + erts/emulator/beam/jit/x86/instr_common.cpp | 336 ++++++++++++++++++ erts/emulator/beam/jit/x86/ops.tab | 30 ++ erts/emulator/test/op_SUITE.erl | 117 +++++- 9 files changed, 874 insertions(+), 11 deletions(-) diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp index bf1fdc5c48..eb3baeefb2 100644 --- a/erts/emulator/beam/jit/arm/beam_asm.hpp +++ b/erts/emulator/beam/jit/arm/beam_asm.hpp @@ -739,6 +739,17 @@ protected: } } + void cmp(arm::Gp src, int64_t val) { + if (Support::isUInt12(val)) { + a.cmp(src, imm(val)); + } else if (Support::isUInt12(-val)) { + a.cmn(src, imm(-val)); + } else { + mov_imm(SUPER_TMP, val); + a.cmp(src, SUPER_TMP); + } + } + void ldur(arm::Gp reg, arm::Mem mem) { safe_9bit_imm(a64::Inst::kIdLdur, reg, mem); } @@ -1589,14 +1600,8 @@ protected: if (arg.isImmed() || arg.isWord()) { Sint val = arg.isImmed() ? arg.as<ArgImmed>().get() : arg.as<ArgWord>().get(); - - if (Support::isUInt12(val)) { - a.cmp(gp, imm(val)); - return; - } else if (Support::isUInt12(-val)) { - a.cmn(gp, imm(-val)); - return; - } + cmp(gp, val); + return; } auto tmp = load_source(arg, SUPER_TMP); diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl index f42d16e853..0099e6cc94 100644 --- a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl +++ b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl @@ -88,6 +88,8 @@ my @beam_global_funcs = qw( int_div_rem_body_shared int_div_rem_guard_shared internal_hash_helper + is_in_range_shared + is_ge_lt_shared minus_body_shared new_map_shared update_map_assoc_shared diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp index 2a1d08a1a1..70097efdc4 100644 --- a/erts/emulator/beam/jit/arm/instr_common.cpp +++ b/erts/emulator/beam/jit/arm/instr_common.cpp @@ -1467,6 +1467,335 @@ void BeamModuleAssembler::emit_is_ge(const ArgLabel &Fail, } } +/* + * ARG1 = Src + * ARG2 = Min + * ARG3 = Max + * + * Result is returned in the flags. + */ +void BeamGlobalAssembler::emit_is_in_range_shared() { + Label immediate = a.newLabel(), generic_compare = a.newLabel(), + float_done = a.newLabel(), done = a.newLabel(); + + /* Is the source a float? */ + emit_is_boxed(immediate, ARG1); + + arm::Gp boxed_ptr = emit_ptr_val(TMP1, ARG1); + a.ldur(TMP2, emit_boxed_val(boxed_ptr)); + + mov_imm(TMP3, HEADER_FLONUM); + a.cmp(TMP2, TMP3); + a.b_ne(generic_compare); + + a.ldur(a64::d0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.asr(TMP1, ARG2, imm(_TAG_IMMED1_SIZE)); + a.scvtf(a64::d1, TMP1); + + a.fcmpe(a64::d0, a64::d1); + a.b_mi(float_done); + + a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE)); + a.scvtf(a64::d1, TMP1); + a.fcmpe(a64::d0, a64::d1); + a.b_gt(float_done); + a.tst(ZERO, ZERO); + + a.bind(float_done); + a.ret(a64::x30); + + a.bind(immediate); + { + /* + * Src is an immediate (such as ATOM) but not SMALL. + * That means that Src must be greater than the upper + * limit. + */ + mov_imm(TMP1, 1); + a.cmp(TMP1, imm(0)); + a.ret(a64::x30); + } + + a.bind(generic_compare); + { + emit_enter_runtime_frame(); + emit_enter_runtime(); + + a.stp(ARG1, ARG3, TMP_MEM1q); + + comment("erts_cmp_compound(X, Y, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.tst(ARG1, ARG1); + a.b_mi(done); + + a.ldp(ARG1, ARG2, TMP_MEM1q); + + comment("erts_cmp_compound(X, Y, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.tst(ARG1, ARG1); + + a.bind(done); + emit_leave_runtime(); + emit_leave_runtime_frame(); + + a.ret(a64::x30); + } +} + +/* + * 1121 occurrences in OTP at the time of writing. + */ +void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small, + ArgLabel const &Large, + ArgRegister const &Src, + ArgConstant const &Min, + ArgConstant const &Max) { + Label next = a.newLabel(), generic = a.newLabel(); + bool need_generic = true; + auto src = load_source(Src, ARG1); + + if (always_small(Src)) { + need_generic = false; + comment("skipped test for small operand since it always small"); + } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) { + /* The only possible kind of immediate is a small and all + * other values are boxed, so we can test for smalls by + * testing boxed. */ + comment("simplified small test since all other types are boxed"); + ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0)); + if (Small == Large && + always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) { + /* Src is never a float and the failure labels are + * equal. Therefore, since a bignum will never be within + * the range, we can fail immediately if Src is not a + * small. */ + need_generic = false; + a.tbz(src.reg, imm(0), resolve_beam_label(Small, disp32K)); + } else { + /* Src can be a float or the failures labels are distinct. + * We need to call the generic routine if Src is not a small. */ + a.tbz(src.reg, imm(0), generic); + } + } else if (Small == Large) { + /* We can save one instruction if we incorporate the test for + * small into the range check. */ + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + comment("simplified small & range tests since failure labels are " + "equal"); + sub(TMP1, src.reg, Min.as<ArgImmed>().get()); + + /* Since we have subtracted the (tagged) lower bound, the + * tag bits of the difference is 0 if and only if Src is + * a small. Testing for a tag of 0 can be done in two + * instructions. */ + a.tst(TMP1, imm(_TAG_IMMED1_MASK)); + a.b_ne(generic); + + /* Now do the range check. */ + cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get()); + a.b_hi(resolve_beam_label(Small, disp1MB)); + + /* Bypass the test code. */ + goto test_done; + } else { + /* We have no applicable type information and the failure + * labels are distinct. Emit the standard test for small + * and call the generic routine if Src is not a small. */ + a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK)); + a.cmp(TMP1, imm(_TAG_IMMED1_SMALL)); + a.b_ne(generic); + } + + /* We have now established that the operand is small. */ + if (Small == Large) { + comment("simplified range test since failure labels are equal"); + sub(TMP1, src.reg, Min.as<ArgImmed>().get()); + cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get()); + a.b_hi(resolve_beam_label(Small, disp1MB)); + } else { + cmp(src.reg, Min.as<ArgImmed>().get()); + a.b_lt(resolve_beam_label(Small, disp1MB)); + cmp(src.reg, Max.as<ArgImmed>().get()); + a.b_gt(resolve_beam_label(Large, disp1MB)); + } + +test_done: + if (need_generic) { + a.b(next); + } + + a.bind(generic); + if (!need_generic) { + comment("skipped generic comparison because it is not needed"); + } else { + mov_var(ARG1, src); + mov_arg(ARG2, Min); + mov_arg(ARG3, Max); + fragment_call(ga->get_is_in_range_shared()); + if (Small == Large) { + a.b_ne(resolve_beam_label(Small, disp1MB)); + } else { + a.b_lt(resolve_beam_label(Small, disp1MB)); + a.b_gt(resolve_beam_label(Large, disp1MB)); + } + } + + a.bind(next); +} + +/* + * ARG1 = Src + * ARG2 = A + * ARG3 = B + * + * Result is returned in the flags. + */ +void BeamGlobalAssembler::emit_is_ge_lt_shared() { + Label done = a.newLabel(); + + emit_enter_runtime_frame(); + emit_enter_runtime(); + + a.stp(ARG1, ARG3, TMP_MEM1q); + + comment("erts_cmp_compound(Src, A, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.tst(ARG1, ARG1); + a.b_mi(done); + + comment("erts_cmp_compound(B, Src, 0, 0);"); + a.ldp(ARG2, ARG1, TMP_MEM1q); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.cmp(ARG1, imm(0)); + + /* Make sure that ARG1 is -1, 0, or 1. */ + a.cset(ARG1, imm(arm::CondCode::kNE)); + a.csinv(ARG1, ARG1, ZERO, imm(arm::CondCode::kGE)); + + /* Prepare return value and flags. */ + a.adds(ARG1, ARG1, imm(1)); + + /* We now have: + * ARG1 == 0 if B < SRC + * ARG1 > 0 if B => SRC + * and flags set accordingly. */ + + a.bind(done); + emit_leave_runtime(); + emit_leave_runtime_frame(); + + a.ret(a64::x30); +} + +/* + * The instruction sequence: + * + * is_ge Fail1 Src A + * is_lt Fail1 B Src + * + * is common (1841 occurrences in OTP at the time of writing). + * + * is_ge + is_lt is 18 instructions, while is_ge_lt is + * 14 instructions. + */ +void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1, + ArgLabel const &Fail2, + ArgRegister const &Src, + ArgConstant const &A, + ArgConstant const &B) { + Label generic = a.newLabel(), next = a.newLabel(); + auto src = load_source(Src, ARG1); + + mov_arg(ARG2, A); + mov_arg(ARG3, B); + + a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK)); + a.cmp(TMP1, imm(_TAG_IMMED1_SMALL)); + a.b_ne(generic); + + a.cmp(src.reg, ARG2); + a.b_lt(resolve_beam_label(Fail1, disp1MB)); + a.cmp(ARG3, src.reg); + a.b_ge(resolve_beam_label(Fail2, disp1MB)); + a.b(next); + + a.bind(generic); + mov_var(ARG1, src); + fragment_call(ga->get_is_ge_lt_shared()); + a.b_lt(resolve_beam_label(Fail1, disp1MB)); + a.b_gt(resolve_beam_label(Fail2, disp1MB)); + + a.bind(next); +} + +/* + * 60 occurrences in OTP at the time of writing. Seems to be common in + * Elixir code. + * + * Currently not very frequent in OTP but very nice reduction in code + * size when it happens. We expect this combination of instructions + * to become more common in the future. + */ +void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail, + ArgRegister const &Src, + ArgConstant const &Min, + ArgConstant const &Max) { + auto src = load_source(Src, ARG1); + + sub(TMP1, src.reg, Min.as<ArgImmed>().get()); + + /* Since we have subtracted the (tagged) lower bound, the + * tag bits of the difference is 0 if and only if Src is + * a small. */ + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.tst(TMP1, imm(_TAG_IMMED1_MASK)); + a.b_ne(resolve_beam_label(Fail, disp1MB)); + cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get()); + a.b_hi(resolve_beam_label(Fail, disp1MB)); +} + +/* + * 428 occurrencs in OTP at the time of writing. + */ +void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail, + ArgRegister const &Src, + ArgConstant const &Min) { + auto src = load_source(Src, ARG1); + Label small = a.newLabel(), next = a.newLabel(); + + if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) { + comment("simplified small test since all other types are boxed"); + emit_is_boxed(small, Src, src.reg); + } else { + a.and_(TMP2, src.reg, imm(_TAG_IMMED1_MASK)); + a.cmp(TMP2, imm(_TAG_IMMED1_SMALL)); + a.b_eq(small); + + emit_is_boxed(resolve_beam_label(Fail, dispUnknown), Src, TMP2); + } + + arm::Gp boxed_ptr = emit_ptr_val(TMP1, src.reg); + a.ldur(TMP1, emit_boxed_val(boxed_ptr)); + a.and_(TMP1, TMP1, imm(_TAG_HEADER_MASK)); + a.cmp(TMP1, imm(_TAG_HEADER_POS_BIG)); + a.b_ne(resolve_beam_label(Fail, disp1MB)); + a.b(next); + + a.bind(small); + cmp(src.reg, Min.as<ArgImmed>().get()); + a.b_lt(resolve_beam_label(Fail, disp1MB)); + + a.bind(next); +} + void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) { mov_arg(arm::Mem(c_p, offsetof(Process, fvalue)), Src); emit_error(BADMATCH); diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index 862d46ebc0..9676577079 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -455,6 +455,36 @@ is_eq_exact f s s is_ne_exact f s s +is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | + equal(N0, N1) | equal(N1, N2) | + equal(NotInt, Small) | equal(Small, Large) => + is_int_in_range NotInt N0 Min Max + +is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | + equal(N0, N1) | equal(N1, N2) | + equal(NotInt, Small) | equal(Small, Large) => + is_int_in_range NotInt N0 Min Max + +is_integer NotInt N0 | is_ge Fail N1=xy Min=i | + equal(N0, N1) | equal(NotInt, Fail) => + is_int_ge NotInt N0 Min + +is_int_in_range f S c c +is_int_ge f S c + +is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) => + is_in_range Small Large N1 Min Max + +is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) => + is_in_range Small Large N2 Min Max + +is_in_range f f S c c + +is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) => + is_ge_lt Small Large N1 A B + +is_ge_lt f f S c c + is_lt f s s is_ge f s s diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp index cd353714ef..da037aa235 100644 --- a/erts/emulator/beam/jit/x86/beam_asm.hpp +++ b/erts/emulator/beam/jit/x86/beam_asm.hpp @@ -1377,6 +1377,24 @@ protected: } } + void cmp(x86::Gp gp, int64_t val, const x86::Gp &spill) { + if (Support::isInt32(val)) { + a.cmp(gp, imm(val)); + } else { + mov_imm(spill, val); + a.cmp(gp, spill); + } + } + + void sub(x86::Gp gp, int64_t val, const x86::Gp &spill) { + if (Support::isInt32(val)) { + a.sub(gp, imm(val)); + } else { + mov_imm(spill, val); + a.sub(gp, spill); + } + } + /* Note: May clear flags. */ void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) { if (from.isBytePtr()) { diff --git a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl index 9ea270c4f4..917a2815e5 100755 --- a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl +++ b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl @@ -79,6 +79,8 @@ my @beam_global_funcs = qw( increment_body_shared int_div_rem_body_shared int_div_rem_guard_shared + is_in_range_shared + is_ge_lt_shared internal_hash_helper minus_body_shared minus_guard_shared diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp index a5cd54fb8f..bf26f5284f 100644 --- a/erts/emulator/beam/jit/x86/instr_common.cpp +++ b/erts/emulator/beam/jit/x86/instr_common.cpp @@ -1738,6 +1738,342 @@ void BeamModuleAssembler::emit_bif_is_ne_exact(const ArgRegister &LHS, emit_bif_is_eq_ne_exact(LHS, RHS, Dst, am_true, am_false); } +/* + * ARG1 = Src + * ARG2 = Min + * ARG3 = Max + * + * Result is returned in the flags. + */ +void BeamGlobalAssembler::emit_is_in_range_shared() { + Label immediate = a.newLabel(); + Label generic_compare = a.newLabel(); + Label done = a.newLabel(); + + /* Is the source a float? */ + emit_is_boxed(immediate, ARG1); + + x86::Gp boxed_ptr = emit_ptr_val(ARG4, ARG1); + a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM)); + a.short_().jne(generic_compare); + + /* Compare the float to the limits. */ + a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.sar(ARG2, imm(_TAG_IMMED1_SIZE)); + a.sar(ARG3, imm(_TAG_IMMED1_SIZE)); + a.cvtsi2sd(x86::xmm1, ARG2); + a.cvtsi2sd(x86::xmm2, ARG3); + a.xor_(x86::ecx, x86::ecx); + a.ucomisd(x86::xmm0, x86::xmm2); + a.seta(x86::cl); + mov_imm(RET, -1); + a.ucomisd(x86::xmm1, x86::xmm0); + a.cmovbe(RET, x86::rcx); + + a.cmp(RET, imm(0)); + + a.ret(); + + a.bind(immediate); + { + /* + * Src is an immediate (such as ATOM) but not SMALL. + * That means that Src must be greater than the upper + * limit. + */ + mov_imm(RET, 1); + a.cmp(RET, imm(0)); + a.ret(); + } + + a.bind(generic_compare); + { + emit_enter_runtime(); + + a.mov(TMP_MEM1q, ARG1); + a.mov(TMP_MEM2q, ARG3); + + comment("erts_cmp_compound(X, Y, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.test(RET, RET); + a.js(done); + + a.mov(ARG1, TMP_MEM1q); + a.mov(ARG2, TMP_MEM2q); + + comment("erts_cmp_compound(X, Y, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.test(RET, RET); + + a.bind(done); + emit_leave_runtime(); + + a.ret(); + } +} + +void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small, + ArgLabel const &Large, + ArgRegister const &Src, + ArgConstant const &Min, + ArgConstant const &Max) { + Label next = a.newLabel(), generic = a.newLabel(); + bool need_generic = true; + + mov_arg(ARG1, Src); + + if (always_small(Src)) { + need_generic = false; + comment("skipped test for small operand since it always small"); + } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) { + /* The only possible kind of immediate is a small and all + * other values are boxed, so we can test for smalls by + * testing boxed. */ + comment("simplified small test since all other types are boxed"); + ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0)); + if (Small == Large && + always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) { + /* Src is never a float and the failure labels are + * equal. Therefore, since a bignum will never be within + * the range, we can fail immediately if Src is not a + * small. */ + need_generic = false; + a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); + a.je(resolve_beam_label(Small)); + } else { + /* Src can be a float or the failures labels are distinct. + * We need to call the generic routine if Src is not a small. */ + a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); + a.short_().je(generic); + } + } else if (Small == Large) { + /* We can save one instruction if we incorporate the test for + * small into the range check. */ + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + comment("simplified small & range tests since failure labels are " + "equal"); + a.mov(RET, ARG1); + sub(RET, Min.as<ArgImmed>().get(), ARG4); + + /* Since we have subtracted the (tagged) lower bound, the + * tag bits of the difference is 0 if and only if Src is + * a small. Testing for a tag of 0 can be done in two + * instructions. */ + a.test(RETb, imm(_TAG_IMMED1_MASK)); + a.jne(generic); + + /* Now do the range check. */ + cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG4); + a.ja(resolve_beam_label(Small)); + + /* Bypass the test code. */ + goto test_done; + } else { + /* We have no applicable type information and the failure + * labels are distinct. Emit the standard test for small + * and call the generic routine if Src is not a small. */ + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().jne(generic); + } + + /* We have now established that the operand is small. */ + if (Small == Large) { + comment("simplified range test since failure labels are equal"); + sub(ARG1, Min.as<ArgImmed>().get(), RET); + cmp(ARG1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), RET); + a.ja(resolve_beam_label(Small)); + } else { + cmp(ARG1, Min.as<ArgImmed>().get(), RET); + a.jl(resolve_beam_label(Small)); + cmp(ARG1, Max.as<ArgImmed>().get(), RET); + a.jg(resolve_beam_label(Large)); + } + +test_done: + if (need_generic) { + a.short_().jmp(next); + } + + a.bind(generic); + if (!need_generic) { + comment("skipped generic comparison because it is not needed"); + } else { + mov_arg(ARG2, Min); + mov_arg(ARG3, Max); + safe_fragment_call(ga->get_is_in_range_shared()); + if (Small == Large) { + a.jne(resolve_beam_label(Small)); + } else { + a.jl(resolve_beam_label(Small)); + a.jg(resolve_beam_label(Large)); + } + } + + a.bind(next); +} + +/* + * ARG1 = Src + * ARG2 = A + * ARG3 = B + * + * Result is returned in the flags. + */ +void BeamGlobalAssembler::emit_is_ge_lt_shared() { + Label done = a.newLabel(); + + emit_enter_runtime(); + + a.mov(TMP_MEM1q, ARG1); + a.mov(TMP_MEM2q, ARG3); + + comment("erts_cmp_compound(Src, A, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + a.test(RET, RET); + a.short_().js(done); + + comment("erts_cmp_compound(B, Src, 0, 0);"); + a.mov(ARG1, TMP_MEM2q); + a.mov(ARG2, TMP_MEM1q); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + + /* The following instructions implements the signum function. */ + mov_imm(ARG1, -1); + mov_imm(ARG4, 1); + a.test(RET, RET); + a.cmovs(RET, ARG1); + a.cmovg(RET, ARG4); + + /* RET is now -1, 0, or 1. */ + a.add(RET, imm(1)); + + /* We now have: + * RET == 0 if B < SRC + * RET > 0 if B => SRC + * and flags set accordingly. */ + + a.bind(done); + emit_leave_runtime(); + + a.ret(); +} + +/* + * is_ge + is_lt is 20 instructions. + * + * is_ge_lt is 15 instructions. + */ +void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1, + ArgLabel const &Fail2, + ArgRegister const &Src, + ArgConstant const &A, + ArgConstant const &B) { + Label generic = a.newLabel(), next = a.newLabel(); + + mov_arg(ARG2, A); + mov_arg(ARG3, B); + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().jne(generic); + + a.cmp(ARG1, ARG2); + a.jl(resolve_beam_label(Fail1)); + a.cmp(ARG3, ARG1); + a.jge(resolve_beam_label(Fail2)); + a.short_().jmp(next); + + a.bind(generic); + safe_fragment_call(ga->get_is_ge_lt_shared()); + a.jl(resolve_beam_label(Fail1)); + a.jg(resolve_beam_label(Fail2)); + + a.bind(next); +} + +/* + * Combine is_integer with range check. + * + * is_integer + is_ge + is_ge is 31 instructions. + * + * is_int_in_range is 6 instructions. + */ +void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail, + ArgRegister const &Src, + ArgConstant const &Min, + ArgConstant const &Max) { + mov_arg(RET, Src); + + sub(RET, Min.as<ArgImmed>().get(), ARG1); + + /* Since we have subtracted the (tagged) lower bound, the + * tag bits of the difference is 0 if and only if Src is + * a small. */ + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.test(RETb, imm(_TAG_IMMED1_MASK)); + a.jne(resolve_beam_label(Fail)); + cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG1); + a.ja(resolve_beam_label(Fail)); +} + +/* + * is_integer + is_ge is 21 instructions. + * + * is_int_ge is 14 instructions. + */ +void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail, + ArgRegister const &Src, + ArgConstant const &Min) { + Label small = a.newLabel(); + Label fail = a.newLabel(); + Label next = a.newLabel(); + /* On Unix, using rcx instead of ARG1 makes the `test` instruction + * in the boxed test one byte shorter. */ + const x86::Gp src_reg = x86::rcx; + + mov_arg(src_reg, Src); + + if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) { + comment("simplified small test since all other types are boxed"); + emit_is_boxed(small, Src, src_reg); + } else { + a.mov(RETd, src_reg.r32()); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().je(small); + + emit_is_boxed(resolve_beam_label(Fail), Src, src_reg); + } + + /* Src is boxed. Jump to failure unless Src is a positive bignum. */ + x86::Gp boxed_ptr = emit_ptr_val(src_reg, src_reg); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.and_(RETb, imm(_TAG_HEADER_MASK)); + a.cmp(RETb, imm(_TAG_HEADER_POS_BIG)); + a.short_().je(next); + + a.bind(fail); + a.jmp(resolve_beam_label(Fail)); + + a.bind(small); + cmp(src_reg, Min.as<ArgImmed>().get(), RET); + a.short_().jl(fail); + + a.bind(next); +} + void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) { mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); emit_error(BADMATCH); diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index a0fa7124cd..0b7455bccd 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -443,6 +443,36 @@ is_eq_exact f s s is_ne_exact f s s +is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | + equal(N0, N1) | equal(N1, N2) | + equal(NotInt, Small) | equal(Small, Large) => + is_int_in_range NotInt N0 Min Max + +is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | + equal(N0, N1) | equal(N1, N2) | + equal(NotInt, Small) | equal(Small, Large) => + is_int_in_range NotInt N0 Min Max + +is_integer NotInt N0 | is_ge Fail N1=xy Min=i | + equal(N0, N1) | equal(NotInt, Fail) => + is_int_ge NotInt N0 Min + +is_int_in_range f S c c +is_int_ge f S c + +is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) => + is_in_range Small Large N1 Min Max + +is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) => + is_in_range Small Large N2 Min Max + +is_in_range f f S c c + +is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) => + is_ge_lt Small Large N1 A B + +is_ge_lt f f S c c + is_lt f s s is_ge f s s diff --git a/erts/emulator/test/op_SUITE.erl b/erts/emulator/test/op_SUITE.erl index 1f1f34d35f..df81b22e08 100644 --- a/erts/emulator/test/op_SUITE.erl +++ b/erts/emulator/test/op_SUITE.erl @@ -25,7 +25,7 @@ -export([all/0, suite/0, bsl_bsr/1,logical/1,t_not/1,relop_simple/1,relop/1, complex_relop/1,unsafe_fusing/1, - range_tests/1,typed_relop/1]). + range_tests/1,combined_relops/1,typed_relop/1]). -import(lists, [foldl/3,flatmap/2]). @@ -35,7 +35,8 @@ suite() -> all() -> [bsl_bsr, logical, t_not, relop_simple, relop, - complex_relop, unsafe_fusing, range_tests, typed_relop]. + complex_relop, unsafe_fusing, range_tests, + combined_relops, typed_relop]. %% Test the bsl and bsr operators. bsl_bsr(Config) when is_list(Config) -> @@ -523,7 +524,7 @@ range_tests(_Config) -> inside = range_big(MinSmall), inside = range_big(-1 bsl 58), inside = range_big(0), - inside = range_barely_small(17.75), + inside = range_big(17.75), inside = range_big(1 bsl 58), inside = range_big(MaxSmall), @@ -531,6 +532,39 @@ range_tests(_Config) -> greater = range_big(1 bsl 64), greater = range_big(float(1 bsl 64)), + inside = int_range_1(id(-100_000)), + inside = int_range_1(id(-10)), + inside = int_range_1(id(100)), + inside = int_range_1(id(100_000)), + + outside = int_range_1(id(atom)), + outside = int_range_1(id(-1 bsl 60)), + outside = int_range_1(id(-100_001)), + outside = int_range_1(id(100_001)), + outside = int_range_1(id(1 bsl 60)), + + inside = int_range_2(id(1)), + inside = int_range_2(id(42)), + inside = int_range_2(id(16#f000_0000)), + + outside = int_range_2(id([a,list])), + outside = int_range_2(id(0)), + outside = int_range_1(id(-1 bsl 60)), + outside = int_range_1(id(1 bsl 60)), + + inside = int_range_3(id(1 bsl 28)), + inside = int_range_3(id((1 bsl 28) + 1)), + inside = int_range_3(id((1 bsl 33) + 555)), + inside = int_range_3(id((1 bsl 58) - 1)), + inside = int_range_3(id(1 bsl 58)), + + outside = int_range_3(id({a,tuple})), + outside = int_range_3(id(-1 bsl 60)), + outside = int_range_3(id(-1000)), + outside = int_range_3(id(100)), + outside = int_range_3(id((1 bsl 58) + 1)), + outside = int_range_3(id(1 bsl 60)), + ok. range(X) -> @@ -683,6 +717,83 @@ compare_integer_pid(N) when is_integer(N) -> N < Immed -> ok end. +int_range_1(X) when is_integer(X), -100_000 =< X, X =< 100_000 -> + inside; +int_range_1(_) -> + outside. + +int_range_2(X) when is_integer(X), 1 =< X, X =< 16#f000_0000 -> + inside; +int_range_2(_) -> + outside. + +int_range_3(X) when is_integer(X), 1 bsl 28 =< X, X =< 1 bsl 58 -> + inside; +int_range_3(_) -> + outside. + +combined_relops(_Config) -> + other = test_tok_char(-1 bsl 64), + other = test_tok_char($A - 1), + + var = test_tok_char($A), + var = test_tok_char($B), + var = test_tok_char($P), + var = test_tok_char($Y), + var = test_tok_char($Z), + + other = test_tok_char($Z + 1), + + var = tok_char($_), + other = tok_char(float($_)), + + other = test_tok_char(1 bsl 64), + + other = test_tok_char(atom), + other = test_tok_char(self()), + + ok. + +test_tok_char(C) -> + Result = tok_char(C), + if + is_integer(C) -> + Result = tok_char(float(C)), + Result = tok_char_int(C), + if + C band 16#FFFF =:= C -> + Result = tok_char_int_range(C); + true -> + Result + end; + true -> + Result + end. + +%% is_ge + is_lt +tok_char(C) when $A =< C, C =< $Z -> + var; +tok_char($_) -> + var; +tok_char(_) -> + other. + +%% is_ge + is_ge +tok_char_int(C) when $A =< C, C =< $Z -> + var; +tok_char_int($_) -> + var; +tok_char_int(_) -> + other. + +%% is_ge + is_ge +tok_char_int_range(C) when $A =< C, C =< $Z -> + var; +tok_char_int_range($_) -> + var; +tok_char_int_range(_) -> + other. + %%% %%% Utilities. %%% -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor