Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
0128-jit-Be-more-paranoid-about-ARM-instruction...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0128-jit-Be-more-paranoid-about-ARM-instruction-caches.patch of Package erlang
From a7a2db8c03ea8ad7012d55fff02fc76d207457cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org> Date: Fri, 9 Jun 2023 18:32:13 +0200 Subject: [PATCH] jit: Be more paranoid about ARM instruction caches Not every processor has a minimum cache line size of 64 bytes, and the barriers were a bit more lax than they should have been. --- erts/emulator/beam/jit/beam_jit_main.cpp | 70 +++++++++++++++++++----- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/erts/emulator/beam/jit/beam_jit_main.cpp b/erts/emulator/beam/jit/beam_jit_main.cpp index 3862663877..7692e4b2d9 100644 --- a/erts/emulator/beam/jit/beam_jit_main.cpp +++ b/erts/emulator/beam/jit/beam_jit_main.cpp @@ -76,6 +76,32 @@ static BeamGlobalAssembler *bga; static BeamModuleAssembler *bma; static CpuInfo cpuinfo; +#if defined(__aarch64__) && !(defined(WIN32) || defined(__APPLE__)) && \ + defined(__GNUC__) && defined(ERTS_THR_INSTRUCTION_BARRIER) && \ + ETHR_HAVE_GCC_ASM_ARM_IC_IVAU_INSTRUCTION && \ + ETHR_HAVE_GCC_ASM_ARM_DC_CVAU_INSTRUCTION +# define BEAMASM_MANUAL_ICACHE_FLUSHING +#endif + +#ifdef BEAMASM_MANUAL_ICACHE_FLUSHING +static UWord min_icache_line_size; +static UWord min_dcache_line_size; +#endif + +static void init_cache_info() { +#if defined(__aarch64__) && defined(BEAMASM_MANUAL_ICACHE_FLUSHING) + UWord ctr_el0; + + /* DC/IC operate on a cache line basis, so we need to step according to the + * _smallest_ data and instruction cache line size. + * + * Query the "Cache Type Register" MSR to find out what they are. */ + __asm__ __volatile__("mrs %0, ctr_el0\n" : "=r"(ctr_el0)); + min_dcache_line_size = (4 << ((ctr_el0 >> 16) & 0xF)); + min_icache_line_size = (4 << (ctr_el0 & 0xF)); +#endif +} + /* * Enter all BIFs into the export table. * @@ -257,6 +283,7 @@ void beamasm_init() { #endif beamasm_metadata_early_init(); + init_cache_info(); /* * Ensure that commonly used fields in the PCB can be accessed with @@ -421,25 +448,38 @@ extern "C" #elif defined(__aarch64__) && defined(__APPLE__) /* Issues full memory/instruction barriers on all threads for us. */ sys_icache_invalidate((char *)address, size); -#elif defined(__aarch64__) && defined(__GNUC__) && \ - defined(ERTS_THR_INSTRUCTION_BARRIER) && \ - ETHR_HAVE_GCC_ASM_ARM_IC_IVAU_INSTRUCTION && \ - ETHR_HAVE_GCC_ASM_ARM_DC_CVAU_INSTRUCTION - /* Note that we do not issue any barriers here, whether instruction or - * memory. This is on purpose as we must issue those on all schedulers +#elif defined(__aarch64__) && defined(BEAMASM_MANUAL_ICACHE_FLUSHING) + /* Note that we do not issue an instruction synchronization barrier + * here. This is on purpose as we must issue those on all schedulers * and not just the calling thread, and the chances of us forgetting to - * do that is much higher if we issue them here. */ - UWord start = reinterpret_cast<UWord>(address); - UWord end = start + size; + * do that is much higher if we issue one here. */ + UWord start, end, stride; - ETHR_COMPILER_BARRIER; + start = reinterpret_cast<UWord>(address); + end = start + size; - for (UWord i = start & ~ERTS_CACHE_LINE_MASK; i < end; - i += ERTS_CACHE_LINE_SIZE) { - __asm__ __volatile__("dc cvau, %0\n" - "ic ivau, %0\n" ::"r"(i) - :); + stride = min_dcache_line_size; + for (UWord i = start & ~(stride - 1); i < end; i += stride) { + __asm__ __volatile__("dc cvau, %0\n" ::"r"(i) :); } + + /* We need a special memory barrier between clearing dcache and icache, + * or there's a chance that the icache on another core is invalidated + * before the dcache, which can then be repopulated with stale data. */ + __asm__ __volatile__("dsb ish\n" ::: "memory"); + + stride = min_icache_line_size; + for (UWord i = start & ~(stride - 1); i < end; i += stride) { + __asm__ __volatile__("ic ivau, %0\n" ::"r"(i) :); + } + + /* Ensures that all cores clear their instruction cache before moving + * on. The usual full memory barrier (`dmb sy`) executed by the thread + * progress mechanism is not sufficient for this. + * + * Note that this barrier need not be executed on other cores, it's + * enough for them to issue an instruction synchronization barrier. */ + __asm__ __volatile__("dsb ish\n" ::: "memory"); #elif (defined(__x86_64__) || defined(_M_X64)) && \ defined(ERTS_THR_INSTRUCTION_BARRIER) /* We don't need to invalidate cache on this platform, but since we -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor