Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Andreas_Schwab:riscv:ghc:9.8
ghc
riscv-ncg.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File riscv-ncg.patch of Package ghc
Index: ghc-9.8.2/compiler/CodeGen.Platform.h =================================================================== --- ghc-9.8.2.orig/compiler/CodeGen.Platform.h +++ ghc-9.8.2/compiler/CodeGen.Platform.h @@ -1,7 +1,8 @@ import GHC.Cmm.Expr #if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \ - || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64)) + || defined(MACHREGS_powerpc) || defined(MACHREGS_aarch64) \ + || defined(MACHREGS_riscv64)) import GHC.Utils.Panic.Plain #endif import GHC.Platform.Reg @@ -1040,6 +1041,105 @@ freeReg 18 = False # if defined(REG_Base) freeReg REG_Base = False +# endif +# if defined(REG_Sp) +freeReg REG_Sp = False +# endif +# if defined(REG_SpLim) +freeReg REG_SpLim = False +# endif +# if defined(REG_Hp) +freeReg REG_Hp = False +# endif +# if defined(REG_HpLim) +freeReg REG_HpLim = False +# endif + +# if defined(REG_R1) +freeReg REG_R1 = False +# endif +# if defined(REG_R2) +freeReg REG_R2 = False +# endif +# if defined(REG_R3) +freeReg REG_R3 = False +# endif +# if defined(REG_R4) +freeReg REG_R4 = False +# endif +# if defined(REG_R5) +freeReg REG_R5 = False +# endif +# if defined(REG_R6) +freeReg REG_R6 = False +# endif +# if defined(REG_R7) +freeReg REG_R7 = False +# endif +# if defined(REG_R8) +freeReg REG_R8 = False +# endif + +# if defined(REG_F1) +freeReg REG_F1 = False +# endif +# if defined(REG_F2) +freeReg REG_F2 = False +# endif +# if defined(REG_F3) +freeReg REG_F3 = False +# endif +# if defined(REG_F4) +freeReg REG_F4 = False +# endif +# if defined(REG_F5) +freeReg REG_F5 = False +# endif +# if defined(REG_F6) +freeReg REG_F6 = False +# endif + +# if defined(REG_D1) +freeReg REG_D1 = False +# endif +# if defined(REG_D2) +freeReg REG_D2 = False +# endif +# if defined(REG_D3) +freeReg REG_D3 = False +# endif +# if defined(REG_D4) +freeReg REG_D4 = False +# endif +# if defined(REG_D5) +freeReg REG_D5 = False +# endif +# if defined(REG_D6) +freeReg REG_D6 = False +# endif + +freeReg _ = True + +#elif defined(MACHREGS_riscv64) + +-- zero reg +freeReg 0 = False +-- link register +freeReg 1 = False +-- stack pointer +freeReg 2 = False +-- global pointer +freeReg 3 = False +-- thread pointer +freeReg 4 = False +-- frame pointer +freeReg 8 = False +-- made-up inter-procedural (ip) register +-- See Note [The made-up RISCV64 IP register] +freeReg 31 = False + +# if defined(REG_Base) +freeReg REG_Base = False # endif # if defined(REG_Sp) freeReg REG_Sp = False Index: ghc-9.8.2/compiler/GHC/Cmm/CLabel.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/Cmm/CLabel.hs +++ ghc-9.8.2/compiler/GHC/Cmm/CLabel.hs @@ -1723,6 +1723,8 @@ pprDynamicLinkerAsmLabel !platform dllIn | platformArch platform == ArchAArch64 = ppLbl + | platformArch platform == ArchRISCV64 + = ppLbl | platformArch platform == ArchX86_64 = case dllInfo of Index: ghc-9.8.2/compiler/GHC/CmmToAsm.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm.hs @@ -82,6 +82,7 @@ import qualified GHC.CmmToAsm.X86 as X import qualified GHC.CmmToAsm.PPC as PPC import qualified GHC.CmmToAsm.AArch64 as AArch64 import qualified GHC.CmmToAsm.Wasm as Wasm32 +import qualified GHC.CmmToAsm.RV64 as RV64 import GHC.CmmToAsm.Reg.Liveness import qualified GHC.CmmToAsm.Reg.Linear as Linear @@ -167,7 +168,7 @@ nativeCodeGen logger ts config modLoc h ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" - ArchRISCV64 -> panic "nativeCodeGen: No NCG for RISCV64" + ArchRISCV64 -> nCG' (RV64.ncgRV64 config) ArchLoongArch64->panic "nativeCodeGen: No NCG for LoongArch64" ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Dwarf/Constants.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/Dwarf/Constants.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Dwarf/Constants.hs @@ -240,6 +240,7 @@ dwarfRegNo p r = case platformArch p of | r == xmm15 -> 32 ArchPPC_64 _ -> fromIntegral $ toRegNo r ArchAArch64 -> fromIntegral $ toRegNo r + ArchRISCV64 -> fromIntegral $ toRegNo r _other -> error "dwarfRegNo: Unsupported platform or unknown register!" -- | Virtual register number to use for return address. @@ -252,5 +253,6 @@ dwarfReturnRegNo p ArchX86 -> 8 -- eip ArchX86_64 -> 16 -- rip ArchPPC_64 ELF_V2 -> 65 -- lr (link register) - ArchAArch64-> 30 + ArchAArch64 -> 30 + ArchRISCV64 -> 1 -- ra (return address) _other -> error "dwarfReturnRegNo: Unsupported platform!" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/PIC.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/PIC.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/PIC.hs @@ -132,6 +132,11 @@ cmmMakeDynamicReference config reference addImport symbolPtr return $ cmmMakePicReference config symbolPtr + AccessViaSymbolPtr | ArchRISCV64 <- platformArch platform -> do + let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl + addImport symbolPtr + return $ cmmMakePicReference config symbolPtr + AccessViaSymbolPtr -> do let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl addImport symbolPtr @@ -164,6 +169,10 @@ cmmMakePicReference config lbl | ArchAArch64 <- platformArch platform = CmmLit $ CmmLabel lbl + | ArchRISCV64 <- platformArch platform + = CmmLit $ CmmLabel lbl + + | OSAIX <- platformOS platform = CmmMachOp (MO_Add W32) [ CmmReg (CmmGlobal $ GlobalRegUse PicBaseReg (bWord platform)) Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64-notes.md =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64-notes.md @@ -0,0 +1,35 @@ +# Riscv 64 NCG + +We model the RV64 NCG along the aarch64 NCG, simply because they share +a lot in common. Ultimately we might want to extract some RISC like super +structure from this to reduce duplicate code. + +This is the aarch64 register layout (for linux calling) +``` +.---------------------------------------------------------------------------------------------------------------------------------------------------------------. +| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +| 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | +|== General Purpose registers ==================================================================================================================================| +| <---- argument passing -------------> | IR | <------- tmp registers --------> | IP0| IP1| PL | <------------------- callee saved ------------> | FP | LR | SP | +| <------ free registers --------------------------------------------------------------------> | BR | Sp | Hp | R1 | R2 | R3 | R4 | R5 | R6 | SL | -- | -- | -- | +|== SIMD/FP Registers ==========================================================================================================================================| +| <---- argument passing -------------> | <-- callee saved (lower 64 bits) ---> | <--------------------------------------- caller saved ----------------------> | +| <------ free registers -------------> | F1 | F2 | F3 | F4 | D1 | D2 | D3 | D4 | <------ free registers -----------------------------------------------------> | +'---------------------------------------------------------------------------------------------------------------------------------------------------------------' +IR: Indirect result location register, IP: Intra-procedure register, PL: Platform register, FP: Frame pointer, LR: Link register, SP: Stack pointer +BR: Base, SL: SpLim +``` +comparing this to RV64 gives us: +``` +.---------------------------------------------------------------------------------------------------------------------------------------------------------------. +| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +| 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | +|== General Purpose registers ==================================================================================================================================| +| ZR | RA | SP | GP | TP | <- tmp r. -> | FP | <- | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> | +| -- | -- | -- | -- | -- | <- free r. > | -- | BR | <---- free registers ---------------> | SP | HP | R1 | R2 | R3 | R4 | R5 | R6 | R7 | SL | <-- free regs --> | +|== SIMD/FP Registers ==========================================================================================================================================| +| <--- temporary registers -----------> | <------ | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> | +| <---------- free registers ---------> | F1 | F2 | <---- free registers ---------------> | F3 | F4 | F5 | F6 | D1 | D2 | D3 | D4 | D5 | D6 | -- | -- | -- | -- | +'---------------------------------------------------------------------------------------------------------------------------------------------------------------' + +ZR: Zero, RA: Return Address, SP: Stack Pointer, GP: Global Pointer, TP: Thread Pointer, FP: Frame Pointer \ No newline at end of file Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64.hs @@ -0,0 +1,61 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | Native code generator for RiscV64 architectures +module GHC.CmmToAsm.RV64 + ( ncgRV64 ) +where + +import GHC.Prelude + +import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.Monad +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types +import GHC.Utils.Outputable (ftext) + +import qualified GHC.CmmToAsm.RV64.Instr as RV64 +import qualified GHC.CmmToAsm.RV64.Ppr as RV64 +import qualified GHC.CmmToAsm.RV64.CodeGen as RV64 +import qualified GHC.CmmToAsm.RV64.Regs as RV64 +import qualified GHC.CmmToAsm.RV64.RegInfo as RV64 + +ncgRV64 :: NCGConfig -> NcgImpl RawCmmStatics RV64.Instr RV64.JumpDest +ncgRV64 config + = NcgImpl { + ncgConfig = config + ,cmmTopCodeGen = RV64.cmmTopCodeGen + ,generateJumpTableForInstr = RV64.generateJumpTableForInstr config + ,getJumpDestBlockId = RV64.getJumpDestBlockId + ,canShortcut = RV64.canShortcut + ,shortcutStatics = RV64.shortcutStatics + ,shortcutJump = RV64.shortcutJump + ,pprNatCmmDeclS = RV64.pprNatCmmDecl config + ,pprNatCmmDeclH = RV64.pprNatCmmDecl config + ,maxSpillSlots = RV64.maxSpillSlots config + ,allocatableRegs = RV64.allocatableRegs platform + ,ncgAllocMoreStack = RV64.allocMoreStack platform + ,ncgMakeFarBranches = \_p _i bs -> pure bs + ,extractUnwindPoints = const [] + ,invertCondBranches = \_ _ -> id + } + where + platform = ncgPlatform config + +-- | Instruction instance for RV64 +instance Instruction RV64.Instr where + regUsageOfInstr = RV64.regUsageOfInstr + patchRegsOfInstr = RV64.patchRegsOfInstr + isJumpishInstr = RV64.isJumpishInstr + jumpDestsOfInstr = RV64.jumpDestsOfInstr + patchJumpInstr = RV64.patchJumpInstr + mkSpillInstr = RV64.mkSpillInstr + mkLoadInstr = RV64.mkLoadInstr + takeDeltaInstr = RV64.takeDeltaInstr + isMetaInstr = RV64.isMetaInstr + mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr + takeRegRegMoveInstr = RV64.takeRegRegMoveInstr + mkJumpInstr = RV64.mkJumpInstr + mkStackAllocInstr = RV64.mkStackAllocInstr + mkStackDeallocInstr = RV64.mkStackDeallocInstr + mkComment = pure . RV64.COMMENT . ftext + pprInstr = RV64.pprInstr Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/CodeGen.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/CodeGen.hs @@ -0,0 +1,1780 @@ +{-# language GADTs #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE OverloadedStrings #-} +module GHC.CmmToAsm.RV64.CodeGen ( + cmmTopCodeGen + , generateJumpTableForInstr +) + +where + +import Control.Monad (mapAndUnzipM) +import Data.Maybe +import Data.Word +import GHC.Cmm +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.DebugBlock +import GHC.Cmm.Switch +import GHC.Cmm.Utils +import GHC.CmmToAsm.CPrim +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Format +import GHC.CmmToAsm.Monad + ( NatM, + getBlockIdNat, + getConfig, + getDebugBlock, + getFileId, + getNewLabelNat, + getNewRegNat, + getPicBaseMaybeNat, + getPlatform, + ) +import GHC.CmmToAsm.PIC +import GHC.CmmToAsm.RV64.Cond +import GHC.CmmToAsm.RV64.Instr +import GHC.CmmToAsm.RV64.Regs +import GHC.CmmToAsm.Types +import GHC.Data.FastString +import GHC.Data.OrdList +import GHC.Float +import GHC.Platform +import GHC.Platform.Reg +import GHC.Platform.Regs +import GHC.Prelude hiding (EQ) +import GHC.Types.Basic +import GHC.Types.ForeignCall +import GHC.Types.SrcLoc (srcSpanFile, srcSpanStartCol, srcSpanStartLine) +import GHC.Types.Tickish (GenTickish (..)) +import GHC.Utils.Constants (debugIsOn) +import GHC.Utils.Misc +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Utils.Panic.Plain (assert) + +-- For an overview of an NCG's structure, see Note [General layout of an NCG] + +cmmTopCodeGen + :: RawCmmDecl + -> NatM [NatCmmDecl RawCmmStatics Instr] + +-- Thus we'll have to deal with either CmmProc ... +cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + + let blocks = toBlockListEntryFirst graph + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + + let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) + tops = proc : concat statics + + case picBaseMb of + Just _picBase -> panic "RV64.cmmTopCodeGen: picBase not implemented" + Nothing -> return tops + +-- ... or CmmData. +cmmTopCodeGen _cmm@(CmmData sec dat) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: Block CmmNode C C + -> NatM ( [NatBasicBlock Instr] + , [NatCmmDecl RawCmmStatics Instr]) + +basicBlockCodeGen block = do + config <- getConfig + -- do + -- traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + -- ++ showSDocUnsafe (ppr block) + let (_, nodes, tail) = blockSplit block + id = entryLabel block + stmts = blockToList nodes + + header_comment_instr | debugIsOn = unitOL $ MULTILINE_COMMENT ( + text "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + $+$ withPprStyle defaultDumpStyle (pdoc (ncgPlatform config) block) + ) + | otherwise = nilOL + -- Generate location directive + dbg <- getDebugBlock (entryLabel block) + loc_instrs <- case dblSourceTick =<< dbg of + Just (SourceNote span (LexicalFastString name)) + -> do fileId <- getFileId (srcSpanFile span) + let line = srcSpanStartLine span; col = srcSpanStartCol span + return $ unitOL $ LOCATION fileId line col (unpackFS name) + _ -> return nilOL + (mid_instrs,mid_bid) <- stmtsToInstrs id stmts + (!tail_instrs,_) <- stmtToInstrs mid_bid tail + let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs + -- TODO: Then x86 backend run @verifyBasicBlock@ here and inserts + -- unwinding info. See Ticket 19913 + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + return (BasicBlock id top : other_blocks, statics) + + +-- ----------------------------------------------------------------------------- +-- | Utilities +ann :: SDoc -> Instr -> Instr +ann doc instr {- debugIsOn -} = ANN doc instr +-- ann _ instr = instr +{-# INLINE ann #-} + +-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with +-- -dppr-debug. The idea is that we can trivially see how a cmm expression +-- ended up producing the assembly we see. By having the verbatim AST printed +-- we can simply check the patterns that were matched to arrive at the assembly +-- we generated. +-- +-- pprExpr will hide a lot of noise of the underlying data structure and print +-- the expression into something that can be easily read by a human. However +-- going back to the exact CmmExpr representation can be laborious and adds +-- indirections to find the matches that lead to the assembly. +-- +-- An improvement oculd be to have +-- +-- (pprExpr genericPlatform e) <> parens (text. show e) +-- +-- to have the best of both worlds. +-- +-- Note: debugIsOn is too restrictive, it only works for debug compilers. +-- However, we do not only want to inspect this for debug compilers. Ideally +-- we'd have a check for -dppr-debug here already, such that we don't even +-- generate the ANN expressions. However, as they are lazy, they shouldn't be +-- forced until we actually force them, and without -dppr-debug they should +-- never end up being forced. +annExpr :: CmmExpr -> Instr -> Instr +annExpr e {- debugIsOn -} = ANN (text . show $ e) +-- annExpr e instr {- debugIsOn -} = ANN (pprExpr genericPlatform e) instr +-- annExpr _ instr = instr +{-# INLINE annExpr #-} + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +-- | Generate jump to jump table target +-- +-- The index into the jump table is calulated by evaluating @expr@. The +-- corresponding table entry contains the address to jump to. +genSwitch :: NCGConfig -> CmmExpr -> SwitchTargets -> NatM InstrBlock +genSwitch config expr targets = do + (reg, fmt1, e_code) <- getSomeReg indexExpr + let fmt = II64 + tmp <- getNewRegNat fmt + lbl <- getNewLabelNat + dynRef <- cmmMakeDynamicReference config DataReference lbl + (tableReg, fmt2, t_code) <- getSomeReg $ dynRef + let code = + toOL [ COMMENT (text "indexExpr" <+> (text . show) indexExpr) + , COMMENT (text "dynRef" <+> (text . show) dynRef) + ] + `appOL` e_code + `appOL` t_code + `appOL` toOL + [ + COMMENT (ftext "Jump table for switch") + , annExpr expr (LSL (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt1) reg) (OpImm (ImmInt 3))) + , ADD (OpReg W64 tmp) (OpReg (formatToWidth fmt1) reg) (OpReg (formatToWidth fmt2) tableReg) + , LDRU II64 (OpReg W64 tmp) (OpAddr (AddrRegImm tmp (ImmInt 0))) + , J_TBL ids (Just lbl) tmp + ] + return code + where + -- See Note [Sub-word subtlety during jump-table indexing] in + -- GHC.CmmToAsm.X86.CodeGen for why we must first offset, then widen. + indexExpr0 = cmmOffset platform expr offset + -- We widen to a native-width register to sanitize the high bits + indexExpr = + CmmMachOp + (MO_UU_Conv expr_w (platformWordWidth platform)) + [indexExpr0] + expr_w = cmmExprWidth platform expr + (offset, ids) = switchTargetsToTable targets + platform = ncgPlatform config + +-- | Generate jump table data (if required) +-- +-- Relies on PIC relocations. The idea is to emit one table entry per case. The +-- entry is the label of the block to jump to. This will be relocated to be the +-- address of the jump target. +generateJumpTableForInstr :: + NCGConfig -> + Instr -> + Maybe (NatCmmDecl RawCmmStatics Instr) +generateJumpTableForInstr config (J_TBL ids (Just lbl) _) = + let jumpTable = + map jumpTableEntryRel ids + where + jumpTableEntryRel Nothing = + CmmStaticLit (CmmInt 0 (ncgWordWidth config)) + jumpTableEntryRel (Just blockid) = + CmmStaticLit (CmmLabel blockLabel) + where + blockLabel = blockLbl blockid + in Just (CmmData (Section ReadOnlyData lbl) (CmmStaticsRaw lbl jumpTable)) +generateJumpTableForInstr _ _ = Nothing + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- See Note [Keeping track of the current block] for why +-- we pass the BlockId. +stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in. + -> [CmmNode O O] -- ^ Cmm Statement + -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction +stmtsToInstrs bid stmts = + go bid stmts nilOL + where + go bid [] instrs = return (instrs,bid) + go bid (s:stmts) instrs = do + (instrs',bid') <- stmtToInstrs bid s + -- If the statement introduced a new block, we use that one + let !newBid = fromMaybe bid bid' + go newBid stmts (instrs `appOL` instrs') + +-- | `bid` refers to the current block and is used to update the CFG +-- if new blocks are inserted in the control flow. +-- See Note [Keeping track of the current block] for more details. +stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in. + -> CmmNode e x + -> NatM (InstrBlock, Maybe BlockId) + -- ^ Instructions, and bid of new block if successive + -- statements are placed in a different basic block. +stmtToInstrs bid stmt = do + -- traceM $ "-- -------------------------- stmtToInstrs -------------------------- --\n" + -- ++ showSDocUnsafe (ppr stmt) + config <- getConfig + platform <- getPlatform + case stmt of + CmmUnsafeForeignCall target result_regs args + -> genCCall target result_regs args bid + + _ -> (,Nothing) <$> case stmt of + CmmComment s -> return (unitOL (COMMENT (ftext s))) + CmmTick {} -> return nilOL + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode format reg src + | otherwise -> assignReg_IntCode format reg src + where ty = cmmRegType reg + format = cmmTypeFormat ty + + CmmStore addr src _alignment + | isFloatType ty -> assignMem_FltCode format addr src + | otherwise -> assignMem_IntCode format addr src + where ty = cmmExprType platform src + format = cmmTypeFormat ty + + CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here. + CmmCondBranch arg true false _prediction -> + genCondBranch bid true false arg + + CmmSwitch arg ids -> genSwitch config arg ids + + CmmCall { cml_target = arg } -> genJump arg + + CmmUnwind _regs -> return nilOL + + _ -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt) + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Format Reg InstrBlock + | Any Format (Reg -> InstrBlock) + +-- | Sometimes we need to change the Format of a register. Primarily during +-- conversion. +swizzleRegisterRep :: Format -> Register -> Register +swizzleRegisterRep format (Fixed _ reg code) = Fixed format reg code +swizzleRegisterRep format (Any _ codefn) = Any format codefn + +-- | Grab the Reg for a CmmReg +getRegisterReg :: Platform -> CmmReg -> Reg + +getRegisterReg _ (CmmLocal (LocalReg u pk)) + = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) + +getRegisterReg platform (CmmGlobal reg@(GlobalRegUse mid _)) + = case globalRegMaybe platform mid of + Just reg -> RegReal reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal reg) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence if it's not mapped to a registers something + -- went wrong earlier in the pipeline. + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO OPT: we might be able give getRegister +-- a hint, what kind of register we want. +getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock) +getFloatReg expr = do + r <- getRegister expr + case r of + Any rep code | isFloatFormat rep -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Any II32 code -> do + tmp <- getNewRegNat FF32 + return (tmp, FF32, code tmp) + Any II64 code -> do + tmp <- getNewRegNat FF64 + return (tmp, FF64, code tmp) + Any _w _code -> do + config <- getConfig + pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr) + -- can't do much for fixed. + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO: TODO, bounds. We can't put any immediate +-- value in. They are constrained. +-- See Ticket 19911 +litToImm' :: CmmLit -> NatM (Operand, InstrBlock) +litToImm' lit = return (OpImm (litToImm lit), nilOL) + +getRegister :: CmmExpr -> NatM Register +getRegister e = do + config <- getConfig + getRegister' config (ncgPlatform config) e + +-- | The register width to be used for an operation on the given width +-- operand. +opRegWidth :: Width -> Width +opRegWidth W64 = W64 -- x +opRegWidth W32 = W32 -- w +opRegWidth W16 = W32 -- w +opRegWidth W8 = W32 -- w +opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w) + +-- Note [Signed arithmetic on RISCV64] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Handling signed arithmetic on sub-word-size values on RISCV64 is a bit +-- tricky as Cmm's type system does not capture signedness. While 32-bit values +-- are fairly easy to handle due to RISCV64's 32-bit instruction variants +-- (denoted by use of %wN registers), 16- and 8-bit values require quite some +-- care. +-- +-- We handle 16-and 8-bit values by using the 32-bit operations and +-- sign-/zero-extending operands and truncate results as necessary. For +-- simplicity we maintain the invariant that a register containing a +-- sub-word-size value always contains the zero-extended form of that value +-- in between operations. +-- +-- For instance, consider the program, +-- +-- test(bits64 buffer) +-- bits8 a = bits8[buffer]; +-- bits8 b = %mul(a, 42); +-- bits8 c = %not(b); +-- bits8 d = %shrl(c, 4::bits8); +-- return (d); +-- } +-- +-- This program begins by loading `a` from memory, for which we use a +-- zero-extended byte-size load. We next sign-extend `a` to 32-bits, and use a +-- 32-bit multiplication to compute `b`, and truncate the result back down to +-- 8-bits. +-- +-- Next we compute `c`: The `%not` requires no extension of its operands, but +-- we must still truncate the result back down to 8-bits. Finally the `%shrl` +-- requires no extension and no truncate since we can assume that +-- `c` is zero-extended. +-- +-- The "RISC-V Sign Extension Optimizations" LLVM tech talk presentation by +-- Craig Topper covers possible future improvements +-- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf) +-- +-- TODO: +-- Don't use Width in Operands +-- Instructions should rather carry a RegWidth +-- +-- Note [Handling PIC on RV64] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- RV64 does not have a special PIC register, the general approach is to +-- simply go through the GOT, and there is assembly support for this: +-- +-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows +-- loading a label, ... into a register. The instruction is desugared into +-- +-- 1: lui rd1, %pcrel_hi(label) +-- addi rd1, %pcrel_lo(1b) +-- +-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dModifiers.html, +-- PIC can be enabled/disabled through +-- +-- .option pic +-- +-- See https://sourceware.org/binutils/docs/as/RISC_002dV_002dDirectives.html#RISC_002dV_002dDirectives +-- +-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the +-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@ +-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two +-- callsites for this. One is in this module to produce the @target@ in @genCCall@ +-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@. +-- +-- Conceptually we do not want any special PicBaseReg to be used on RV64. If +-- we want to distinguish between symbol loading, we need to address this through +-- the way we load it, not through a register. +-- + +getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register +-- OPTIMIZATION WARNING: CmmExpr rewrites +-- 1. Rewrite: Reg + (-n) => Reg - n +-- TODO: this expression shouldn't even be generated to begin with. +getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)]) + +getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)]) + +-- Generic case. +getRegister' config plat expr = + case expr of + CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) -> + pprPanic "getRegisterReg-memory" (ppr PicBaseReg) + + CmmLit lit -> + case lit of + CmmInt 0 w -> pure $ Fixed (intFormat w) zeroReg nilOL + CmmInt i w -> + -- narrowU is important: Negative immediates may be + -- sign-extended on load! + let imm = OpImm . ImmInteger $ narrowU w i + in + pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm))) + + -- floatToBytes (fromRational f) + CmmFloat 0 w -> do + (op, imm_code) <- litToImm' lit + return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op))) + + CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr) + CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr) + CmmFloat f W32 -> do + let word = castFloatToWord32 (fromRational f) :: Word32 + tmp <- getNewRegNat (intFormat W32) + return (Any (floatFormat W32) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 tmp) (OpImm (ImmInteger (fromIntegral word))) + , MOV (OpReg W32 dst) (OpReg W32 tmp) + ])) + CmmFloat f W64 -> do + let word = castDoubleToWord64 (fromRational f) :: Word64 + tmp <- getNewRegNat (intFormat W64) + return (Any (floatFormat W64) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W64 tmp) (OpImm (ImmInteger (fromIntegral word))) + , MOV (OpReg W64 dst) (OpReg W64 tmp) + ])) + CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr) + CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr) + CmmLabel _lbl -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + return (Any format (\dst -> imm_code `snocOL` annExpr expr (LDR format (OpReg (formatToWidth format) dst) op))) + + CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op)) + + CmmLabelOff lbl off -> do + (op, imm_code) <- litToImm' (CmmLabel lbl) + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + width = typeWidth rep + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r))) + + CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmLoad mem rep _ -> do + let format = cmmTypeFormat rep + width = typeWidth rep + Amode addr addr_code <- getAmode plat width mem + case width of + w | w <= W64 -> + -- Load without sign-extension. See Note [Signed arithmetic on RISCV64] + pure (Any format (\dst -> addr_code `snocOL` LDRU format (OpReg width dst) (OpAddr addr))) + _ -> + pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr) + + CmmStackSlot _ _ + -> pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr) + CmmReg reg + -> return (Fixed (cmmTypeFormat (cmmRegType reg)) + (getRegisterReg plat reg) + nilOL) + CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do + getRegister' config plat $ + CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) + + CmmRegOff reg off -> do + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + (reg, _format, code) <- getSomeReg $ CmmReg reg + return $ Any (intFormat width) (\dst -> off_code `appOL` code `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)) + where width = typeWidth (cmmRegType reg) + + + + -- for MachOps, see GHC.Cmm.MachOp + -- For CmmMachOp, see GHC.Cmm.Expr + CmmMachOp op [e] -> do + (reg, _format, code) <- getSomeReg e + case op of + MO_Not w -> return $ Any (intFormat w) $ \dst -> + let w' = opRegWidth w + in code `snocOL` + -- pseudo instruction `not` is `xori rd, rs, -1` + ann (text "not") (XORI (OpReg w' dst) (OpReg w' reg) (OpImm (ImmInt (-1)))) `appOL` + truncateReg w' w dst -- See Note [Signed arithmetic on RISCV64] + + MO_S_Neg w -> negate code w reg + MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) + + -- TODO: Can this case happen? + MO_SF_Conv from to | from < W32 -> do + -- extend to the smallest available representation + (reg_x, code_x) <- signExtendReg from W32 reg + pure $ Any (floatFormat to) + (\dst -> code `appOL` code_x `snocOL` annExpr expr (SCVTF (OpReg to dst) (OpReg from reg_x))) -- (Signed ConVerT Float) + MO_SF_Conv from to -> pure $ Any (floatFormat to) (\dst -> code `snocOL` annExpr expr (SCVTF (OpReg to dst) (OpReg from reg))) -- (Signed ConVerT Float) + -- TODO: Can this case happen? + MO_FS_Conv from to | to < W32 -> pure $ Any (intFormat to) (\dst -> + code `snocOL` + -- W32 is the smallest width to convert to. Decrease width afterwards. + annExpr expr (FCVTZS (OpReg W32 dst) (OpReg from reg)) `appOL` + signExtendAdjustPrecission W32 to dst dst) -- (float convert (-> zero) signed) + MO_FS_Conv from to -> pure $ Any (intFormat to) (\dst -> + code `snocOL` + annExpr expr (FCVTZS (OpReg to dst) (OpReg from reg)) `appOL` -- (float convert (-> zero) signed) + truncateReg from to dst) + MO_UU_Conv from to | from <= to -> pure $ Any (intFormat to) (\dst -> + code `snocOL` + annExpr e (MOV (OpReg to dst) (OpReg from reg)) + ) + + MO_UU_Conv from to -> pure $ Any (intFormat to) (\dst -> + code `snocOL` + annExpr e (MOV (OpReg from dst) (OpReg from reg)) `appOL` + truncateReg from to dst + ) + MO_SS_Conv from to -> ss_conv from to reg code + MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` annExpr e (FCVT (OpReg to dst) (OpReg from reg))) + + -- Conversions + MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e + MO_AlignmentCheck align wordWidth -> do + reg <- getRegister' config plat e + addAlignmentCheck align wordWidth reg + + _ -> pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr) + where + -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits + -- See Note [Signed arithmetic on RISCV64]. + negate code w reg = do + let w' = opRegWidth w + (reg', code_sx) <- signExtendReg w w' reg + return $ Any (intFormat w) $ \dst -> + code `appOL` + code_sx `snocOL` + NEG (OpReg w' dst) (OpReg w' reg') `appOL` + truncateReg w' w dst + + ss_conv from to reg code + | from < to = do + pure $ Any (intFormat to) $ \dst -> + code + `appOL` signExtend from to reg dst + `appOL` truncateReg from to dst + | from > to = + pure $ Any (intFormat to) $ \dst -> + code + `appOL` toOL + [ ann + (text "MO_SS_Conv: narrow register signed" <+> ppr reg <+> ppr from <> text "->" <> ppr to) + (LSL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt shift))), + -- signed right shift + ASR (OpReg to dst) (OpReg to dst) (OpImm (ImmInt shift)) + ] + `appOL` truncateReg from to dst + | otherwise = + -- No conversion necessary: Just copy. + pure $ Any (intFormat from) $ \dst -> + code `snocOL` MOV (OpReg from dst) (OpReg from reg) + where + shift = 64 - (widthInBits from - widthInBits to) + +-- Dyadic machops: + -- + -- The general idea is: + -- compute x<i> <- x + -- compute x<j> <- y + -- OP x<r>, x<i>, x<j> + -- + -- TODO: for now we'll only implement the 64bit versions. And rely on the + -- fallthrough to alert us if things go wrong! + -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring + -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg + CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + -- 1. Compute Reg +/- n directly. + -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12. + CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)] + | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) + r' = getRegisterReg plat reg + CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)] + | fitsIn12bitImm n -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) + r' = getRegisterReg plat reg + + CmmMachOp (MO_U_Quot w) [x, y] | w == W8 || w == W16 -> do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + return $ Any (intFormat w) (\dst -> code_x `appOL` + truncateReg (formatToWidth format_x) w reg_x `appOL` + code_y `appOL` + truncateReg (formatToWidth format_y) w reg_y `snocOL` + annExpr expr (DIVU (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))) + + -- 2. Shifts. x << n, x >> n. + CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> + code_x `snocOL` + annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) `appOL` + truncateReg w w dst + ) + CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> + code_x `snocOL` + annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))) `appOL` + truncateReg w w dst + ) + + CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x + return $ Any (intFormat w) ( + \dst -> + code_x `appOL` code_x' `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n))) + ) + CmmMachOp (MO_S_Shr w) [x, y] -> do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x + return $ Any (intFormat w) ( + \dst -> + code_x `appOL` code_x' `appOL` code_y `snocOL` annExpr expr (ASR (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y)) + ) + + CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | w == W8, 0 <= n, n < 8 -> do + (reg_x, format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `appOL` truncateReg (formatToWidth format_x) w reg_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | w == W16, 0 <= n, n < 16 -> do + (reg_x, format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `appOL` truncateReg (formatToWidth format_x) w reg_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `appOL` truncateReg (formatToWidth format_x) w reg_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))) + CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + + -- 3. Logic &&, || + CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) + r' = getRegisterReg plat reg + + CmmMachOp (MO_Or w) [CmmReg reg, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORI (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType reg)) + r' = getRegisterReg plat reg + + -- Generic case. + CmmMachOp op [x, y] -> do + let + -- A "plain" operation. + bitOp w op = do + -- compute x<m> <- x + -- compute x<o> <- y + -- <OP> x<n>, x<m>, x<o> + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible" + return $ Any (intFormat w) (\dst -> + code_x `appOL` + code_y `appOL` + op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) + + -- A (potentially signed) integer operation. + -- In the case of 8- and 16-bit signed arithmetic we must first + -- sign-extend both arguments to 32-bits. + -- See Note [Signed arithmetic on RISCV64]. + intOp is_signed w op = do + -- compute x<m> <- x + -- compute x<o> <- y + -- <OP> x<n>, x<m>, x<o> + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int" + -- This is the width of the registers on which the operation + -- should be performed. + let w' = opRegWidth w + signExt r + | not is_signed = return (r, nilOL) + | otherwise = signExtendReg w w' r + (reg_x_sx, code_x_sx) <- signExt reg_x + (reg_y_sx, code_y_sx) <- signExt reg_y + return $ Any (intFormat w) $ \dst -> + code_x `appOL` + code_y `appOL` + -- sign-extend both operands + code_x_sx `appOL` + code_y_sx `appOL` + op (OpReg w' dst) (OpReg w' reg_x_sx) (OpReg w' reg_y_sx) `appOL` + truncateReg w' w dst -- truncate back to the operand's original width + + floatOp w op = do + (reg_fx, format_x, code_fx) <- getFloatReg x + (reg_fy, format_y, code_fy) <- getFloatReg y + massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatOp: non-float" + return $ Any (floatFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + + -- need a special one for conditionals, as they return ints + floatCond w op = do + (reg_fx, format_x, code_fx) <- getFloatReg x + (reg_fy, format_y, code_fy) <- getFloatReg y + massertPpr (isFloatFormat format_x && isFloatFormat format_y) $ text "floatCond: non-float" + return $ Any (intFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + + case op of + -- Integer operations + -- Add/Sub should only be Integer Options. + MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y)) + -- TODO: Handle sub-word case + MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y)) + + -- TODO: Check if this comment is correct + -- Note [CSET] + -- ~~~~~~~~~~~ + -- Setting conditional flags: the architecture internally knows the + -- following flag bits. And based on thsoe comparisons as in the + -- table below. + -- + -- 31 30 29 28 + -- .---+---+---+---+-- - - + -- | N | Z | C | V | + -- '---+---+---+---+-- - - + -- Negative + -- Zero + -- Carry + -- oVerflow + -- + -- .------+-------------------------------------+-----------------+----------. + -- | Code | Meaning | Flags | Encoding | + -- |------+-------------------------------------+-----------------+----------| + -- | EQ | Equal | Z = 1 | 0000 | + -- | NE | Not Equal | Z = 0 | 0001 | + -- | HI | Unsigned Higher | C = 1 && Z = 0 | 1000 | + -- | HS | Unsigned Higher or Same | C = 1 | 0010 | + -- | LS | Unsigned Lower or Same | C = 0 || Z = 1 | 1001 | + -- | LO | Unsigned Lower | C = 0 | 0011 | + -- | GT | Signed Greater Than | Z = 0 && N = V | 1100 | + -- | GE | Signed Greater Than or Equal | N = V | 1010 | + -- | LE | Signed Less Than or Equal | Z = 1 || N /= V | 1101 | + -- | LT | Signed Less Than | N /= V | 1011 | + -- | CS | Carry Set (Unsigned Overflow) | C = 1 | 0010 | + -- | CC | Carry Clear (No Unsigned Overflow) | C = 0 | 0011 | + -- | VS | Signed Overflow | V = 1 | 0110 | + -- | VC | No Signed Overflow | V = 0 | 0111 | + -- | MI | Minus, Negative | N = 1 | 0100 | + -- | PL | Plus, Positive or Zero (!) | N = 0 | 0101 | + -- | AL | Always | Any | 1110 | + -- | NV | Never | Any | 1111 | + --- '-------------------------------------------------------------------------' + + -- N.B. We needn't sign-extend sub-word size (in)equality comparisons + -- since we don't care about ordering. + MO_Eq w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ)) + MO_Ne w -> bitOp w (\d x y -> unitOL $ annExpr expr (CSET d x y NE)) + + -- Signed multiply/divide + MO_Mul w -> intOp True w (\d x y -> unitOL $ annExpr expr (MUL d x y)) + MO_S_MulMayOflo w -> do_mul_may_oflo w x y + MO_S_Quot w -> intOp True w (\d x y -> unitOL $ annExpr expr (DIV d x y)) + + MO_S_Rem w -> intOp True w (\d x y -> unitOL $ annExpr expr (REM d x y)) + + -- Unsigned multiply/divide + MO_U_Quot w -> intOp False w (\d x y -> unitOL $ annExpr expr (DIVU d x y)) + MO_U_Rem w -> intOp False w (\d x y -> unitOL $ annExpr expr (REMU d x y)) + + -- Signed comparisons -- see Note [CSET) + MO_S_Ge w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGE)) + MO_S_Le w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLE)) + MO_S_Gt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SGT)) + MO_S_Lt w -> intOp True w (\d x y -> unitOL $ annExpr expr (CSET d x y SLT)) + + -- Unsigned comparisons + MO_U_Ge w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGE)) + MO_U_Le w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULE)) + MO_U_Gt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y UGT)) + MO_U_Lt w -> intOp False w (\d x y -> unitOL $ annExpr expr (CSET d x y ULT)) + + -- Floating point arithmetic + MO_F_Add w -> floatOp w (\d x y -> unitOL $ annExpr expr (ADD d x y)) + MO_F_Sub w -> floatOp w (\d x y -> unitOL $ annExpr expr (SUB d x y)) + MO_F_Mul w -> floatOp w (\d x y -> unitOL $ annExpr expr (MUL d x y)) + MO_F_Quot w -> floatOp w (\d x y -> unitOL $ annExpr expr (DIV d x y)) + + -- Floating point comparison + MO_F_Eq w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y EQ)) + MO_F_Ne w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y NE)) + + MO_F_Ge w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGE)) + MO_F_Le w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLE)) -- x <= y <=> y > x + MO_F_Gt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FGT)) + MO_F_Lt w -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET d x y FLT)) -- x < y <=> y >= x + + -- Bitwise operations + MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y)) + MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y)) + MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y)) + MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (LSL d x y)) + MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (LSR d x y)) + MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (ASR d x y)) + + op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr + CmmMachOp _op _xs + -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr) + + where + isNbitEncodeable :: Int -> Integer -> Bool + isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) + -- N.B. MUL does not set the overflow flag. + -- Return 0 when the operation cannot overflow, /= 0 otherwise + do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register + do_mul_may_oflo w _x _y | w > W64 = pprPanic "Cannot multiply larger than 64bit" (ppr w) + do_mul_may_oflo w@W64 x y = do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + -- TODO: Can't we clobber reg_x and reg_y to save registers? + lo <- getNewRegNat II64 + hi <- getNewRegNat II64 + -- TODO: Overhaul CSET: 3rd operand isn't needed for SNEZ + let nonSense = OpImm (ImmInt 0) + pure $ + Any + (intFormat w) + ( \dst -> + code_x + `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x + `appOL` code_y + `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y + `appOL` toOL + [ annExpr expr (SMULH (OpReg w hi) (OpReg w reg_x) (OpReg w reg_y)), + MUL (OpReg w lo) (OpReg w reg_x) (OpReg w reg_y), + ASR (OpReg w lo) (OpReg w lo) (OpImm (ImmInt (widthInBits W64 - 1))), + ann + (text "Set flag if result of MULH contains more than sign bits.") + (XOR (OpReg w hi) (OpReg w hi) (OpReg w lo)), + CSET (OpReg w dst) (OpReg w hi) nonSense NE + ] + ) + do_mul_may_oflo w x y = do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + let width_x = formatToWidth format_x + width_y = formatToWidth format_y + if w > width_x && w > width_y + then + pure $ + Any + (intFormat w) + ( \dst -> + -- 8bit * 8bit cannot overflow 16bit + -- 16bit * 16bit cannot overflow 32bit + -- 32bit * 32bit cannot overflow 64bit + unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 0))) + ) + else do + let use32BitMul = w <= W32 && width_x <= W32 && width_y <= W32 + nonSense = OpImm (ImmInt 0) + if use32BitMul + then do + narrowedReg <- getNewRegNat II64 + pure $ + Any + (intFormat w) + ( \dst -> + code_x + `appOL` signExtend (formatToWidth format_x) W32 reg_x reg_x + `appOL` code_y + `appOL` signExtend (formatToWidth format_y) W32 reg_y reg_y + `snocOL` annExpr expr (MUL (OpReg W32 dst) (OpReg W32 reg_x) (OpReg W32 reg_y)) + `appOL` signExtendAdjustPrecission W32 w dst narrowedReg + `appOL` toOL + [ ann + (text "Check if the multiplied value fits in the narrowed register") + (SUB (OpReg w dst) (OpReg w dst) (OpReg w narrowedReg)), + CSET (OpReg w dst) (OpReg w dst) nonSense NE + ] + ) + else + pure $ + Any + (intFormat w) + ( \dst -> + -- Do not handle this unlikely case. Just tell that it may overflow. + unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1))) + ) + +-- | Instructions to sign-extend the value in the given register from width @w@ +-- up to width @w'@. +signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr) +signExtendReg w _w' r | w == W64 = pure (r, nilOL) +signExtendReg w w' r = do + r' <- getNewRegNat (intFormat w') + let instrs = signExtend w w' r r' + pure (r', instrs) + +-- | Sign extends to 64bit, if needed +-- +-- Source `Reg` @r@ stays untouched, while the conversion happens on destination +-- `Reg` @r'@. +signExtend :: Width -> Width -> Reg -> Reg -> OrdList Instr +signExtend w w' _r _r' | w > w' = pprPanic "This is not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w' +signExtend w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w' +signExtend w w' r r' | w == W64 && w' == W64 && r == r' = nilOL +signExtend w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r) +signExtend w w' r r' + | w == W32 && w' == W64 = + unitOL $ + ann + (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w') + -- `ADDIW r r 0` is the pseudo-op SEXT.W + (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0))) +signExtend w w' r r' = + toOL + [ ann + (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') + (LSL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), + -- signed (arithmetic) right shift + ASR (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) + ] + where + shift = 64 - widthInBits w + +-- | Sign extends to 64bit, if needed and reduces the precission to the target `Width` (@w'@) +-- +-- Source `Reg` @r@ stays untouched, while the conversion happens on destination +-- `Reg` @r'@. +signExtendAdjustPrecission :: Width -> Width -> Reg -> Reg -> OrdList Instr +signExtendAdjustPrecission w w' _r _r' | w > W64 || w' > W64 = pprPanic "Unexpected width (max is 64bit):" $ ppr w <> text "->" <+> ppr w' +signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 && r == r' = nilOL +signExtendAdjustPrecission w w' r r' | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r) +signExtendAdjustPrecission w w' r r' + | w == W32 && w' == W64 = + unitOL $ + ann + (text "sign-extend register (SEXT.W)" <+> ppr r <+> ppr w <> text "->" <> ppr w') + -- `ADDIW r r 0` is the pseudo-op SEXT.W + (ADD (OpReg w' r') (OpReg w r) (OpImm (ImmInt 0))) +signExtendAdjustPrecission w w' r r' | w > w' = + toOL + [ ann + (text "narrow register signed" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') + (LSL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), + -- signed (arithmetic) right shift + ASR (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) + ] + where + shift = 64 - widthInBits w' +signExtendAdjustPrecission w w' r r' = + toOL + [ ann + (text "sign extend register" <+> ppr r <> char ':' <> ppr w <> text "->" <> ppr r <> char ':' <> ppr w') + (LSL (OpReg w' r') (OpReg w r) (OpImm (ImmInt shift))), + -- signed (arithmetic) right shift + ASR (OpReg w' r') (OpReg w' r') (OpImm (ImmInt shift)) + ] + where + shift = 64 - widthInBits w + +-- | Instructions to truncate the value in the given register from width @w@ +-- to width @w'@. +-- +-- In other words, it just cuts the width out of the register. N.B.: This +-- ignores signedness (no sign extension takes place)! +truncateReg :: Width -> Width -> Reg -> OrdList Instr +truncateReg _w w' _r | w' == W64 = nilOL +truncateReg _w w' r | w' > W64 = pprPanic "Cannot truncate to width bigger than register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w' +truncateReg w _w' r | w > W64 = pprPanic "Unexpected register size (max is 64bit):" $ text (show r) <> char ':' <+> ppr w +truncateReg w w' r = + toOL + [ ann + (text "truncate register" <+> ppr r <+> ppr w <> text "->" <> ppr w') + (LSL (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift))), + -- SHL ignores signedness! + LSR (OpReg w' r) (OpReg w r) (OpImm (ImmInt shift)) + ] + where + shift = 64 - widthInBits w' + +-- | Given a 'Register', produce a new 'Register' with an instruction block +-- which will check the value for alignment. Used for @-falignment-sanitisation@. +addAlignmentCheck :: Int -> Width -> Register -> NatM Register +addAlignmentCheck align wordWidth reg = do + jumpReg <- getNewRegNat II64 + cmpReg <- getNewRegNat II64 + okayLblId <- getBlockIdNat + + pure $ case reg of + Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt jumpReg cmpReg okayLblId reg) + Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt jumpReg cmpReg okayLblId reg) + where + -- TODO: Reduce amount of parameters by making this a let binding + check :: Format -> Reg -> Reg -> BlockId -> Reg -> InstrBlock + check fmt jumpReg cmpReg okayLblId reg = + let width = formatToWidth fmt + in assert (not $ isFloatFormat fmt) + $ toOL + [ ann + (text "Alignment check - alignment: " <> int align <> text ", word width: " <> text (show wordWidth)) + (AND (OpReg width cmpReg) (OpReg width reg) (OpImm $ ImmInt $ align - 1)) + , BCOND EQ (OpReg width cmpReg) zero (TBlock okayLblId) + , COMMENT (text "Alignment check failed") + , LDR II64 (OpReg W64 jumpReg) (OpImm $ ImmCLbl mkBadAlignmentLabel) + , J (TReg jumpReg) + , NEWBLOCK okayLblId + ] + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. +data Amode = Amode AddrMode InstrBlock + +-- | Provide the value of a `CmmExpr` with an `Amode` +-- +-- N.B. this function should be used to provide operands to load and store +-- instructions with signed 12bit wide immediates (S & I types). For other +-- immediate sizes and formats (e.g. B type uses multiples of 2) this function +-- would need to be adjusted. +getAmode :: Platform + -> Width -- ^ width of loaded value + -> CmmExpr + -> NatM Amode +-- TODO: Specialize stuff we can destructure here. + +-- LDR/STR: Immediate can be represented with 12bits +getAmode platform w (CmmRegOff reg off) + | w <= W64, fitsIn12bitImm off + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off + +-- For Stores we often see something like this: +-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2) +-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ] +-- for `n` in range. +getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')]) + | fitsIn12bitImm off + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger off)) code + +getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')]) + | fitsIn12bitImm (-off) + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger (-off))) code + +-- Generic case +getAmode _platform _ expr + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrReg reg) code + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode rep addrE srcE + = do + (src_reg, _format, code) <- getSomeReg srcE + platform <- getPlatform + let w = formatToWidth rep + Amode addr addr_code <- getAmode platform w addrE + return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE))) + `consOL` (code + `appOL` addr_code + `snocOL` STR rep (OpReg w src_reg) (OpAddr addr)) + +assignReg_IntCode _ reg src + = do + platform <- getPlatform + let dst = getRegisterReg platform reg + r <- getRegister src + return $ case r of + Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst + Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)) + +-- Let's treat Floating point stuff +-- as integer code for now. Opaque. +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + +-- ----------------------------------------------------------------------------- +-- Jumps +-- AArch64 has 26bits for targets, whereas RiscV only has 20. +-- Thus we need to distinguish between far (outside of the) +-- current compilation unit. And regular branches. +-- RiscV has ±2MB of displacement, whereas AArch64 has ±128MB. +-- Thus for most branches we can get away with encoding it +-- directly in the instruction rather than always loading the +-- address into a register and then using that to jump. +-- Under the assumption that our linked build product is less than +-- ~2*128MB of TEXT, and there are no jump that span the whole +-- TEXT segment. +-- Something where riscv's compressed instruction might come in +-- handy. +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock +genJump expr = do + (target, _format, code) <- getSomeReg expr + return (code `appOL` unitOL (annExpr expr (J (TReg target)))) + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + +-- ----------------------------------------------------------------------------- +-- Conditional branches +genCondJump + :: BlockId + -> CmmExpr + -> NatM InstrBlock +genCondJump bid expr = do + case expr of + -- Optimized == 0 case. + CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` annExpr expr (BCOND EQ zero (OpReg w reg_x) (TBlock bid)) + + -- Optimized /= 0 case. + CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` annExpr expr (BCOND NE zero (OpReg w reg_x) (TBlock bid)) + + -- Generic case. + CmmMachOp mop [x, y] -> do + + let ubcond w cmp = do + -- compute both sides. + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + let x' = OpReg w reg_x + y' = OpReg w reg_y + return $ case w of + w | w == W8 || w == W16 -> code_x `appOL` + truncateReg (formatToWidth format_x) w reg_x `appOL` + code_y `appOL` + truncateReg (formatToWidth format_y) w reg_y `appOL` + code_y `snocOL` + annExpr expr (BCOND cmp x' y' (TBlock bid)) + _ -> code_x `appOL` code_y `snocOL` annExpr expr (BCOND cmp x' y' (TBlock bid)) + + sbcond w cmp = do + -- compute both sides. + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + let x' = OpReg w reg_x + y' = OpReg w reg_y + return $ case w of + w | w `elem` [W8, W16, W32] -> + code_x + `appOL` signExtend (formatToWidth format_x) W64 reg_x reg_x + `appOL` code_y + `appOL` signExtend (formatToWidth format_y) W64 reg_y reg_y + `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid))) + _ -> code_x `appOL` code_y `appOL` unitOL (annExpr expr (BCOND cmp x' y' (TBlock bid))) + + fbcond w cmp = do + -- ensure we get float regs + (reg_fx, _format_fx, code_fx) <- getFloatReg x + (reg_fy, _format_fy, code_fy) <- getFloatReg y + oneReg <- getNewRegNat II64 + return $ code_fx `appOL` + code_fy `snocOL` + annExpr expr (CSET ip (OpReg w reg_fx) (OpReg w reg_fy) cmp) `snocOL` + MOV (OpReg W64 oneReg) (OpImm (ImmInt 1)) `snocOL` + BCOND EQ ip (OpReg w oneReg) (TBlock bid) + + case mop of + MO_F_Eq w -> fbcond w EQ + MO_F_Ne w -> fbcond w NE + + MO_F_Gt w -> fbcond w FGT + MO_F_Ge w -> fbcond w FGE + MO_F_Lt w -> fbcond w FLT + MO_F_Le w -> fbcond w FLE + + MO_Eq w -> sbcond w EQ + MO_Ne w -> sbcond w NE + + MO_S_Gt w -> sbcond w SGT + MO_S_Ge w -> sbcond w SGE + MO_S_Lt w -> sbcond w SLT + MO_S_Le w -> sbcond w SLE + MO_U_Gt w -> ubcond w UGT + MO_U_Ge w -> ubcond w UGE + MO_U_Lt w -> ubcond w ULT + MO_U_Le w -> ubcond w ULE + _ -> pprPanic "RV64.genCondJump:case mop: " (text $ show expr) + _ -> pprPanic "RV64.genCondJump: " (text $ show expr) + + +genCondBranch + :: BlockId -- the source of the jump + -> BlockId -- the true branch target + -> BlockId -- the false branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock -- Instructions + +genCondBranch _ true false expr = do + b1 <- genCondJump true expr + b2 <- genBranch false + return (b1 `appOL` b2) + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- As per *convention*: +-- x0-x7: (volatile) argument registers +-- x8: (volatile) indirect result register / Linux syscall no +-- x9-x15: (volatile) caller saved regs +-- x16,x17: (volatile) intra-procedure-call registers +-- x18: (volatile) platform register. don't use for portability +-- x19-x28: (non-volatile) callee save regs +-- x29: (non-volatile) frame pointer +-- x30: link register +-- x31: stack pointer / zero reg +-- +-- Thus, this is what a c function will expect. Find the arguments in x0-x7, +-- anything above that on the stack. We'll ignore c functions with more than +-- 8 arguments for now. Sorry. +-- +-- We need to make sure we preserve x9-x15, don't want to touch x16, x17. + +-- Note [PLT vs GOT relocations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- When linking objects together, we may need to lookup foreign references. That +-- is symbolic references to functions or values in other objects. When +-- compiling the object, we can not know where those elements will end up in +-- memory (relative to the current location). Thus the use of symbols. There +-- are two types of items we are interested, code segments we want to jump to +-- and continue execution there (functions, ...), and data items we want to look +-- up (strings, numbers, ...). For functions we can use the fact that we can use +-- an intermediate jump without visibility to the programs execution. If we +-- want to jump to a function that is simply too far away to reach for the B/BL +-- instruction, we can create a small piece of code that loads the full target +-- address and jumps to that on demand. Say f wants to call g, however g is out +-- of range for a direct jump, we can create a function h in range for f, that +-- will load the address of g, and jump there. The area where we construct h +-- is called the Procedure Linking Table (PLT), we have essentially replaced +-- f -> g with f -> h -> g. This is fine for function calls. However if we +-- want to lookup values, this trick doesn't work, so we need something else. +-- We will instead reserve a slot in memory, and have a symbol pointing to that +-- slot. Now what we essentially do is, we reference that slot, and expect that +-- slot to hold the final resting address of the data we are interested in. +-- Thus what that symbol really points to is the location of the final data. +-- The block of memory where we hold all those slots is the Global Offset Table +-- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y]. +-- +-- FIXME: Update for RISCV, the below is still AArch64. +-- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only +-- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most +-- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within +-- 4GB of the PC, and load that. For anything outside of that range, we'd have +-- to go through the GOT. +-- +-- adrp x0, <symbol> +-- add x0, :lo:<symbol> +-- +-- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the +-- PC. +-- +-- If we want to get the slot in the global offset table (GOT), we can do this: +-- +-- adrp x0, #:got:<symbol> +-- ldr x0, [x0, #:got_lo12:<symbol>] +-- +-- this will compute the address anywhere in the addressable 64bit space into +-- x0, by loading the address from the GOT slot. +-- +-- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which +-- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>] +-- instead of the add instruction. +-- +-- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do +-- not need to go through the GOT, unless we want to address the full address +-- range within 64bit. + +genCCall + :: ForeignTarget -- function to call + -> [CmmFormal] -- where to put the result + -> [CmmActual] -- arguments (of mixed type) + -> BlockId -- The block we are in + -> NatM (InstrBlock, Maybe BlockId) +-- TODO: Specialize where we can. +-- Generic impl +genCCall target dest_regs arg_regs bid = do + -- we want to pass arg_regs into allArgRegs + -- pprTraceM "genCCall target" (ppr target) + -- pprTraceM "genCCall formal" (ppr dest_regs) + -- pprTraceM "genCCall actual" (ppr arg_regs) + + case target of + -- The target :: ForeignTarget call can either + -- be a foreign procedure with an address expr + -- and a calling convention. + ForeignTarget expr _cconv -> do + (call_target, call_target_code) <- case expr of + -- if this is a label, let's just directly to it. This will produce the + -- correct CALL relocation for BL... + -- While this works on aarch64, for _most_ labels, it will fall short + -- where label branching only works for shoter distances (e.g. riscv) + -- (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL) + -- ... if it's not a label--well--let's compute the expression into a + -- register and jump to that. See Note [PLT vs GOT relocations] + _ -> do (reg, _format, reg_code) <- getSomeReg expr + pure (TReg reg, reg_code) + -- compute the code and register logic for all arg_regs. + -- this will give us the format information to match on. + arg_regs' <- mapM getSomeReg arg_regs + + -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes + -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in + -- STG; this thenn breaks packing of stack arguments, if we need to pack + -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type + -- in Cmm proper. Option two, which we choose here is to use extended Hint + -- information to contain the size information and use that when packing + -- arguments, spilled onto the stack. + let (_res_hints, arg_hints) = foreignTargetHints target + arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints + + (stackSpace', passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL + + -- if we pack the stack, we may need to adjust to multiple of 8byte. + -- if we don't pack the stack, it will always be multiple of 8. + let stackSpace = if stackSpace' `mod` 8 /= 0 + then 8 * (stackSpace' `div` 8 + 1) + else stackSpace' + + (returnRegs, readResultsCode) <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL + + let moveStackDown 0 = toOL [ PUSH_STACK_FRAME + , DELTA (-16) ] + moveStackDown i | odd i = moveStackDown (i + 1) + moveStackDown i = toOL [ PUSH_STACK_FRAME + , SUB (OpReg W64 (regSingle 2)) (OpReg W64 (regSingle 2)) (OpImm (ImmInt (8 * i))) + , DELTA (-8 * i - 16) ] + moveStackUp 0 = toOL [ POP_STACK_FRAME + , DELTA 0 ] + moveStackUp i | odd i = moveStackUp (i + 1) + moveStackUp i = toOL [ ADD (OpReg W64 (regSingle 2)) (OpReg W64 (regSingle 2)) (OpImm (ImmInt (8 * i))) + , POP_STACK_FRAME + , DELTA 0 ] + + let code = call_target_code -- compute the label (possibly into a register) + `appOL` moveStackDown (stackSpace `div` 8) + `appOL` passArgumentsCode -- put the arguments into x0, ... + `snocOL` BL call_target passRegs returnRegs -- branch and link. + `appOL` readResultsCode -- parse the results into registers + `appOL` moveStackUp (stackSpace `div` 8) + return (code, Nothing) + + PrimTarget MO_F32_Fabs + | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs -> + unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg + PrimTarget MO_F64_Fabs + | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs -> + unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg + + -- or a possibly side-effecting machine operation + -- mop :: CallishMachOp (see GHC.Cmm.MachOp) + PrimTarget mop -> do + -- We'll need config to construct forien targets + case mop of + -- 64 bit float ops + MO_F64_Pwr -> mkCCall "pow" + + MO_F64_Sin -> mkCCall "sin" + MO_F64_Cos -> mkCCall "cos" + MO_F64_Tan -> mkCCall "tan" + + MO_F64_Sinh -> mkCCall "sinh" + MO_F64_Cosh -> mkCCall "cosh" + MO_F64_Tanh -> mkCCall "tanh" + + MO_F64_Asin -> mkCCall "asin" + MO_F64_Acos -> mkCCall "acos" + MO_F64_Atan -> mkCCall "atan" + + MO_F64_Asinh -> mkCCall "asinh" + MO_F64_Acosh -> mkCCall "acosh" + MO_F64_Atanh -> mkCCall "atanh" + + MO_F64_Log -> mkCCall "log" + MO_F64_Log1P -> mkCCall "log1p" + MO_F64_Exp -> mkCCall "exp" + MO_F64_ExpM1 -> mkCCall "expm1" + MO_F64_Fabs -> mkCCall "fabs" + MO_F64_Sqrt -> mkCCall "sqrt" + + -- 32 bit float ops + MO_F32_Pwr -> mkCCall "powf" + + MO_F32_Sin -> mkCCall "sinf" + MO_F32_Cos -> mkCCall "cosf" + MO_F32_Tan -> mkCCall "tanf" + MO_F32_Sinh -> mkCCall "sinhf" + MO_F32_Cosh -> mkCCall "coshf" + MO_F32_Tanh -> mkCCall "tanhf" + MO_F32_Asin -> mkCCall "asinf" + MO_F32_Acos -> mkCCall "acosf" + MO_F32_Atan -> mkCCall "atanf" + MO_F32_Asinh -> mkCCall "asinhf" + MO_F32_Acosh -> mkCCall "acoshf" + MO_F32_Atanh -> mkCCall "atanhf" + MO_F32_Log -> mkCCall "logf" + MO_F32_Log1P -> mkCCall "log1pf" + MO_F32_Exp -> mkCCall "expf" + MO_F32_ExpM1 -> mkCCall "expm1f" + MO_F32_Fabs -> mkCCall "fabsf" + MO_F32_Sqrt -> mkCCall "sqrtf" + + -- 64-bit primops + MO_I64_ToI -> mkCCall "hs_int64ToInt" + MO_I64_FromI -> mkCCall "hs_intToInt64" + MO_W64_ToW -> mkCCall "hs_word64ToWord" + MO_W64_FromW -> mkCCall "hs_wordToWord64" + MO_x64_Neg -> mkCCall "hs_neg64" + MO_x64_Add -> mkCCall "hs_add64" + MO_x64_Sub -> mkCCall "hs_sub64" + MO_x64_Mul -> mkCCall "hs_mul64" + MO_I64_Quot -> mkCCall "hs_quotInt64" + MO_I64_Rem -> mkCCall "hs_remInt64" + MO_W64_Quot -> mkCCall "hs_quotWord64" + MO_W64_Rem -> mkCCall "hs_remWord64" + MO_x64_And -> mkCCall "hs_and64" + MO_x64_Or -> mkCCall "hs_or64" + MO_x64_Xor -> mkCCall "hs_xor64" + MO_x64_Not -> mkCCall "hs_not64" + MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64" + MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64" + MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64" + MO_x64_Eq -> mkCCall "hs_eq64" + MO_x64_Ne -> mkCCall "hs_ne64" + MO_I64_Ge -> mkCCall "hs_geInt64" + MO_I64_Gt -> mkCCall "hs_gtInt64" + MO_I64_Le -> mkCCall "hs_leInt64" + MO_I64_Lt -> mkCCall "hs_ltInt64" + MO_W64_Ge -> mkCCall "hs_geWord64" + MO_W64_Gt -> mkCCall "hs_gtWord64" + MO_W64_Le -> mkCCall "hs_leWord64" + MO_W64_Lt -> mkCCall "hs_ltWord64" + + -- Conversion + MO_UF_Conv w -> mkCCall (word2FloatLabel w) + + -- Optional MachOps + -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config + MO_S_Mul2 _w -> unsupported mop + MO_S_QuotRem _w -> unsupported mop + MO_U_QuotRem _w -> unsupported mop + MO_U_QuotRem2 _w -> unsupported mop + MO_Add2 _w -> unsupported mop + MO_AddWordC _w -> unsupported mop + MO_SubWordC _w -> unsupported mop + MO_AddIntC _w -> unsupported mop + MO_SubIntC _w -> unsupported mop + MO_U_Mul2 _w -> unsupported mop + + -- Memory Ordering + -- The concrete encoding is copied from load_load_barrier() and write_barrier() (SMP.h) + -- TODO: This needs to be changed for https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10628 + -- The related C functions are: + -- atomic_thread_fence(memory_order_acquire); + -- atomic_thread_fence(memory_order_release); + MO_ReadBarrier -> return (unitOL (DMBSY DmbRead DmbRead), Nothing) + MO_WriteBarrier -> return (unitOL (DMBSY DmbWrite DmbWrite), Nothing) + MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers) + -- Prefetch + MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint. + + -- Memory copy/set/move/cmp, with alignment for optimization + + -- TODO Optimize and use e.g. quad registers to move memory around instead + -- of offloading this to memcpy. For small memcpys we can utilize + -- the 128bit quad registers in NEON to move block of bytes around. + -- Might also make sense of small memsets? Use xzr? What's the function + -- call overhead? + MO_Memcpy _align -> mkCCall "memcpy" + MO_Memset _align -> mkCCall "memset" + MO_Memmove _align -> mkCCall "memmove" + MO_Memcmp _align -> mkCCall "memcmp" + + MO_SuspendThread -> mkCCall "suspendThread" + MO_ResumeThread -> mkCCall "resumeThread" + + MO_PopCnt w -> mkCCall (popCntLabel w) + MO_Pdep w -> mkCCall (pdepLabel w) + MO_Pext w -> mkCCall (pextLabel w) + MO_Clz w -> mkCCall (clzLabel w) + MO_Ctz w -> mkCCall (ctzLabel w) + MO_BSwap w -> mkCCall (bSwapLabel w) + MO_BRev w -> mkCCall (bRevLabel w) + + -- Atomic read-modify-write. + mo@(MO_AtomicRead w ord) + | [p_reg] <- arg_regs + , [dst_reg] <- dest_regs -> do + (p, _fmt_p, code_p) <- getSomeReg p_reg + platform <- getPlatform + -- See __atomic_load_n (in C) + let instrs = case ord of + MemOrderRelaxed -> unitOL $ ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)) + MemOrderAcquire -> toOL [ + ann moDescr (LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)), + DMBSY DmbRead DmbReadWrite + ] + MemOrderSeqCst -> toOL [ + ann moDescr (DMBSY DmbReadWrite DmbReadWrite), + LDR (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p), + DMBSY DmbRead DmbReadWrite + ] + MemOrderRelease -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo + dst = getRegisterReg platform (CmmLocal dst_reg) + moDescr = (text . show) mo + code = + code_p `appOL` + instrs + return (code, Nothing) + | otherwise -> panic "mal-formed AtomicRead" + mo@(MO_AtomicWrite w ord) + | [p_reg, val_reg] <- arg_regs -> do + (p, _fmt_p, code_p) <- getSomeReg p_reg + (val, fmt_val, code_val) <- getSomeReg val_reg + -- See __atomic_store_n (in C) + let instrs = case ord of + MemOrderRelaxed -> unitOL $ ann moDescr (STR fmt_val (OpReg w val) (OpAddr $ AddrReg p)) + MemOrderSeqCst -> toOL [ + ann moDescr (DMBSY DmbReadWrite DmbWrite), + STR fmt_val (OpReg w val) (OpAddr $ AddrReg p) + ] + MemOrderRelease -> toOL [ + ann moDescr (DMBSY DmbReadWrite DmbWrite), + STR fmt_val (OpReg w val) (OpAddr $ AddrReg p) + ] + MemOrderAcquire -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo + moDescr = (text . show) mo + code = + code_p `appOL` + code_val `appOL` + instrs + return (code, Nothing) + | otherwise -> panic "mal-formed AtomicWrite" + MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop) + MO_Cmpxchg w -> mkCCall (cmpxchgLabel w) + -- -- Should be an AtomicRMW variant eventually. + -- -- Sequential consistent. + -- TODO: this should be implemented properly! + MO_Xchg w -> mkCCall (xchgLabel w) + + where + unsupported :: Show a => a -> b + unsupported mop = panic ("outOfLineCmmOp: " ++ show mop + ++ " not supported here") + mkCCall :: FastString -> NatM (InstrBlock, Maybe BlockId) + mkCCall name = do + config <- getConfig + target <- cmmMakeDynamicReference config CallReference $ + mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction + let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn + genCCall (ForeignTarget target cconv) dest_regs arg_regs bid + + -- Implementiation of the RISCV ABI calling convention. + -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention + passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock) + -- Base case: no more arguments to pass (left) + passArguments _ _ [] stackSpace accumRegs accumCode = return (stackSpace, accumRegs, accumCode) + + -- Still have GP regs, and we want to pass an GP argument. + passArguments (gpReg:gpRegs) fpRegs ((r, format, hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + -- RISCV64 Integer Calling Convention: "When passed in registers or on the + -- stack, integer scalars narrower than XLEN bits are widened according to + -- the sign of their type up to 32 bits, then sign-extended to XLEN bits." + let w = formatToWidth format + assignArg = if hint == SignedHint then + COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r) `consOL` + signExtend w W64 r gpReg + + else toOL [COMMENT (text "Pass gp argument sign-extended (SignedHint): " <> ppr r) + , MOV (OpReg w gpReg) (OpReg w r)] + accumCode' = accumCode `appOL` + code_r `appOL` + assignArg + passArguments gpRegs fpRegs args stackSpace (gpReg:accumRegs) accumCode' + + -- Still have FP regs, and we want to pass an FP argument. + passArguments gpRegs (fpReg:fpRegs) ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + mov = MOV (OpReg w fpReg) (OpReg w r) + accumCode' = accumCode `appOL` + code_r `snocOL` + ann (text "Pass fp argument: " <> ppr r) mov + passArguments gpRegs fpRegs args stackSpace (fpReg:accumRegs) accumCode' + + -- No mor regs left to pass. Must pass on stack. + passArguments [] [] ((r, format, hint, code_r) : args) stackSpace accumRegs accumCode = do + let w = formatToWidth format + space = 8 + str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpace))) + stackCode = + if hint == SignedHint + then + code_r + `appOL` signExtend w W64 r ipReg + `snocOL` ann (text "Pass signed argument (size " <> ppr w <> text ") on the stack: " <> ppr ipReg) str + else + code_r + `snocOL` ann (text "Pass unsigned argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str + passArguments [] [] args (stackSpace + space) accumRegs (stackCode `appOL` accumCode) + + -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then. + passArguments [] fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + let w = formatToWidth format + space = 8 + str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpace))) + stackCode = code_r `snocOL` + ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str + passArguments [] fpRegs args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + -- Still have gpRegs left, but want to pass a FP argument. Must be passed in gpReg then. + passArguments (gpReg:gpRegs) [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + mov = MOV (OpReg w gpReg) (OpReg w r) + accumCode' = accumCode `appOL` + code_r `snocOL` + ann (text "Pass fp argument in gpReg: " <> ppr r) mov + passArguments gpRegs [] args stackSpace (gpReg:accumRegs) accumCode' + + passArguments _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state") + + readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg]-> InstrBlock -> NatM ([Reg], InstrBlock) + readResults _ _ [] accumRegs accumCode = return (accumRegs, accumCode) + readResults [] _ _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target) + readResults _ [] _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target) + readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do + -- gp/fp reg -> dst + platform <- getPlatform + let rep = cmmRegType (CmmLocal dst) + format = cmmTypeFormat rep + w = cmmRegWidth (CmmLocal dst) + r_dst = getRegisterReg platform (CmmLocal dst) + if isFloatFormat format + then readResults (gpReg:gpRegs) fpRegs dsts (fpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg)) + else readResults gpRegs (fpReg:fpRegs) dsts (gpReg:accumRegs) $ + accumCode `snocOL` + MOV (OpReg w r_dst) (OpReg w gpReg) `appOL` + -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations + truncateReg W64 w r_dst + + unaryFloatOp w op arg_reg dest_reg = do + platform <- getPlatform + (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg + let dst = getRegisterReg platform (CmmLocal dest_reg) + let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx) + return (code, Nothing) Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Cond.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Cond.hs @@ -0,0 +1,39 @@ +module GHC.CmmToAsm.RV64.Cond where + +import GHC.Prelude + +-- | Condition codes. +-- +-- Used in conditional branches and bit setters. According to the available +-- instruction set, some conditions are encoded as their negated opposites. I.e. +-- these are logical things that don't necessarily map 1:1 to hardware/ISA. +data Cond + = -- | int and float + EQ + | -- | int and float + NE + | -- | signed less than + SLT + | -- | signed less than or equal + SLE + | -- | signed greater than or equal + SGE + | -- | signed greater than + SGT + | -- | unsigned less than + ULT + | -- | unsigned less than or equal + ULE + | -- | unsigned greater than or equal + UGE + | -- | unsigned greater than + UGT + | -- | floating point instruction @flt@ + FLT + | -- | floating point instruction @fle@ + FLE + | -- | floating point instruction @fge@ + FGE + | -- | floating point instruction @fgt@ + FGT + deriving (Eq, Show) Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Instr.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Instr.hs @@ -0,0 +1,798 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module GHC.CmmToAsm.RV64.Instr + +where + +import GHC.Prelude + +import GHC.CmmToAsm.RV64.Cond +import GHC.CmmToAsm.RV64.Regs + +import GHC.CmmToAsm.Instr (RegUsage(..)) +import GHC.CmmToAsm.Format +import GHC.CmmToAsm.Types +import GHC.CmmToAsm.Utils +import GHC.CmmToAsm.Config +import GHC.Platform.Reg + +import GHC.Platform.Regs +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Cmm +import GHC.Cmm.CLabel +import GHC.Utils.Outputable +import GHC.Platform +import GHC.Types.Unique.Supply + +import GHC.Utils.Panic + +import Data.Maybe + +import GHC.Stack + +-- | Stack frame header size in bytes. +-- +-- The stack frame header is made of the values that are always saved +-- (regardless of the context.) It consists of the saved return address and a +-- pointer to the previous frame. Thus, its size is two stack frame slots which +-- equals two addresses/words (2 * 8 byte). +stackFrameHeaderSize :: Int +stackFrameHeaderSize = 2 * spillSlotSize + +-- | All registers are 8 byte wide. +spillSlotSize :: Int +spillSlotSize = 8 + +-- | The number of bytes that the stack pointer should be aligned +-- to. +stackAlign :: Int +stackAlign = 16 + +-- | The number of spill slots available without allocating more. +maxSpillSlots :: NCGConfig -> Int +maxSpillSlots config + = ((ncgSpillPreallocSize config - stackFrameHeaderSize) + `div` spillSlotSize) - 1 + +-- | Convert a spill slot number to a *byte* offset, with no sign. +spillSlotToOffset :: Int -> Int +spillSlotToOffset slot + = stackFrameHeaderSize + spillSlotSize * slot + +-- | Get the registers that are being used by this instruction. +-- regUsage doesn't need to do any trickery for jumps and such. +-- Just state precisely the regs read and written by that insn. +-- The consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +-- RegUsage = RU [<read regs>] [<write regs>] + +instance Outputable RegUsage where + ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')' + +regUsageOfInstr :: Platform -> Instr -> RegUsage +regUsageOfInstr platform instr = case instr of + ANN _ i -> regUsageOfInstr platform i + COMMENT{} -> usage ([], []) + MULTILINE_COMMENT{} -> usage ([], []) + PUSH_STACK_FRAME -> usage ([], []) + POP_STACK_FRAME -> usage ([], []) + DELTA{} -> usage ([], []) + + -- 1. Arithmetic Instructions ------------------------------------------------ + ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + NEG dst src -> usage (regOp src, regOp dst) + SMULH dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + DIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + REM dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + REMU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + DIVU dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + + -- 2. Bit Manipulation Instructions ------------------------------------------ + -- 3. Logical and Move Instructions ------------------------------------------ + AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + LSL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + LSR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + MOV dst src -> usage (regOp src, regOp dst) + -- ORI's third operand is always an immediate + ORI dst src1 _ -> usage (regOp src1, regOp dst) + XORI dst src1 _ -> usage (regOp src1, regOp dst) + -- 4. Branch Instructions ---------------------------------------------------- + J t -> usage (regTarget t, []) + J_TBL _ _ t -> usage ([t], []) + B t -> usage (regTarget t, []) + BCOND _ l r t -> usage (regTarget t ++ regOp l ++ regOp r, []) + BL t ps _rs -> usage (regTarget t ++ ps, callerSavedRegisters) + + -- 5. Atomic Instructions ---------------------------------------------------- + -- 6. Conditional Instructions ----------------------------------------------- + CSET dst l r _ -> usage (regOp l ++ regOp r, regOp dst) + -- 7. Load and Store Instructions -------------------------------------------- + STR _ src dst -> usage (regOp src ++ regOp dst, []) + -- STLR _ src dst L -> usage (regOp src ++ regOp dst, []) + LDR _ dst src -> usage (regOp src, regOp dst) + LDRU _ dst src -> usage (regOp src, regOp dst) + + -- 8. Synchronization Instructions ------------------------------------------- + DMBSY _ _ -> usage ([], []) + + -- 9. Floating Point Instructions -------------------------------------------- + FCVT dst src -> usage (regOp src, regOp dst) + SCVTF dst src -> usage (regOp src, regOp dst) + FCVTZS dst src -> usage (regOp src, regOp dst) + FABS dst src -> usage (regOp src, regOp dst) + + _ -> panic $ "regUsageOfInstr: " ++ instrCon instr + + where + -- filtering the usage is necessary, otherwise the register + -- allocator will try to allocate pre-defined fixed stg + -- registers as well, as they show up. + usage (src, dst) = RU (filter (interesting platform) src) + (filter (interesting platform) dst) + + regAddr :: AddrMode -> [Reg] + regAddr (AddrRegImm r1 _) = [r1] + regAddr (AddrReg r1) = [r1] + regOp :: Operand -> [Reg] + regOp (OpReg _ r1) = [r1] + regOp (OpAddr a) = regAddr a + regOp (OpImm _) = [] + regTarget :: Target -> [Reg] + regTarget (TBlock _) = [] + regTarget (TReg r1) = [r1] + + -- Is this register interesting for the register allocator? + interesting :: Platform -> Reg -> Bool + interesting _ (RegVirtual _) = True + interesting _ (RegReal (RealRegSingle (-1))) = False + interesting platform (RegReal (RealRegSingle i)) = freeReg platform i + +-- Save caller save registers +-- This is x0-x18 +-- +-- For SIMD/FP Registers: +-- Registers v8-v15 must be preserved by a callee across subroutine calls; +-- the remaining registers (v0-v7, v16-v31) do not need to be preserved (or +-- should be preserved by the caller). Additionally, only the bottom 64 bits +-- of each value stored in v8-v15 need to be preserved [7]; it is the +-- responsibility of the caller to preserve larger values. +-- +-- .---------------------------------------------------------------------------------------------------------------------------------------------------------------. +-- | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +-- | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | +-- |== General Purpose registers ==================================================================================================================================| +-- | ZR | RA | SP | GP | TP | <- tmp r. -> | FP | <- | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> | +-- | -- | -- | -- | -- | -- | <- free r. > | -- | BR | <---- free registers ---------------> | SP | HP | R1 | R2 | R3 | R4 | R5 | R6 | R7 | SL | <-- free regs --> | +-- |== SIMD/FP Registers ==========================================================================================================================================| +-- | <--- temporary registers -----------> | <------ | <---- argument passing -------------> | -- callee saved ------------------------------> | <--- tmp regs --> | +-- | <---------- free registers ---------> | F1 | F2 | <---- free registers ---------------> | F3 | F4 | F5 | F6 | D1 | D2 | D3 | D4 | D5 | D6 | -- | -- | -- | -- | +-- '---------------------------------------------------------------------------------------------------------------------------------------------------------------' +-- ZR: Zero, RA: Return Address, SP: Stack Pointer, GP: Global Pointer, TP: Thread Pointer, FP: Frame Pointer +-- BR: Base, SL: SpLim +callerSavedRegisters :: [Reg] +callerSavedRegisters = + map regSingle [t0RegNo .. t2RegNo] + ++ map regSingle [a0RegNo .. a7RegNo] + ++ map regSingle [t3RegNo .. t6RegNo] + ++ map regSingle [ft0RegNo .. ft7RegNo] + ++ map regSingle [fa0RegNo .. fa7RegNo] + +-- | Apply a given mapping to all the register references in this +-- instruction. +patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +patchRegsOfInstr instr env = case instr of + -- 0. Meta Instructions + ANN d i -> ANN d (patchRegsOfInstr i env) + COMMENT{} -> instr + MULTILINE_COMMENT{} -> instr + PUSH_STACK_FRAME -> instr + POP_STACK_FRAME -> instr + DELTA{} -> instr + -- 1. Arithmetic Instructions ---------------------------------------------- + ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3) + MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3) + NEG o1 o2 -> NEG (patchOp o1) (patchOp o2) + SMULH o1 o2 o3 -> SMULH (patchOp o1) (patchOp o2) (patchOp o3) + DIV o1 o2 o3 -> DIV (patchOp o1) (patchOp o2) (patchOp o3) + REM o1 o2 o3 -> REM (patchOp o1) (patchOp o2) (patchOp o3) + REMU o1 o2 o3 -> REMU (patchOp o1) (patchOp o2) (patchOp o3) + SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3) + DIVU o1 o2 o3 -> DIVU (patchOp o1) (patchOp o2) (patchOp o3) + + -- 2. Bit Manipulation Instructions ---------------------------------------- + + -- 3. Logical and Move Instructions ---------------------------------------- + AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3) + OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3) + ASR o1 o2 o3 -> ASR (patchOp o1) (patchOp o2) (patchOp o3) + XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3) + LSL o1 o2 o3 -> LSL (patchOp o1) (patchOp o2) (patchOp o3) + LSR o1 o2 o3 -> LSR (patchOp o1) (patchOp o2) (patchOp o3) + MOV o1 o2 -> MOV (patchOp o1) (patchOp o2) + -- o3 cannot be a register for ORI (always an immediate) + ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3) + XORI o1 o2 o3 -> XORI (patchOp o1) (patchOp o2) (patchOp o3) + + -- 4. Branch Instructions -------------------------------------------------- + J t -> J (patchTarget t) + J_TBL ids mbLbl t -> J_TBL ids mbLbl (env t) + B t -> B (patchTarget t) + BL t rs ts -> BL (patchTarget t) rs ts + BCOND c o1 o2 t -> BCOND c (patchOp o1) (patchOp o2) (patchTarget t) + + -- 5. Atomic Instructions -------------------------------------------------- + -- 6. Conditional Instructions --------------------------------------------- + CSET o l r c -> CSET (patchOp o) (patchOp l) (patchOp r) c + -- 7. Load and Store Instructions ------------------------------------------ + STR f o1 o2 -> STR f (patchOp o1) (patchOp o2) + -- STLR f o1 o2 -> STLR f (patchOp o1) (patchOp o2) + LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2) + LDRU f o1 o2 -> LDRU f (patchOp o1) (patchOp o2) + + -- 8. Synchronization Instructions ----------------------------------------- + DMBSY o1 o2 -> DMBSY o1 o2 + + -- 9. Floating Point Instructions ------------------------------------------ + FCVT o1 o2 -> FCVT (patchOp o1) (patchOp o2) + SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2) + FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2) + FABS o1 o2 -> FABS (patchOp o1) (patchOp o2) + _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr + where + patchOp :: Operand -> Operand + patchOp (OpReg w r) = OpReg w (env r) + patchOp (OpAddr a) = OpAddr (patchAddr a) + patchOp op = op + patchTarget :: Target -> Target + patchTarget (TReg r) = TReg (env r) + patchTarget t = t + patchAddr :: AddrMode -> AddrMode + patchAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + patchAddr (AddrReg r) = AddrReg (env r) +-------------------------------------------------------------------------------- + +-- | Checks whether this instruction is a jump/branch instruction. +-- +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +isJumpishInstr :: Instr -> Bool +isJumpishInstr instr = case instr of + ANN _ i -> isJumpishInstr i + J {} -> True + J_TBL {} -> True + B {} -> True + BL {} -> True + BCOND {} -> True + _ -> False + +-- | Get the `BlockId`s of the jump destinations (if any) +jumpDestsOfInstr :: Instr -> [BlockId] +jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i +jumpDestsOfInstr (J t) = [id | TBlock id <- [t]] +jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids +jumpDestsOfInstr (B t) = [id | TBlock id <- [t]] +jumpDestsOfInstr (BL t _ _) = [id | TBlock id <- [t]] +jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]] +jumpDestsOfInstr _ = [] + +-- | Change the destination of this (potential) jump instruction. +-- +-- Used in the linear allocator when adding fixup blocks for join +-- points. +patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +patchJumpInstr instr patchF = + case instr of + ANN d i -> ANN d (patchJumpInstr i patchF) + J (TBlock bid) -> J (TBlock (patchF bid)) + J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r + B (TBlock bid) -> B (TBlock (patchF bid)) + BL (TBlock bid) ps rs -> BL (TBlock (patchF bid)) ps rs + BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid)) + _ -> panic $ "patchJumpInstr: " ++ instrCon instr + +-- ----------------------------------------------------------------------------- +-- Note [Spills and Reloads] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading +-- registers. AArch64s maximum displacement for SP relative spills and reloads +-- is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits. +-- +-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't address any location in a +-- single instruction. The idea is to use the Inter Procedure 0 (ip) register +-- to perform the computations for larger offsets. +-- +-- Using sp to compute the offset will violate assumptions about the stack pointer +-- pointing to the top of the stack during signal handling. As we can't force +-- every signal to use its own stack, we have to ensure that the stack pointer +-- always points to the top of the stack, and we can't use it for computation. +-- +-- | An instruction to spill a register into a spill slot. +mkSpillInstr :: + HasCallStack => + NCGConfig -> + Reg -> -- ^ register to spill + Int -> -- ^ current stack delta + Int -> -- ^ spill slot to use + [Instr] +mkSpillInstr _config reg delta slot = + case off - delta of + imm | fitsIn12bitImm imm -> [mkStrSpImm imm] + imm -> + [ movImmToIp imm, + addSpToIp, + mkStrIp + ] + where + fmt = case reg of + RegReal (RealRegSingle n) | n < d0RegNo -> II64 + _ -> FF64 + mkStrSpImm imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm))) + movImmToIp imm = ANN (text "Spill: IP <- " <> int imm) $ MOV ip (OpImm (ImmInt imm)) + addSpToIp = ANN (text "Spill: IP <- SP + IP ") $ ADD ip ip sp + mkStrIp = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg ipReg)) + + off = spillSlotToOffset slot + +mkLoadInstr + :: NCGConfig + -> Reg -- ^ register to load + -> Int -- ^ current stack delta + -> Int -- ^ spill slot to use + -> [Instr] + +mkLoadInstr _config reg delta slot = + case off - delta of + imm | fitsIn12bitImm imm -> [mkLdrSpImm imm] + imm -> + [ movImmToIp imm, + addSpToIp, + mkLdrIp + ] + where + fmt = case reg of + RegReal (RealRegSingle n) | n < d0RegNo -> II64 + _ -> FF64 + mkLdrSpImm imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm))) + movImmToIp imm = ANN (text "Reload: IP <- " <> int imm) $ MOV ip (OpImm (ImmInt imm)) + addSpToIp = ANN (text "Reload: IP <- SP + IP ") $ ADD ip ip sp + mkLdrIp = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg ipReg)) + + off = spillSlotToOffset slot + +-- | See if this instruction is telling us the current C stack delta +takeDeltaInstr :: Instr -> Maybe Int +takeDeltaInstr (ANN _ i) = takeDeltaInstr i +takeDeltaInstr (DELTA i) = Just i +takeDeltaInstr _ = Nothing + +-- | Not real instructions. Just meta data +isMetaInstr :: Instr -> Bool +isMetaInstr instr = + case instr of + ANN _ i -> isMetaInstr i + COMMENT {} -> True + MULTILINE_COMMENT {} -> True + LOCATION {} -> True + LDATA {} -> True + NEWBLOCK {} -> True + DELTA {} -> True + PUSH_STACK_FRAME -> True + POP_STACK_FRAME -> True + _ -> False + +-- | Copy the value in a register to another one. +-- +-- Must work for all register classes. +mkRegRegMoveInstr :: Reg -> Reg -> Instr +mkRegRegMoveInstr src dst = ANN desc instr + where + desc = text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst + instr = MOV (operandFromReg dst) (operandFromReg src) + +-- | Take the source and destination from this (potential) reg -> reg move instruction +-- +-- We have to be a bit careful here: A `MOV` can also mean an implicit +-- conversion. This case is filtered out. +takeRegRegMoveInstr :: Instr -> Maybe (Reg, Reg) +takeRegRegMoveInstr (MOV (OpReg width dst) (OpReg width' src)) + | width == width' && (isFloatReg dst == isFloatReg src) = pure (src, dst) +takeRegRegMoveInstr _ = Nothing + +-- | Make an unconditional jump instruction. +mkJumpInstr :: BlockId -> [Instr] +mkJumpInstr = pure . B . TBlock + +-- | Decrement @sp@ to allocate stack space. +-- +-- The stack grows downwards, so we decrement the stack pointer by @n@ (bytes). +-- This is dual to `mkStackDeallocInstr`. @sp@ is the RISCV stack pointer, not +-- to be confused with the STG stack pointer. +mkStackAllocInstr :: Platform -> Int -> [Instr] +mkStackAllocInstr _platform = moveSp . negate + +-- | Increment SP to deallocate stack space. +-- +-- The stack grows downwards, so we increment the stack pointer by @n@ (bytes). +-- This is dual to `mkStackAllocInstr`. @sp@ is the RISCV stack pointer, not to +-- be confused with the STG stack pointer. +mkStackDeallocInstr :: Platform -> Int -> [Instr] +mkStackDeallocInstr _platform = moveSp + +moveSp :: Int -> [Instr] +moveSp n + | n == 0 = [] + | n /= 0 && fitsIn12bitImm n = pure . ANN desc $ ADD sp sp (OpImm (ImmInt n)) + | otherwise = + -- This ends up in three effective instructions. We could get away with + -- two for intMax12bit < n < 3 * intMax12bit by recursing once. However, + -- this way is likely less surprising. + [ ANN desc (MOV ip (OpImm (ImmInt n))), + ADD sp sp ip + ] + where + desc = text "Move SP:" <+> int n + +-- +-- See Note [extra spill slots] in X86/Instr.hs +-- +allocMoreStack + :: Platform + -> Int + -> NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr + -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.RV64.Instr.Instr, [(BlockId,BlockId)]) + +allocMoreStack _ _ top@(CmmData _ _) = return (top,[]) +allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do + let entries = entryBlocks proc + + uniqs <- getUniquesM + + let + delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up + where x = slots * spillSlotSize -- sp delta + + alloc = mkStackAllocInstr platform delta + dealloc = mkStackDeallocInstr platform delta + + retargetList = zip entries (map mkBlockId uniqs) + + new_blockmap :: LabelMap BlockId + new_blockmap = mapFromList retargetList + + insert_stack_insn (BasicBlock id insns) + | Just new_blockid <- mapLookup id new_blockmap + = [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ] + , BasicBlock new_blockid block' ] + | otherwise + = [ BasicBlock id block' ] + where + block' = foldr insert_dealloc [] insns + + insert_dealloc insn r = case insn of + J _ -> dealloc ++ (insn : r) + J_TBL {} -> dealloc ++ (insn : r) + ANN _ e -> insert_dealloc e r + _other | jumpDestsOfInstr insn /= [] + -> patchJumpInstr insn retarget : r + _other -> insn : r + + where retarget b = fromMaybe b (mapLookup b new_blockmap) + + new_code = concatMap insert_stack_insn code + return (CmmProc info lbl live (ListGraph new_code), retargetList) + +-- ----------------------------------------------------------------------------- +-- Machine's assembly language + +-- We have a few common "instructions" (nearly all the pseudo-ops) but +-- mostly all of 'Instr' is machine-specific. + +-- RV64 reference card: https://cs61c.org/sp23/pdfs/resources/reference-card.pdf +-- RV64 pseudo instructions: https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#-a-listing-of-standard-risc-v-pseudoinstructions +-- We will target: RV64G(C). That is G = I+A+F+S+D +-- I: Integer Multiplication and Division +-- A: Atomic Instructions +-- F: Single Precision +-- D: Double Precision +-- C: Compressed (though we won't use that). + +-- This most notably leaves out B. (Bit Manipulation) instructions. + +data Instr + -- comment pseudo-op + = COMMENT SDoc + | MULTILINE_COMMENT SDoc + + -- Annotated instruction. Should print <instr> # <doc> + | ANN SDoc Instr + + -- location pseudo-op (file, line, col, name) + | LOCATION Int Int Int String + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section RawCmmStatics + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- 0. Pseudo Instructions -------------------------------------------------- + | PUSH_STACK_FRAME + | POP_STACK_FRAME + + -- == Base Instructions (I) ================================================ + -- 1. Arithmetic Instructions ---------------------------------------------- + -- all of these instructions can also take an immediate, in which case they + -- hafe a suffix I (except for U suffix, where it's IU then. E.g. SLTIU). + | ADD Operand Operand Operand -- rd = rs1 + rs2 + | SUB Operand Operand Operand -- rd = rs1 - rs2 + + | AND Operand Operand Operand -- rd = rs1 & rs2 + | OR Operand Operand Operand -- rd = rs1 | rs2 + -- | XOR Operand Operand Operand -- rd = rs1 ^ rs2 + | LSL {- SLL -} Operand Operand Operand -- rd = rs1 << rs2 (zero ext) + | LSR {- SRL -} Operand Operand Operand -- rd = rs1 >> rs2 (zero ext) + | ASR {- SRA -} Operand Operand Operand -- rd = rs1 >> rs2 (sign ext) + + -- 2. Memory Load/Store Instructions --------------------------------------- + -- Unlike arm, we don't have register shorthands for size. + -- We do however have {L,S}{B,H,W,D}[U] instructions for Load/Store, Byte, Half, Word, Double, (Unsigned). + -- Reusing the arm logic with the _format_ specifier will hopefully work. + | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr + | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr (sign-extended) + | LDRU Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr (unsigned) + + -- 3. Control Flow --------------------------------------------------------- + -- B{EQ,GE,GEU,LT,LTU}, these are effectively BCOND from AArch64; + -- however, AArch64 desugars them into CMP + BCOND. So these are a bit more + -- powerful. + -- JAL / JARL are effectively the BL instruction from AArch64. + + | MUL Operand Operand Operand -- rd = rn × rm + + + -- Pseudo/synthesized: + | NEG Operand Operand -- rd = -op2 + + | DIV Operand Operand Operand -- rd = rn ÷ rm + | REM Operand Operand Operand -- rd = rn % rm (signed) + | REMU Operand Operand Operand -- rd = rn % rm (unsigned) + + -- TODO: Rename: MULH + | SMULH Operand Operand Operand + | DIVU Operand Operand Operand -- rd = rn ÷ rm + + -- 2. Bit Manipulation Instructions ---------------------------------------- + + -- 3. Logical and Move Instructions ---------------------------------------- + -- | AND Operand Operand Operand -- rd = rn & op2 + -- | ANDS Operand Operand Operand -- rd = rn & op2 + -- | ASR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits + | XOR Operand Operand Operand -- rd = rn ⊕ op2 + -- | LSL Operand Operand Operand -- rd = rn ≪ rm or rd = rn ≪ #i, i is 6 bits + -- | LSR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits + | MOV Operand Operand -- rd = rn or rd = #i + | ORI Operand Operand Operand -- rd = rn | op2 + | XORI Operand Operand Operand -- rd = rn `xor` imm + -- Load and stores. + + -- Conditional instructions + -- This is a synthetic operation. + | CSET Operand Operand Operand Cond -- if(o2 cond o3) op <- 1 else op <- 0 + + -- Branching. + -- TODO: Unused + | J Target -- like B, but only generated from genJump. Used to distinguish genJumps from others. + -- | A `J` instruction with data for switch jump tables + | J_TBL [Maybe BlockId] (Maybe CLabel) Reg + | B Target -- unconditional branching b/br. (To a blockid, label or register) + | BL Target [Reg] [Reg] -- branch and link (e.g. set x30 to next pc, and branch) + | BCOND Cond Operand Operand Target -- branch with condition. b.<cond> + + -- 8. Synchronization Instructions ----------------------------------------- + | DMBSY DmbType DmbType + -- 9. Floating Point Instructions + -- Float ConVerT + | FCVT Operand Operand + -- Signed ConVerT Float + | SCVTF Operand Operand + -- Float ConVerT to Zero Signed + | FCVTZS Operand Operand + -- Float ABSolute value + | FABS Operand Operand + +data DmbType = DmbRead | DmbWrite | DmbReadWrite + +instrCon :: Instr -> String +instrCon i = + case i of + COMMENT{} -> "COMMENT" + MULTILINE_COMMENT{} -> "COMMENT" + ANN{} -> "ANN" + LOCATION{} -> "LOCATION" + LDATA{} -> "LDATA" + NEWBLOCK{} -> "NEWBLOCK" + DELTA{} -> "DELTA" + PUSH_STACK_FRAME{} -> "PUSH_STACK_FRAME" + POP_STACK_FRAME{} -> "POP_STACK_FRAME" + ADD{} -> "ADD" + OR{} -> "OR" + MUL{} -> "MUL" + NEG{} -> "NEG" + DIV{} -> "DIV" + REM{} -> "REM" + REMU{} -> "REMU" + SMULH{} -> "SMULH" + SUB{} -> "SUB" + DIVU{} -> "DIVU" + AND{} -> "AND" + ASR{} -> "ASR" + XOR{} -> "XOR" + LSL{} -> "LSL" + LSR{} -> "LSR" + MOV{} -> "MOV" + ORI{} -> "ORI" + XORI{} -> "ORI" + STR{} -> "STR" + LDR{} -> "LDR" + LDRU{} -> "LDRU" + CSET{} -> "CSET" + J{} -> "J" + J_TBL{} -> "J_TBL" + B{} -> "B" + BL{} -> "BL" + BCOND{} -> "BCOND" + DMBSY{} -> "DMBSY" + FCVT{} -> "FCVT" + SCVTF{} -> "SCVTF" + FCVTZS{} -> "FCVTZS" + FABS{} -> "FABS" + +data Target + = TBlock BlockId + | TReg Reg + +data Operand + = -- | register + OpReg Width Reg + | -- | immediate value + OpImm Imm + | -- | memory reference + OpAddr AddrMode + deriving (Eq, Show) + +operandFromReg :: Reg -> Operand +operandFromReg = OpReg W64 + +operandFromRegNo :: RegNo -> Operand +operandFromRegNo = operandFromReg . regSingle + +zero, ra, sp, gp, tp, fp, ip :: Operand +zero = operandFromReg zeroReg +ra = operandFromReg raReg +sp = operandFromReg spMachReg +gp = operandFromRegNo 3 +tp = operandFromRegNo 4 +fp = operandFromRegNo 8 +ip = operandFromReg ipReg + +x0, x1, x2, x3, x4, x5, x6, x7 :: Operand +x8, x9, x10, x11, x12, x13, x14, x15 :: Operand +x16, x17, x18, x19, x20, x21, x22, x23 :: Operand +x24, x25, x26, x27, x28, x29, x30, x31 :: Operand +x0 = operandFromRegNo x0RegNo +x1 = operandFromRegNo 1 +x2 = operandFromRegNo 2 +x3 = operandFromRegNo 3 +x4 = operandFromRegNo 4 +x5 = operandFromRegNo x5RegNo +x6 = operandFromRegNo 6 +x7 = operandFromRegNo x7RegNo +x8 = operandFromRegNo 8 +x9 = operandFromRegNo 9 +x10 = operandFromRegNo x10RegNo +x11 = operandFromRegNo 11 +x12 = operandFromRegNo 12 +x13 = operandFromRegNo 13 +x14 = operandFromRegNo 14 +x15 = operandFromRegNo 15 +x16 = operandFromRegNo 16 +x17 = operandFromRegNo x17RegNo +x18 = operandFromRegNo 18 +x19 = operandFromRegNo 19 +x20 = operandFromRegNo 20 +x21 = operandFromRegNo 21 +x22 = operandFromRegNo 22 +x23 = operandFromRegNo 23 +x24 = operandFromRegNo 24 +x25 = operandFromRegNo 25 +x26 = operandFromRegNo 26 +x27 = operandFromRegNo 27 +x28 = operandFromRegNo x28RegNo +x29 = operandFromRegNo 29 +x30 = operandFromRegNo 30 +x31 = operandFromRegNo x31RegNo + +d0, d1, d2, d3, d4, d5, d6, d7 :: Operand +d8, d9, d10, d11, d12, d13, d14, d15 :: Operand +d16, d17, d18, d19, d20, d21, d22, d23 :: Operand +d24, d25, d26, d27, d28, d29, d30, d31 :: Operand +d0 = operandFromRegNo d0RegNo +d1 = operandFromRegNo 33 +d2 = operandFromRegNo 34 +d3 = operandFromRegNo 35 +d4 = operandFromRegNo 36 +d5 = operandFromRegNo 37 +d6 = operandFromRegNo 38 +d7 = operandFromRegNo d7RegNo +d8 = operandFromRegNo 40 +d9 = operandFromRegNo 41 +d10 = operandFromRegNo d10RegNo +d11 = operandFromRegNo 43 +d12 = operandFromRegNo 44 +d13 = operandFromRegNo 45 +d14 = operandFromRegNo 46 +d15 = operandFromRegNo 47 +d16 = operandFromRegNo 48 +d17 = operandFromRegNo d17RegNo +d18 = operandFromRegNo 50 +d19 = operandFromRegNo 51 +d20 = operandFromRegNo 52 +d21 = operandFromRegNo 53 +d22 = operandFromRegNo 54 +d23 = operandFromRegNo 55 +d24 = operandFromRegNo 56 +d25 = operandFromRegNo 57 +d26 = operandFromRegNo 58 +d27 = operandFromRegNo 59 +d28 = operandFromRegNo 60 +d29 = operandFromRegNo 61 +d30 = operandFromRegNo 62 +d31 = operandFromRegNo d31RegNo + +fitsIn12bitImm :: (Num a, Ord a) => a -> Bool +fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit + +intMin12bit :: Num a => a +intMin12bit = -2048 + +intMax12bit :: Num a => a +intMax12bit = 2047 + +fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool +fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 -1) + +isNbitEncodeable :: Int -> Integer -> Bool +isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) + +isEncodeableInWidth :: Width -> Integer -> Bool +isEncodeableInWidth = isNbitEncodeable . widthInBits + +isIntOp :: Operand -> Bool +isIntOp = not . isFloatOp + +isFloatOp :: Operand -> Bool +isFloatOp (OpReg _ reg) | isFloatReg reg = True +isFloatOp _ = False + +isFloatReg :: Reg -> Bool +isFloatReg (RegReal (RealRegSingle i)) | i > 31 = True +isFloatReg (RegVirtual (VirtualRegF _)) = True +isFloatReg (RegVirtual (VirtualRegD _)) = True +isFloatReg _ = False Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Ppr.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Ppr.hs @@ -0,0 +1,662 @@ +module GHC.CmmToAsm.RV64.Ppr (pprNatCmmDecl, pprInstr) where + +import GHC.Prelude hiding (EQ) + +import GHC.CmmToAsm.RV64.Instr +import GHC.CmmToAsm.RV64.Regs +import GHC.CmmToAsm.RV64.Cond +import GHC.CmmToAsm.Ppr +import GHC.CmmToAsm.Format +import GHC.Platform.Reg +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types +import GHC.CmmToAsm.Utils + +import GHC.Cmm hiding (topInfoTable) +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Types.Basic (Alignment, mkAlignment, alignmentBytes) + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel + +import GHC.Types.Unique ( pprUniqueAlways, getUnique ) +import GHC.Platform +import GHC.Utils.Outputable + +import GHC.Utils.Panic + +-- TODO: Move function down to where it is used. +pprProcAlignment :: IsDoc doc => NCGConfig -> doc +pprProcAlignment config = maybe empty (pprAlign platform . mkAlignment) (ncgProcAlignment config) + where + platform = ncgPlatform config + +pprNatCmmDecl :: IsDoc doc => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc +pprNatCmmDecl config (CmmData section dats) = + pprSectionAlign config section $$ pprDatas config dats + +pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) = + let platform = ncgPlatform config in + pprProcAlignment config $$ + case topInfoTable proc of + Nothing -> + -- special case for code without info table: + pprSectionAlign config (Section Text lbl) $$ + -- do not + -- pprProcAlignment config $$ + pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed + vcat (map (pprBasicBlock config top_info) blocks) $$ + ppWhen (ncgDwarfEnabled config) + (line (pprBlockEndLabel platform lbl) $$ line (pprProcEndLabel platform lbl)) $$ + pprSizeDecl platform lbl + + Just (CmmStaticsRaw info_lbl _) -> + pprSectionAlign config (Section Text info_lbl) $$ + -- pprProcAlignment config $$ + (if platformHasSubsectionsViaSymbols platform + then line (pprAsmLabel platform (mkDeadStripPreventer info_lbl) <> char ':') + else empty) $$ + vcat (map (pprBasicBlock config top_info) blocks) $$ + ppWhen (ncgDwarfEnabled config) (line (pprProcEndLabel platform info_lbl)) $$ + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. + (if platformHasSubsectionsViaSymbols platform + then -- See Note [Subsections Via Symbols] + line + $ text "\t.long " + <+> pprAsmLabel platform info_lbl + <+> char '-' + <+> pprAsmLabel platform (mkDeadStripPreventer info_lbl) + else empty) $$ + pprSizeDecl platform info_lbl +{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc #-} +{-# SPECIALIZE pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable + +pprLabel :: IsDoc doc => Platform -> CLabel -> doc +pprLabel platform lbl = + pprGloblDecl platform lbl + $$ pprTypeDecl platform lbl + $$ line (pprAsmLabel platform lbl <> char ':') + +-- TODO: Delete unused parameter. +pprAlign :: IsDoc doc => Platform -> Alignment -> doc +pprAlign _platform alignment + = line $ text "\t.balign " <> int (alignmentBytes alignment) + +-- TODO: Delete unused parameters. +-- | Print appropriate alignment for the given section type. +pprAlignForSection :: IsDoc doc => Platform -> SectionType -> doc +pprAlignForSection _platform _seg + -- .balign is stable, whereas .align is platform dependent. + = line (text "\t.balign 8") -- always 8 + +-- | Print section header and appropriate alignment for that section. +-- +-- This one will emit the header: +-- +-- .section .text +-- .balign 8 +-- +pprSectionAlign :: IsDoc doc => NCGConfig -> Section -> doc +pprSectionAlign _config (Section (OtherSection _) _) = + panic "RV64.Ppr.pprSectionAlign: unknown section" +pprSectionAlign config sec@(Section seg _) = + line (pprSectionHeader config sec) + $$ pprAlignForSection (ncgPlatform config) seg + +pprProcEndLabel :: IsLine doc => Platform -> CLabel -- ^ Procedure name + -> doc +pprProcEndLabel platform lbl = + pprAsmLabel platform (mkAsmTempProcEndLabel lbl) <> colon + +pprBlockEndLabel :: IsLine doc => Platform -> CLabel -- ^ Block name + -> doc +pprBlockEndLabel platform lbl = + pprAsmLabel platform (mkAsmTempEndLabel lbl) <> colon + +-- | Output the ELF .size directive. +pprSizeDecl :: IsDoc doc => Platform -> CLabel -> doc +pprSizeDecl platform lbl + = if osElfTarget (platformOS platform) + then line (text "\t.size" <+> pprAsmLabel platform lbl <> text ", .-" <> pprAsmLabel platform lbl) + else empty + +pprBasicBlock :: IsDoc doc => NCGConfig -> LabelMap RawCmmStatics -> NatBasicBlock Instr + -> doc +pprBasicBlock config info_env (BasicBlock blockid instrs) + = maybe_infotable $ + pprLabel platform asmLbl $$ + vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs)) $$ + ppWhen (ncgDwarfEnabled config) ( + -- Emit both end labels since this may end up being a standalone + -- top-level block + line (pprBlockEndLabel platform asmLbl + <> pprProcEndLabel platform asmLbl) + ) + where + -- TODO: Check if we can filter more instructions here. + -- TODO: Shouldn't this be a more general check on a higher level? + -- Filter out identity moves. E.g. mov x18, x18 will be dropped. + optInstrs = filter f instrs + where f (MOV o1 o2) | o1 == o2 = False + f _ = True + + asmLbl = blockLbl blockid + platform = ncgPlatform config + maybe_infotable c = case mapLookup blockid info_env of + Nothing -> c + Just (CmmStaticsRaw info_lbl info) -> + -- pprAlignForSection platform Text $$ + infoTableLoc $$ + vcat (map (pprData config) info) $$ + pprLabel platform info_lbl $$ + c $$ + ppWhen (ncgDwarfEnabled config) + (line (pprBlockEndLabel platform info_lbl)) + -- Make sure the info table has the right .loc for the block + -- coming right after it. See Note [Info Offset] + infoTableLoc = case instrs of + (l@LOCATION{} : _) -> pprInstr platform l + _other -> empty + +pprDatas :: IsDoc doc => NCGConfig -> RawCmmStatics -> doc +-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel". +pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' + = pprGloblDecl (ncgPlatform config) alias + $$ line (text ".equiv" <+> pprAsmLabel (ncgPlatform config) alias <> comma <> pprAsmLabel (ncgPlatform config) ind') + +pprDatas config (CmmStaticsRaw lbl dats) + = vcat (pprLabel platform lbl : map (pprData config) dats) + where + platform = ncgPlatform config + +-- TODO: Unused parameter. +pprData :: IsDoc doc => NCGConfig -> CmmStatic -> doc +pprData _config (CmmString str) = line (pprString str) +pprData _config (CmmFileEmbed path _) = line (pprFileEmbed path) + +-- TODO: AFAIK there no Darwin for RISCV, so we may consider to simplify this. +pprData config (CmmUninitialised bytes) + = line $ let platform = ncgPlatform config + in if platformOS platform == OSDarwin + then text ".space " <> int bytes + else text ".skip " <> int bytes + +pprData config (CmmStaticLit lit) = pprDataItem config lit + +pprGloblDecl :: IsDoc doc => Platform -> CLabel -> doc +pprGloblDecl platform lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = line (text "\t.globl " <> pprAsmLabel platform lbl) + +-- Note [Always use objects for info tables] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- See discussion in X86.Ppr for why this is necessary. Essentially we need to +-- ensure that we never pass function symbols when we might want to lookup the +-- info table. If we did, we could end up with procedure linking tables +-- (PLT)s, and thus the lookup wouldn't point to the function, but into the +-- jump table. +-- +-- Fun fact: The LLVMMangler exists to patch this issue su on the LLVM side as +-- well. +pprLabelType' :: IsLine doc => Platform -> CLabel -> doc +pprLabelType' platform lbl = + if isCFunctionLabel lbl || functionOkInfoTable then + text "@function" + else + text "@object" + where + functionOkInfoTable = platformTablesNextToCode platform && + isInfoTableLabel lbl && not (isCmmInfoTableLabel lbl) && not (isConInfoTableLabel lbl) + +-- this is called pprTypeAndSizeDecl in PPC.Ppr +pprTypeDecl :: IsDoc doc => Platform -> CLabel -> doc +pprTypeDecl platform lbl + = if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl + then line (text ".type " <> pprAsmLabel platform lbl <> text ", " <> pprLabelType' platform lbl) + else empty + +pprDataItem :: IsDoc doc => NCGConfig -> CmmLit -> doc +pprDataItem config lit + = lines_ (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit) + where + platform = ncgPlatform config + + imm = litToImm lit + + ppr_item II8 _ = [text "\t.byte\t" <> pprDataImm platform imm] + ppr_item II16 _ = [text "\t.short\t" <> pprDataImm platform imm] + ppr_item II32 _ = [text "\t.long\t" <> pprDataImm platform imm] + ppr_item II64 _ = [text "\t.quad\t" <> pprDataImm platform imm] + + ppr_item FF32 (CmmFloat r _) + = let bs = floatToBytes (fromRational r) + in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs + + ppr_item FF64 (CmmFloat r _) + = let bs = doubleToBytes (fromRational r) + in map (\b -> text "\t.byte\t" <> int (fromIntegral b)) bs + + ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit) + +-- | Pretty print an immediate value in the @data@ section +-- +-- This does not include any checks. We rely on the Assembler to check for +-- errors. Use `pprOpImm` for immediates in instructions (operands.) +pprDataImm :: IsLine doc => Platform -> Imm -> doc +pprDataImm _ (ImmInt i) = int i +pprDataImm _ (ImmInteger i) = integer i +pprDataImm p (ImmCLbl l) = pprAsmLabel p l +pprDataImm p (ImmIndex l i) = pprAsmLabel p l <> char '+' <> int i +pprDataImm _ (ImmLit s) = ftext s +pprDataImm _ (ImmFloat f) = float (fromRational f) +pprDataImm _ (ImmDouble d) = double (fromRational d) + +pprDataImm p (ImmConstantSum a b) = pprDataImm p a <> char '+' <> pprDataImm p b +pprDataImm p (ImmConstantDiff a b) = pprDataImm p a <> char '-' + <> lparen <> pprDataImm p b <> rparen + +asmComment :: SDoc -> SDoc +asmComment c = text "#" <+> c + +asmDoubleslashComment :: SDoc -> SDoc +asmDoubleslashComment c = text "//" <+> c + +asmMultilineComment :: SDoc -> SDoc +asmMultilineComment c = text "/*" $+$ c $+$ text "*/" + +-- | Pretty print an immediate operand of an instruction +-- +-- The kinds of immediates we can use here is pretty limited: RISCV doesn't +-- support index expressions (as e.g. Aarch64 does.) Floating points need to +-- fit in range. As we don't need them, forbit them to save us from future +-- troubles. +pprOpImm :: (IsLine doc) => Platform -> Imm -> doc +pprOpImm platform im = case im of + ImmInt i -> int i + ImmInteger i -> integer i + ImmCLbl l -> char '=' <> pprAsmLabel platform l + _ -> pprPanic "RV64.Ppr.pprOpImm" (text "Unsupported immediate for instruction operands" <> colon <+> (text . show) im) + +negOp :: Operand -> Operand +negOp (OpImm (ImmInt i)) = OpImm (ImmInt (negate i)) +negOp (OpImm (ImmInteger i)) = OpImm (ImmInteger (negate i)) +negOp op = pprPanic "RV64.negOp" (text $ show op) + +pprOp :: IsLine doc => Platform -> Operand -> doc +pprOp plat op = case op of + OpReg w r -> pprReg w r + OpImm im -> pprOpImm plat im + OpAddr (AddrRegImm r1 im) -> pprOpImm plat im <> char '(' <> pprReg W64 r1 <> char ')' + OpAddr (AddrReg r1) -> text "0(" <+> pprReg W64 r1 <+> char ')' + +pprReg :: forall doc. IsLine doc => Width -> Reg -> doc +pprReg w r = case r of + RegReal (RealRegSingle i) -> ppr_reg_no i + -- virtual regs should not show up, but this is helpful for debugging. + RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u + RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u + RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u + _ -> pprPanic "RiscV64.pprReg" (text (show r) <+> ppr w) + + where + ppr_reg_no :: Int -> doc + -- General Purpose Registers + ppr_reg_no 0 = text "zero" + ppr_reg_no 1 = text "ra" + ppr_reg_no 2 = text "sp" + ppr_reg_no 3 = text "gp" + ppr_reg_no 4 = text "tp" + ppr_reg_no 5 = text "t0" + ppr_reg_no 6 = text "t1" + ppr_reg_no 7 = text "t2" + ppr_reg_no 8 = text "s0" + ppr_reg_no 9 = text "s1" + ppr_reg_no 10 = text "a0" + ppr_reg_no 11 = text "a1" + ppr_reg_no 12 = text "a2" + ppr_reg_no 13 = text "a3" + ppr_reg_no 14 = text "a4" + ppr_reg_no 15 = text "a5" + ppr_reg_no 16 = text "a6" + ppr_reg_no 17 = text "a7" + ppr_reg_no 18 = text "s2" + ppr_reg_no 19 = text "s3" + ppr_reg_no 20 = text "s4" + ppr_reg_no 21 = text "s5" + ppr_reg_no 22 = text "s6" + ppr_reg_no 23 = text "s7" + ppr_reg_no 24 = text "s8" + ppr_reg_no 25 = text "s9" + ppr_reg_no 26 = text "s10" + ppr_reg_no 27 = text "s11" + ppr_reg_no 28 = text "t3" + ppr_reg_no 29 = text "t4" + ppr_reg_no 30 = text "t5" + ppr_reg_no 31 = text "t6" + + -- Floating Point Registers + ppr_reg_no 32 = text "ft0" + ppr_reg_no 33 = text "ft1" + ppr_reg_no 34 = text "ft2" + ppr_reg_no 35 = text "ft3" + ppr_reg_no 36 = text "ft4" + ppr_reg_no 37 = text "ft5" + ppr_reg_no 38 = text "ft6" + ppr_reg_no 39 = text "ft7" + ppr_reg_no 40 = text "fs0" + ppr_reg_no 41 = text "fs1" + ppr_reg_no 42 = text "fa0" + ppr_reg_no 43 = text "fa1" + ppr_reg_no 44 = text "fa2" + ppr_reg_no 45 = text "fa3" + ppr_reg_no 46 = text "fa4" + ppr_reg_no 47 = text "fa5" + ppr_reg_no 48 = text "fa6" + ppr_reg_no 49 = text "fa7" + ppr_reg_no 50 = text "fs2" + ppr_reg_no 51 = text "fs3" + ppr_reg_no 52 = text "fs4" + ppr_reg_no 53 = text "fs5" + ppr_reg_no 54 = text "fs6" + ppr_reg_no 55 = text "fs7" + ppr_reg_no 56 = text "fs8" + ppr_reg_no 57 = text "fs9" + ppr_reg_no 58 = text "fs10" + ppr_reg_no 59 = text "fs11" + ppr_reg_no 60 = text "ft8" + ppr_reg_no 61 = text "ft9" + ppr_reg_no 62 = text "ft10" + ppr_reg_no 63 = text "ft11" + + ppr_reg_no i + | i < 0 = pprPanic "Unexpected register number (min is 0)" (ppr w <+> int i) + | i > 63 = pprPanic "Unexpected register number (max is 63)" (ppr w <+> int i) + -- no support for widths > W64. + | otherwise = pprPanic "Unsupported width in register (max is 64)" (ppr w <+> int i) + +isSingleOp :: Operand -> Bool +isSingleOp (OpReg W32 _) = True +isSingleOp _ = False + +isDoubleOp :: Operand -> Bool +isDoubleOp (OpReg W64 _) = True +isDoubleOp _ = False + +isImmOp :: Operand -> Bool +isImmOp (OpImm _) = True +isImmOp _ = False + +isImmZero :: Operand -> Bool +isImmZero (OpImm (ImmFloat 0)) = True +isImmZero (OpImm (ImmDouble 0)) = True +isImmZero (OpImm (ImmInt 0)) = True +isImmZero _ = False + +isLabel :: Target -> Bool +isLabel (TBlock _) = True +isLabel _ = False + +getLabel :: IsLine doc => Platform -> Target -> doc +getLabel platform (TBlock bid) = pprAsmLabel platform (mkLocalBlockLabel (getUnique bid)) +getLabel _platform _other = panic "Cannot turn this into a label" + +pprInstr :: IsDoc doc => Platform -> Instr -> doc +pprInstr platform instr = case instr of + -- Meta Instructions --------------------------------------------------------- + -- see Note [dualLine and dualDoc] in GHC.Utils.Outputable + COMMENT s -> dualDoc (asmComment s) empty + MULTILINE_COMMENT s -> dualDoc (asmMultilineComment s) empty + ANN d i -> dualDoc (pprInstr platform i <+> asmDoubleslashComment d) (pprInstr platform i) + + LOCATION file line' col _name + -> line (text "\t.loc" <+> int file <+> int line' <+> int col) + DELTA d -> dualDoc (asmComment $ text "\tdelta = " <> int d) empty + NEWBLOCK _ -> panic "PprInstr: NEWBLOCK" + LDATA _ _ -> panic "pprInstr: LDATA" + + -- Pseudo Instructions ------------------------------------------------------- + + PUSH_STACK_FRAME -> lines_ [ text "\taddi sp, sp, -16" + , text "\tsd x1, 8(sp)" -- store RA + , text "\tsd x8, 0(sp)" -- store FP/s0 + , text "\taddi x8, sp, 16"] + + POP_STACK_FRAME -> lines_ [ text "\tld x8, 0(sp)" -- restore FP/s0 + , text "\tld x1, 8(sp)" -- restore RA + , text "\taddi sp, sp, 16" ] + -- =========================================================================== + -- AArch64 Instruction Set + -- 1. Arithmetic Instructions ------------------------------------------------ + ADD o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfadd." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 + -- This case is used for sign extension: SEXT.W op + | OpReg W64 _ <- o1 , OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddiw") o1 o2 o3 + | otherwise -> op3 (text "\tadd") o1 o2 o3 + MUL o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfmul." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 + | otherwise -> op3 (text "\tmul") o1 o2 o3 + SMULH o1 o2 o3 -> op3 (text "\tmulh") o1 o2 o3 + NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2 + NEG o1 o2 | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2 + NEG o1 o2 -> op2 (text "\tneg") o1 o2 + DIV o1 o2 o3 | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 + -- TODO: This must (likely) be refined regarding width + -> op3 (text "\tfdiv." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 + DIV o1 o2 o3 -> op3 (text "\tdiv") o1 o2 o3 + REM o1 o2 o3 | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 + -> panic "pprInstr - REM not implemented for floats (yet)" + REM o1 o2 o3 -> op3 (text "\trem") o1 o2 o3 + REMU o1 o2 o3 -> op3 (text "\tremu") o1 o2 o3 + + SUB o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> op3 (text "\tfsub." <> if isSingleOp o1 then text "s" else text "d") o1 o2 o3 + | isImmOp o3 -> op3 (text "\taddi") o1 o2 (negOp o3) + | otherwise -> op3 (text "\tsub") o1 o2 o3 + DIVU o1 o2 o3 -> op3 (text "\tdivu") o1 o2 o3 + + -- 2. Bit Manipulation Instructions ------------------------------------------ + + -- 3. Logical and Move Instructions ------------------------------------------ + AND o1 o2 o3 | isImmOp o3 -> op3 (text "\tandi") o1 o2 o3 + | otherwise -> op3 (text "\tand") o1 o2 o3 + OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3 + ASR o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3 + ASR o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3 + XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3 + LSL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3 + LSR o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3 + MOV o1 o2 + | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs + | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs + | isFloatOp o1 && isImmZero o2 && isDoubleOp o1 -> op2 (text "\tfcvt.d.w") o1 zero + | isFloatOp o1 && isImmZero o2 && isSingleOp o1 -> op2 (text "\tfcvt.s.w") o1 zero + | isFloatOp o1 && not (isFloatOp o2) && isSingleOp o1 -> op2 (text "\tfmv.w.x") o1 o2 + | isFloatOp o1 && not (isFloatOp o2) && isDoubleOp o1 -> op2 (text "\tfmv.d.x") o1 o2 + | not (isFloatOp o1) && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.x.w") o1 o2 + | not (isFloatOp o1) && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.x.d") o1 o2 + | (OpImm (ImmInteger i)) <- o2 + , fitsIn12bitImm i + -> lines_ [ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2 ] + | (OpImm (ImmInt i)) <- o2 + , fitsIn12bitImm i + -> lines_ [ text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <> comma <+> pprOp platform o2 ] + | (OpImm (ImmInteger i)) <- o2 + , fitsIn32bits i + -> lines_ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")" + , text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")" ] + | (OpImm (ImmInt i)) <- o2 + , fitsIn32bits i + -> lines_ [ text "\tlui" <+> pprOp platform o1 <> comma <+> text "%hi(" <> pprOp platform o2 <> text ")" + , text "\taddw" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%lo(" <> pprOp platform o2 <> text ")" ] + | isImmOp o2 + -- Surrender! Let the assembler figure out the right expressions with pseudo-op LI. + -> lines_ [ text "\tli" <+> pprOp platform o1 <> comma <+> pprOp platform o2 ] + | otherwise -> op3 (text "\taddi") o1 o2 (OpImm (ImmInt 0)) + ORI o1 o2 o3 -> op3 (text "\tori") o1 o2 o3 + XORI o1 o2 o3 -> op3 (text "\txori") o1 o2 o3 + + -- 4. Branch Instructions ---------------------------------------------------- + J t -> pprInstr platform (B t) + J_TBL _ _ r -> pprInstr platform (J (TReg r)) + -- TODO: This is odd: (B)ranch and branch and link (BL) do the same: branch and link + B l | isLabel l -> lines_ [ text "\tla" <+> pprOp platform ip <> comma <+> getLabel platform l + , text "\tjalr" <+> text "x0" <> comma <+> pprOp platform ip <> comma <+> text "0" ] + B (TReg r) -> line $ text "\tjalr" <+> text "x0" <> comma <+> pprReg W64 r <> comma <+> text "0" + + BL l _ _ | isLabel l-> line $ text "\tcall" <+> getLabel platform l + BL (TReg r) _ _ -> line $ text "\tjalr" <+> text "x1" <> comma <+> pprReg W64 r <> comma <+> text "0" + + BCOND c l r t | isLabel t -> + line $ text "\t" <> pprBcond c <+> pprOp platform l <> comma <+> pprOp platform r <> comma <+> getLabel platform t + + BCOND _ _ _ (TReg _) -> panic "RV64.ppr: No conditional branching to registers!" + + -- 5. Atomic Instructions ---------------------------------------------------- + -- 6. Conditional Instructions ----------------------------------------------- + CSET o l r c -> case c of + EQ | isIntOp l && isIntOp r -> lines_ [ subFor l r + , text "\tseqz" <+> pprOp platform o <> comma <+> pprOp platform o] + EQ | isFloatOp l && isFloatOp r -> line $ binOp ("\tfeq." ++ floatOpPrecision platform l r) + NE | isIntOp l && isIntOp r -> lines_ [ subFor l r + , text "\tsnez" <+> pprOp platform o <> comma <+> pprOp platform o] + -- feq.s a0,fa0,fa1 + -- xori a0,a0,1 + NE | isFloatOp l && isFloatOp r -> lines_ [binOp ("\tfeq." ++ floatOpPrecision platform l r) + , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1"] + SLT -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ] + SLE -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l + , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ] + SGE -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r + , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ] + SGT -> lines_ [ sltFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l ] + ULT -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r ] + ULE -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l + , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ] + UGE -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r + , text "\txori" <+> pprOp platform o <> comma <+> pprOp platform o <> comma <+> text "1" ] + UGT -> lines_ [ sltuFor l r <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l ] + FLT | isFloatOp l && isFloatOp r -> line $ binOp ("\tflt." ++ floatOpPrecision platform l r) + FLE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfle." ++ floatOpPrecision platform l r) + FGT | isFloatOp l && isFloatOp r -> line $ binOp ("\tfgt." ++ floatOpPrecision platform l r) + FGE | isFloatOp l && isFloatOp r -> line $ binOp ("\tfge." ++ floatOpPrecision platform l r) + x -> pprPanic "RV64.ppr: unhandled CSET conditional" (text (show x) <+> pprOp platform o <> comma <+> pprOp platform r <> comma <+> pprOp platform l) + where + subFor l r | (OpImm _) <- r = text "\taddi" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform (negOp r) + | (OpImm _) <- l = panic "RV64.ppr: Cannot SUB IMM _" + | otherwise = text "\tsub" <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r + sltFor l r | (OpImm _) <- r = text "\tslti" + | (OpImm _) <- l = panic "PV64.ppr: Cannot SLT IMM _" + | otherwise = text "\tslt" + sltuFor l r| (OpImm _) <- r = text "\tsltui" + | (OpImm _) <- l = panic "PV64.ppr: Cannot SLTU IMM _" + | otherwise = text "\tsltu" + binOp :: (IsLine doc) => String -> doc + binOp op = text op <+> pprOp platform o <> comma <+> pprOp platform l <> comma <+> pprOp platform r + + -- 7. Load and Store Instructions -------------------------------------------- + -- NOTE: GHC may do whacky things where it only load the lower part of an + -- address. Not observing the correct size when loading will lead + -- inevitably to crashes. + STR II8 o1 o2 -> op2 (text "\tsb") o1 o2 + STR II16 o1 o2 -> op2 (text "\tsh") o1 o2 + STR II32 o1 o2 -> op2 (text "\tsw") o1 o2 + STR II64 o1 o2 -> op2 (text "\tsd") o1 o2 + STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2 + STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2 + + LDR _f o1 (OpImm (ImmIndex lbl off)) -> + lines_ [ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl + , text "\taddi" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off + ] + + LDR _f o1 (OpImm (ImmCLbl lbl)) -> + -- fixing this is _really_ annoyin we need to generate code like: + -- 1: auipc x16, %pcrel_hi(<lbl>) + -- addi x16, x16, %pcrel_lo(1b) + -- I really dislike this (refer back to label 1 syntax from the assembler.) + -- + -- So we'll go with pseudo ops. la and li it is. + -- op_adrp o1 (text "%pcrel_hi(" <> pprAsmLabel platform lbl <> text ")") $$ + -- op_add o1 (text "%pcrel_lo(" <> pprAsmLabel platform lbl <> text ")") + line $ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl + + LDR II8 o1 o2 -> op2 (text "\tlb") o1 o2 + LDR II16 o1 o2 -> op2 (text "\tlh") o1 o2 + LDR II32 o1 o2 -> op2 (text "\tlw") o1 o2 + LDR II64 o1 o2 -> op2 (text "\tld") o1 o2 + LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2 + LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2 + + LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2 + LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2 + LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2 + -- double words (64bit) cannot be sign extended by definition + LDRU II64 o1 o2 -> op2 (text "\tld") o1 o2 + LDRU FF32 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tflw") o1 o2 + LDRU FF32 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tflw") o1 o2 + LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2 + LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2 + LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text.show) f <+> pprOp platform o1 <+> pprOp platform o2) + + -- 8. Synchronization Instructions ------------------------------------------- + DMBSY r w -> line $ text "\tfence" <+> pprDmbType r <> char ',' <+> pprDmbType w + + -- 9. Floating Point Instructions -------------------------------------------- + FCVT o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2 + FCVT o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.s") o1 o2 + FCVT o1 o2 -> pprPanic "RV64.pprInstr - impossible float conversion" $ + line (pprOp platform o1 <> text "->" <> pprOp platform o2) + + SCVTF o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.s.w") o1 o2 + SCVTF o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.l") o1 o2 + SCVTF o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.d.w") o1 o2 + SCVTF o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.d.l") o1 o2 + SCVTF o1 o2 -> pprPanic "RV64.pprInstr - impossible integer to float conversion" $ + line (pprOp platform o1 <> text "->" <> pprOp platform o2) + + FCVTZS o1@(OpReg W32 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.w.s") o1 o2 + FCVTZS o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.w.d") o1 o2 + FCVTZS o1@(OpReg W64 _) o2@(OpReg W32 _) -> op2 (text "\tfcvt.l.s") o1 o2 + FCVTZS o1@(OpReg W64 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.l.d") o1 o2 + FCVTZS o1 o2 -> pprPanic "RV64.pprInstr - impossible float to integer conversion" $ + line (pprOp platform o1 <> text "->" <> pprOp platform o2) + + FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2 + FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2 + instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr + where op2 op o1 o2 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 + op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + pprDmbType DmbRead = text "r" + pprDmbType DmbWrite = text "w" + pprDmbType DmbReadWrite = text "rw" + +floatOpPrecision :: Platform -> Operand -> Operand -> String +floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision +floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision +floatOpPrecision p l r = pprPanic "Cannot determine floating point precission" (text "op1" <+> pprOp p l <+> text "op2" <+> pprOp p r) + +pprBcond :: (IsLine doc) => Cond -> doc +pprBcond c = text "b" <> pprCond c + where + pprCond :: (IsLine doc) => Cond -> doc + pprCond c = case c of + EQ -> text "eq" + NE -> text "ne" + SLT -> text "lt" + SLE -> text "le" + SGE -> text "ge" + SGT -> text "gt" + ULT -> text "ltu" + ULE -> text "leu" + UGE -> text "geu" + UGT -> text "gtu" + -- BCOND cannot handle floating point comparisons / registers + _ -> panic $ "RV64.ppr: unhandled BCOND conditional: " ++ show c Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/RegInfo.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/RegInfo.hs @@ -0,0 +1,31 @@ +module GHC.CmmToAsm.RV64.RegInfo where + +import GHC.Prelude + +import GHC.CmmToAsm.RV64.Instr +import GHC.Cmm.BlockId +import GHC.Cmm + +import GHC.Utils.Outputable + +data JumpDest = DestBlockId BlockId + +-- Debug Instance +instance Outputable JumpDest where + ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid + +-- TODO: documen what this does. See Ticket 19914 +getJumpDestBlockId :: JumpDest -> Maybe BlockId +getJumpDestBlockId (DestBlockId bid) = Just bid + +-- TODO: document what this does. See Ticket 19914 +canShortcut :: Instr -> Maybe JumpDest +canShortcut _ = Nothing + +-- TODO: document what this does. See Ticket 19914 +shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics +shortcutStatics _ other_static = other_static + +-- TODO: document what this does. See Ticket 19914 +shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr +shortcutJump _ other = other \ No newline at end of file Index: ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Regs.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/RV64/Regs.hs @@ -0,0 +1,226 @@ +module GHC.CmmToAsm.RV64.Regs where + +import GHC.Prelude +import GHC.Data.FastString + +import GHC.Platform.Reg +import GHC.Platform.Reg.Class +import GHC.CmmToAsm.Format + +import GHC.Cmm +import GHC.Cmm.CLabel ( CLabel ) +import GHC.Types.Unique + +import GHC.Platform.Regs +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Platform + +-- * Registers + +-- | First integer register number. @zero@ register. +x0RegNo :: RegNo +x0RegNo = 0 + +x5RegNo, t0RegNo :: RegNo +x5RegNo = 5 +t0RegNo = x5RegNo + +x7RegNo, t2RegNo :: RegNo +x7RegNo = 7 +t2RegNo = x7RegNo + +x28RegNo, t3RegNo :: RegNo +x28RegNo = 28 +t3RegNo = x28RegNo + +-- | Last integer register number. Used as IP register. +x31RegNo, t6RegNo, ipRegNo :: RegNo +x31RegNo = 31 +t6RegNo = x31RegNo +ipRegNo = x31RegNo + +-- | First floating point register. +d0RegNo, ft0RegNo :: RegNo +d0RegNo = 32 +ft0RegNo = d0RegNo + +d7RegNo, ft7RegNo :: RegNo +d7RegNo = 39 +ft7RegNo = d7RegNo + +-- | Last floating point register. +d31RegNo :: RegNo +d31RegNo = 63 + +a0RegNo, x10RegNo :: RegNo +x10RegNo = 10 +a0RegNo = x10RegNo + +a7RegNo, x17RegNo :: RegNo +x17RegNo = 17 +a7RegNo = x17RegNo + +fa0RegNo, d10RegNo :: RegNo +d10RegNo = 42 +fa0RegNo = d10RegNo + +fa7RegNo, d17RegNo :: RegNo +d17RegNo = 49 +fa7RegNo = d17RegNo + +-- Note [The made-up RISCV64 IP register] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- RISCV64 has no inter-procedural register in its ABI. However, we need one to +-- make register spills/loads to/from high number slots. I.e. slot numbers that +-- do not fit in a 12bit integer which is used as immediate in the arithmetic +-- operations. Thus, we're marking one additional register (x31) as permanently +-- non-free and call it IP. +-- +-- IP can be used as temporary register in all operations. Just be aware that it +-- may be clobbered as soon as you loose direct control over it (i.e. using IP +-- by-passes the register allocation/spilling mechanisms.) It should be fine to +-- use it as temporary register in a MachOp translation as long as you don't +-- rely on its value beyond this limited scope. +-- +-- X31 is a caller-saved register. I.e. there are no guarantees about what the +-- callee does with it. That's exactly what we want here. + +zeroReg, raReg, spMachReg, ipReg :: Reg +zeroReg = regSingle x0RegNo +raReg = regSingle 1 +-- | Not to be confused with the `CmmReg` `spReg` +spMachReg = regSingle 2 +ipReg = regSingle ipRegNo + +-- | All machine register numbers. +allMachRegNos :: [RegNo] +allMachRegNos = intRegs ++ fpRegs + where + intRegs = [x0RegNo .. x31RegNo] + fpRegs = [d0RegNo .. d31RegNo] + +-- | Registers available to the register allocator. +-- +-- These are all registers minus those with a fixed role in RISCV ABI (zero, lr, +-- sp, gp, tp, fp, ip) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6, +-- D1..D6.) +allocatableRegs :: Platform -> [RealReg] +allocatableRegs platform = + let isFree = freeReg platform + in map RealRegSingle $ filter isFree allMachRegNos + +-- | Integer argument registers according to the calling convention +allGpArgRegs :: [Reg] +allGpArgRegs = map regSingle [a0RegNo .. a7RegNo] + +-- | Floating point argument registers according to the calling convention +allFpArgRegs :: [Reg] +allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo] + +-- * Addressing modes + +-- TODO: AddReg seems to be just a special case of AddrRegImm. Maybe we should +-- replace it with AddrRegImm having an Imm of 0. +-- | Addressing modes +data AddrMode + = -- | A register plus some integer, e.g. @8(sp)@ or @-16(sp)@. The offset + -- needs to fit into 12bits. + AddrRegImm Reg Imm + | -- | A register + AddrReg Reg + deriving (Eq, Show) + +-- * Immediates + +data Imm + = ImmInt Int + | ImmInteger Integer -- Sigh. + | ImmCLbl CLabel -- AbstractC Label (with baggage) + | ImmLit FastString + | ImmIndex CLabel Int + | ImmFloat Rational + | ImmDouble Rational + | ImmConstantSum Imm Imm + | ImmConstantDiff Imm Imm + deriving (Eq, Show) + +litToImm :: CmmLit -> Imm +litToImm (CmmInt i w) = ImmInteger (narrowS w i) +-- narrow to the width: a CmmInt might be out of +-- range, but we assume that ImmInteger only contains +-- in-range values. A signed value should be fine here. +litToImm (CmmFloat f W32) = ImmFloat f +litToImm (CmmFloat f W64) = ImmDouble f +litToImm (CmmLabel l) = ImmCLbl l +litToImm (CmmLabelOff l off) = ImmIndex l off +litToImm (CmmLabelDiffOff l1 l2 off _) = + ImmConstantSum + (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) + (ImmInt off) +litToImm l = panic $ "RV64.Regs.litToImm: no match for " ++ show l + +-- == To satisfy GHC.CmmToAsm.Reg.Target ======================================= + +-- squeese functions for the graph allocator ----------------------------------- +-- | regSqueeze_class reg +-- Calculate the maximum number of register colors that could be +-- denied to a node of this class due to having this reg +-- as a neighbour. +-- +{-# INLINE virtualRegSqueeze #-} +virtualRegSqueeze :: RegClass -> VirtualReg -> Int +virtualRegSqueeze cls vr + = case cls of + RcInteger + -> case vr of + VirtualRegI{} -> 1 + VirtualRegHi{} -> 1 + _other -> 0 + + RcDouble + -> case vr of + VirtualRegD{} -> 1 + VirtualRegF{} -> 0 + _other -> 0 + + _other -> 0 + +{-# INLINE realRegSqueeze #-} +realRegSqueeze :: RegClass -> RealReg -> Int +realRegSqueeze cls rr = + case cls of + RcInteger -> + case rr of + RealRegSingle regNo + | regNo < d0RegNo -> 1 + | otherwise -> 0 + RcDouble -> + case rr of + RealRegSingle regNo + | regNo < d0RegNo -> 0 + | otherwise -> 1 + _other -> 0 + +mkVirtualReg :: Unique -> Format -> VirtualReg +mkVirtualReg u format + | not (isFloatFormat format) = VirtualRegI u + | otherwise + = case format of + FF32 -> VirtualRegD u + FF64 -> VirtualRegD u + _ -> panic "RV64.mkVirtualReg" + +{-# INLINE classOfRealReg #-} +classOfRealReg :: RealReg -> RegClass +classOfRealReg (RealRegSingle i) + | i < d0RegNo = RcInteger + | otherwise = RcDouble + +regDotColor :: RealReg -> SDoc +regDotColor reg + = case classOfRealReg reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs @@ -115,7 +115,7 @@ trivColorable platform virtualRegSqueeze ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" ArchS390X -> panic "trivColorable ArchS390X" - ArchRISCV64 -> panic "trivColorable ArchRISCV64" + ArchRISCV64 -> 14 ArchLoongArch64->panic "trivColorable ArchLoongArch64" ArchJavaScript-> panic "trivColorable ArchJavaScript" ArchWasm32 -> panic "trivColorable ArchWasm32" @@ -150,7 +150,7 @@ trivColorable platform virtualRegSqueeze ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" ArchS390X -> panic "trivColorable ArchS390X" - ArchRISCV64 -> panic "trivColorable ArchRISCV64" + ArchRISCV64 -> 0 ArchLoongArch64->panic "trivColorable ArchLoongArch64" ArchJavaScript-> panic "trivColorable ArchJavaScript" ArchWasm32 -> panic "trivColorable ArchWasm32" @@ -184,7 +184,7 @@ trivColorable platform virtualRegSqueeze ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" ArchS390X -> panic "trivColorable ArchS390X" - ArchRISCV64 -> panic "trivColorable ArchRISCV64" + ArchRISCV64 -> 20 ArchLoongArch64->panic "trivColorable ArchLoongArch64" ArchJavaScript-> panic "trivColorable ArchJavaScript" ArchWasm32 -> panic "trivColorable ArchWasm32" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/Reg/Linear.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -114,6 +114,7 @@ import qualified GHC.CmmToAsm.Reg.Linear import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 +import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64 import GHC.CmmToAsm.Reg.Target import GHC.CmmToAsm.Reg.Liveness import GHC.CmmToAsm.Reg.Utils @@ -223,7 +224,7 @@ linearRegAlloc config entry_ids block_li ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" ArchMipsel -> panic "linearRegAlloc ArchMipsel" - ArchRISCV64 -> panic "linearRegAlloc ArchRISCV64" + ArchRISCV64 -> go $ (frInitFreeRegs platform :: RV64.FreeRegs) ArchLoongArch64-> panic "linearRegAlloc ArchLoongArch64" ArchJavaScript -> panic "linearRegAlloc ArchJavaScript" ArchWasm32 -> panic "linearRegAlloc ArchWasm32" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs @@ -29,10 +29,12 @@ import qualified GHC.CmmToAsm.Reg.Linear import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 +import qualified GHC.CmmToAsm.Reg.Linear.RV64 as RV64 import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr import qualified GHC.CmmToAsm.X86.Instr as X86.Instr import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr +import qualified GHC.CmmToAsm.RV64.Instr as RV64.Instr class Show freeRegs => FR freeRegs where frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs @@ -64,6 +66,12 @@ instance FR AArch64.FreeRegs where frInitFreeRegs = AArch64.initFreeRegs frReleaseReg = \_ -> AArch64.releaseReg +instance FR RV64.FreeRegs where + frAllocateReg = \_ -> RV64.allocateReg + frGetFreeRegs = \_ -> RV64.getFreeRegs + frInitFreeRegs = RV64.initFreeRegs + frReleaseReg = \_ -> RV64.releaseReg + maxSpillSlots :: NCGConfig -> Int maxSpillSlots config = case platformArch (ncgPlatform config) of ArchX86 -> X86.Instr.maxSpillSlots config @@ -76,7 +84,7 @@ maxSpillSlots config = case platformArch ArchAlpha -> panic "maxSpillSlots ArchAlpha" ArchMipseb -> panic "maxSpillSlots ArchMipseb" ArchMipsel -> panic "maxSpillSlots ArchMipsel" - ArchRISCV64 -> panic "maxSpillSlots ArchRISCV64" + ArchRISCV64 -> RV64.Instr.maxSpillSlots config ArchLoongArch64->panic "maxSpillSlots ArchLoongArch64" ArchJavaScript-> panic "maxSpillSlots ArchJavaScript" ArchWasm32 -> panic "maxSpillSlots ArchWasm32" Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs =================================================================== --- /dev/null +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs @@ -0,0 +1,65 @@ +module GHC.CmmToAsm.Reg.Linear.RV64 where + +import GHC.Prelude + +import GHC.CmmToAsm.RV64.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Platform + +import Data.Word + +import GHC.Stack + +data FreeRegs = FreeRegs !Word32 !Word32 + +instance Show FreeRegs where + show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f + +instance Outputable FreeRegs where + ppr (FreeRegs g f) = text " " <+> foldr (\i x -> pad_int i <+> x) (text "") [0..31] + $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31] + $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31] + where pad_int i | i < 10 = char ' ' <> int i + pad_int i = int i + -- remember bit = 1 means it's available. + show_bit bits bit | testBit bits bit = text " " + show_bit _ _ = text " x" + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 0 + +showBits :: Word32 -> String +showBits w = map (\i -> if testBit w i then '1' else '0') [0..31] + +-- FR instance implementation (See Linear.FreeRegs) +allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) + | r < 32 && testBit g r = FreeRegs (clearBit g r) f + | r > 31 = panic $ "Linear.RV64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f + | otherwise = pprPanic "Linear.RV64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g) + +-- For LLVM Interop, see https://github.com/llvm/llvm-project/blob/6ab900f8746e7d8e24afafb5886a40801f6799f4/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L13638-L13685 +getFreeRegs :: RegClass -> FreeRegs -> [RealReg] +getFreeRegs cls (FreeRegs g f) + | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted. + | RcDouble <- cls = go 32 f [0..31] + | RcInteger <- cls = go 0 g ([5..7] ++ [10..17] ++ [28..31]) + where + go _ _ [] = [] + go off x (i:is) | testBit x i = RealRegSingle (off + i) : (go off x $! is) + | otherwise = go off x $! is + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32)) + | r < 32 && testBit g r = pprPanic "Linear.RV64.releaseReg" (text "can't release non-allocated reg x" <> int r) + | r > 31 = FreeRegs g (setBit f (r - 32)) + | otherwise = FreeRegs (setBit g r) f \ No newline at end of file Index: ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Target.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/CmmToAsm/Reg/Target.hs +++ ghc-9.8.2/compiler/GHC/CmmToAsm/Reg/Target.hs @@ -34,7 +34,7 @@ import qualified GHC.CmmToAsm.X86.Regs import qualified GHC.CmmToAsm.X86.RegInfo as X86 import qualified GHC.CmmToAsm.PPC.Regs as PPC import qualified GHC.CmmToAsm.AArch64.Regs as AArch64 - +import qualified GHC.CmmToAsm.RV64.Regs as RV64 targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int targetVirtualRegSqueeze platform @@ -49,7 +49,7 @@ targetVirtualRegSqueeze platform ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" - ArchRISCV64 -> panic "targetVirtualRegSqueeze ArchRISCV64" + ArchRISCV64 -> RV64.virtualRegSqueeze ArchLoongArch64->panic "targetVirtualRegSqueeze ArchLoongArch64" ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript" ArchWasm32 -> panic "targetVirtualRegSqueeze ArchWasm32" @@ -69,7 +69,7 @@ targetRealRegSqueeze platform ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" - ArchRISCV64 -> panic "targetRealRegSqueeze ArchRISCV64" + ArchRISCV64 -> RV64.realRegSqueeze ArchLoongArch64->panic "targetRealRegSqueeze ArchLoongArch64" ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript" ArchWasm32 -> panic "targetRealRegSqueeze ArchWasm32" @@ -88,7 +88,7 @@ targetClassOfRealReg platform ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" - ArchRISCV64 -> panic "targetClassOfRealReg ArchRISCV64" + ArchRISCV64 -> RV64.classOfRealReg ArchLoongArch64->panic "targetClassOfRealReg ArchLoongArch64" ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript" ArchWasm32 -> panic "targetClassOfRealReg ArchWasm32" @@ -107,7 +107,7 @@ targetMkVirtualReg platform ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" - ArchRISCV64 -> panic "targetMkVirtualReg ArchRISCV64" + ArchRISCV64 -> RV64.mkVirtualReg ArchLoongArch64->panic "targetMkVirtualReg ArchLoongArch64" ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript" ArchWasm32 -> panic "targetMkVirtualReg ArchWasm32" @@ -126,7 +126,7 @@ targetRegDotColor platform ArchAlpha -> panic "targetRegDotColor ArchAlpha" ArchMipseb -> panic "targetRegDotColor ArchMipseb" ArchMipsel -> panic "targetRegDotColor ArchMipsel" - ArchRISCV64 -> panic "targetRegDotColor ArchRISCV64" + ArchRISCV64 -> RV64.regDotColor ArchLoongArch64->panic "targetRegDotColor ArchLoongArch64" ArchJavaScript-> panic "targetRegDotColor ArchJavaScript" ArchWasm32 -> panic "targetRegDotColor ArchWasm32" Index: ghc-9.8.2/compiler/GHC/Driver/Backend.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/Driver/Backend.hs +++ ghc-9.8.2/compiler/GHC/Driver/Backend.hs @@ -217,6 +217,7 @@ platformNcgSupported platform = if ArchPPC_64 {} -> True ArchAArch64 -> True ArchWasm32 -> True + ArchRISCV64 -> True _ -> False -- | Is the platform supported by the JS backend? Index: ghc-9.8.2/compiler/GHC/Driver/DynFlags.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/Driver/DynFlags.hs +++ ghc-9.8.2/compiler/GHC/Driver/DynFlags.hs @@ -1330,6 +1330,11 @@ default_PIC platform = -- always generate PIC. See -- #10597 for more -- information. + + -- On RISC-V, we need to always have PIC enabled. Otherwise, the stack + -- protector emits relocations that cannot be resolved by our linker. The + -- addresses for the canary are too high to fit in 32bits. + (OSLinux, ArchRISCV64) -> [Opt_PIC] _ -> [] -- | The language extensions implied by the various language variants. Index: ghc-9.8.2/compiler/GHC/Platform.hs =================================================================== --- ghc-9.8.2.orig/compiler/GHC/Platform.hs +++ ghc-9.8.2/compiler/GHC/Platform.hs @@ -283,7 +283,6 @@ platformHasRTSLinker p = case archOS_arc ArchPPC_64 ELF_V1 -> False -- powerpc64 ArchPPC_64 ELF_V2 -> False -- powerpc64le ArchS390X -> False - ArchRISCV64 -> False ArchLoongArch64 -> False ArchJavaScript -> False ArchWasm32 -> False Index: ghc-9.8.2/compiler/ghc.cabal.in =================================================================== --- ghc-9.8.2.orig/compiler/ghc.cabal.in +++ ghc-9.8.2/compiler/ghc.cabal.in @@ -278,6 +278,7 @@ Library GHC.CmmToAsm.Reg.Linear.FreeRegs GHC.CmmToAsm.Reg.Linear.JoinToTargets GHC.CmmToAsm.Reg.Linear.PPC + GHC.CmmToAsm.Reg.Linear.RV64 GHC.CmmToAsm.Reg.Linear.StackMap GHC.CmmToAsm.Reg.Linear.State GHC.CmmToAsm.Reg.Linear.Stats @@ -286,6 +287,13 @@ Library GHC.CmmToAsm.Reg.Liveness GHC.CmmToAsm.Reg.Target GHC.CmmToAsm.Reg.Utils + GHC.CmmToAsm.RV64 + GHC.CmmToAsm.RV64.CodeGen + GHC.CmmToAsm.RV64.Cond + GHC.CmmToAsm.RV64.Instr + GHC.CmmToAsm.RV64.Ppr + GHC.CmmToAsm.RV64.RegInfo + GHC.CmmToAsm.RV64.Regs GHC.CmmToAsm.Types GHC.CmmToAsm.Utils GHC.CmmToAsm.X86 Index: ghc-9.8.2/hadrian/bindist/config.mk.in =================================================================== --- ghc-9.8.2.orig/hadrian/bindist/config.mk.in +++ ghc-9.8.2/hadrian/bindist/config.mk.in @@ -141,7 +141,7 @@ GhcWithSMP := $(strip $(if $(filter YESN # Whether to include GHCi in the compiler. Depends on whether the RTS linker # has support for this OS/ARCH combination. OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu))) -ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64))) +ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64))) ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES" GhcWithInterpreter=YES Index: ghc-9.8.2/hadrian/src/Settings/Builders/RunTest.hs =================================================================== --- ghc-9.8.2.orig/hadrian/src/Settings/Builders/RunTest.hs +++ ghc-9.8.2/hadrian/src/Settings/Builders/RunTest.hs @@ -116,7 +116,14 @@ inTreeCompilerArgs stg = do os <- setting HostOs arch <- setting TargetArch - let codegen_arches = ["x86_64", "i386", "powerpc", "powerpc64", "powerpc64le", "aarch64", "wasm32"] + let codegen_arches = [ "x86_64" + , "i386" + , "powerpc" + , "powerpc64" + , "powerpc64le" + , "aarch64" + , "wasm32" + , "riscv64" ] let withNativeCodeGen | unregisterised = False | arch `elem` codegen_arches = True Index: ghc-9.8.2/rts/LinkerInternals.h =================================================================== --- ghc-9.8.2.orig/rts/LinkerInternals.h +++ ghc-9.8.2/rts/LinkerInternals.h @@ -208,7 +208,7 @@ typedef struct _Segment { int n_sections; } Segment; -#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) +#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) #define NEED_SYMBOL_EXTRAS 1 #endif @@ -237,6 +237,8 @@ typedef struct { uint8_t jumpIsland[8]; #elif defined(arm_HOST_ARCH) uint8_t jumpIsland[16]; +#elif defined(riscv64_HOST_ARCH) + uint64_t addr; #endif } SymbolExtra; Index: ghc-9.8.2/rts/RtsSymbols.c =================================================================== --- ghc-9.8.2.orig/rts/RtsSymbols.c +++ ghc-9.8.2/rts/RtsSymbols.c @@ -974,6 +974,17 @@ extern char **environ; #define RTS_LIBGCC_SYMBOLS #endif +#if defined(riscv64_HOST_ARCH) +// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as +// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate +// to __builtin-s in libraries/ghc-prim/cbits/ctz.c. +#define RTS_ARCH_LIBGCC_SYMBOLS \ + SymI_NeedsProto(__clzdi2) \ + SymI_NeedsProto(__ctzdi2) +#else +#define RTS_ARCH_LIBGCC_SYMBOLS +#endif + // Symbols defined by libgcc/compiler-rt for AArch64's outline atomics. #if defined(HAVE_ARM_OUTLINE_ATOMICS) #include "ARMOutlineAtomicsSymbols.h" @@ -1026,6 +1037,7 @@ RTS_DARWIN_ONLY_SYMBOLS RTS_OPENBSD_ONLY_SYMBOLS RTS_LIBC_SYMBOLS RTS_LIBGCC_SYMBOLS +RTS_ARCH_LIBGCC_SYMBOLS RTS_FINI_ARRAY_SYMBOLS RTS_LIBFFI_SYMBOLS RTS_ARM_OUTLINE_ATOMIC_SYMBOLS @@ -1068,6 +1080,7 @@ RtsSymbolVal rtsSyms[] = { RTS_DARWIN_ONLY_SYMBOLS RTS_OPENBSD_ONLY_SYMBOLS RTS_LIBGCC_SYMBOLS + RTS_ARCH_LIBGCC_SYMBOLS RTS_FINI_ARRAY_SYMBOLS RTS_LIBFFI_SYMBOLS RTS_ARM_OUTLINE_ATOMIC_SYMBOLS Index: ghc-9.8.2/rts/adjustor/LibffiAdjustor.c =================================================================== --- ghc-9.8.2.orig/rts/adjustor/LibffiAdjustor.c +++ ghc-9.8.2/rts/adjustor/LibffiAdjustor.c @@ -12,6 +12,7 @@ #include "Adjustor.h" #include "rts/ghc_ffi.h" +#include <stdint.h> #include <string.h> // Note that ffi_alloc_prep_closure is a non-standard libffi closure @@ -187,5 +188,20 @@ createAdjustor (int cconv, barf("createAdjustor: failed to allocate memory"); } - return (void*)code; +#if defined(riscv64_HOST_ARCH) + // Synchronize the memory and instruction cache to prevent illegal + // instruction exceptions. + + // We expect two instructions for address loading, one for the jump. + int instrCount = 3; + // On Linux the parameters of __builtin___clear_cache are currently unused. + // Add them anyways for future compatibility. (I.e. the parameters couldn't + // be checked during development.) + __builtin___clear_cache((void *)code, + (void *)code + instrCount * sizeof(uint64_t)); + // Memory barrier to ensure nothing circumvents the fence.i / cache flush. + SEQ_CST_FENCE(); +#endif + + return (void *)code; } Index: ghc-9.8.2/rts/linker/Elf.c =================================================================== --- ghc-9.8.2.orig/rts/linker/Elf.c +++ ghc-9.8.2/rts/linker/Elf.c @@ -101,7 +101,7 @@ # include <elf_abi.h> #endif -#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) +#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH) # define NEED_GOT # define NEED_PLT # include "elf_got.h" @@ -430,10 +430,7 @@ ocVerifyImage_ELF ( ObjectCode* oc ) case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break; #endif #if defined(EM_RISCV) - case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); - errorBelch("%s: RTS linker not implemented on riscv", - oc->fileName); - return 0; + case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); break; #endif #if defined(EM_LOONGARCH) case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" )); @@ -1995,7 +1992,7 @@ ocResolve_ELF ( ObjectCode* oc ) (void) shdr; #endif /* NEED_GOT */ -#if defined(aarch64_HOST_ARCH) +#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) /* use new relocation design */ if(relocateObjectCode( oc )) return 0; @@ -2018,6 +2015,8 @@ ocResolve_ELF ( ObjectCode* oc ) #if defined(powerpc_HOST_ARCH) ocFlushInstructionCache( oc ); +#elif defined(riscv64_HOST_ARCH) + flushInstructionCache( oc ); #endif return ocMprotect_Elf(oc); Index: ghc-9.8.2/rts/linker/ElfTypes.h =================================================================== --- ghc-9.8.2.orig/rts/linker/ElfTypes.h +++ ghc-9.8.2/rts/linker/ElfTypes.h @@ -150,6 +150,7 @@ typedef struct _Stub { void * addr; void * target; + void* got_addr; /* flags can hold architecture specific information they are used during * lookup of stubs as well. Thus two stubs for the same target with * different flags are considered unequal. Index: ghc-9.8.2/rts/linker/SymbolExtras.c =================================================================== --- ghc-9.8.2.orig/rts/linker/SymbolExtras.c +++ ghc-9.8.2/rts/linker/SymbolExtras.c @@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc) } -#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) +#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH) SymbolExtra* makeSymbolExtra( ObjectCode const* oc, unsigned long symbolNumber, unsigned long target ) @@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode extra->addr = target; memcpy(extra->jumpIsland, jmp, 8); #endif /* x86_64_HOST_ARCH */ - +#if defined(riscv64_HOST_ARCH) + // Fake GOT entry (used like GOT, but located in symbol extras) + extra->addr = target; +#endif return extra; } -#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */ +#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */ #endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */ #endif // NEED_SYMBOL_EXTRAS Index: ghc-9.8.2/rts/linker/SymbolExtras.h =================================================================== --- ghc-9.8.2.orig/rts/linker/SymbolExtras.h +++ ghc-9.8.2/rts/linker/SymbolExtras.h @@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectC unsigned long target, bool fromThumb, bool toThumb ); -#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) +#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH) SymbolExtra* makeSymbolExtra( ObjectCode const* oc, unsigned long symbolNumber, unsigned long target ); Index: ghc-9.8.2/rts/linker/elf_got.c =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_got.c +++ ghc-9.8.2/rts/linker/elf_got.c @@ -9,22 +9,20 @@ * Check if we need a global offset table slot for a * given symbol */ -bool -needGotSlot(Elf_Sym * symbol) { - /* using global here should give an upper bound */ - /* I don't believe we need to relocate STB_LOCAL - * symbols via the GOT; however I'm unsure about - * STB_WEAK. - * - * Any more restrictive filter here would result - * in a smaller GOT, which is preferable. - */ - return ELF_ST_BIND(symbol->st_info) == STB_GLOBAL - || ELF_ST_BIND(symbol->st_info) == STB_WEAK - // Section symbols exist primarily for relocation - // and as such may need a GOT slot. - || ELF_ST_TYPE(symbol->st_info) == STT_SECTION; - +bool needGotSlot(Elf_Sym *symbol) { + /* using global here should give an upper bound */ + /* I don't believe we need to relocate STB_LOCAL + * symbols via the GOT; however I'm unsure about + * STB_WEAK. + * + * Any more restrictive filter here would result + * in a smaller GOT, which is preferable. + */ + return ELF_ST_BIND(symbol->st_info) == STB_GLOBAL || + ELF_ST_BIND(symbol->st_info) == STB_WEAK + // Section symbols exist primarily for relocation + // and as such may need a GOT slot. + || ELF_ST_TYPE(symbol->st_info) == STT_SECTION; } bool Index: ghc-9.8.2/rts/linker/elf_plt.c =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_plt.c +++ ghc-9.8.2/rts/linker/elf_plt.c @@ -5,7 +5,7 @@ #include <stdint.h> #include <stdlib.h> -#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) +#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH) #if defined(OBJFORMAT_ELF) #define STRINGIFY(x) #x @@ -49,11 +49,13 @@ findStub(Section * section, bool makeStub(Section * section, void* * addr, + void* got_addr, uint8_t flags) { Stub * s = calloc(1, sizeof(Stub)); ASSERT(s != NULL); s->target = *addr; + s->got_addr = got_addr; s->flags = flags; s->next = NULL; s->addr = (uint8_t *)section->info->stub_offset + 8 Index: ghc-9.8.2/rts/linker/elf_plt.h =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_plt.h +++ ghc-9.8.2/rts/linker/elf_plt.h @@ -4,8 +4,9 @@ #include "elf_plt_arm.h" #include "elf_plt_aarch64.h" +#include "elf_plt_riscv64.h" -#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) +#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH) #if defined(OBJFORMAT_ELF) @@ -21,6 +22,8 @@ #define __suffix__ Arm #elif defined(__mips__) #define __suffix__ Mips +#elif defined(__riscv) +#define __suffix__ RISCV64 #else #error "unknown architecture" #endif @@ -34,10 +37,10 @@ unsigned numberOfStubsForSection( Objec #define STUB_SIZE ADD_SUFFIX(stubSize) bool findStub(Section * section, void* * addr, uint8_t flags); -bool makeStub(Section * section, void* * addr, uint8_t flags); +bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags); void freeStubs(Section * section); #endif // OBJECTFORMAT_ELF -#endif // arm/aarch64_HOST_ARCH +#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH Index: ghc-9.8.2/rts/linker/elf_plt_riscv64.c =================================================================== --- /dev/null +++ ghc-9.8.2/rts/linker/elf_plt_riscv64.c @@ -0,0 +1,84 @@ +#include "Rts.h" +#include "elf_compat.h" +#include "elf_plt_riscv64.h" +#include "rts/Messages.h" + +#include <stdint.h> +#include <stdlib.h> + +#if defined(riscv64_HOST_ARCH) + +#if defined(OBJFORMAT_ELF) + +const size_t instSizeRISCV64 = 4; +const size_t stubSizeRISCV64 = 3 * instSizeRISCV64; + +bool needStubForRelRISCV64(Elf_Rel *rel) { + switch (ELF64_R_TYPE(rel->r_info)) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + return true; + default: + return false; + } +} + +bool needStubForRelaRISCV64(Elf_Rela *rela) { + switch (ELF64_R_TYPE(rela->r_info)) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + return true; + default: + return false; + } +} + +// After the global offset table (GOT) has been set up, we can use these three +// instructions to jump to the target address / function: +// 1. AUIPC ip, %pcrel_hi(addr) +// 2. LD ip, %pcrel_lo(addr)(ip) +// 3. JARL x0, ip, 0 +// +// We could use the absolute address of the target (because we know it), but +// that would require loading a 64-bit constant which is a nightmare to do in +// riscv64 assembly. (See +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table) +bool makeStubRISCV64(Stub *s) { + uint32_t *P = (uint32_t *)s->addr; + int32_t addr = (uint64_t)s->got_addr - (uint64_t)P; + + uint64_t hi = (addr + 0x800) >> 12; + uint64_t lo = addr - (hi << 12); + + IF_DEBUG( + linker, + debugBelch( + "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x " + ", hi = 0x%lx, lo = 0x%lx\n", + P, s->got_addr, s->target, addr, hi, lo)); + + // AUIPC ip, %pcrel_hi(addr) + uint32_t auipcInst = 0b0010111; // opcode + auipcInst |= 0x1f << 7; // rd = ip (x31) + auipcInst |= hi << 12; // imm[31:12] + + // LD ip, %pcrel_lo(addr)(ip) + uint32_t ldInst = 0b0000011; // opcode + ldInst |= 0x1f << 7; // rd = ip (x31) + ldInst |= 0x1f << 15; // rs = ip (x31) + ldInst |= 0b11 << 12; // funct3 = 0x3 (LD) + ldInst |= lo << 20; // imm[11:0] + + // JARL x0, ip, 0 + uint32_t jalrInst = 0b1100111; // opcode + jalrInst |= 0x1f << 15; // rs = ip (x31) + + P[0] = auipcInst; + P[1] = ldInst; + P[2] = jalrInst; + + return EXIT_SUCCESS; +} + +#endif +#endif Index: ghc-9.8.2/rts/linker/elf_plt_riscv64.h =================================================================== --- /dev/null +++ ghc-9.8.2/rts/linker/elf_plt_riscv64.h @@ -0,0 +1,12 @@ +#pragma once + +#include "LinkerInternals.h" + +#if defined(OBJFORMAT_ELF) + +extern const size_t stubSizeRISCV64; +bool needStubForRelRISCV64(Elf_Rel * rel); +bool needStubForRelaRISCV64(Elf_Rela * rel); +bool makeStubRISCV64(Stub * s); + +#endif Index: ghc-9.8.2/rts/linker/elf_reloc.c =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_reloc.c +++ ghc-9.8.2/rts/linker/elf_reloc.c @@ -4,13 +4,18 @@ #if defined(OBJFORMAT_ELF) -/* we currently only use this abstraction for elf/aarch64 */ -#if defined(aarch64_HOST_ARCH) +/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */ +#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH) bool relocateObjectCode(ObjectCode * oc) { return ADD_SUFFIX(relocateObjectCode)(oc); } + + +void flushInstructionCache(ObjectCode * oc){ + return ADD_SUFFIX(flushInstructionCache)(oc); +} #endif #endif Index: ghc-9.8.2/rts/linker/elf_reloc.h =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_reloc.h +++ ghc-9.8.2/rts/linker/elf_reloc.h @@ -5,9 +5,10 @@ #if defined(OBJFORMAT_ELF) #include "elf_reloc_aarch64.h" +#include "elf_reloc_riscv64.h" bool relocateObjectCode(ObjectCode * oc); - +void flushInstructionCache(ObjectCode *oc); #endif /* OBJETFORMAT_ELF */ Index: ghc-9.8.2/rts/linker/elf_reloc_aarch64.c =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_reloc_aarch64.c +++ ghc-9.8.2/rts/linker/elf_reloc_aarch64.c @@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel /* check if we already have that stub */ if(findStub(section, (void**)&S, 0)) { /* did not find it. Crete a new stub. */ - if(makeStub(section, (void**)&S, 0)) { + if(makeStub(section, (void**)&S, NULL, 0)) { abort(/* could not find or make stub */); } } @@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * o return EXIT_SUCCESS; } +void flushInstructionCacheAarch64(ObjectCode * oc) { + // Looks like we don't need this on Aarch64. + /* no-op */ +} + #endif /* OBJECTFORMAT_ELF */ #endif /* aarch64_HOST_ARCH */ Index: ghc-9.8.2/rts/linker/elf_reloc_aarch64.h =================================================================== --- ghc-9.8.2.orig/rts/linker/elf_reloc_aarch64.h +++ ghc-9.8.2/rts/linker/elf_reloc_aarch64.h @@ -7,4 +7,5 @@ bool relocateObjectCodeAarch64(ObjectCode * oc); +void flushInstructionCacheAarch64(ObjectCode *oc); #endif /* OBJETFORMAT_ELF */ Index: ghc-9.8.2/rts/linker/elf_reloc_riscv64.c =================================================================== --- /dev/null +++ ghc-9.8.2/rts/linker/elf_reloc_riscv64.c @@ -0,0 +1,655 @@ +#include "elf_reloc_riscv64.h" +#include "LinkerInternals.h" +#include "Rts.h" +#include "Stg.h" +#include "SymbolExtras.h" +#include "elf.h" +#include "elf_plt.h" +#include "elf_util.h" +#include "rts/Messages.h" +#include "util.h" + +#include <stdint.h> +#include <stdlib.h> + +#if defined(riscv64_HOST_ARCH) + +#if defined(OBJFORMAT_ELF) + +typedef uint64_t addr_t; + +/* regular instructions are 32bit */ +typedef uint32_t inst_t; + +/* compressed instructions are 16bit */ +typedef uint16_t cinst_t; + +// TODO: These instances could be static. They are not yet, because we might +// need their debugging symbols. +char *relocationTypeToString(Elf64_Xword type); +int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel); +bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend); +int32_t SignExtend32(uint32_t X, unsigned B); +void write8le(uint8_t *p, uint8_t v); +uint8_t read8le(const uint8_t *P); +void write16le(cinst_t *p, uint16_t v); +uint16_t read16le(const cinst_t *P); +uint32_t read32le(const inst_t *P); +void write32le(inst_t *p, uint32_t v); +uint64_t read64le(const uint64_t *P); +void write64le(uint64_t *p, uint64_t v); +uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end); +void setCJType(cinst_t *loc, uint32_t val); +void setCBType(cinst_t *loc, uint32_t val); +void setBType(inst_t *loc, uint32_t val); +void setSType(inst_t *loc, uint32_t val); +int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol, + int64_t addend, ObjectCode *oc); +void setJType(inst_t *loc, uint32_t val); +void setIType(inst_t *loc, int32_t val); +void checkInt(inst_t *loc, int32_t v, int n); +uint32_t setLO12_I(uint32_t insn, uint32_t imm); +uint32_t setLO12_S(uint32_t insn, uint32_t imm); +void setUType(inst_t *loc, int32_t val); + + +char *relocationTypeToString(Elf64_Xword type) { + switch (ELF64_R_TYPE(type)) { + case R_RISCV_NONE: + return "R_RISCV_NONE"; + case R_RISCV_32: + return "R_RISCV_32"; + case R_RISCV_64: + return "R_RISCV_64"; + case R_RISCV_RELATIVE: + return "R_RISCV_RELATIVE"; + case R_RISCV_COPY: + return "R_RISCV_COPY"; + case R_RISCV_JUMP_SLOT: + return "R_RISCV_JUMP_SLOT"; + case R_RISCV_TLS_DTPMOD32: + return "R_RISCV_TLS_DTPMOD32"; + case R_RISCV_TLS_DTPMOD64: + return "R_RISCV_TLS_DTPMOD64"; + case R_RISCV_TLS_DTPREL32: + return "R_RISCV_TLS_DTPREL32"; + case R_RISCV_TLS_DTPREL64: + return "R_RISCV_TLS_DTPREL64"; + case R_RISCV_TLS_TPREL32: + return "R_RISCV_TLS_TPREL32"; + case R_RISCV_TLS_TPREL64: + return "R_RISCV_TLS_TPREL64"; + case R_RISCV_BRANCH: + return "R_RISCV_BRANCH"; + case R_RISCV_JAL: + return "R_RISCV_JAL"; + case R_RISCV_CALL: + return "R_RISCV_CALL"; + case R_RISCV_CALL_PLT: + return "R_RISCV_CALL_PLT"; + case R_RISCV_GOT_HI20: + return "R_RISCV_GOT_HI20"; + case R_RISCV_PCREL_HI20: + return "R_RISCV_PCREL_HI20"; + case R_RISCV_LO12_I: + return "R_RISCV_LO12_I"; + case R_RISCV_PCREL_LO12_I: + return "R_RISCV_PCREL_LO12_I"; + case R_RISCV_HI20: + return "R_RISCV_HI20"; + case R_RISCV_LO12_S: + return "R_RISCV_LO12_S"; + case R_RISCV_PCREL_LO12_S: + return "R_RISCV_PCREL_LO12_S"; + case R_RISCV_RELAX: + return "R_RISCV_RELAX"; + case R_RISCV_RVC_BRANCH: + return "R_RISCV_RVC_BRANCH"; + case R_RISCV_RVC_JUMP: + return "R_RISCV_RVC_JUMP"; + default: + return "Unknown relocation type"; + } +} + +#define Page(x) ((x) & ~0xFFF) + +int32_t decodeAddendRISCV64(Section *section STG_UNUSED, + Elf_Rel *rel STG_UNUSED) { + debugBelch("decodeAddendRISCV64: Relocations with explicit addend are not " + "supported."); + abort(/* we don't support Rel locations yet. */); +} + +// Sign-extend the number in the bottom B bits of X to a 32-bit integer. +// Requires 0 < B <= 32. (32 bit is sufficient as we can only encode 20 + 12 = +// 32 bit in a relocation pair.) +int32_t SignExtend32(uint32_t X, unsigned B) { + assert(B > 0 && "Bit width can't be 0."); + assert(B <= 32 && "Bit width out of range."); + return (int32_t)(X << (32 - B)) >> (32 - B); +} + +// Make sure that V can be represented as an N bit signed integer. +void checkInt(inst_t *loc, int32_t v, int n) { + if (v != SignExtend32(v, n)) { + debugBelch("Relocation at 0x%x is out of range. value: 0x%x (%d), " + "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n", + *loc, v, v, SignExtend32(v, n), SignExtend32(v, n), n, n); + } +} + +// RISCV is little-endian by definition. +void write8le(uint8_t *p, uint8_t v) { *p = v; } + +// RISCV is little-endian by definition. +uint8_t read8le(const uint8_t *P) { return *P; } + +// RISCV is little-endian by definition. +void write16le(cinst_t *p, uint16_t v) { *p = v; } + +// RISCV is little-endian by definition. +uint16_t read16le(const cinst_t *P) { return *P; } + +// RISCV is little-endian by definition. +uint32_t read32le(const inst_t *P) { return *P; } + +// RISCV is little-endian by definition. +void write32le(inst_t *p, uint32_t v) { *p = v; } + +// RISCV is little-endian by definition. +uint64_t read64le(const uint64_t *P) { return *P; } + +// RISCV is little-endian by definition. +void write64le(uint64_t *p, uint64_t v) { *p = v; } + +uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { + return (v & ((1ULL << (begin + 1)) - 1)) >> end; +} + +uint32_t setLO12_I(uint32_t insn, uint32_t imm) { + IF_DEBUG(linker, debugBelch("setLO12_I: insn 0x%x imm 0x%x (insn & 0xfffff) " + "0x%x (imm << 20) 0x%x \n", + insn, imm, (insn & 0xfffff), (imm << 20))); + return (insn & 0xfffff) | (imm << 20); +} + +uint32_t setLO12_S(uint32_t insn, uint32_t imm) { + return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) | + (extractBits(imm, 4, 0) << 7); +} + +void setUType(inst_t *loc, int32_t val) { + const unsigned bits = 32; + uint32_t hi = val + 0x800; + checkInt(loc, SignExtend32(hi, bits) >> 12, 20); + IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val)); + write32le(loc, (read32le(loc) & 0xFFF) | (hi & 0xFFFFF000)); +} + +void setIType(inst_t *loc, int32_t val) { + uint64_t hi = (val + 0x800) >> 12; + uint64_t lo = val - (hi << 12); + IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo)); + IF_DEBUG(linker, debugBelch("setIType: loc %p *loc 0x%x val 0x%x\n", loc, + *loc, val)); + uint32_t insn = setLO12_I(read32le(loc), lo & 0xfff); + IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", insn)); + write32le(loc, insn); + IF_DEBUG(linker, debugBelch("setIType: loc %p *loc' 0x%x val 0x%x\n", loc, + *loc, val)); +} + +void setSType(inst_t *loc, uint32_t val) { + uint64_t hi = (val + 0x800) >> 12; + uint64_t lo = val - (hi << 12); + write32le(loc, setLO12_S(read32le(loc), lo)); +} + +void setJType(inst_t *loc, uint32_t val) { + checkInt(loc, val, 21); + + uint32_t insn = read32le(loc) & 0xFFF; + uint32_t imm20 = extractBits(val, 20, 20) << 31; + uint32_t imm10_1 = extractBits(val, 10, 1) << 21; + uint32_t imm11 = extractBits(val, 11, 11) << 20; + uint32_t imm19_12 = extractBits(val, 19, 12) << 12; + insn |= imm20 | imm10_1 | imm11 | imm19_12; + + write32le(loc, insn); +} + +void setBType(inst_t *loc, uint32_t val) { + checkInt(loc, val, 13); + + uint32_t insn = read32le(loc) & 0x1FFF07F; + uint32_t imm12 = extractBits(val, 12, 12) << 31; + uint32_t imm10_5 = extractBits(val, 10, 5) << 25; + uint32_t imm4_1 = extractBits(val, 4, 1) << 8; + uint32_t imm11 = extractBits(val, 11, 11) << 7; + insn |= imm12 | imm10_5 | imm4_1 | imm11; + + write32le(loc, insn); +} + +void setCBType(cinst_t *loc, uint32_t val) { + checkInt((inst_t *)loc, val, 9); + uint16_t insn = read16le(loc) & 0xE383; + uint16_t imm8 = extractBits(val, 8, 8) << 12; + uint16_t imm4_3 = extractBits(val, 4, 3) << 10; + uint16_t imm7_6 = extractBits(val, 7, 6) << 5; + uint16_t imm2_1 = extractBits(val, 2, 1) << 3; + uint16_t imm5 = extractBits(val, 5, 5) << 2; + insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5; + + write16le(loc, insn); +} + +void setCJType(cinst_t *loc, uint32_t val) { + checkInt((inst_t *)loc, val, 12); + uint16_t insn = read16le(loc) & 0xE003; + uint16_t imm11 = extractBits(val, 11, 11) << 12; + uint16_t imm4 = extractBits(val, 4, 4) << 11; + uint16_t imm9_8 = extractBits(val, 9, 8) << 9; + uint16_t imm10 = extractBits(val, 10, 10) << 8; + uint16_t imm6 = extractBits(val, 6, 6) << 7; + uint16_t imm7 = extractBits(val, 7, 7) << 6; + uint16_t imm3_1 = extractBits(val, 3, 1) << 3; + uint16_t imm5 = extractBits(val, 5, 5) << 2; + insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5; + + write16le(loc, insn); +} + +bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) { + addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset); + IF_DEBUG(linker, + debugBelch( + "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / " + "%d) P 0x%lx\n", + relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info), + ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend, + addend, addend, P)); + switch (ELF64_R_TYPE(rel->r_info)) { + case R_RISCV_32_PCREL: + case R_RISCV_32: + write32le((inst_t *)P, addend); + break; + case R_RISCV_64: + write64le((uint64_t *)P, addend); + break; + case R_RISCV_GOT_HI20: + case R_RISCV_PCREL_HI20: + case R_RISCV_HI20: { + setUType((inst_t *)P, addend); + break; + } + case R_RISCV_PCREL_LO12_I: + case R_RISCV_LO12_I: { + setIType((inst_t *)P, addend); + break; + } + case R_RISCV_RVC_JUMP: { + setCJType((cinst_t *)P, addend); + break; + } + case R_RISCV_RVC_BRANCH: { + setCBType((cinst_t *)P, addend); + break; + } + case R_RISCV_BRANCH: { + setBType((inst_t *)P, addend); + break; + } + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + // We could relax more (in some cases) but right now most important is to + // make it work. + setUType((inst_t *)P, addend); + setIType(((inst_t *)P) + 1, addend); + break; + } + case R_RISCV_JAL: { + setJType((inst_t *)P, addend); + break; + } + case R_RISCV_ADD8: + write8le((uint8_t *)P, read8le((uint8_t *)P) + addend); + break; + case R_RISCV_ADD16: + write16le((cinst_t *)P, read16le((cinst_t *)P) + addend); + break; + case R_RISCV_ADD32: + write32le((inst_t *)P, read32le((inst_t *)P) + addend); + break; + case R_RISCV_ADD64: + write64le((uint64_t *)P, read64le((uint64_t *)P) + addend); + break; + case R_RISCV_SUB6: { + uint8_t keep = *((uint8_t *)P) & 0xc0; + uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f; + + write8le((uint8_t *)P, keep | imm); + break; + } + case R_RISCV_SUB8: + write8le((uint8_t *)P, read8le((uint8_t *)P) - addend); + break; + case R_RISCV_SUB16: + write16le((cinst_t *)P, read16le((cinst_t *)P) - addend); + break; + case R_RISCV_SUB32: + write32le((inst_t *)P, read32le((inst_t *)P) - addend); + break; + case R_RISCV_SUB64: + write64le((uint64_t *)P, read64le((uint64_t *)P) - addend); + break; + case R_RISCV_SET6: { + uint8_t keep = *((uint8_t *)P) & 0xc0; + uint8_t imm = (addend & 0x3f) & 0x3f; + + write8le((uint8_t *)P, keep | imm); + break; + } + case R_RISCV_SET8: + write8le((uint8_t *)P, addend); + break; + case R_RISCV_SET16: + write16le((cinst_t *)P, addend); + break; + case R_RISCV_SET32: + write32le((inst_t *)P, addend); + break; + case R_RISCV_PCREL_LO12_S: + case R_RISCV_TPREL_LO12_S: + case R_RISCV_LO12_S: { + setSType((inst_t *)P, addend); + break; + } + case R_RISCV_RELAX: + case R_RISCV_ALIGN: + // I guess we don't need to implement these relaxations (optimizations). + break; + default: + debugBelch("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info)); + abort(); + } + return EXIT_SUCCESS; +} + +/** + * Compute the *new* addend for a relocation, given a pre-existing addend. + * @param section The section the relocation is in. + * @param rel The Relocation struct. + * @param symbol The target symbol. + * @param addend The existing addend. Either explicit or implicit. + * @return The new computed addend. + */ +int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol, + int64_t addend, ObjectCode *oc) { + Section * section = &oc->sections[relaTab->targetSectionIndex]; + + /* Position where something is relocated */ + addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset); + + CHECK(0x0 != P); + CHECK((uint64_t)section->start <= P); + CHECK(P <= (uint64_t)section->start + section->size); + /* Address of the symbol */ + addr_t S = (addr_t)symbol->addr; + /* GOT slot for the symbol */ + addr_t GOT_S = (addr_t)symbol->got_addr; + + int64_t A = addend; + + IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n", + relocationTypeToString(rel->r_info), P, S, + symbol->name, GOT_S, A, relNo)); + switch (ELF64_R_TYPE(rel->r_info)) { + case R_RISCV_32: + return S + A; + case R_RISCV_64: + return S + A; + case R_RISCV_HI20: + return S + A; + case R_RISCV_JUMP_SLOT: + return S; + case R_RISCV_JAL: + return S + A - P; + case R_RISCV_PCREL_HI20: + return S + A - P; + case R_RISCV_LO12_I: + return S + A; + // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), + // the symbol actually points the corresponding R_RISCV_PCREL_HI20 + // relocation, and the target VA is calculated using PCREL_HI20's symbol. + case R_RISCV_PCREL_LO12_S: + FALLTHROUGH; + case R_RISCV_PCREL_LO12_I: { + // Lookup related HI20 relocation and use that value. I'm still confused why + // relocations aren't pure, but this is how LLVM does it. And, calculating + // the lower 12 bit without any relation ship to the GOT entry's address + // makes no sense either. + for (unsigned i = relNo; i >= 0 ; i--) { + Elf_Rela *rel_prime = &relaTab->relocations[i]; + + addr_t P_prime = + (addr_t)((uint8_t *)section->start + rel_prime->r_offset); + + if (P_prime != S) { + // S points to the P of the corresponding *_HI20 relocation. + continue; + } + + ElfSymbol *symbol_prime = + findSymbol(oc, relaTab->sectionHeader->sh_link, + ELF64_R_SYM((Elf64_Xword)rel_prime->r_info)); + + CHECK(0x0 != symbol_prime); + + /* take explicit addend */ + int64_t addend_prime = rel_prime->r_addend; + + uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info); + + if (type_prime == R_RISCV_PCREL_HI20 || + type_prime == R_RISCV_GOT_HI20 || + type_prime == R_RISCV_TLS_GD_HI20 || + type_prime == R_RISCV_TLS_GOT_HI20) { + IF_DEBUG(linker, + debugBelch( + "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, " + "sym-name: %s) -> %s (P: 0x%lx, S: 0x%lx, sym-name: %s, relNo: %u)", + relocationTypeToString(rel->r_info), P, S, symbol->name, + relocationTypeToString(rel_prime->r_info), P_prime, + symbol_prime->addr, symbol_prime->name, i)); + int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime, + symbol_prime, addend_prime, oc); + IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n", + result, result)); + return result; + } + } + debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n", + relocationTypeToString(rel->r_info), P, S, symbol->name); + abort(); + } + + case R_RISCV_RVC_JUMP: + return S + A - P; + case R_RISCV_RVC_BRANCH: + return S + A - P; + case R_RISCV_BRANCH: + return S + A - P; + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + addr_t GOT_Target; + if (GOT_S != 0) { + // 1. Public symbol with GOT entry. + GOT_Target = GOT_S; + } else { + // 2. Fake GOT entry with symbol extra entry. + SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S); + addr_t* FAKE_GOT_S = &symbolExtra->addr; + IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , " + "entry = 0x%lx\n", + symbolExtra, FAKE_GOT_S)); + GOT_Target = (addr_t) FAKE_GOT_S; + } + + if (findStub(section, (void **)&S, 0)) { + /* did not find it. Crete a new stub. */ + if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) { + abort(/* could not find or make stub */); + } + } + IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = " + "0x%lx (S + A) - P = 0x%lx \n", + S, A, P, (S + A) - P)); + return (S + A) - P; + } + case R_RISCV_ADD8: + FALLTHROUGH; + case R_RISCV_ADD16: + FALLTHROUGH; + case R_RISCV_ADD32: + FALLTHROUGH; + case R_RISCV_ADD64: + FALLTHROUGH; + return S + A; // Add V when the value is set + case R_RISCV_SUB6: + FALLTHROUGH; + case R_RISCV_SUB8: + FALLTHROUGH; + case R_RISCV_SUB16: + FALLTHROUGH; + case R_RISCV_SUB32: + FALLTHROUGH; + case R_RISCV_SUB64: + FALLTHROUGH; + return S + A; // Subtract from V when value is set + case R_RISCV_SET6: + FALLTHROUGH; + case R_RISCV_SET8: + FALLTHROUGH; + case R_RISCV_SET16: + FALLTHROUGH; + case R_RISCV_SET32: + FALLTHROUGH; + return S + A; + case R_RISCV_RELAX: + case R_RISCV_ALIGN: + // I guess we don't need to implement this relaxation. Otherwise, this + // should return the number of blank bytes to insert via NOPs. + return 0; + case R_RISCV_32_PCREL: + return S + A - P; + case R_RISCV_GOT_HI20: { + // TODO: Allocating extra memory for every symbol just to play this trick + // seems to be a bit obscene. (GOT relocations hitting local symbols + // happens, but not very often.) It would be better to allocate only what we + // really need. + + // There are two cases here: 1. The symbol is public and has an entry in the + // GOT. 2. It's local and has no corresponding GOT entry. The first case is + // easy: We simply calculate the addend with the GOT address. In the second + // case we create a symbol extra entry and pretend it's the GOT. + if (GOT_S != 0) { + // 1. Public symbol with GOT entry. + return GOT_S + A - P; + } else { + // 2. Fake GOT entry with symbol extra entry. + SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S); + addr_t* FAKE_GOT_S = &symbolExtra->addr; + addr_t res = (addr_t) FAKE_GOT_S + A - P; + IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , " + "entry = 0x%lx , reloc-addend = 0x%lu ", + symbolExtra, FAKE_GOT_S, res)); + return res; + } + } + default: + debugBelch("Unimplemented relocation: 0x%lx\n (%lu)", + ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info)); + abort(/* unhandled rel */); + } + debugBelch("This should never happen!"); + abort(/* unhandled rel */); +} + +// TODO: This is duplicated from elf_reloc_aarch64.c +bool relocateObjectCodeRISCV64(ObjectCode *oc) { + for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL; + relTab = relTab->next) { + /* only relocate interesting sections */ + if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind) + continue; + + Section *targetSection = &oc->sections[relTab->targetSectionIndex]; + + for (unsigned i = 0; i < relTab->n_relocations; i++) { + Elf_Rel *rel = &relTab->relocations[i]; + + ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link, + ELF64_R_SYM((Elf64_Xword)rel->r_info)); + + CHECK(0x0 != symbol); + + // TODO: This always fails, because we don't support Rel locations: Do + // we need this case? + /* decode implicit addend */ + int64_t addend = decodeAddendRISCV64(targetSection, rel); + + addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc); + encodeAddendRISCV64(targetSection, rel, addend); + } + } + for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL; + relaTab = relaTab->next) { + /* only relocate interesting sections */ + if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind) + continue; + + Section *targetSection = &oc->sections[relaTab->targetSectionIndex]; + + for (unsigned i = 0; i < relaTab->n_relocations; i++) { + + Elf_Rela *rel = &relaTab->relocations[i]; + + ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link, + ELF64_R_SYM((Elf64_Xword)rel->r_info)); + + CHECK(0x0 != symbol); + + /* take explicit addend */ + int64_t addend = rel->r_addend; + + addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc); + encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend); + } + } + return EXIT_SUCCESS; +} + +void flushInstructionCacheRISCV64(ObjectCode *oc) { + // Synchronize the memory and instruction cache to prevent illegal + // instruction exceptions. On Linux the parameters of + // __builtin___clear_cache are currently unused. Add them anyways for future + // compatibility. (I.e. the parameters couldn't be checked during + // development.) + + /* The main object code */ + void *codeBegin = oc->image + oc->misalignment; + __builtin___clear_cache(codeBegin, codeBegin + oc->fileSize); + + /* Jump Islands */ + __builtin___clear_cache((void *)oc->symbol_extras, + (void *)oc->symbol_extras + + sizeof(SymbolExtra) * oc->n_symbol_extras); + + // Memory barrier to ensure nothing circumvents the fence.i / cache flushes. + SEQ_CST_FENCE(); +} + +#endif /* OBJECTFORMAT_ELF */ +#endif /* riscv64_HOST_ARCH */ Index: ghc-9.8.2/rts/linker/elf_reloc_riscv64.h =================================================================== --- /dev/null +++ ghc-9.8.2/rts/linker/elf_reloc_riscv64.h @@ -0,0 +1,11 @@ +#pragma once + +#include "LinkerInternals.h" + +#if defined(OBJFORMAT_ELF) + +bool +relocateObjectCodeRISCV64(ObjectCode * oc); + +void flushInstructionCacheRISCV64(ObjectCode *oc); +#endif /* OBJETFORMAT_ELF */ Index: ghc-9.8.2/rts/linker/macho/plt.h =================================================================== --- ghc-9.8.2.orig/rts/linker/macho/plt.h +++ ghc-9.8.2/rts/linker/macho/plt.h @@ -25,7 +25,7 @@ unsigned numberOfStubsForSection( Object #define STUB_SIZE ADD_SUFFIX(stubSize) bool findStub(Section * section, void* * addr, uint8_t flags); -bool makeStub(Section * section, void* * addr, uint8_t flags); +bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags); void freeStubs(Section * section); Index: ghc-9.8.2/rts/rts.cabal.in =================================================================== --- ghc-9.8.2.orig/rts/rts.cabal.in +++ ghc-9.8.2/rts/rts.cabal.in @@ -634,9 +634,11 @@ library linker/elf_got.c linker/elf_plt.c linker/elf_plt_aarch64.c + linker/elf_plt_riscv64.c linker/elf_plt_arm.c linker/elf_reloc.c linker/elf_reloc_aarch64.c + linker/elf_reloc_riscv64.c linker/elf_tlsgd.c linker/elf_util.c sm/BlockAlloc.c Index: ghc-9.8.2/tests/compiler/cmm/README.md =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/README.md @@ -0,0 +1,62 @@ +# The Cmm tests + +These tests are primarily targetted towards writing a codegeneration backend. The +`driver.sh` will pickup all `.cmm` files and run them by looking for the `RUN:` +marker in them. The commands will then be executed in sequence. + +A typical file will contain the following header: +``` +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +``` +this instructs the driver, to compie the cmm file `$1`; and pipe the assembly +output into the `FileCheck` tool from the llvm project, which allows us to write +basic assertions about the generated assembly. The `FileCheck` tool will scan +the file for `CHECK-A64:` marker, as well as `CHECK-A64-NEXT:` marker and verify +those in the output (see the `-check-prefix` argument to `FileCheck`). The +`FileCheck` tool has many more options, that might come in handy. + +Some files will also verify that they execute with a zero exit code, or print +some value to stdout. Again we can use the `RUN:` marker for this as the following +example demonstrates: +``` +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" +``` +this will not only compile the assembly, verify that some instructions are +contained, but also convert the produced object file into an executable using +a c compiler, and then run the executable. + +You will most likely also want to include the following: +``` +#include "Cmm.h" +#include "Types.h" +``` +to be able to copy the cmm dumps ghc produces more closely. Those will still +need quite a bit of munging, as the pretty-printer does not roundtrip. + +## Environment Variables: + +- `HC` the haskell compiler to use. +- `CC` the c compiler to use, should be targetting the same architecture as `HC`. +- `EXEC` if we are cross compiling, we might need a tool to execute the foreign + executable. E.g. `wine` or `qemu-<arch>`. + +That is, to execute the whole test-suite (when cross compiling) you may run +``` +EXEC=qemu-aarch64 CC="$TARGET_CC" bash ./driver.sh +``` + +## Missing features + +- [ ] Run selective tests. +- [ ] Pipe command output into log, and cat log on failure. +- [x] Better reporting on which command failed if multiple commands are given. +- [ ] RUN commands should have pipeset fail set. + +## Cmm ppr/parser bugs +- [ ] [Parser] Cannot assing to a fixed value (F1) = ... fails to parse +- [ ] [Parser] F1 + F2, will produce MO_Add F1 F2 instead of MO_F_Add F1 F2 +- [ ] [Parser] Can not construct f(y(x)), the parser will turn that into z = y(x); f(z). +- [ ] [Ppr] keeps pring types all over the place even though the parser doesn't allow for those. +- [ ] [Ppr] will print MO_Add, ... instead of the symbols or primitives understood by the parser. \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/Types.h =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/Types.h @@ -0,0 +1,4 @@ +#define F32 float32 +#define F64 float64 +#define W32 bits32 +#define W64 bits64 Index: ghc-9.8.2/tests/compiler/cmm/assert.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/assert.cmm @@ -0,0 +1,26 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// We need ghc to link this +// RUN: "$HC" -no-hs-main -debug "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" | FileCheck "$1" -check-prefix=CHECK-RUN-A64 + +#define DEBUG + +#include "Cmm.h" + +main () { + // CHECK-RUN-A64: Sp: 0; SpLim: 0 + foreign "C" printf("Sp: %x; SpLim: %x\n", Sp, SpLim); + + Sp = 0x105368; + SpLim = 0x1050c0; + + // XXX: This might just work with qemu, which provides predictable memory + // locations. + // CHECK-RUN-A64: Sp: 105368; SpLim: 1050c0 + foreign "C" printf("Sp: %x; SpLim: %x\n", Sp, SpLim); + + // CHECK-A64: x18, x18, :lo12:_assertFail + ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); + + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/driver.sh =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/driver.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# A simple test driver for the cmm tests. Using FileCheck, a little less +# sofisticated than the LLVM regression test driver. + +# If __PRINT=echo, we'll see the output. +__PRINT="" + +function run_test() { + bash <<CODE +function _run () { + $__PRINT $1 +} +_run $2 +CODE +} + +# Colors! See https://stackoverflow.com/a/5947802 +# .---------- constant part! +# vvvv vvvv-- the code from above +RED='\033[0;31m' +GREEN='\033[0;32m' +GRAY='\033[1;30m' +NC='\033[0m' # No Color + +for cmm in *.cmm; do + if grep "RUN: " $cmm > /dev/null ; then + cmds=$(grep "RUN: " $cmm |sed "s|.*RUN: ||g") + if run_test "$cmds" "$cmm" > /dev/null 2>&1 ; then + echo -e "[${GREEN}\u2713${NC}] $cmm ${GREEN}succeded${NC}" + rm -f "${cmm%%.*}.s" "${cmm%%.*}.o" "${cmm%%.*}.exe" "${cmm%.*}.c" "${cmm%%.*}_stub.o" + else + echo -e "[${RED}\u2717${NC}] $cmm ${RED}failed${NC}" + IFS=$'\n'; arrCmds=($cmds); unset IFS; + for cmd in "${arrCmds[@]}"; do + expandedCmd=$(__PRINT=echo run_test "\"$cmd\"" "$cmm") + if run_test "$cmd" "$cmm" > /dev/null 2>&1 ; then + echo -e "\t[${GREEN}\u2713${NC}] $expandedCmd" + else + echo -e "\t[${RED}\u2717${NC}] $expandedCmd" + fi + done + fi + else + echo -e "[${GRAY}?${NC}] $cmm ${GRAY}has no RUN: marker${NC}" + fi +done \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/encode_values.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/encode_values.cmm @@ -0,0 +1,30 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" | FileCheck "$1" -check-prefix=CHECK-RUN-A64 + +#include "Cmm.h" +#include "Types.h" + +main () { + R1 = 123454321; + F1 = 0.12345::F32; + D1 = 3.123456789123456789; + D2 = 1.234567891234567890; + + // Let's see that the c string ends up as a symbol + // CHECK-A64: c1_str: + // CHECK-A64-NEXT: .string "%d\n%1.10f\n%.20f\n%.20f\n" + + // CHECK-RUN-A64: 123454321 + // The following is WRONG. We need to promote Float to Double when + // calling a C function. However for now, and for compatibility, we + // don't promote. + + // CHECK-RUN-A64-NEXT: 0.0000000000 + // CHECK-RUN-A64-NEXT: 3.12345678912345681155 + // CHECK-RUN-A64-NEXT: 1.23456789123456789348 + + foreign "C" printf("%d\n%1.10f\n%.20f\n%.20f\n", R1, F1, D1, D2); + + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/immideate.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/immideate.cmm @@ -0,0 +1,65 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$HC" -no-hs-main "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" | FileCheck "$1" -check-prefix=CHECK-RUN-A64 + +#include "Cmm.h" + +// Too bad this won't work. We can't construct Cmm Immediates. +// Often ghc will just optimize it away. + +// CHECK-A64: .globl main +// CHECK-A64: main: +main () { + + // CHECK-A64: mov w22, #1 + // CHECK-RUN-A64: 1 = 1 + R1 = 1 :: I64; foreign "C" printf("1 = %ld\n", R1); + + // CHECK-A64: mov w22, #-1 + // CHECK-RUN-A64: -1 = -1 + R1 = -1; foreign "C" printf("-1 = %ld\n", R1); + + // CHECK-A64: mov w22, #4096 + // CHECK-RUN-A64: 4096 = 4096 + R1 = 1 << 12 :: I64; foreign "C" printf("4096 = %ld\n", R1); + + // CHECK-A64: mov w22, #-4096 + // CHECK-RUN-A64: -4096 = -4096 + R1 = -1 << 12 :: I64; foreign "C" printf("-4096 = %ld\n", R1); + + // CHECK-A64: mov w22, #0 + // CHECK-A64-NEXT: movk w22, #1, lsl #16 + // CHECK-RUN-A64: 65536 = 65536 + R1 = 1 << 16 :: I64; foreign "C" printf("65536 = %ld\n", R1); + + // CHECK-A64: mov w22, #0 + // CHECK-A64-NEXT: movk w22, #65535, lsl #16 + // CHECK-RUN-A64: -65536 = -65536 + R1 = -1 << 16 :: I64; foreign "C" printf("-65536 = %ld\n", R1); + + // CHECK-A64: mov w22, #0 + // CHECK-A64-NEXT: movk w22, #256, lsl #16 + // CHECK-RUN-A64: 16777216 = 16777216 + R1 = 1 << 24 :: I64; foreign "C" printf("16777216 = %ld\n", R1); + + // CHECK-A64: mov w22, #0 + // CHECK-A64-NEXT: movk w22, #65280, lsl #16 + // CHECK-RUN-A64: -16777216 = -16777216 + R1 = -1 << 24 :: I64; foreign "C" printf("-16777216 = %ld\n", R1); + + // CHECK-A64: mov x22, #0 + // CHECK-A64-NEXT: movk x22, #0, lsl #16 + // CHECK-A64-NEXT: movk x22, #16, lsl #32 + // CHECK-A64-NEXT: movk x22, #0, lsl #48 + // CHECK-RUN-A64: 68719476736 = 68719476736 + R1 = 1 << 36 :: I64; foreign "C" printf("68719476736 = %ld\n", R1); + + // CHECK-A64: mov x22, #0 + // CHECK-A64-NEXT: movk x22, #0, lsl #16 + // CHECK-A64-NEXT: movk x22, #65520, lsl #32 + // CHECK-A64-NEXT: movk x22, #65535, lsl #48 + // CHECK-RUN-A64: -68719476736 = -68719476736 + R1 = -1 << 36 :: I64; foreign "C" printf("-68719476736 = %ld\n", R1); + + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/load_large_immediate.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/load_large_immediate.cmm @@ -0,0 +1,28 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +large() { + // CHECK-A64: mov x23, #52428 + // CHECK-A64-NEXT: movk x23, #52428, lsl #16 + // CHECK-A64-NEXT: movk x23, #52428, lsl #32 + // CHECK-A64-NEXT: movk x23, #3276, lsl #48 + R2 = 922337203685477580; + + // CHECK-A64: mov x23, #52429 + R2 = 922337203685477581; + + // CHECK-A64: mov x18, #11544 + // CHECK-A64-NEXT: movk x18, #21572, lsl #16 + // CHECK-A64-NEXT: movk x18, #8699, lsl #32 + // CHECK-A64-NEXT: movk x18, #16393, lsl #48 + // CHECK-A64-NEXT: fmov d12, x18 + D1 = 3.141592653589793; + + // CHECK-A64: mov x18, #11544 + // CHECK-A64-NEXT: movk x18, #21572, lsl #16 + // CHECK-A64-NEXT: movk x18, #8699, lsl #32 + // CHECK-A64-NEXT: movk x18, #16377, lsl #48 + // CHECK-A64-NEXT: fmov d12, x18 + D1 = 1.5707963267948966; +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/load_store.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/load_store.cmm @@ -0,0 +1,9 @@ + +// XXX: todo +main () { + // test load_store + // write 64bit word, load lower 8bits, 16bits, 32bits, + // and ensure we get exactly what we asked for! + + // write byte, half, word, ... load 64bit, verify. +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/load_sym_with_large_offset.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/load_sym_with_large_offset.cmm @@ -0,0 +1,9 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 + +test () { + // CHECK-A64: mov w18, #4273 + // CHECK-A64: adrp x23, stg_INTLIKE_closure + // CHECK-A64: add x23, x23, :lo12:stg_INTLIKE_closure + // CHECK-A64: add x23, x23, x18 + R2 = stg_INTLIKE_closure+4273; +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/main.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/main.cmm @@ -0,0 +1,27 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" +/* + * This is a very basic cmm example. For a more in-depth discussion see + * GHC/Cmm/Parser.y. This file emulates a main function in Cmm. Note + * that we can't use return. This will most of the time require a StackSlot, + * which isn't implemented in most code gen backends. + * + * To compile: + * + * $HC -cpp -dcmm-lint -keep-s-file -c main.cmm + * $CC main.o -o main + * + * Note: We use $CC to compile, the reson is that this allows us to depend on + * the default crt0, ..., files; and we get functions like exit. + * + * Note2: The Cmm.h include (which requires the -cpp flag to $HC), this + * allows us to use I64, I32, ... + */ +#include "Cmm.h" + +// CHECK-A64: .type main, @function +// CHECK-A64-NEXT: main: +main ( I32 argc, I32 argv_ptr ) { + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/mo_f_eq.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/mo_f_eq.cmm @@ -0,0 +1,10 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +w64 () { + // CHECK-A64: fmov d31, xzr + // CHECK-A64-NEXT: fcmp d12, d31 + // CHECK-A64-NEXT: cset x22, eq + R1 = %feq(D1, 0.0 :: W64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/mo_f_gt.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/mo_f_gt.cmm @@ -0,0 +1,15 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 + +#include "Cmm.h" +#include "Types.h" + +w64 () { + // CHECK-A64: fmov d31, xzr + R1 = %fgt(D1, 0.0 :: W64); + + if (%fgt(D1, 0.0 :: W64)) { + R2 = 1 :: W64; + } else { + R2 = 0 :: W64; + } +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/mo_f_neg.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/mo_f_neg.cmm @@ -0,0 +1,14 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +f_neg_same_reg () { + // CHECK-A64: neg x22, x22 + R1 = %neg(R1); + + // CHECK-A64: fneg s8, s8 + F1 = %fneg(F1); + + // CHECK-A64: fneg d12, d12 + D1 = %fneg(D1); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/mo_ff_conv.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/mo_ff_conv.cmm @@ -0,0 +1,12 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +w64 () { + F64 x; + F32 y; + // CHECK-A64: fmov d31, d12 + x = D1; + // CHECK-A64: fcvt s31, d31 + y = %f2f32(x); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/mo_uu_conv.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/mo_uu_conv.cmm @@ -0,0 +1,20 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -dppr-debug -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +w32_w64 () { + I64 m; + m = 1234 :: I64; + // lobits64 is MO_UU_Conv_W32_W64, when given a I32 + // zx32 is MO_UU_Conv_W64_W32 + // + // First we extract the lower 32 bits from x18 (32 bit value -> 64 bit value) + // CHECK-A64: ubfm x22, x18, #0, #31 + R1 = %lobits64(I32[m]); + + // then we extract the lower 32 bits from x22 (64 bit value -> 32 bit value) + // CHECK-A64-NEXT: ubfm x18, x22, #0, #31 + // and finally extend it again. + // CHECK-A64-NEXT: ubfm x23, x18, #0, #31 + R2 = %lobits64(%zx32(R1)); +} Index: ghc-9.8.2/tests/compiler/cmm/nan.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/nan.cmm @@ -0,0 +1,51 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$HC" -no-hs-main "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" | FileCheck "$1" -check-prefix=CHECK-RUN-A64 + +#include "Cmm.h" +#include "Types.h" + +// CHECK-A64: .globl main +// CHECK-A64: main: + +main () { + F64 y; + I64 z; + + D1 = 0.0 - 7.0; + + (y) = foreign "C" sqrt(D1); + + (z) = foreign "C" isDoubleNaN(y); + + // CHECK-RUN-A64: sqrt(-7.000000) = nan + // CHECK-RUN-A64: isDoubleNaN(nan) = 1 + foreign "C" printf("sqrt(%f) = %f\n", D1, y); + foreign "C" printf("isDoubleNaN(%f) = %d\n", y, z); + + // CHECK-RUN-A64: nan < 0 = 0 + foreign "C" printf("%f < 0 = %d\n", y, y `flt` 0.0); + // CHECK-RUN-A64: nan > 0 = 0 + foreign "C" printf("%f > 0 = %d\n", y, y `fgt` 0.0); + // CHECK-RUN-A64: nan <= 0 = 0 + foreign "C" printf("%f <= 0 = %d\n", y, y `fle` 0.0); + // CHECK-RUN-A64: nan >= 0 = 0 + foreign "C" printf("%f >= 0 = %d\n", y, y `fge` 0.0); + // CHECK-RUN-A64: nan == 0 = 0 + foreign "C" printf("%f == 0 = %d\n", y, y `feq` 0.0); + // CHECK-RUN-A64: nan /= 0 = 1 + foreign "C" printf("%f /= 0 = %d\n", y, y `fne` 0.0); + + F64 x; + x = 0.0; y = 0.0; + foreign "C" printf("%f / %f = %d\n", x, y, x `fquot` y); + + // CHECK-RUN-A64: nan >= 0 + if((x `fquot` y) < x) { + foreign "C" printf("%f < %f\n", x `fquot` y, x); + } else { + foreign "C" printf("%f >= %f\n", x `fquot` y, x); + } + + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/primops_math.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/primops_math.cmm @@ -0,0 +1,111 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$HC" -no-hs-main "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" | FileCheck "$1" -check-prefix=CHECK-RUN-A64 + +#include "Cmm.h" + +// TODO: most of the unsigned test don't really test unsigned. + +// CHECK-A64: .globl main +// CHECK-A64: main: +main () { + R1 = 7; R2 = 3; + R4 = -7; R5 = -3; + + // CHECK-RUN-A64: 7 + 3 => 10 + R3 = R1 `add` R2; + foreign "C" printf("%d + %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 - 3 => 4 + R3 = R1 `sub` R2; + foreign "C" printf("%d - %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 = 3 => 0 + R3 = R1 `eq` R2; + foreign "C" printf("%d = %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 * 3 => 21 + R3 = R1 `mul` R2; + foreign "C" printf("%d * %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: -(7+3) => -10 + R3 = %neg(R1+R2); + foreign "C" printf("-(%d+%d) => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 / 3 => 2 + R3 = R1 `quot` R2; + foreign "C" printf("%d / %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 % 3 => 1 + R3 = R1 `rem` R2; + foreign "C" printf("%d %% %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 / 3 => 2 + R3 = R1 `divu` R2; + foreign "C" printf("(unsigned): %d / %d => %d\n", R1, R2, R3); + + // CHECK-RUN-A64: 7 % 3 => 1 + R3 = R1 `modu` R2; + foreign "C" printf("(unsigned): %d %% %d => %d\n", R1, R2, R3); + + // Signed >=, <=, >, < + // CHECK-RUN-A64: 7 >= 3 => 1 + R3 = R1 `ge` R2; + foreign "C" printf("%d >= %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 <= 3 => 0 + R3 = R1 `le` R2; + foreign "C" printf("%d <= %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 > 3 => 1 + R3 = R1 `gt` R2; + foreign "C" printf("%d > %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 < 3 => 0 + R3 = R1 `lt` R2; + foreign "C" printf("%d < %d => %d", R1, R2, R3); + + // Unsigned >=, <=, >, < + // CHECK-RUN-A64: 7 >= 3 => 1 + R3 = R1 `geu` R2; + foreign "C" printf("(unsigned): %d >= %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 <= 3 => 0 + R3 = R1 `leu` R2; + foreign "C" printf("(unsigned): %d <= %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 > 3 => 1 + R3 = R1 `gtu` R2; + foreign "C" printf("(unsigned): %d > %d => %d", R1, R2, R3); + + // CHECK-RUN-A64: 7 < 3 => 0 + R3 = R1 `ltu` R2; + foreign "C" printf("(unsigned): %d < %d => %d", R1, R2, R3); + + // Logical ops: and, or, xor, com, shl, shrl, shra + R3 = R1 `and` R2; + foreign "C" printf("%d and %d => %d", R1, R2, R3); + + R3 = R1 `or` R2; + foreign "C" printf("%d or %d => %d", R1, R2, R3); + + R3 = R1 `xor` R2; + foreign "C" printf("%d xor %d => %d", R1, R2, R3); + + // R3 = R1 `com` R2; + // foreign "C" printf("%d com %d => %d", R1, R2, R3); + + R3 = R1 `shl` R2; + foreign "C" printf("%d shl %d => %d", R1, R2, R3); + + R3 = R1 `shrl` R2; + foreign "C" printf("%d shrl %d => %d", R1, R2, R3); + + R3 = R1 `shra` R2; + foreign "C" printf("%d shra %d => %d", R1, R2, R3); + + + + + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/shift_left.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/shift_left.cmm @@ -0,0 +1,15 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +shift_left () { + I64 a, b, c, d, e, f; + a = 1; + b = 1; + c = 1; + d = 1; + // CHECK-A64: lsl x18, x18, x14 + // CHECK-A64: lsl x18, x18, x17 + // CHECK-A64: lsl x18, x18, x17 + R2 = (((((a << 1 | b) << 1) | c) << 1) | d); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/shift_left_conditional.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/shift_left_conditional.cmm @@ -0,0 +1,13 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +// Original failing cmm: +// +// R1 = I64[(%MO_F_Gt_W32(F32[Sp + 8], F32[R1 + 7]) << 3) + ghczmprim_GHCziTypes_Bool_closure_tbl]; // CmmAssign +// +shift_left_conditional () { + // CHECK-A64: fcmp s31, s30 + // CHECK-A64-NEXT: cset w18, gt + R1 = %fgt(F32[Sp], F32[R1]) << 3; +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/shift_right.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/shift_right.cmm @@ -0,0 +1,24 @@ +// RUN: "$HC" -debug -dppr-debug -cpp -dcmm-lint -keep-s-file -O0 -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-RV64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" + +#include "Cmm.h" +#include "Types.h" + +main() { + I64 buffer; + I32 a, b, c, d; + + I64 arr; + (arr) = foreign "C" malloc(1024); + bits64[arr] = 2; + + a = I32[arr]; + b = %mul(a, 32 :: I32); + c = %neg(b); + d = %shra(c, 4::I64); + + foreign "C" printf("a: %hd b: %hd c: %hd d: %hd", a, b, c, d); + + foreign "C" exit(d == -4 :: I32); +} Index: ghc-9.8.2/tests/compiler/cmm/store_neg_float.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/store_neg_float.cmm @@ -0,0 +1,13 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +store_neg_float_on_heap () { + // CHECK-A64: fneg s31, s8 + // CHECK-A64-NEXT: str s31, [ x21 ] + F32[Hp] = %fneg(F1); + + // CHECK-A64: fneg d31, d12 + // CHECK-A64-NEXT: str d31, [ x21 ] + F64[Hp] = %fneg(D1); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/switch_expr.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/switch_expr.cmm @@ -0,0 +1,52 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" + +#include "Cmm.h" +#include "Types.h" + + +main (I64 argc) { + _switch: // global + switch [0 .. 7] argc { + case 0 : goto _a0; + case 1 : goto _a1; + case 2 : goto _a2; + case 3 : goto _a3; + case 4 : goto _a4; + case 5 : goto _a5; + case 6 : goto _a6; + case 7 : goto _a7; + default: {goto _c;} + } // CmmSwitch + _a0: // global + foreign "C" printf("A0\n"); + goto _end; + _a1: // global + foreign "C" printf("A1\n"); + goto _end; + _a2: // global + foreign "C" printf("A2\n"); + goto _end; + _a3: // global + foreign "C" printf("A3\n"); + goto _end; + _a4: // global + foreign "C" printf("A4\n"); + goto _end; + _a5: // global + foreign "C" printf("A5\n"); + goto _end; + _a6: // global + foreign "C" printf("A6\n"); + goto _end; + _a7: // global + foreign "C" printf("A7\n"); + goto _end; + _c: // global + foreign "C" printf("C\n"); + goto _end; + + _end: + foreign "C" exit(0::I64); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls.cmm @@ -0,0 +1,34 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +// Original failing cmm: +// +// (_ckSs::F32) = call MO_F32_ExpM1(_shwV::F32); // CmmUnsafeForeignCall +// +// One issue with the cmm parser is, that we can't build the following expressions +// the parser will deconstruct them into arg0 <- arg0_expr; f(arg0); instead of f(arg0_expr) +// +// (_ckSz::F32) = call MO_F32_Log(%MO_F_Neg_W32(_ckSs::F32)); // CmmUnsafeForeignCall +// (_ckqE::F64) = call MO_F64_Log(2.0 :: W64); // CmmUnsafeForeignCall +// (_cjE1::F64) = call MO_F64_Atan(%MO_F_Quot_W64(_shFv::F64, _shFw::F64)); // CmmUnsafeForeignCall +// +// +foreign_calls () { + F32 x, y; + x = F1; + + // CHECK-A64: adrp x18, expm1f + // CHECK-A64: add x18, x18, :lo12:expm1f + // CHECK-A64: fmov s0, s30 + // CHECK-A64: blr x18 + (y) = prim %expM132f(F1); + // CHECK-A64: fmov s8, s30 + F1 = y; + F2 = y; + (y) = prim %log32f(%fneg(x)); + + prim %log64f(2.0); + + prim %atan64f(%fquot(D1,D2)); +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_arguments.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_arguments.cmm @@ -0,0 +1,36 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$LN" -sf "$1" "${1%%.*}.c" +// RUN: "$CC" -DC -c "${1%%.*}.c" -o "${1%%.*}_stub.o" +// RUN: "$CC" "${1%%.*}.o" "${1%%.*}_stub.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.*}.exe" + +#if defined(C) // the C part +int mega_add(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k) { + return a + b + c + d + e + f + g + h + i + j + k; +} +#else // the CMM part +#include "Cmm.h" +#include "Types.h" + +// Let's ensure we can call functions with lots of arguments, and they are +// passed correctly. +main () { + + // create some fake stack space and assign to Sp. + I64 sp; + (sp) = foreign "C" malloc(1024); + Sp = sp; + + I64 x, y; + I64 a, b, c, d, e, f, g, h, i, j, k; + a = 1; b = 2; c = 4; d = 8; e = 16; f = 32; g = 64; h = 128; i = 256; j = 512; k = 1024; + + // We have 3 excess arguments; thus we need 4 slots (32 bytes) + // CHECK-A64: sub sp, sp, #32 + // CHECK-A64: add sp, sp, #32 + (y) = foreign "C" mega_add(a,b,c,d,e,f,g,h,i,j,k); + x = a + b + c + d + e + f + g + h + i + j + k; + + foreign "C" exit(y != x); +} +#endif Index: ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_float_arguments.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_float_arguments.cmm @@ -0,0 +1,37 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$LN" -sf "$1" "${1%%.*}.c" +// RUN: "$CC" -DC -c "${1%%.*}.c" -o "${1%%.*}_stub.o" +// RUN: "$CC" "${1%%.*}.o" "${1%%.*}_stub.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.*}.exe" + +#if defined(C) // the C part +double mega_fadd(double a, double b, double c, double d, double e, double f, double g, double h, double i, double j, double k) { + return a + b + c + d + e + f + g + h + i + j + k; +} + +#else // the CMM part +#include "Cmm.h" +#include "Types.h" + +// Let's ensure we can call functions with lots of arguments, and they are +// passed correctly. +main () { + + // create some fake stack space and assign to Sp. + I64 sp; + (sp) = foreign "C" malloc(1024); + Sp = sp; + + F64 fx, fy; + F64 fa, fb, fc, fd, fe, ff, fg, fh, fi, fj, fk; + fa = 1.0; fb = 2.0; fc = 4.0; fd = 8.0; fe = 16.0; ff = 32.0; fg = 64.0; fh = 128.0; fi = 256.0; fj = 512.0; fk = 1024.0; + + // We have 3 excess arguments; thus we need 4 slots (32 bytes) + // CHECK-A64: sub sp, sp, #32 + // CHECK-A64: add sp, sp, #32 + (fy) = foreign "C" mega_fadd(fa,fb,fc,fd,fe,ff,fg,fh,fi,fj,fk); + fx = fa + fb + fc + fd + fe + ff + fg + fh + fi + fj + fk; + + foreign "C" exit(fy != fx); +} +#endif Index: ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_mixed_arguments.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_excess_mixed_arguments.cmm @@ -0,0 +1,57 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +// RUN: "$LN" -sf "$1" "${1%%.*}.c" +// RUN: "$CC" -DC -c "${1%%.*}.c" -o "${1%%.*}_stub.o" +// RUN: "$CC" "${1%%.*}.o" "${1%%.*}_stub.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.*}.exe" + +#if defined(C) // the C part +#include <stdlib.h> +#include <stdio.h> + +void *mega_mixed_add(int a, double fa, int b, double fb, int c, double fc, + int d, double fd, int e, double fe, int f, double ff, + int g, double fg, int h, double fh, int i, double fi, + int j, double fj, int k, double fk) { + + void *r = (void *)calloc(1,16); + *(int*)r = a + b + c + d + e + f + g + h + i + j + k; + *(double*)(r+8) = fa + fb + fc + fd + fe + ff + fg + fh + fi + fj + fk; + return r; +} + +#else // the CMM part +#include "Cmm.h" +#include "Types.h" + +// Let's ensure we can call functions with lots of arguments, and they are +// passed correctly. +main () { + + // create some fake stack space and assign to Sp. + I64 sp; + (sp) = foreign "C" malloc(1024); + Sp = sp; + + I64 x, y; + I64 a, b, c, d, e, f, g, h, i, j, k; + a = 1; b = 2; c = 4; d = 8; e = 16; f = 32; g = 64; h = 128; i = 256; j = 512; k = 1024; + + F64 fx, fy; + F64 fa, fb, fc, fd, fe, ff, fg, fh, fi, fj, fk; + fa = 1.0; fb = 2.0; fc = 4.0; fd = 8.0; fe = 16.0; ff = 32.0; fg = 64.0; fh = 128.0; fi = 256.0; fj = 512.0; fk = 1024.0; + + // We have 6 excess arguments; thus we need 6 slots (48 bytes) + // CHECK-A64: sub sp, sp, #48 + // CHECK-A64: add sp, sp, #48 + I64 rptr; + (rptr) = foreign "C" mega_mixed_add(a,fa,b,fb,c,fc,d,fd,e,fe,f,ff,g,fg,h,fh, i,fi,j,fj,k,fk); + + y = I64[rptr]; + fy = F64[rptr + 8]; + + x = a + b + c + d + e + f + g + h + i + j + k; + fx = fa + fb + fc + fd + fe + ff + fg + fh + fi + fj + fk; + + foreign "C" exit((fy != fx) == (f != y)); +} +#endif Index: ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_spill_regs.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/unsafe_foreign_calls_spill_regs.cmm @@ -0,0 +1,50 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-A64 +#include "Cmm.h" +#include "Types.h" + +// Original failing cmm: +// +// (_ckSs::F32) = call MO_F32_ExpM1(_shwV::F32); // CmmUnsafeForeignCall +// +// One issue with the cmm parser is, that we can't build the following expressions +// the parser will deconstruct them into arg0 <- arg0_expr; f(arg0); instead of f(arg0_expr) +// +// (_ckSz::F32) = call MO_F32_Log(%MO_F_Neg_W32(_ckSs::F32)); // CmmUnsafeForeignCall +// (_ckqE::F64) = call MO_F64_Log(2.0 :: W64); // CmmUnsafeForeignCall +// (_cjE1::F64) = call MO_F64_Atan(%MO_F_Quot_W64(_shFv::F64, _shFw::F64)); // CmmUnsafeForeignCall +// +// +foreign_calls () { + F32 x, y; + x = F1; + + // Some variables to ensure we use up all the registers. + F32 a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t; + a = F1; b = F1; c = F1; d = F1; e = F1; f = F1; g = F1; h = F1; i = F1; + j = F1; k = F1; l = F1; m = F1; n = F1; o = F1; p = F1; q = F1; r = F1; + s = F1; t = F1; + // Some integer register + I64 _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z; + _a = R1; _b = R1; _c = R1; _d = R1; _e = R1; _f = R1; _g = R1; _h = R1; + _i = R1; _j = R1; _k = R1; _l = R1; _m = R1; _n = R1; _o = R1; _p = R1; + _q = R1; _r = R1; _s = R1; _t = R1; _u = R1; _v = R1; _w = R1; _x = R1; _y = R1; _z = R1; + + // CHECK-A64: adrp x11, expm1f + // CHECK-A64: add x11, x11, :lo12:expm1f + // CHECK-A64: fmov s0, s2 + // CHECK-A64: blr x11 + (y) = prim %expM132f(F1); + // CHECK-A64: fmov s8, s30 + F1 = y; + F2 = y; + (y) = prim %log32f(%fneg(x)); + + prim %log64f(2.0); + + prim %atan64f(%fquot(D1,D2)); + + // make sure a..t stay alive! + F3 = a + b + c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r + s + t; + R2 = _a + _b + _c + _d + _e + _f + _g + _h + _i + _j + _k + _l + _m + _n + + _o + _p + _q + _r + _s + _t + _u + _v + _w + _x + _y + _z; +} \ No newline at end of file Index: ghc-9.8.2/tests/compiler/cmm/zero.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/tests/compiler/cmm/zero.cmm @@ -0,0 +1,14 @@ +// RUN: "$HC" -cpp -dcmm-lint -keep-s-file -c "$1" && cat "${1%%.*}.s" | FileCheck "$1" -check-prefix=CHECK-RV64 +// RUN: "$CC" "${1%%.*}.o" -o "${1%%.*}.exe" +// RUN: "$EXEC" "${1%%.cmm}.exe" + +#include "Cmm.h" +#include "Types.h" + +main(){ + I64 zero; + // Should refer to the zero register + // CHECK-RV64: addi t0, zero, 0 + zero = 0; + foreign "C" exit(zero); +} Index: ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv.hs =================================================================== --- /dev/null +++ ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv.hs @@ -0,0 +1,123 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE GHCForeignImportPrim #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} + +-- | This test ensures that sub-word signed and unsigned parameters are correctly +-- handed over to C functions. I.e. it asserts the calling-convention. +-- +-- The number of parameters is currently shaped for the RISCV64 calling-convention. +-- You may need to add more parameters to the C functions in case there are more +-- registers reserved for parameters in your architecture. +module Main where + +import Data.Word +import GHC.Exts +import GHC.Int + +foreign import ccall "fun8" + fun8 :: + Int8# -> -- a0 + Word8# -> -- a1 + Int8# -> -- a2 + Int8# -> -- a3 + Int8# -> -- a4 + Int8# -> -- a5 + Int8# -> -- a6 + Int8# -> -- a7 + Word8# -> -- s0 + Int8# -> -- s1 + Int64# -- result + +foreign import ccall "fun16" + fun16 :: + Int16# -> -- a0 + Word16# -> -- a1 + Int16# -> -- a2 + Int16# -> -- a3 + Int16# -> -- a4 + Int16# -> -- a5 + Int16# -> -- a6 + Int16# -> -- a7 + Word16# -> -- s0 + Int16# -> -- s1 + Int64# -- result + +foreign import ccall "fun32" + fun32 :: + Int32# -> -- a0 + Word32# -> -- a1 + Int32# -> -- a2 + Int32# -> -- a3 + Int32# -> -- a4 + Int32# -> -- a5 + Int32# -> -- a6 + Int32# -> -- a7 + Word32# -> -- s0 + Int32# -> -- s1 + Int64# -- result + +foreign import ccall "funFloat" + funFloat :: + Float# -> -- a0 + Float# -> -- a1 + Float# -> -- a2 + Float# -> -- a3 + Float# -> -- a4 + Float# -> -- a5 + Float# -> -- a6 + Float# -> -- a7 + Float# -> -- s0 + Float# -> -- s1 + Float# -- result + +foreign import ccall "funDouble" + funDouble :: + Double# -> -- a0 + Double# -> -- a1 + Double# -> -- a2 + Double# -> -- a3 + Double# -> -- a4 + Double# -> -- a5 + Double# -> -- a6 + Double# -> -- a7 + Double# -> -- s0 + Double# -> -- s1 + Double# -- result + +main :: IO () +main = + -- N.B. the values here aren't choosen by accident: -1 means all bits one in + -- twos-complement, which is the same as the max word value. + let i8 :: Int8# = intToInt8# (-1#) + w8 :: Word8# = wordToWord8# (255##) + res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8 + expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1) + i16 :: Int16# = intToInt16# (-1#) + w16 :: Word16# = wordToWord16# (65535##) + res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16 + expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1) + i32 :: Int32# = intToInt32# (-1#) + w32 :: Word32# = wordToWord32# (4294967295##) + res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32 + expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1) + resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#) + resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##) + in do + print $ "fun8 result:" ++ show (I64# res8) + assertEqual expected_res8 (I64# res8) + print $ "fun16 result:" ++ show (I64# res16) + assertEqual expected_res16 (I64# res16) + print $ "fun32 result:" ++ show (I64# res32) + assertEqual expected_res32 (I64# res32) + print $ "funFloat result:" ++ show resFloat + assertEqual (14.5 :: Float) resFloat + print $ "funDouble result:" ++ show resDouble + assertEqual (14.5 :: Double) resDouble + +assertEqual :: (Eq a, Show a) => a -> a -> IO () +assertEqual a b = + if a == b + then pure () + else error $ show a ++ " =/= " ++ show b Index: ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv.stdout =================================================================== --- /dev/null +++ ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv.stdout @@ -0,0 +1,60 @@ +"fun8 result:502" +"fun16 result:131062" +"fun32 result:8589934582" +"funFloat result:14.5" +"funDouble result:14.5" +fun32: +a0: 0xffffffff -1 +a1: 0xffffffff 4294967295 +a2: 0xffffffff -1 +a3: 0xffffffff -1 +a4: 0xffffffff -1 +a5: 0xffffffff -1 +a6: 0xffffffff -1 +a7: 0xffffffff -1 +s0: 0xffffffff -1 +s1: 0xffffffff 4294967295 +fun16: +a0: 0xffffffff -1 +a1: 0xffff 65535 +a2: 0xffffffff -1 +a3: 0xffffffff -1 +a4: 0xffffffff -1 +a5: 0xffffffff -1 +a6: 0xffffffff -1 +a7: 0xffffffff -1 +s0: 0xffffffff -1 +s1: 0xffff 65535 +fun8: +a0: 0xffffffff -1 +a1: 0xff 255 +a2: 0xffffffff -1 +a3: 0xffffffff -1 +a4: 0xffffffff -1 +a5: 0xffffffff -1 +a6: 0xffffffff -1 +a7: 0xffffffff -1 +s0: 0xffffffff -1 +s1: 0xff 255 +funFloat: +a0: 1.000000 +a1: 1.100000 +a2: 1.200000 +a3: 1.300000 +a4: 1.400000 +a5: 1.500000 +a6: 1.600000 +a7: 1.700000 +s0: 1.800000 +s1: 1.900000 +funDouble: +a0: 1.000000 +a1: 1.100000 +a2: 1.200000 +a3: 1.300000 +a4: 1.400000 +a5: 1.500000 +a6: 1.600000 +a7: 1.700000 +s0: 1.800000 +s1: 1.900000 Index: ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv_c.c =================================================================== --- /dev/null +++ ghc-9.8.2/testsuite/tests/codeGen/should_run/CCallConv_c.c @@ -0,0 +1,91 @@ +#include "stdint.h" +#include "stdio.h" + +int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5, + int8_t a6, int8_t a7, int8_t s0, uint8_t s1) { + printf("fun8:\n"); + printf("a0: %#x %hhd\n", a0, a0); + printf("a1: %#x %hhu\n", a1, a1); + printf("a2: %#x %hhd\n", a2, a2); + printf("a3: %#x %hhd\n", a3, a3); + printf("a4: %#x %hhd\n", a4, a4); + printf("a5: %#x %hhd\n", a5, a5); + printf("a6: %#x %hhd\n", a6, a6); + printf("a7: %#x %hhd\n", a7, a7); + printf("s0: %#x %hhd\n", s0, s0); + printf("s1: %#x %hhu\n", s1, s1); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; +} + +int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4, + int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) { + printf("fun16:\n"); + printf("a0: %#x %hd\n", a0, a0); + printf("a1: %#x %hu\n", a1, a1); + printf("a2: %#x %hd\n", a2, a2); + printf("a3: %#x %hd\n", a3, a3); + printf("a4: %#x %hd\n", a4, a4); + printf("a5: %#x %hd\n", a5, a5); + printf("a6: %#x %hd\n", a6, a6); + printf("a7: %#x %hd\n", a7, a7); + printf("s0: %#x %hd\n", s0, s0); + printf("s1: %#x %hu\n", s1, s1); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; +} + +int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4, + int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) { + printf("fun32:\n"); + printf("a0: %#x %d\n", a0, a0); + printf("a1: %#x %u\n", a1, a1); + printf("a2: %#x %d\n", a2, a2); + printf("a3: %#x %d\n", a3, a3); + printf("a4: %#x %d\n", a4, a4); + printf("a5: %#x %d\n", a5, a5); + printf("a6: %#x %d\n", a6, a6); + printf("a7: %#x %d\n", a7, a7); + printf("s0: %#x %d\n", s0, s0); + printf("s1: %#x %u\n", s1, s1); + + // Ensure the addition happens in long int (not just int) precission. + // Otherwise, the result is truncated during the operation. + int64_t force_int64_precission = 0; + return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + + s1; +} + +float funFloat(float a0, float a1, float a2, float a3, float a4, float a5, + float a6, float a7, float s0, float s1) { + printf("funFloat:\n"); + printf("a0: %f\n", a0); + printf("a1: %f\n", a1); + printf("a2: %f\n", a2); + printf("a3: %f\n", a3); + printf("a4: %f\n", a4); + printf("a5: %f\n", a5); + printf("a6: %f\n", a6); + printf("a7: %f\n", a7); + printf("s0: %f\n", s0); + printf("s1: %f\n", s1); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; +} + +double funDouble(double a0, double a1, double a2, double a3, double a4, double a5, + double a6, double a7, double s0, double s1) { + printf("funDouble:\n"); + printf("a0: %f\n", a0); + printf("a1: %f\n", a1); + printf("a2: %f\n", a2); + printf("a3: %f\n", a3); + printf("a4: %f\n", a4); + printf("a5: %f\n", a5); + printf("a6: %f\n", a6); + printf("a7: %f\n", a7); + printf("s0: %f\n", s0); + printf("s1: %f\n", s1); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1; +} Index: ghc-9.8.2/testsuite/tests/codeGen/should_run/MulMayOflo_cmm.cmm =================================================================== --- /dev/null +++ ghc-9.8.2/testsuite/tests/codeGen/should_run/MulMayOflo_cmm.cmm @@ -0,0 +1,89 @@ +// Suppress empty ASSERT() optimization +#define USE_ASSERTS_ALL_WAYS 1 + +#include "Cmm.h" + +runCmmzh() { +// BEWARE: Cmm isn't really type checked. I.e. you may construct +// 256::I8, which is obviously wrong and let's to strange behaviour. + +// N.B. the contract of '%mulmayoflo' is a bit weak: +// "Return non-zero if there is any possibility that the signed multiply +// of a and b might overflow. Return zero only if you are absolutely sure +// that it won't overflow. If in doubt, return non-zero." (Stg.h) +// So, this test might be a bit too strict for some architectures as it +// expects a perfect implementation. + + // --- I8 + ASSERT(%mulmayoflo(1::I8, 1::I8) == 0::I8); + ASSERT(%mulmayoflo(0::I8, 0::I8) == 0::I8); + ASSERT(%mulmayoflo(-1::I8, 1::I8) == 0::I8); + ASSERT(%mulmayoflo(1::I8, -1::I8) == 0::I8); + ASSERT(%mulmayoflo(127::I8, 1::I8) == 0::I8); + ASSERT(%mulmayoflo(1::I8, 127::I8) == 0::I8); + ASSERT(%mulmayoflo(63::I8, 2::I8) == 0::I8); + ASSERT(%mulmayoflo(2::I8, 63::I8) == 0::I8); + ASSERT(%mulmayoflo(64::I8, 2::I8) > 0::I8); + ASSERT(%mulmayoflo(2::I8, 64::I8) > 0::I8); + ASSERT(%mulmayoflo(127::I8, -1::I8) == 0::I8); + ASSERT(%mulmayoflo(-1::I8, 127::I8) == 0::I8); + ASSERT(%mulmayoflo(-128::I8, 1::I8) == 0::I8); + ASSERT(%mulmayoflo(-64::I8, 2::I8) == 0::I8); + ASSERT(%mulmayoflo(2::I8, -64::I8) == 0::I8); + ASSERT(%mulmayoflo(-65::I8, 2::I8) > 0::I8); + ASSERT(%mulmayoflo(2::I8, -65::I8) > 0::I8); + + // --- I16 + ASSERT(%mulmayoflo(1::I16, 1::I16) == 0::I16); + ASSERT(%mulmayoflo(0::I16, 0::I16) == 0::I16); + ASSERT(%mulmayoflo(-1::I16, 1::I16) == 0::I16); + ASSERT(%mulmayoflo(1::I16, -1::I16) == 0::I16); + ASSERT(%mulmayoflo(32767::I16, 1::I16) == 0::I16); + ASSERT(%mulmayoflo(1::I16,32767 ::I16) == 0::I16); + ASSERT(%mulmayoflo(16383::I16, 2::I16) == 0::I16); + ASSERT(%mulmayoflo(2::I16, 16383::I16) == 0::I16); + ASSERT(%mulmayoflo(16384::I16, 2::I16) > 0::I16); + ASSERT(%mulmayoflo(2::I16, 16384::I16) > 0::I16); + ASSERT(%mulmayoflo(-16384::I16, 2::I16) == 0::I16); + ASSERT(%mulmayoflo(2::I16, -16384::I16) == 0::I16); + ASSERT(%mulmayoflo(-32768::I16, 1::I16) == 0::I16); + ASSERT(%mulmayoflo(1::I16, -32768::I16) == 0::I16); + + // -- I32 + ASSERT(%mulmayoflo(1::I32, 1::I32) == 0::I32); + ASSERT(%mulmayoflo(0::I32, 0::I32) == 0::I32); + ASSERT(%mulmayoflo(-1::I32, 1::I32) == 0::I32); + ASSERT(%mulmayoflo(1::I32, -1::I32) == 0::I32); + ASSERT(%mulmayoflo(2147483647::I32, 1::I32) == 0::I32); + ASSERT(%mulmayoflo(1::I32, 2147483647::I32) == 0::I32); + ASSERT(%mulmayoflo(-2147483648::I32, 1::I32) == 0::I32); + ASSERT(%mulmayoflo(1::I32, -2147483648::I32) == 0::I32); + ASSERT(%mulmayoflo(2::I32, 1073741823::I32) == 0::I32); + ASSERT(%mulmayoflo(1073741823::I32, 2::I32) == 0::I32); + ASSERT(%mulmayoflo(2::I32, 1073741824::I32) > 0::I32); + ASSERT(%mulmayoflo(1073741824::I32, 2::I32) > 0::I32); + ASSERT(%mulmayoflo(2::I32, -1073741824::I32) == 0::I32); + ASSERT(%mulmayoflo(-1073741824::I32, 2::I32) == 0::I32); + ASSERT(%mulmayoflo(2::I32, -1073741825::I32) > 0::I32); + ASSERT(%mulmayoflo(-1073741825::I32, 2::I32) > 0::I32); + + // -- I64 + ASSERT(%mulmayoflo(1::I64, 1::I64) == 0::I64); + ASSERT(%mulmayoflo(0::I64, 0::I64) == 0::I64); + ASSERT(%mulmayoflo(-1::I64, 1::I64) == 0::I64); + ASSERT(%mulmayoflo(1::I64, -1::I64) == 0::I64); + ASSERT(%mulmayoflo(9223372036854775807::I64, 1::I64) == 0::I64); + ASSERT(%mulmayoflo(1::I64, 9223372036854775807::I64) == 0::I64); + ASSERT(%mulmayoflo(-9223372036854775808::I64, 1::I64) == 0::I64); + ASSERT(%mulmayoflo(1::I64, -9223372036854775808::I64) == 0::I64); + ASSERT(%mulmayoflo(4611686018427387903::I64, 2::I64) == 0::I64); + ASSERT(%mulmayoflo(2::I64, 4611686018427387903::I64) == 0::I64); + ASSERT(%mulmayoflo(-4611686018427387904::I64, 2::I64) == 0::I64); + ASSERT(%mulmayoflo(2::I64, -4611686018427387904::I64) == 0::I64); + ASSERT(%mulmayoflo(-4611686018427387905::I64, 2::I64) > 0::I64); + ASSERT(%mulmayoflo(2::I64, -4611686018427387905::I64) > 0::I64); + ASSERT(%mulmayoflo(4611686018427387904::I64, 2::I64) > 0::I64); + ASSERT(%mulmayoflo(2::I64, 4611686018427387904::I64) > 0::I64); + + return(0); +}
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor