christianermann.dev-hugo

The Hugo source for my website
git clone git://git.christianermann.dev/christianermann.dev-hugo
Log | Files | Refs | Submodules | README

forth-rv32i-assembler.md (8173B)


      1 ---
      2 title: "Forth RV32I Assembler"
      3 date: 2024-11-13T17:33:47-08:00
      4 tags: [Forth, Assembly, RISC-V]
      5 draft: false
      6 ---
      7 
      8 Here's an assembler for RV32I that I wrote in Forth. I find the definitions of
      9 the instructions and instruction types especially elegant, and I find it to be
     10 a great demonstration of how concise and powerful Forth can be.
     11 
     12 All code in this post is licensed under the
     13 [AGPLv3](https://www.gnu.org/licenses/agpl-3.0.en.html#license-text).
     14 
     15 ```forth
     16 \ encode stack values into proper instruction locations. all
     17 \ encoding sequences must begin with 'opcode'.
     18 : opcode             hex     7F and                     ;
     19 : funct3        swap hex      7 and decimal 12 lshift + ;
     20 : funct7        swap hex     7F and decimal 25 lshift + ;
     21 : i-immed       swap hex    FFF and decimal 20 lshift + ;
     22 : i-immed-shamt swap hex     1F and decimal 20 lshift + ;
     23 : u-immed       swap hex  FFFFF and decimal 12 lshift + ;
     24 : s-immed       over hex    FE0 and decimal 20 lshift +
     25                 swap hex     1F and decimal  7 lshift + ;
     26 : j-immed       over hex 100000 and decimal 11 lshift +
     27                 over hex  FF000 and                   +
     28                 over hex    800 and decimal  9 lshift +
     29                 swap hex    7FE and decimal 20 lshift + ;
     30 : b-immed       over hex   1000 and decimal 19 lshift +
     31                 over hex    800 and decimal  4 rshift +
     32                 over hex    7E0 and decimal 20 lshift +
     33                 swap hex     1E and decimal  7 lshift + ;
     34 : rd            swap hex     1F and decimal  7 lshift + ;
     35 : rs1           swap hex     1F and decimal 15 lshift + ;
     36 : rs2           swap hex     1F and decimal 20 lshift + ;
     37 
     38 \ instruction types. all instruction values should be pushed on the
     39 \ stack with the opcode last before calling.
     40 : r-type       opcode funct3 funct7               rs2 rs1 rd , ;
     41 : i-type       opcode funct3        i-immed           rs1 rd , ;
     42 : i-type-shamt opcode funct3 funct7 i-immed-shamt     rs1 rd , ;
     43 : s-type       opcode funct3        s-immed       rs2 rs1    , ;
     44 : b-type       opcode funct3        b-immed       rs2 rs1    , ;
     45 : u-type       opcode               u-immed               rd , ;
     46 : j-type       opcode               j-immed               rd , ;
     47 
     48 \ instructions. these are just simple encodings, no assembler
     49 \ niceties yet.
     50 \             funct7  funct3  opcode  encoding
     51 :   addi, hex              0      13  i-type       ;
     52 :   andi, hex              7      13  i-type       ;
     53 :    ori, hex              6      13  i-type       ;
     54 :   xori, hex              4      13  i-type       ;
     55 :   slli, hex     00       1      13  i-type-shamt ;
     56 :   srli, hex     00       5      13  i-type-shamt ;
     57 :   srai, hex     20       5      13  i-type-shamt ;
     58 :   slti, hex              2      13  i-type       ;
     59 :  sltiu, hex              3      13  i-type       ;
     60 :    lui, hex                     37  u-type       ;
     61 :  auipc, hex                     17  u-type       ;
     62 :    add, hex     00       0      33  r-type       ;
     63 :    sub, hex     20       0      33  r-type       ;
     64 :    and, hex     00       7      33  r-type       ;
     65 :     or, hex     00       6      33  r-type       ;
     66 :    xor, hex     00       4      33  r-type       ;
     67 :    sll, hex     00       1      33  r-type       ;
     68 :    srl, hex     00       5      33  r-type       ;
     69 :    sra, hex     20       5      33  r-type       ;
     70 :    slt, hex     00       2      33  r-type       ;
     71 :   sltu, hex     00       3      33  r-type       ;
     72 :    jal, hex                     6F  j-type       ;
     73 :   jalr, hex              0      67  i-type       ;
     74 :    beq, hex              0      63  b-type       ;
     75 :    bne, hex              1      63  b-type       ;
     76 :    blt, hex              4      63  b-type       ;
     77 :   bltu, hex              6      63  b-type       ;
     78 :    bge, hex              5      63  b-type       ;
     79 :   bgeu, hex              7      63  b-type       ;
     80 :     lw, hex              2      03  i-type       ;
     81 :     lh, hex              1      03  i-type       ;
     82 :    lhu, hex              5      03  i-type       ;
     83 :     lb, hex              0      03  i-type       ;
     84 :     sw, hex              2      23  s-type       ;
     85 :     sh, hex              1      23  s-type       ;
     86 :     sb, hex              0      23  s-type       ;
     87 :  fence, hex              0      0F  i-type       ;
     88 :  ecall, hex              0      73  i-type       ;
     89 : ebreak, hex              0      73  i-type       ;
     90 
     91 \ some instructions, now with nicer usage.
     92 : sw, >r swap r> sw, ;
     93 : sh, >r swap r> sh, ;
     94 : sb, >r swap r> sb, ;
     95 : ecall,  0 0 0  ecall, ;   \ usage: ecall,
     96 : ebreak, 0 0 1 ebreak, ;   \ usage: ebreak,
     97 : fence, >r 0 0 r> fence, ; \ usage: imm fence,
     98 
     99 \ registers
    100 decimal
    101  0 constant x0     1 constant x1     2 constant x2     3 constant x3
    102  4 constant x4     5 constant x5     6 constant x6     7 constant x7
    103  8 constant x8     9 constant x9    10 constant x10   11 constant x11
    104 12 constant x12   13 constant x13   14 constant x14   15 constant x15
    105 16 constant x16   17 constant x17   18 constant x18   19 constant x19
    106 20 constant x20   21 constant x21   22 constant x22   23 constant x23
    107 24 constant x24   25 constant x25   26 constant x26   27 constant x27
    108 28 constant x28   29 constant x29   30 constant x30   31 constant x31
    109 
    110 \ registers (calling convention)
    111 x0  constant zero \ zero constant
    112 x1  constant ra   \ return address
    113 x2  constant sp   \ stack pointer
    114 x3  constant gp   \ global pointer
    115 x4  constant tp   \ thread pointer
    116 x8  constant fp   \ frame pointer
    117 \ function arguments / return values (a0, a1)
    118 x10 constant a0  x11 constant a1  x12 constant a2   x13 constant a3
    119 x14 constant a4  x15 constant a5  x16 constant a6   x17 constant a7
    120 \ saved registers
    121 x8  constant s0  x9  constant s1  x18 constant s2   x19 constant s3
    122 x20 constant s4  x21 constant s5  x22 constant s6   x23 constant s7
    123 x24 constant s8  x25 constant s9  x26 constant s10  x27 constant s11
    124 \ temporaries
    125 x5  constant t0  x6  constant t1  x7  constant t2   x28 constant t3
    126 x29 constant t4  x30 constant t5  x31 constant t6
    127 ```
    128 
    129 And here's some tests to verify the instruction encodings are generated
    130 correctly:
    131 ```forth
    132 : undo, -1 cells allot here @ ; \ undoes the last ','
    133 t{ a0 a1 hex   FF   addi, undo, -> hex 0FF58513 }t
    134 t{ a0 a1 hex   FF   andi, undo, -> hex 0FF5F513 }t
    135 t{ a0 a1 hex   FF    ori, undo, -> hex 0FF5E513 }t
    136 t{ a0 a1 hex   FF   xori, undo, -> hex 0FF5C513 }t
    137 t{ a0 a1 hex    F   slli, undo, -> hex 00F59513 }t
    138 t{ a0 a1 hex    F   srli, undo, -> hex 00F5D513 }t
    139 t{ a0 a1 hex    F   srai, undo, -> hex 40F5D513 }t
    140 t{ t0    hex FFFF    lui, undo, -> hex 0FFFF2B7 }t
    141 t{ t0    hex FFFF  auipc, undo, -> hex 0FFFF297 }t
    142 t{ a0 a1 hex   FF   slti, undo, -> hex 0FF5A513 }t
    143 t{ a0 a1 hex   FF  sltiu, undo, -> hex 0FF5B513 }t
    144 t{ a0 a1 a2          add, undo, -> hex 00C58533 }t
    145 t{ a0 a1 a2          sub, undo, -> hex 40C58533 }t
    146 t{ a0 a1 a2          and, undo, -> hex 00C5F533 }t
    147 t{ a0 a1 a2           or, undo, -> hex 00C5E533 }t
    148 t{ a0 a1 a2          xor, undo, -> hex 00C5C533 }t
    149 t{ a0 a1 a2          sll, undo, -> hex 00C59533 }t
    150 t{ a0 a1 a2          srl, undo, -> hex 00C5D533 }t
    151 t{ a0 a1 a2          sra, undo, -> hex 40C5D533 }t
    152 t{ a0 a1 a2          slt, undo, -> hex 00C5A533 }t
    153 t{ a0 a1 a2         sltu, undo, -> hex 00C5B533 }t
    154 t{ ra    hex FFFF    jal, undo, -> hex 7FF0F0EF }t
    155 t{ ra a0 hex   FF   jalr, undo, -> hex 0FF500E7 }t
    156 t{ a0 a1 hex    F    beq, undo, -> hex 00B50763 }t
    157 t{ a0 a1 hex    F    bne, undo, -> hex 00B51763 }t
    158 t{ a0 a1 hex    F    blt, undo, -> hex 00B54763 }t
    159 t{ a0 a1 hex    F   bltu, undo, -> hex 00B56763 }t
    160 t{ a0 a1 hex    F    bge, undo, -> hex 00B55763 }t
    161 t{ a0 a1 hex    F   bgeu, undo, -> hex 00B57763 }t
    162 t{ a0 a1 hex    F     lw, undo, -> hex 00F5A503 }t
    163 t{ a0 a1 hex    F     lh, undo, -> hex 00F59503 }t
    164 t{ a0 a1 hex    F    lhu, undo, -> hex 00F5D503 }t
    165 t{ a0 a1 hex    F     lb, undo, -> hex 00F58503 }t
    166 t{ a0 a1 hex    F     sw, undo, -> hex 00A5A7A3 }t
    167 t{ a0 a1 hex    F     sh, undo, -> hex 00A597A3 }t
    168 t{ a0 a1 hex    F     sb, undo, -> hex 00A587A3 }t
    169 t{       hex  0FF  fence, undo, -> hex 0FF0000F }t
    170 t{                 ecall, undo, -> hex 00000073 }t
    171 t{                ebreak, undo, -> hex 00100073 }t
    172 ```