commit cbee5318a1ccf055bfd099a58bf7ab69c79937c2
parent 56302df696f373955d894acabd53ba29d235cf43
Author: Christian Ermann <christianermann@gmail.com>
Date: Sat, 2 Dec 2023 11:27:21 -0500
Add EMIT and TYPE
Diffstat:
M | forth.asm | | | 289 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 283 insertions(+), 6 deletions(-)
diff --git a/forth.asm b/forth.asm
@@ -10,20 +10,297 @@ section '.text' code executable readable
include 'efi.asm'
-main:
-; Initialize the system and print a version string.
+;------------------------------------------------------------------------------
+;
+; Foreword
+;
+; I am writing this as an alternative to the "jonesforth" implementation of
+; the Forth language for learning how to write a Forth. I'm attempting to
+; design my Forth in such a way that a user can interact with the system as
+; soon as possible, and further progress is broken into incremental steps. My
+; hope is that this also makes it easier to port to different architectures by
+; providing natural checkpoints to verify functionality as more features are
+; added.
+;
+;------------------------------------------------------------------------------
+;
+; Table of Contents
+; 1. The Basic Structure of Forth
+; 2. Executing this Program
+; 3. Hello, World!
+;
+;------------------------------------------------------------------------------
+;
+; Section 1: Basic Structure
+;
+; In this section, we'll learn how to represent words in the Forth language in
+; memory, and how to execute those words to do meaningful work.
+;
+; The Dictionary
+;
+; All words in Forth are stored in a dictionary, just like in English or in
+; French. Each entry in a Forth dictionary stores a reference to the previous
+; word (known as the link pointer), some flags to denote special properties of
+; certain words, the length of the word's name, the name of the word, and
+; finally the definition of the word. For memory alignment, there may be some
+; padding placed inbetween
+; the name of the word and its definition. The memory layout of an entry for
+; soupforth can be seen below:
+;
+; Dictionary Entry
+; +-------------------------------+ <
+; | link pointer | 8 bytes | | Header
+; +-------------------------------+ |
+; | flags | 1 byte | |
+; +-------------------------------+ |
+; | length | 1 byte | |
+; +-------------------------------+ |
+; | name | upto 255 bytes | |
+; +-------------------------------+ |
+; | padding | upto 7 bytes | |
+; +-------------------------------+ <
+; | definition | |
+; +-------------------------------+
+;
+; All the words we define can be split into two main categories: primitive
+; words and Forth words. The terminology can be a bit confusing, as all the
+; words we define are part of our Forth, but what these terms actually compare
+; is the implementation of the words. Primitive words are implemented directly
+; in assembly, while Forth words are implemented in Forth itself.
+;
+; This distinction is mostly hidden from users of the language, however, the
+; choices you make affect the speed and portability of the implementation. As
+; the number of primitive words increases, so does the speed of the Forth
+; implementation. However, the fewer primitive words there are, the easier the
+; Forth is to port to new architectures.
+;
+; As implementers, we do need to be aware of the differences. For example, the
+; "definition" field, left blank in the diagram above, differs slightly between
+; primitive words and Forth words. Definitions always start with a codeword and
+; end with some sort of "return" statement, but the details of these codewords
+; and "return" tatements differ between the two types of words.
+;
+; For primitive words, the codeword is the address of the assembly code that
+; implements the word. The return statement is called `NEXT` and takes care of
+; loading and jumping to the address of the **next** word.
+;
+; As we execute words, we'll use the RAX register to store the address of the
+; word that we're currently executing and we'll use the RSI register to store
+; the address of the next word we need to execute.
+;
+; This leads us to a fairly straightforward definition of a macro for NEXT:
+
+macro NEXT
+{
+ lodsq ; 1. Load value at address RSI into RAX
+ jmp qword [rax] ; 2. Jump to address in RAX
+}
+
+; For Forth words, the codeword is called DOCOL, short for "DO COLON", as Forth
+; definitions are started with a ":". DOCOL takes care of storing the address
+; of the next word we need to execute, and starting execution of the current
+; word. The return statement is called EXIT and loads the address of the next
+; word that DOCOL stored earlier and calls NEXT.
+;
+; In order to store and load addresses of words, Forth uses what's known as the
+; return stack. It's called the return stack as it stores the addresses we
+; **return** to at the end of a definition. We'll use the RBP register as our
+; return stack pointer.
+
+macro push_rs reg
+; Push the value of `reg` onto the return stack.
+;
+{
+ lea rbp, [rbp - 8] ; 1. Move the return stack pointer down 1 address.
+ mov [rbp], reg ; 2. Push the word address onto the stack.
+}
+
+macro pop_rs reg
+; Pop the top value of the return stack into `reg`.
+;
+{
+ mov reg, [rbp] ; 1. Pop a word address off of the stack.
+ lea rbp, [rbp + 8] ; 2. Move the return stack pointer up 1 address.
+}
+
+DOCOL:
+; Start execution of a Forth word.
+;
+ push_rs rsi ; 1. Save the next word's address onto the return stack.
+ lea rsi, [rax + 8] ; 2. Load the address of the first data word into RSI.
+ NEXT ; 3. Execute the word pointed to by RSI.
+
+; As EXIT is included in the definition of every Forth word, it has to have an
+; entry in the dictionary. EXIT will be the first primitive word we define.
;
-; Later on, this will start the interpreter as well.
+; Let's revisit what the memory layout of our entry will look like before we
+; start defining anything:
;
+; Memory layout of EXIT
+;
+; section '.rodata'
+; +-------------------------------+ <- name_EXIT
+; | link pointer | 8 bytes |
+; +-------------------------------+
+; | flags | 1 byte |
+; +-------------------------------+
+; | length | 1 byte |
+; +-------------------------------+
+; | name | upto 255 bytes |
+; +-------------------------------+
+; | padding | upto 7 bytes |
+; +-------------------------------+ <- EXIT
+; | code_EXIT | 8 bytes |
+; +-------------------------------+
+;
+; section '.text'
+; +-------------+ <- code_EXIT
+; | pop_rs rsi |
+; | NEXT |
+; +-------------+
+;
+; Some notes about the labels:
+; 1. name_EXIT is the label for the start of the dictionary entry.
+; 2. EXIT is the label for the location of the codeword.
+; 3. code_EXIT is the label of the assembly code implementing EXIT.
+;
+; We'll need to re-create this memory structure for every primitive word we
+; define so it's worthwhile to write a `defcode` macro that does it for you:
+
+macro defcode name, name_length, flags, label_
+; Define a primitive word.
+;
+{
+ section '.data' data readable
+ align 8
+name_#label_:
+ dq link ; 1. Set the link pointer.
+ link equ name_#label_ ; 2. Update `link`.
+ db flags ; 3. Set the flags.
+ db name_length ; 4. Set the name length.
+ db name ; 5. Set the name.
+
+ align 8 ; 6. Add any padding we may need.
+label_:
+ dq code_#label_ ; 7. Set the codeword.
+ section '.text' code readable executable
+
+ align 1
+code_#label_: ; 8. This is where our assembly code will go.
+}
+
+; You'll have noticed that the macro expects the `link` variable to hold the
+; address of the previous word. As EXIT is our first word, we'll need to
+; initialize `link` to 0. The `defcode` macro will take care of updating `link`
+; for us as we define new words.
+;
+; After we set `link`, we can finally define our first primitive word:
+
+link dq 0
+
+defcode "EXIT", 4, 0, EXIT
+; Return from a Forth word.
+;
+ pop_rs rsi ; 1. Load the next word's address back into RSI.
+ NEXT ; 2. Execute word pointed to by RSI.
+
+; It will be awhile before we get a chance to actually use EXIT, but it gives
+; us a taste of where we're heading.
+;
+;------------------------------------------------------------------------------
+;
+; Section 2 - Executing this Program
+;
+; In this section, we'll build some infrastructure to enable us to run small
+; demos at the end of each section without having to re-write any code.
+;
+; The Entry Point
+;
+; At the beginning of this file, we defined the entry point to be `main`. That
+; means we need to define what `main` is before we can run anything. I decided
+; that `main` should do 4 things:
+;
+; 1. Initialize the UEFI interface.
+; 2. Clear the screen, so we have a blank canvas.
+; 3. Store the version string for this Forth onto the stack.
+; 4. Start the execution of Forth code.
+;
+; These first two steps are handled by function calls provided by the UEFI
+; interface.
+;
+; The version string, and its length, arae defined in the `data` section at the
+; bottom of this file.
+;
+; If you remember from the previous section, we can use the macro NEXT to start
+; the execution of a primitive word. The only thing NEXT expects is for the RSI
+; register to contain the address of a primitive word.
+;
+; If we pass an address corresponding to a sequence of words,
+;
+; We'll use the variable `program` to store the address of the w
+;
+
+main:
+ cld
call sys_initialize
call sys_clear_screen
- mov rcx, version_string
- mov rdx, version_string.length
+ push version_string
+ push version_string.length
+ mov rsi, program
+ NEXT
+
+;------------------------------------------------------------------------------
+;
+; Section 3 - Hello, World!
+;
+; In this section, we'll define a few more primitive words and print a message
+; to screen using them.
+;
+; Basic Output
+;
+; There are two main words for printing to the screen in Forth: EMIT and TYPE.
+; EMIT **emits** a single character to the screen, while TYPE outputs an entire
+; string to the screen. Unlike many other languages at the time, Forth does not
+; use null-terminated strings. Instead, the address and length of a string are
+; expected to travel as a pair.
+;
+; The implementations of these words are fairly simple, as I've defined the
+; `sys_print_string` function as part of the EFI abstraction and it does most
+; of the work for us.
+;
+
+defcode "EMIT", 4, 0, EMIT
+; Print a character.
+;
+; The character is temporarily stored in the `.char_buffer` local variable.
+; `.char_buffer` is defined as a 1-byte variable in the data section at the
+; bottom of this file.
+;
+ pop rax
+ mov [.char_buffer], al
+ mov rcx, .char_buffer
+ mov rdx, 1
+ call sys_print_string
+ NEXT
+
+defcode "TYPE", 4, 0, TYPE
+; Print an ASCII string.
+;
+ pop rdx
+ pop rcx
call sys_print_string
- call sys_read_char
+ NEXT
+
+section '.rodata' readable
+
+program_s1:
+ dq TYPE
+
+program dq program_s1
section '.data' readable writable
version_string db 'soup forth v0.1', 0xA
.length = $ - version_string
+code_EMIT.char_buffer db ?