token.c (7428B)
1 #include "token.h" 2 3 #include <stdbool.h> 4 #include <stdlib.h> 5 #include <errno.h> 6 #include <string.h> 7 8 #define MAX_FIELD_LEN 31 9 10 void cursor_init(Cursor* cursor, FILE* file) { 11 cursor->file = file; 12 cursor->line_idx = 1; 13 cursor->char_idx = 1; 14 } 15 16 char cursor_getc(Cursor* cursor) { 17 char c = fgetc(cursor->file); 18 switch (c) { 19 case EOF: 20 break; 21 case '\n': 22 cursor->line_idx += 1; 23 case '\r': 24 cursor->char_idx = 0; 25 break; 26 default: 27 cursor->char_idx += 1; 28 } 29 return c; 30 } 31 32 void cursor_ungetc(Cursor* cursor, char c) { 33 ungetc(c, cursor->file); 34 switch (c) { 35 case EOF: 36 break; 37 default: 38 cursor->char_idx -= 1; 39 } 40 } 41 42 typedef enum { 43 TOKEN_STATUS__FIELD_VALUE_TOO_LONG, 44 TOKEN_STATUS__INVALID_CHAR_INT, 45 TOKEN_STATUS__NO_DIGITS_INT, 46 TOKEN_STATUS__INVALID_CHAR_FLOAT, 47 TOKEN_STATUS__NO_DIGITS_FLOAT, 48 TOKEN_STATUS__VALUE_PARSING_NOT_SUPPORTED, 49 TOKEN_STATUS__UNRECOGNIZED, 50 TOKEN_STATUS__OKAY, 51 } TokenStatus; 52 53 void skip_whitespace(Cursor* cursor) { 54 for (;;) { 55 char c = cursor_getc(cursor); 56 switch (c) { 57 case ' ': 58 case '\t': 59 case '\n': 60 case '\r': 61 continue; 62 default: 63 cursor_ungetc(cursor, c); 64 return; 65 } 66 } 67 } 68 69 void skip_until_whitespace(Cursor* cursor) { 70 for (;;) { 71 char c = cursor_getc(cursor); 72 switch (c) { 73 case EOF: 74 case ' ': 75 case '\t': 76 case '\n': 77 case '\r': 78 return; 79 default: 80 continue; 81 } 82 } 83 } 84 85 void skip_until_newline(Cursor* cursor) { 86 for (;;) { 87 char c = cursor_getc(cursor); 88 switch (c) { 89 case EOF: 90 case '\n': 91 return; 92 default: 93 continue; 94 } 95 } 96 } 97 98 typedef struct { 99 int length; 100 TokenStatus status; 101 } ReadIntoBufferResult; 102 103 ReadIntoBufferResult read_into_buffer( 104 Cursor* cursor, 105 char* buffer, 106 int max_len 107 ) 108 { 109 ReadIntoBufferResult result; 110 int i; 111 char c; 112 for (i = 0; i < max_len; i += 1) { 113 c = cursor_getc(cursor); 114 switch (c) { 115 case EOF: 116 case ' ': 117 case '\t': 118 case '\n': 119 case '\r': 120 buffer[i] = 0; 121 result.length = i; 122 result.status = TOKEN_STATUS__OKAY; 123 return result; 124 default: 125 buffer[i] = c; 126 continue; 127 } 128 } 129 cursor_ungetc(cursor, c); 130 buffer[max_len - 1] = 0; 131 result.length = i; 132 result.status = TOKEN_STATUS__FIELD_VALUE_TOO_LONG; 133 return result; 134 } 135 136 typedef struct { 137 int value; 138 TokenStatus status; 139 } ParseIntResult; 140 141 ParseIntResult parse_int(char* str) { 142 ParseIntResult result; 143 char* end; 144 result.value = strtol(str, &end, 10); 145 if (end == str) { 146 result.status = TOKEN_STATUS__NO_DIGITS_INT; 147 } 148 else if (*end != '\0') { 149 result.status = TOKEN_STATUS__INVALID_CHAR_INT; 150 } 151 else { 152 result.status = TOKEN_STATUS__OKAY; 153 } 154 return result; 155 } 156 157 typedef struct { 158 float value; 159 TokenStatus status; 160 } ParseFloatResult; 161 162 ParseFloatResult parse_float(char* str) { 163 ParseFloatResult result; 164 char* end; 165 result.value = strtof(str, &end); 166 if (end == str) { 167 result.status = TOKEN_STATUS__NO_DIGITS_FLOAT; 168 } 169 else if (*end != '\0') { 170 result.status = TOKEN_STATUS__INVALID_CHAR_FLOAT; 171 } 172 else { 173 result.status = TOKEN_STATUS__OKAY; 174 } 175 return result; 176 } 177 178 Token parse_field(Cursor* cursor, TokenType type) { 179 Token t; 180 t.line_idx = cursor->line_idx; 181 t.char_idx = cursor->char_idx - 1; 182 183 char buffer[MAX_FIELD_LEN + 1]; 184 ReadIntoBufferResult result = read_into_buffer( 185 cursor, 186 buffer, 187 MAX_FIELD_LEN + 1 188 ); 189 if (result.status != TOKEN_STATUS__OKAY) { 190 t.type = TOKEN_ERROR; 191 t.value.error_value = result.status; 192 return t; 193 } 194 195 TokenValue value; 196 switch (type) { 197 case TOKEN_G_CODE: 198 case TOKEN_M_CODE: { 199 ParseIntResult result = parse_int(buffer); 200 if (result.status != TOKEN_STATUS__OKAY) { 201 t.type = TOKEN_ERROR; 202 t.value.error_value = result.status; 203 } 204 else { 205 t.type = type; 206 t.value.code_value = result.value; 207 } 208 break; 209 } 210 case TOKEN_S_PARAM: 211 case TOKEN_X_PARAM: 212 case TOKEN_Y_PARAM: 213 case TOKEN_Z_PARAM: 214 case TOKEN_F_PARAM: 215 case TOKEN_E_PARAM: { 216 ParseFloatResult result = parse_float(buffer); 217 if (result.status != TOKEN_STATUS__OKAY) { 218 t.type = TOKEN_ERROR; 219 t.value.error_value = result.status; 220 } 221 else { 222 t.type = type; 223 t.value.param_value = result.value; 224 } 225 break; 226 } 227 default: 228 t.type = TOKEN_ERROR; 229 t.value.error_value = TOKEN_STATUS__VALUE_PARSING_NOT_SUPPORTED; 230 break; 231 } 232 return t; 233 } 234 235 Token parse_comment(Cursor* cursor) { 236 Token t = { 237 .type = TOKEN_COMMENT, 238 .line_idx = cursor->line_idx, 239 .char_idx = cursor->char_idx - 1, 240 }; 241 skip_until_newline(cursor); 242 return t; 243 } 244 245 Token parse_token(Cursor* cursor) { 246 skip_whitespace(cursor); 247 248 char c = cursor_getc(cursor); 249 switch (c) { 250 case ';': 251 return parse_comment(cursor); 252 case 'g': 253 case 'G': 254 return parse_field(cursor, TOKEN_G_CODE); 255 case 'm': 256 case 'M': 257 return parse_field(cursor, TOKEN_M_CODE); 258 case 's': 259 case 'S': 260 return parse_field(cursor, TOKEN_S_PARAM); 261 case 'x': 262 case 'X': 263 return parse_field(cursor, TOKEN_X_PARAM); 264 case 'y': 265 case 'Y': 266 return parse_field(cursor, TOKEN_Y_PARAM); 267 case 'z': 268 case 'Z': 269 return parse_field(cursor, TOKEN_Z_PARAM); 270 case 'f': 271 case 'F': 272 return parse_field(cursor, TOKEN_F_PARAM); 273 case 'e': 274 case 'E': 275 return parse_field(cursor, TOKEN_E_PARAM); 276 case EOF: { 277 Token t = { 278 .type = TOKEN_EOF, 279 .line_idx = cursor->line_idx, 280 .char_idx = cursor->char_idx, 281 }; 282 return t; 283 } 284 default: { 285 cursor_ungetc(cursor, c); 286 Token t = { 287 .type = TOKEN_ERROR, 288 .line_idx = cursor->line_idx, 289 .char_idx = cursor->char_idx, 290 .value.error_value = TOKEN_STATUS__UNRECOGNIZED, 291 }; 292 return t; 293 } 294 } 295 } 296 297 void recover_from_error(Cursor* cursor, const Token* token) { 298 switch (token->value.error_value) { 299 case TOKEN_STATUS__FIELD_VALUE_TOO_LONG: 300 case TOKEN_STATUS__UNRECOGNIZED: 301 skip_until_whitespace(cursor); 302 break; 303 default: 304 // most errors don't require any action to get back to a valid 305 // parsing state. 306 break; 307 } 308 } 309