120 lines
3.3 KiB
C
120 lines
3.3 KiB
C
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "hydroforth/hydroforth.h"
|
|
|
|
void token_array_push(struct hf__token **arr, size_t *const len,
|
|
size_t *const size, struct hf__token item) {
|
|
if (*len > *size) {
|
|
return;
|
|
} else if (*len == *size) {
|
|
*size += 1 + (*size / 2);
|
|
*arr = realloc(*arr, sizeof(struct hf__token) * (*size));
|
|
}
|
|
|
|
(*arr)[*len] = item;
|
|
(*len)++;
|
|
}
|
|
|
|
void hf__lex(const char *const src, const size_t src_len,
|
|
struct hf__token **tokens, size_t *const len, size_t *const size) {
|
|
size_t i = 0;
|
|
while (i < src_len) {
|
|
if (hf__is_space_like(src[i]) || src[i] == '\n') {
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
size_t start = i;
|
|
struct hf__token token;
|
|
|
|
if (src[i] == '\'') {
|
|
const size_t char_start = start;
|
|
i++;
|
|
start = i;
|
|
while (src[i] != '\'') {
|
|
i++;
|
|
if (i >= src_len) {
|
|
start = char_start;
|
|
goto TOKEN_IS_WORD;
|
|
}
|
|
}
|
|
|
|
token.type = HF__TOKEN_TYPE__CHAR;
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
|
|
i++;
|
|
} else {
|
|
while (!hf__is_space_like(src[i]) && src[i] != '\n' && i < src_len) {
|
|
i++;
|
|
}
|
|
const size_t str_len = i - start;
|
|
|
|
if (hf__is_numeric(src[start]) || (src[start] == '-' && str_len > 1 &&
|
|
hf__is_numeric(src[start + 1]))) {
|
|
token.type = HF__TOKEN_TYPE__NUMBER;
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
} else if (str_len == 1 && src[start] == ':') {
|
|
token.type = HF__TOKEN_TYPE__COLON;
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
} else if (str_len == 1 && src[start] == ';') {
|
|
token.type = HF__TOKEN_TYPE__SEMICOLON;
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
} else if (str_len == 1 && src[start] == '(' &&
|
|
hf__is_space_like(src[i])) {
|
|
i++;
|
|
bool got_end = false;
|
|
while (i < src_len) {
|
|
if (src[i] == ')' && hf__is_space_like(src[i - 1])) {
|
|
got_end = true;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
if (got_end) {
|
|
token.type = HF__TOKEN_TYPE__PAREN_COMMENT;
|
|
token.location.start = start + 2;
|
|
token.location.end = i - 2;
|
|
i++;
|
|
} else {
|
|
i = start + 1;
|
|
goto TOKEN_IS_WORD;
|
|
}
|
|
} else if (str_len == 1 && src[start] == '\\' &&
|
|
hf__is_space_like(src[i])) {
|
|
token.type = HF__TOKEN_TYPE__BACKSLASH_COMMENT;
|
|
|
|
start = ++i;
|
|
while (src[i] != '\n' && i < src_len) {
|
|
i++;
|
|
}
|
|
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
} else if (str_len == 2 && strncmp(src + start, "--", 2) == 0 &&
|
|
(hf__is_space_like(src[i]) || src[i] == '\0')) {
|
|
token.type = HF__TOKEN_TYPE__DASH_COMMENT;
|
|
|
|
start = ++i;
|
|
while (src[i] != '\n' && i < src_len) {
|
|
i++;
|
|
}
|
|
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
} else {
|
|
TOKEN_IS_WORD:
|
|
token.type = HF__TOKEN_TYPE__WORD;
|
|
token.location.start = start;
|
|
token.location.end = i - 1;
|
|
}
|
|
}
|
|
|
|
token_array_push(tokens, len, size, token);
|
|
}
|
|
} |