hydroforth/src/hydroforth/parser.c
Dominic Grimm 7490439223
All checks were successful
continuous-integration/drone/push Build is passing
Add error call stack if call stack exceeds maximum capacity
2023-08-05 20:19:52 +02:00

369 lines
12 KiB
C

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hydroforth/hydroforth.h"
void hf__parser__init_keyword_map(struct hf__hashmap *const map, size_t cap) {
if (cap == 0) {
cap = HF__PARSER__KEYWORD_MAP_CAP;
}
*map = (struct hf__hashmap){
.arr = calloc(cap, sizeof(struct hf__hashmap__node *)),
.cap = cap,
};
for (enum HF__KEYWORD k = 0; k < __HF__KEYWORD__N; k++) {
hf__hashmap__insert(map, hf__hash_str(HF__KEYWORD_STR[k]),
(void *)&HF__KEYWORD_NODE_TYPE[k]);
}
}
void hf__parser__node_array_push(struct hf__node **arr, size_t *const len,
size_t *const cap, struct hf__node item) {
if (*len > *cap) {
return;
} else if (*len == *cap) {
*cap += 1 + (*cap / 2);
*arr = realloc(*arr, sizeof(struct hf__node) * (*cap));
}
(*arr)[*len] = item;
(*len)++;
}
char *strip_whitespaces(const char *const str, size_t start, size_t end) {
while (hf__is_space_like(str[start])) {
start++;
}
while (hf__is_space_like(str[end])) {
end--;
}
const size_t raw_len = end - start + 1;
char *stripped = malloc(sizeof(char) * raw_len);
strncpy(stripped, str + start, raw_len);
stripped[raw_len] = '\0';
return stripped;
}
struct hf__result hf__parse(struct hf__parser *const parser,
const char *const src,
const struct hf__token *const tokens,
const size_t tokens_len, struct hf__node **nodes,
size_t *const len, size_t *const size) {
if (!parser->keyword_map_is_init) {
hf__parser__init_keyword_map(&parser->keyword_map, 0);
parser->keyword_map_is_init = true;
}
for (size_t i = 0; i < tokens_len; i++) {
switch (tokens[i].type) {
case HF__TOKEN_TYPE__NUMBER: {
size_t j = tokens[i].location.start;
bool negative = false;
long number = 0;
if (src[tokens[i].location.start] == '-') {
j++;
negative = true;
} else if (src[tokens[i].location.start] == '+') {
j++;
}
if (src[j] == '0') {
j++;
if (j < (tokens[i].location.end + 1)) {
switch (src[j]) {
case 'B':
case 'b':
j++;
for (; j < (tokens[i].location.end + 1); j++) {
if (src[j] != '0' && src[j] != '1') {
return HF__ERR_CUSTOM(
HF__ERROR__PARSER__INVALID_NUMBER,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end, true),
true);
}
number *= 2;
number += src[j] - '0';
}
break;
case 'X':
case 'x':
j++;
for (; j < (tokens[i].location.end + 1); j++) {
const bool is_alphabetical_high_case =
src[j] >= 'A' && src[j] <= 'F';
const bool is_alphabetical =
is_alphabetical_high_case || (src[j] >= 'a' && src[j] <= 'f');
if (!((src[j] >= '0' && src[j] <= '9') || is_alphabetical)) {
return HF__ERR_CUSTOM(
HF__ERROR__PARSER__INVALID_NUMBER,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end, true),
true);
}
number *= 16;
if (is_alphabetical_high_case) {
number += 10 + src[j] - 'A';
} else if (is_alphabetical) {
number += 10 + src[j] - 'a';
} else {
number += src[j] - '0';
}
}
break;
default:
goto PARSER_NUMBER_DEFAULT;
}
}
} else {
PARSER_NUMBER_DEFAULT:
for (; j < (tokens[i].location.end + 1); j++) {
if (src[j] < '0' || src[j] > '9') {
return HF__ERR_CUSTOM(
HF__ERROR__PARSER__INVALID_NUMBER,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end, true),
true);
}
number *= 10;
number += src[j] - '0';
}
}
hf__parser__node_array_push(
nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__NUMBER,
.value = {.number = negative ? -number : number},
.is_owner = true,
});
break;
}
case HF__TOKEN_TYPE__CHAR: {
const size_t char_len =
tokens[i].location.end - tokens[i].location.start + 1;
char ch = src[tokens[i].location.start + 1];
if (src[tokens[i].location.start] == '\\') {
size_t j = 1;
switch (ch) {
case 's':
case 't':
j++;
ch = ' ';
break;
case 'n':
j++;
ch = '\n';
break;
default:
j++;
break;
}
if (j != char_len) {
return HF__ERR_CUSTOM(HF__ERROR__PARSER__INVALID_CHAR,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end,
false),
true);
}
} else if (char_len != 1) {
return HF__ERR_CUSTOM(HF__ERROR__PARSER__INVALID_CHAR,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end, false),
true);
} else {
ch = src[tokens[i].location.start];
}
hf__parser__node_array_push(nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__CHAR,
.value = {.ch = ch},
.is_owner = true,
});
break;
}
case HF__TOKEN_TYPE__WORD: {
struct hf__node node;
const size_t diff = tokens[i].location.end - tokens[i].location.start;
const size_t word_len = diff + 1;
char *lower_s = malloc(sizeof(char) * word_len);
const char *const word_start = src + tokens[i].location.start;
for (size_t j = 0; j < word_len; j++) {
lower_s[j] = tolower(word_start[j]);
}
lower_s[word_len] = '\0';
const hf__hash_t hash = hf__hash_str(lower_s);
const enum hf__node_type *const *const type_ptr =
(const enum hf__node_type *const *const)hf__hashmap__get(
&parser->keyword_map, hash);
if (type_ptr == NULL) {
node = (struct hf__node){
.type = HF__NODE_TYPE__WORD,
.value = {.word = {.hash = hash, .value = lower_s}},
.is_owner = true,
};
} else {
node.type = **type_ptr;
free(lower_s);
}
hf__parser__node_array_push(nodes, len, size, node);
break;
}
case HF__TOKEN_TYPE__COLON: {
const size_t start = i++;
bool got_end = false;
size_t end;
unsigned char depth = 1;
for (; i < tokens_len; i++) {
switch (tokens[i].type) {
case HF__TOKEN_TYPE__COLON:
depth++;
break;
case HF__TOKEN_TYPE__SEMICOLON:
depth--;
break;
default:
break;
}
if (depth == 0) {
end = i;
got_end = true;
break;
}
}
const struct hf__token *const name_tok = tokens + start + 1;
if (!got_end || end - start <= 1) {
return HF__ERR(HF__ERROR__PARSER__WORD_DEF_INCOMPLETE);
} else if (name_tok->type != HF__TOKEN_TYPE__WORD) {
return HF__ERR(HF__ERROR__PARSER__WORD_DEF_INVALID_NAME);
}
const size_t name_len =
name_tok->location.end - name_tok->location.start + 1;
char *name = malloc(sizeof(char) * (name_len + 1));
for (size_t j = 0; j < name_len; j++) {
name[j] = tolower(src[name_tok->location.start + j]);
}
name[name_len] = '\0';
const hf__hash_t hash = hf__hash_str(name);
if (hf__hashmap__get(&parser->keyword_map, hash)) {
free(name);
return HF__ERR_CUSTOM(HF__ERROR__PARSER__WORD_DEF_IS_KEYWORD,
hf__quote_mem_str(src, name_tok->location.start,
name_tok->location.end, true),
true);
}
size_t body_len = 0;
size_t body_size = 0;
struct hf__node *body = NULL;
struct hf__result parse_res =
hf__parse(parser, src, tokens + 2, (end - start + 1) - 3, &body,
&body_len, &body_size);
struct hf__node_value__word_def *word_def =
malloc(sizeof(struct hf__node_value__word_def));
(*word_def) = (struct hf__node_value__word_def){
.name =
{
.hash = hash,
.value = name,
},
.body = body,
.body_len = body_len,
};
hf__parser__node_array_push(nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__WORD_DEF,
.value = {.word_def = word_def},
.is_owner = true,
});
break;
}
case HF__TOKEN_TYPE__SEMICOLON:
return HF__ERR_CUSTOM(HF__ERROR__PARSER__UNEXPECTED,
hf__quote_mem_str(src, tokens[i].location.start,
tokens[i].location.end, true),
true);
case HF__TOKEN_TYPE__DASH_COMMENT:
hf__parser__node_array_push(nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__DASH_COMMENT,
.value =
{
.comment = strip_whitespaces(
src, tokens[i].location.start,
tokens[i].location.end),
},
.is_owner = true,
});
break;
case HF__TOKEN_TYPE__BACKSLASH_COMMENT:
hf__parser__node_array_push(nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__DASH_COMMENT,
.value =
{
.comment = strip_whitespaces(
src, tokens[i].location.start,
tokens[i].location.end),
},
.is_owner = true,
});
break;
case HF__TOKEN_TYPE__PAREN_COMMENT:
hf__parser__node_array_push(nodes, len, size,
(struct hf__node){
.type = HF__NODE_TYPE__PAREN_COMMENT,
.value =
{
.comment = strip_whitespaces(
src, tokens[i].location.start,
tokens[i].location.end),
},
.is_owner = true,
});
break;
default:
break;
}
}
return HF__OK;
}