Dominic Grimm
7490439223
All checks were successful
continuous-integration/drone/push Build is passing
369 lines
12 KiB
C
369 lines
12 KiB
C
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "hydroforth/hydroforth.h"
|
|
|
|
void hf__parser__init_keyword_map(struct hf__hashmap *const map, size_t cap) {
|
|
if (cap == 0) {
|
|
cap = HF__PARSER__KEYWORD_MAP_CAP;
|
|
}
|
|
*map = (struct hf__hashmap){
|
|
.arr = calloc(cap, sizeof(struct hf__hashmap__node *)),
|
|
.cap = cap,
|
|
};
|
|
|
|
for (enum HF__KEYWORD k = 0; k < __HF__KEYWORD__N; k++) {
|
|
hf__hashmap__insert(map, hf__hash_str(HF__KEYWORD_STR[k]),
|
|
(void *)&HF__KEYWORD_NODE_TYPE[k]);
|
|
}
|
|
}
|
|
|
|
void hf__parser__node_array_push(struct hf__node **arr, size_t *const len,
|
|
size_t *const cap, struct hf__node item) {
|
|
if (*len > *cap) {
|
|
return;
|
|
} else if (*len == *cap) {
|
|
*cap += 1 + (*cap / 2);
|
|
*arr = realloc(*arr, sizeof(struct hf__node) * (*cap));
|
|
}
|
|
|
|
(*arr)[*len] = item;
|
|
(*len)++;
|
|
}
|
|
|
|
char *strip_whitespaces(const char *const str, size_t start, size_t end) {
|
|
while (hf__is_space_like(str[start])) {
|
|
start++;
|
|
}
|
|
while (hf__is_space_like(str[end])) {
|
|
end--;
|
|
}
|
|
const size_t raw_len = end - start + 1;
|
|
char *stripped = malloc(sizeof(char) * raw_len);
|
|
strncpy(stripped, str + start, raw_len);
|
|
stripped[raw_len] = '\0';
|
|
|
|
return stripped;
|
|
}
|
|
|
|
struct hf__result hf__parse(struct hf__parser *const parser,
|
|
const char *const src,
|
|
const struct hf__token *const tokens,
|
|
const size_t tokens_len, struct hf__node **nodes,
|
|
size_t *const len, size_t *const size) {
|
|
if (!parser->keyword_map_is_init) {
|
|
hf__parser__init_keyword_map(&parser->keyword_map, 0);
|
|
parser->keyword_map_is_init = true;
|
|
}
|
|
|
|
for (size_t i = 0; i < tokens_len; i++) {
|
|
switch (tokens[i].type) {
|
|
case HF__TOKEN_TYPE__NUMBER: {
|
|
size_t j = tokens[i].location.start;
|
|
bool negative = false;
|
|
long number = 0;
|
|
|
|
if (src[tokens[i].location.start] == '-') {
|
|
j++;
|
|
negative = true;
|
|
} else if (src[tokens[i].location.start] == '+') {
|
|
j++;
|
|
}
|
|
|
|
if (src[j] == '0') {
|
|
j++;
|
|
if (j < (tokens[i].location.end + 1)) {
|
|
switch (src[j]) {
|
|
case 'B':
|
|
case 'b':
|
|
j++;
|
|
for (; j < (tokens[i].location.end + 1); j++) {
|
|
if (src[j] != '0' && src[j] != '1') {
|
|
return HF__ERR_CUSTOM(
|
|
HF__ERROR__PARSER__INVALID_NUMBER,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end, true),
|
|
true);
|
|
}
|
|
|
|
number *= 2;
|
|
number += src[j] - '0';
|
|
}
|
|
break;
|
|
|
|
case 'X':
|
|
case 'x':
|
|
j++;
|
|
for (; j < (tokens[i].location.end + 1); j++) {
|
|
const bool is_alphabetical_high_case =
|
|
src[j] >= 'A' && src[j] <= 'F';
|
|
const bool is_alphabetical =
|
|
is_alphabetical_high_case || (src[j] >= 'a' && src[j] <= 'f');
|
|
if (!((src[j] >= '0' && src[j] <= '9') || is_alphabetical)) {
|
|
return HF__ERR_CUSTOM(
|
|
HF__ERROR__PARSER__INVALID_NUMBER,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end, true),
|
|
true);
|
|
}
|
|
|
|
number *= 16;
|
|
if (is_alphabetical_high_case) {
|
|
number += 10 + src[j] - 'A';
|
|
} else if (is_alphabetical) {
|
|
number += 10 + src[j] - 'a';
|
|
} else {
|
|
number += src[j] - '0';
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
goto PARSER_NUMBER_DEFAULT;
|
|
}
|
|
}
|
|
} else {
|
|
PARSER_NUMBER_DEFAULT:
|
|
for (; j < (tokens[i].location.end + 1); j++) {
|
|
if (src[j] < '0' || src[j] > '9') {
|
|
return HF__ERR_CUSTOM(
|
|
HF__ERROR__PARSER__INVALID_NUMBER,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end, true),
|
|
true);
|
|
}
|
|
|
|
number *= 10;
|
|
number += src[j] - '0';
|
|
}
|
|
}
|
|
|
|
hf__parser__node_array_push(
|
|
nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__NUMBER,
|
|
.value = {.number = negative ? -number : number},
|
|
.is_owner = true,
|
|
});
|
|
break;
|
|
}
|
|
|
|
case HF__TOKEN_TYPE__CHAR: {
|
|
const size_t char_len =
|
|
tokens[i].location.end - tokens[i].location.start + 1;
|
|
char ch = src[tokens[i].location.start + 1];
|
|
|
|
if (src[tokens[i].location.start] == '\\') {
|
|
size_t j = 1;
|
|
|
|
switch (ch) {
|
|
case 's':
|
|
case 't':
|
|
j++;
|
|
ch = ' ';
|
|
break;
|
|
|
|
case 'n':
|
|
j++;
|
|
ch = '\n';
|
|
break;
|
|
|
|
default:
|
|
j++;
|
|
break;
|
|
}
|
|
|
|
if (j != char_len) {
|
|
return HF__ERR_CUSTOM(HF__ERROR__PARSER__INVALID_CHAR,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end,
|
|
false),
|
|
true);
|
|
}
|
|
} else if (char_len != 1) {
|
|
return HF__ERR_CUSTOM(HF__ERROR__PARSER__INVALID_CHAR,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end, false),
|
|
true);
|
|
} else {
|
|
ch = src[tokens[i].location.start];
|
|
}
|
|
|
|
hf__parser__node_array_push(nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__CHAR,
|
|
.value = {.ch = ch},
|
|
.is_owner = true,
|
|
});
|
|
break;
|
|
}
|
|
|
|
case HF__TOKEN_TYPE__WORD: {
|
|
struct hf__node node;
|
|
|
|
const size_t diff = tokens[i].location.end - tokens[i].location.start;
|
|
const size_t word_len = diff + 1;
|
|
char *lower_s = malloc(sizeof(char) * word_len);
|
|
const char *const word_start = src + tokens[i].location.start;
|
|
for (size_t j = 0; j < word_len; j++) {
|
|
lower_s[j] = tolower(word_start[j]);
|
|
}
|
|
lower_s[word_len] = '\0';
|
|
const hf__hash_t hash = hf__hash_str(lower_s);
|
|
|
|
const enum hf__node_type *const *const type_ptr =
|
|
(const enum hf__node_type *const *const)hf__hashmap__get(
|
|
&parser->keyword_map, hash);
|
|
if (type_ptr == NULL) {
|
|
node = (struct hf__node){
|
|
.type = HF__NODE_TYPE__WORD,
|
|
.value = {.word = {.hash = hash, .value = lower_s}},
|
|
.is_owner = true,
|
|
};
|
|
} else {
|
|
node.type = **type_ptr;
|
|
free(lower_s);
|
|
}
|
|
|
|
hf__parser__node_array_push(nodes, len, size, node);
|
|
break;
|
|
}
|
|
|
|
case HF__TOKEN_TYPE__COLON: {
|
|
const size_t start = i++;
|
|
bool got_end = false;
|
|
size_t end;
|
|
unsigned char depth = 1;
|
|
for (; i < tokens_len; i++) {
|
|
switch (tokens[i].type) {
|
|
case HF__TOKEN_TYPE__COLON:
|
|
depth++;
|
|
break;
|
|
|
|
case HF__TOKEN_TYPE__SEMICOLON:
|
|
depth--;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (depth == 0) {
|
|
end = i;
|
|
got_end = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
const struct hf__token *const name_tok = tokens + start + 1;
|
|
if (!got_end || end - start <= 1) {
|
|
return HF__ERR(HF__ERROR__PARSER__WORD_DEF_INCOMPLETE);
|
|
} else if (name_tok->type != HF__TOKEN_TYPE__WORD) {
|
|
return HF__ERR(HF__ERROR__PARSER__WORD_DEF_INVALID_NAME);
|
|
}
|
|
|
|
const size_t name_len =
|
|
name_tok->location.end - name_tok->location.start + 1;
|
|
char *name = malloc(sizeof(char) * (name_len + 1));
|
|
for (size_t j = 0; j < name_len; j++) {
|
|
name[j] = tolower(src[name_tok->location.start + j]);
|
|
}
|
|
name[name_len] = '\0';
|
|
const hf__hash_t hash = hf__hash_str(name);
|
|
|
|
if (hf__hashmap__get(&parser->keyword_map, hash)) {
|
|
free(name);
|
|
|
|
return HF__ERR_CUSTOM(HF__ERROR__PARSER__WORD_DEF_IS_KEYWORD,
|
|
hf__quote_mem_str(src, name_tok->location.start,
|
|
name_tok->location.end, true),
|
|
true);
|
|
}
|
|
|
|
size_t body_len = 0;
|
|
size_t body_size = 0;
|
|
struct hf__node *body = NULL;
|
|
struct hf__result parse_res =
|
|
hf__parse(parser, src, tokens + 2, (end - start + 1) - 3, &body,
|
|
&body_len, &body_size);
|
|
|
|
struct hf__node_value__word_def *word_def =
|
|
malloc(sizeof(struct hf__node_value__word_def));
|
|
(*word_def) = (struct hf__node_value__word_def){
|
|
.name =
|
|
{
|
|
.hash = hash,
|
|
.value = name,
|
|
},
|
|
.body = body,
|
|
.body_len = body_len,
|
|
};
|
|
|
|
hf__parser__node_array_push(nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__WORD_DEF,
|
|
.value = {.word_def = word_def},
|
|
.is_owner = true,
|
|
});
|
|
|
|
break;
|
|
}
|
|
|
|
case HF__TOKEN_TYPE__SEMICOLON:
|
|
return HF__ERR_CUSTOM(HF__ERROR__PARSER__UNEXPECTED,
|
|
hf__quote_mem_str(src, tokens[i].location.start,
|
|
tokens[i].location.end, true),
|
|
true);
|
|
|
|
case HF__TOKEN_TYPE__DASH_COMMENT:
|
|
hf__parser__node_array_push(nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__DASH_COMMENT,
|
|
.value =
|
|
{
|
|
.comment = strip_whitespaces(
|
|
src, tokens[i].location.start,
|
|
tokens[i].location.end),
|
|
},
|
|
.is_owner = true,
|
|
});
|
|
break;
|
|
|
|
case HF__TOKEN_TYPE__BACKSLASH_COMMENT:
|
|
hf__parser__node_array_push(nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__DASH_COMMENT,
|
|
.value =
|
|
{
|
|
.comment = strip_whitespaces(
|
|
src, tokens[i].location.start,
|
|
tokens[i].location.end),
|
|
},
|
|
.is_owner = true,
|
|
});
|
|
break;
|
|
|
|
case HF__TOKEN_TYPE__PAREN_COMMENT:
|
|
hf__parser__node_array_push(nodes, len, size,
|
|
(struct hf__node){
|
|
.type = HF__NODE_TYPE__PAREN_COMMENT,
|
|
.value =
|
|
{
|
|
.comment = strip_whitespaces(
|
|
src, tokens[i].location.start,
|
|
tokens[i].location.end),
|
|
},
|
|
.is_owner = true,
|
|
});
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return HF__OK;
|
|
}
|