Add LPAREN, RPAREN, COMMA token types to the scanner

Extends the vec0 tokenizer to recognize '(', ')', and ',' as
single-character tokens, preparing for DiskANN index option parsing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-02 18:07:57 -08:00
parent aab9b37de2
commit 0bca960e9d
3 changed files with 96 additions and 0 deletions

View file

@ -108,6 +108,24 @@ void test_vec0_token_next() {
assert(token.token_type == TOKEN_TYPE_DIGIT);
assert(token.end - token.start == 2);
// Left paren
input = "(";
rc = vec0_token_next(input, input + 1, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_LPAREN);
// Right paren
input = ")";
rc = vec0_token_next(input, input + 1, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_RPAREN);
// Comma
input = ",";
rc = vec0_token_next(input, input + 1, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_COMMA);
printf(" All vec0_token_next tests passed.\n");
}
@ -229,6 +247,60 @@ void test_vec0_scanner() {
assert(rc == VEC0_TOKEN_RESULT_EOF);
}
// Scan "diskann(k=v, k2=v2)"
{
const char *input = "diskann(k=v, k2=v2)";
vec0_scanner_init(&scanner, input, (int)strlen(input));
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_IDENTIFIER);
assert(strncmp(token.start, "diskann", 7) == 0);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_LPAREN);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_IDENTIFIER);
assert(strncmp(token.start, "k", 1) == 0);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_EQ);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_IDENTIFIER);
assert(strncmp(token.start, "v", 1) == 0);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_COMMA);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_IDENTIFIER);
assert(strncmp(token.start, "k2", 2) == 0);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_EQ);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_IDENTIFIER);
assert(strncmp(token.start, "v2", 2) == 0);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_SOME);
assert(token.token_type == TOKEN_TYPE_RPAREN);
rc = vec0_scanner_next(&scanner, &token);
assert(rc == VEC0_TOKEN_RESULT_EOF);
}
printf(" All vec0_scanner tests passed.\n");
}