diff options
| author | day <day@national.shitposting.agency> | 2026-03-16 16:25:49 +0100 |
|---|---|---|
| committer | day <day@national.shitposting.agency> | 2026-03-16 16:25:49 +0100 |
| commit | 7f85c9fc75bd62ac09ea4457d3b17f85988fca66 (patch) | |
| tree | 15248e42bfafc6bd19e50c9010b701057958ff3a /src/util/str_tokenizer.h | |
| parent | 872c39b24ecf4063f785ff3e8b2f940acd8c2d59 (diff) | |
| parent | 991352b0d2767e6bd1a46f554db4ac9d208c13ad (diff) | |
Merge remote-tracking branch 'origin/master' into obj
Diffstat (limited to 'src/util/str_tokenizer.h')
| -rw-r--r-- | src/util/str_tokenizer.h | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/src/util/str_tokenizer.h b/src/util/str_tokenizer.h new file mode 100644 index 0000000..8a3aebf --- /dev/null +++ b/src/util/str_tokenizer.h @@ -0,0 +1,140 @@ +#pragma once + +#include "string.h" + +using TOKENIZER_COMPARE_FN = FN<U8( char )>; + +struct STR_TOKENIZER { + STR str; + STR ignored; + + I32 cur; + I32 last; +}; + +inline STR_TOKENIZER tok_init( STR str, STR whitespace_chars = " \t\n\r" ) { + return { + .str = str, + .ignored = { whitespace_chars }, + .cur = 0, + .last = 0, + }; +} + +inline STR tok_next( STR_TOKENIZER* t ) { + if( t->cur >= t->str.size ) + return ""; + + U8 start = 0; + for( I32 i = t->cur; i < t->str.size; i++ ) { + U8 c = t->str.data[i]; + if( i == t->str.size - 1 ) { + if( !start ) + return ""; + + STR ret = { (U32)i - t->cur + 1, t->str.data + t->cur }; + t->last = t->cur; + t->cur = i + 1; + return ret; + } + + for( auto& it : t->ignored ) { + if( c == it ) { + if( !start ) { + t->cur = i + 1; + continue; + } else { + STR ret = { (U32)i - t->cur, t->str.data + t->cur }; + t->last = t->cur; + t->cur = i + 1; + return ret; + } + } + } + + start = 1; + } + + return ""; +} + +inline STR tok_peek( STR_TOKENIZER* t ) { + if( t->cur >= t->str.size ) + return ""; + + U8 start = 0; + I32 cur = t->cur; + for( I32 i = cur; i < t->str.size; i++ ) { + U8 c = t->str.data[i]; + if( i == t->str.size - 1 ) { + if( !start ) + return ""; + + STR ret = { (U32)i - cur + 1, t->str.data + cur }; + return ret; + } + + for( auto& it : t->ignored ) { + if( c == it ) { + if( !start ) { + cur = i + 1; + continue; + } else { + STR ret = { (U32)i - cur, t->str.data + cur }; + return ret; + } + } + } + + start = 1; + } + + return ""; +} + + +inline STR tok_next( STR_TOKENIZER* t, STR what ) { + if( t->cur >= t->str.size ) + return ""; + + if( !what.size ) + return ""; + + for( I32 i = t->cur; i <= t->str.size - what.size; i++ ) { + STR slice = { what.size, t->str.data + i }; + if( slice == what ) { + STR ret = { (U32)i - t->cur, t->str.data + i + what.size }; + t->last = t->cur; + t->cur = i + what.size; + return ret; + } + } + + return ""; +} + +inline char tok_nextchar( STR_TOKENIZER* t ) { + if( t->cur >= t->str.size ) + return 0; + + U32 last = t->cur; + for( ; t->cur < t->str.size; t->cur++ ) { + U8 cont = 0; + for( auto& it : t->ignored ) { + if( t->str.data[t->cur] == it ) { + cont = 1; + break; + } + } + + if( cont ) + continue; + else { + t->last = last; + t->cur++; + return t->str.data[t->cur]; + } + } + + return 0; +} |
