summaryrefslogtreecommitdiff
path: root/src/util/str_tokenizer.h
diff options
context:
space:
mode:
authorday <day@national.shitposting.agency>2026-03-16 16:25:49 +0100
committerday <day@national.shitposting.agency>2026-03-16 16:25:49 +0100
commit7f85c9fc75bd62ac09ea4457d3b17f85988fca66 (patch)
tree15248e42bfafc6bd19e50c9010b701057958ff3a /src/util/str_tokenizer.h
parent872c39b24ecf4063f785ff3e8b2f940acd8c2d59 (diff)
parent991352b0d2767e6bd1a46f554db4ac9d208c13ad (diff)
Merge remote-tracking branch 'origin/master' into obj
Diffstat (limited to 'src/util/str_tokenizer.h')
-rw-r--r--src/util/str_tokenizer.h140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/util/str_tokenizer.h b/src/util/str_tokenizer.h
new file mode 100644
index 0000000..8a3aebf
--- /dev/null
+++ b/src/util/str_tokenizer.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include "string.h"
+
+using TOKENIZER_COMPARE_FN = FN<U8( char )>;
+
+struct STR_TOKENIZER {
+ STR str;
+ STR ignored;
+
+ I32 cur;
+ I32 last;
+};
+
+inline STR_TOKENIZER tok_init( STR str, STR whitespace_chars = " \t\n\r" ) {
+ return {
+ .str = str,
+ .ignored = { whitespace_chars },
+ .cur = 0,
+ .last = 0,
+ };
+}
+
+inline STR tok_next( STR_TOKENIZER* t ) {
+ if( t->cur >= t->str.size )
+ return "";
+
+ U8 start = 0;
+ for( I32 i = t->cur; i < t->str.size; i++ ) {
+ U8 c = t->str.data[i];
+ if( i == t->str.size - 1 ) {
+ if( !start )
+ return "";
+
+ STR ret = { (U32)i - t->cur + 1, t->str.data + t->cur };
+ t->last = t->cur;
+ t->cur = i + 1;
+ return ret;
+ }
+
+ for( auto& it : t->ignored ) {
+ if( c == it ) {
+ if( !start ) {
+ t->cur = i + 1;
+ continue;
+ } else {
+ STR ret = { (U32)i - t->cur, t->str.data + t->cur };
+ t->last = t->cur;
+ t->cur = i + 1;
+ return ret;
+ }
+ }
+ }
+
+ start = 1;
+ }
+
+ return "";
+}
+
+inline STR tok_peek( STR_TOKENIZER* t ) {
+ if( t->cur >= t->str.size )
+ return "";
+
+ U8 start = 0;
+ I32 cur = t->cur;
+ for( I32 i = cur; i < t->str.size; i++ ) {
+ U8 c = t->str.data[i];
+ if( i == t->str.size - 1 ) {
+ if( !start )
+ return "";
+
+ STR ret = { (U32)i - cur + 1, t->str.data + cur };
+ return ret;
+ }
+
+ for( auto& it : t->ignored ) {
+ if( c == it ) {
+ if( !start ) {
+ cur = i + 1;
+ continue;
+ } else {
+ STR ret = { (U32)i - cur, t->str.data + cur };
+ return ret;
+ }
+ }
+ }
+
+ start = 1;
+ }
+
+ return "";
+}
+
+
+inline STR tok_next( STR_TOKENIZER* t, STR what ) {
+ if( t->cur >= t->str.size )
+ return "";
+
+ if( !what.size )
+ return "";
+
+ for( I32 i = t->cur; i <= t->str.size - what.size; i++ ) {
+ STR slice = { what.size, t->str.data + i };
+ if( slice == what ) {
+ STR ret = { (U32)i - t->cur, t->str.data + i + what.size };
+ t->last = t->cur;
+ t->cur = i + what.size;
+ return ret;
+ }
+ }
+
+ return "";
+}
+
+inline char tok_nextchar( STR_TOKENIZER* t ) {
+ if( t->cur >= t->str.size )
+ return 0;
+
+ U32 last = t->cur;
+ for( ; t->cur < t->str.size; t->cur++ ) {
+ U8 cont = 0;
+ for( auto& it : t->ignored ) {
+ if( t->str.data[t->cur] == it ) {
+ cont = 1;
+ break;
+ }
+ }
+
+ if( cont )
+ continue;
+ else {
+ t->last = last;
+ t->cur++;
+ return t->str.data[t->cur];
+ }
+ }
+
+ return 0;
+}