From 8329d42d3e592f4cd42cdfa586e2325ddc76c898 Mon Sep 17 00:00:00 2001 From: aura Date: Tue, 10 Mar 2026 01:35:50 +0100 Subject: perf profiler, simplify 2d render, string struct, many small things --- src/util/allocator.h | 37 ++++++++- src/util/math.h | 10 +-- src/util/profiler.cpp | 55 +++++++++++++ src/util/profiler.h | 139 +++++++++++++++++++++++++++++++++ src/util/string.h | 209 ++++++++++++++++++++++++++++++++++++++++++++++---- src/util/time.h | 21 +++++ 6 files changed, 448 insertions(+), 23 deletions(-) create mode 100644 src/util/profiler.cpp create mode 100644 src/util/profiler.h create mode 100644 src/util/time.h (limited to 'src/util') diff --git a/src/util/allocator.h b/src/util/allocator.h index 27b21c0..64bad23 100644 --- a/src/util/allocator.h +++ b/src/util/allocator.h @@ -5,10 +5,23 @@ #include #include "typedef.h" -template < typename T > +template +struct LIST_ITERATOR { + T* ptr; + + LIST_ITERATOR( T* ptr ) : ptr( ptr ) {} + T& operator*() { return *ptr; } + T* operator->() { return ptr; } + LIST_ITERATOR& operator++() { ptr++; return *this; } + LIST_ITERATOR operator+=( U8 n ) { ptr += n; return *this; } + bool operator==( const LIST_ITERATOR& other ) { return ptr == other.ptr; } + bool operator!=( const LIST_ITERATOR& other ) { return ptr != other.ptr; } +}; + +template using QSORT_FN = std::function< U8( T*, T* ) >; -template < typename T > +template static U8 qsort_basic_sort( T* t1, T* t2 ) { return (*t1 > *t2); } @@ -63,6 +76,13 @@ struct LIST { size = 0; } + LIST( U32 _size, const T* data ) { + data = (T*)malloc( sizeof( T ) * _size ); + memcpy( data, data, _size * sizeof( T ) ); + size = _size; + capacity = _size; + } + LIST( const LIST& other ) { if( !other.capacity || !other.size ) { capacity = 1; @@ -91,6 +111,7 @@ struct LIST { capacity = 1; size = 0; data = (T*)malloc( sizeof( T ) ); + memset( data, 0, sizeof( T ) ); return *this; } @@ -106,6 +127,10 @@ struct LIST { free( data ); } + T at( U32 index ) { + return data[index]; + } + void reserve( U32 count ) { if( capacity >= count ) return; @@ -320,4 +345,12 @@ struct LIST { ret.sort( fn ); return ret; } + + LIST_ITERATOR begin() { + return LIST_ITERATOR( data ); + } + + LIST_ITERATOR end() { + return LIST_ITERATOR( data + size ); + } }; diff --git a/src/util/math.h b/src/util/math.h index add705a..7bae125 100644 --- a/src/util/math.h +++ b/src/util/math.h @@ -57,18 +57,18 @@ inline F32 m_snap_to_grid( F32 x, F32 grid ) { } template -inline T m_min( T a, T b ) { - return a < b ? a : b; +T min( T a, T b ) { + return a < b? a : b; } template -inline T m_max( T a, T b ) { - return a > b ? a : b; +T max( T a, T b ) { + return a > b? a : b; } template inline T m_clamp( T x, T a, T b ) { - return m_min( m_max( x, a ), b ); + return min( max( x, a ), b ); } extern VEC2 m_screen_transform( const VEC3& world ); diff --git a/src/util/profiler.cpp b/src/util/profiler.cpp new file mode 100644 index 0000000..86c1810 --- /dev/null +++ b/src/util/profiler.cpp @@ -0,0 +1,55 @@ +#if defined(DEBUG) || defined(PROFILER) +#include "profiler.h" +#include "../render/gl_2d_font.h" +#include "../game/vars.h" +#include "string.h" + +CVAR* prof_overlay = var_new( "prof_overlay", 0 ); +PROFILER_GLOBAL gprof; + +void __profiler_intern_draw_tree( PROFILER_LIST_ENTRY* e, GL_FONT* font, I32* x, I32* y ) { + GL_DATA* gl = gl_instance(); + static GL_SHADER_PROGRAM** gl2d = gl->programs.where( fn( GL_SHADER_PROGRAM** p ) { + return STR( (*p)->name ) == "2d" ; + } ); + + if( !gl2d ) + return; + + STR line; + if( e->parent ) { + U64 parent_dur = e->parent->duration; + U64 percent = (F64)parent_dur / e->duration * 100.f; + line = STR( "%s -> duration: %.2fms [%.0f%%]", + e->name, + (F32)e->duration / TICK_RESOLUTION * 1000.f, + percent + ); + } else { + line = STR( "%s -> duration: %.2fms", + e->name, + (F32)e->duration / TICK_RESOLUTION * 1000.f + ); + } + + gl_font_draw( font, *gl2d, VEC2{ (F32)*x + 1, (F32)*y + 1 }, line, CLR::BLACK() ); + gl_font_draw( font, *gl2d, VEC2{ (F32)*x, (F32)*y }, line, CLR::WHITE() ); + + *y += font->size + 1; + *x += 20; + for( auto& it : e->children ) + __profiler_intern_draw_tree( &it, font, x, y ); + *x -= 20; +} + +void __profiler_intern_draw_overlay( struct GL_FONT* f ) { + if( !var_geti( prof_overlay ) ) + return; + + I32 x = 50, y = 50; + for( auto& it : gprof.frames ) + __profiler_intern_draw_tree( &it, f, &x, &y ); +} + + +#endif diff --git a/src/util/profiler.h b/src/util/profiler.h new file mode 100644 index 0000000..c8eea24 --- /dev/null +++ b/src/util/profiler.h @@ -0,0 +1,139 @@ +#pragma once + +#if defined(DEBUG) || defined(PROFILER) +#include "allocator.h" +#include "time.h" +#include "thread.h" +#include "fnv.h" + +static THREAD_MUTEX __profiler_intern_mutex; + +#ifndef __func__ + #define __func__ __FUNCTION__ +#endif + +#define PROFILE( x ) \ + PROFILER_LIST_ENTRY* __profiler_intern_id = __profiler_intern_start( x ); \ + defer( { __profiler_intern_end( __profiler_intern_id ); } ) + +#define _profiled PROFILE( __func__ ); + + +struct PROFILER_LIST_ENTRY { + LIST children; + const char* name; + U64 duration; + U64 start; + FNV1A hash; + + PROFILER_LIST_ENTRY* parent; +}; + +struct PROFILER_GLOBAL { + LIST frames; + LIST stack; + PROFILER_LIST_ENTRY* current = 0; +}; + +extern struct CVAR* prof_overlay; +extern PROFILER_GLOBAL gprof; + +inline PROFILER_LIST_ENTRY* __profiler_intern_is_root( FNV1A hash ) { + for( auto& it : gprof.stack ) { + if( it.hash == hash ) { + return ⁢ + } + } + return 0; +} + + +inline void __profiler_intern_clear_frame( PROFILER_LIST_ENTRY* entry ) { + entry->children.each( __profiler_intern_clear_frame ); + entry->children.clear(); +} + +inline void __profiler_intern_new_frame_child( PROFILER_LIST_ENTRY* entry ) { + for( auto& it : entry->children ) { + __profiler_intern_new_frame_child( &it ); + it.parent = entry; + } +} + +inline void __profiler_intern_new_frame( PROFILER_LIST_ENTRY* entry ) { + PROFILER_LIST_ENTRY ne = *entry; + U32 i = gprof.frames.idx_where( fn( PROFILER_LIST_ENTRY* pe ) { + return pe->hash == entry->hash; + } ); + + if( i != -1 ) { + __profiler_intern_clear_frame( &gprof.frames.data[i] ); + gprof.frames.erase( i ); + } + + PROFILER_LIST_ENTRY* pne = gprof.frames.push( ne ); + for( auto& it : pne->children ) { + it.parent = pne; + __profiler_intern_new_frame_child( &it ); + } +} + +inline PROFILER_LIST_ENTRY* __profiler_intern_start( const char* name ) { + PROFILER_LIST_ENTRY e; + FNV1A fnv = fnv1a( name ); + U64 tick = u_tick(); + PROFILER_LIST_ENTRY* ep; + + thread_mutex_lock( &__profiler_intern_mutex ); + defer( thread_mutex_unlock( &__profiler_intern_mutex ) ); + if( (ep = __profiler_intern_is_root( fnv )) != 0 ) { + __profiler_intern_new_frame( ep ); + ep->children.clear(); + ep->start = tick; + gprof.current = ep; + return ep; + } + + e.name = name; + e.children = LIST(); + memset( e.children.data, 0, sizeof(PROFILER_LIST_ENTRY) ); + e.duration = 0; + e.start = tick; + e.children = {}; + e.parent = gprof.current; + e.hash = fnv; + + if( gprof.current ) + ep = gprof.current->children.push( e ); + else + ep = gprof.stack.push( e ); + + gprof.current = ep; + return ep; +} + +inline void __profiler_intern_end( PROFILER_LIST_ENTRY* entry ) { + U64 tick = u_tick(); + + thread_mutex_lock( &__profiler_intern_mutex ); + entry->duration = tick - entry->start; + if( gprof.current ) + gprof.current = gprof.current->parent; + thread_mutex_unlock( &__profiler_intern_mutex ); +} + +inline void __profiler_intern_init() { + thread_mutex_init( &__profiler_intern_mutex ); +} + +extern void __profiler_intern_draw_overlay( struct GL_FONT* font ); + +#define profiler_init() __profiler_intern_init() +#define profiler_draw_tree( font ) __profiler_intern_draw_overlay( font ) + +#else +#define PROFILE( x ) +#define _profiled +#define profiler_init() +#define profiler_draw_tree( x ) +#endif diff --git a/src/util/string.h b/src/util/string.h index 47da1a0..0781fca 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -1,7 +1,9 @@ #pragma once +#include +#include #include -#include "typedef.h" +#include "allocator.h" constexpr U32 strlen_ct( const char* str ) { U32 len = 0; @@ -10,40 +12,215 @@ constexpr U32 strlen_ct( const char* str ) { } template -struct STR { +struct ARRSTR { char data[N]{ 0 }; enum { size = N }; - STR() { + ARRSTR() { memset( data, 0, N ); } - STR( const char* str ) { + ARRSTR( const char* str ) { memcpy( data, str, strlen_ct( str ) ); } - STR( const STR& str ) { + ARRSTR( const ARRSTR& str ) { memcpy( data, str.data, N ); } template - auto operator+( const STR& rhs ) { - constexpr U32 l1 = strlen_ct( data ); - constexpr U32 l2 = strlen_ct( rhs.data ); + auto operator+( const ARRSTR& rhs ) { + const U32 l1 = strlen_ct( data ); + const U32 l2 = strlen_ct( rhs.data ); - constexpr U32 high = N > other ? N : other; - constexpr U32 max = (l1 + l2 > high) ? l1 + l2 + 1 : high; + if( l1 + l2 >= N ) { + dlog( "STR::operator+(): string overflow" ); + abort(); + return *this; + } - STR result; - memcpy( result.data, data, l1 ); - memcpy( result.data + l1, rhs.data, l2 ); - result.data[l1 + l2] = '\0'; - return result; + memcpy( data + l1, rhs.data, l2 ); + data[l1 + l2] = '\0'; + return *this; } template - auto concat( const STR& str ) { + auto concat( const ARRSTR& str ) { return *this + str; } operator char*() { return data; } }; + +template +struct __str : public LIST { + __str() : LIST() {} + __str( const CT* fmt, ... ) : LIST() { + va_list args; + va_start( args, fmt ); + va_list args2; + va_copy( args2, args ); + U32 c = vsnprintf( 0, 0, fmt, args ); + va_end( args ); + this->data = 0; + this->reserve( c * 2 ); + vsnprintf( this->data, c + 1, fmt, args2 ); + this->data[c] = 0; + this->size = c; + va_end( args2 ); + } + + __str( const __str& rhs ) : LIST( rhs ) { + this->data[this->size] = 0; + } + + __str& operator=( const __str& other ) { + if( this == &other ) + return *this; + + if( this->data && this->data != other.data ) + free( this->data ); + + this->data = 0; + + if( !other.capacity || !other.size ) { + this->capacity = 1; + this->size = 0; + this->data = (CT*)malloc( sizeof(CT) ); + memset( this->data, 0, sizeof(CT) ); + return *this; + } + + this->data = (CT*)malloc( other.capacity * sizeof( CT ) ); + memcpy( this->data, other.data, (other.size + 1) * sizeof( CT ) ); + this->size = other.size; + this->capacity = other.capacity; + + return *this; + } + + const bool operator==( const __str& rhs ) { return this->equals( rhs ); } + const bool operator!=( const __str& rhs ) { return !(*this == rhs); } + const bool operator==( const CT* rhs ) { return this->equals( rhs ); } + const bool operator!=( const CT* rhs ) { return !(*this == rhs); } + __str operator+( const __str& rhs ) { __str ret = *this; return ret.append( rhs ); } + __str operator+( const CT* rhs ) { __str ret = *this; return ret.append( rhs ); } + __str operator+( const CT rhs ) { __str ret = *this; ret.push( rhs ); return ret; } + __str& operator+=( const __str& rhs ) { return this->append( rhs ); } + __str& operator+=( const CT* rhs ) { return this->append( rhs ); } + __str& operator+=( const CT rhs ) { this->push( rhs ); return this; } + + operator CT*() { return this->data; } + + CT operator[]( U32 i ) { + return this->data[i]; + } + + U8 equals( const __str& rhs ) { + if( rhs.size != this->size ) + return 0; + + for( U32 i = 0; i < this->size; ++i ) { + if( this->data[i] != rhs.data[i] ) + return 0; + } + + return 1; + } + + U8 equals( const CT* rhs ) { + for( U32 i = 0; i < this->size; ++i ) { + if( !rhs[i] || this->data[i] != rhs[i] ) + return 0; + } + + return 1; + } + + __str& fmt( const char* fmt, ... ) { + va_list args; + va_start( args, fmt ); + va_list args2; + va_copy( args2, args ); + U32 c = this->size + vsnprintf( 0, 0, fmt, args ); + va_end( args ); + if( c > this->capacity ) + this->reserve( c * 2 ); + vsnprintf( this->data + this->size, c + 1, fmt, args2 ); + this->data[c] = 0; + this->size = c; + va_end( args2 ); + } + + __str& append( const CT* str ) { + U32 len; + for( len = 0; !!str[len]; ++len ); + if( this->size + len > this->capacity ) + this->reserve( this->size * 2 ); + + memcpy( this->data + this->size, str, len * sizeof(CT) ); + this->size += len; + this->data[this->size] = 0; + return *this; + } + + __str& append( const __str& str ) { + U32 len = str.len; + if( this->size + len + 1 >= this->capacity ) + this->reserve( this->size + len + 1 ); + + memcpy( this->data + this->size, str, len * sizeof(CT) ); + this->size += len; + this->data[this->size] = 0; + return *this; + } + + CT* push( const CT& item ) { + if( this->capacity <= this->size + 1 ) + this->grow(); + this->data[this->size++] = item; + this->data[this->size] = 0; + + return &this->data[this->size - 1]; + } + + U32 idx_of( const CT* str ) { + return idx_of( str, 0 ); + } + + U32 idx_of( const CT* str, U32 offset ) { + for( U32 i = offset; i < this->size; ++i ) { + U8 found = 1; + for( U32 i2 = 0; !!str[i2] && i + i2 < this->size; ++i2 ) { + if( this->data[i + i2] != str[i2] ) { + found = 0; + break; + } + } + + if( found ) + return i; + } + } + + CT* find( const CT* str ) { + for( U32 i = 0; i < this->size; ++i ) { + U8 found = 1; + for( U32 i2 = 0; !!str[i2] && i + i2 < this->size; ++i2 ) { + if( this->data[i + i2] != str[i2] ) { + found = 0; + break; + } + } + if( found ) + return this->data + i; + } + return 0; + } + + LIST_ITERATOR end() { + return LIST_ITERATOR( this->data + this->size ); + } +}; + +using STR = __str; +using WSTR = __str; diff --git a/src/util/time.h b/src/util/time.h new file mode 100644 index 0000000..ae696c5 --- /dev/null +++ b/src/util/time.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +#include "typedef.h" + +const U32 TICK_RESOLUTION = 1000000; + +inline U64 u_tick() { + return (SDL_GetPerformanceCounter() * TICK_RESOLUTION / SDL_GetPerformanceFrequency()); +} + +inline F32 u_time() { + return (F32)((F64)u_tick() / TICK_RESOLUTION); +} + +inline F64 u_time64() { + return (F64)u_tick() / TICK_RESOLUTION; +} + -- cgit v1.2.3