diff --git a/include/camera.hpp b/include/camera.hpp index ebb227d..5be4508 100644 --- a/include/camera.hpp +++ b/include/camera.hpp @@ -1,11 +1,11 @@ #ifndef __CAMERA_HPP_ #define __CAMERA_HPP_ +#include "config.hpp" + #include #include -#include "config.hpp" - class OrbitCamera3D { friend class Renderer; diff --git a/include/config.hpp b/include/config.hpp index 72b658f..9bcd1bc 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -4,6 +4,7 @@ #include // #define WEB // Disables multithreading +// #define TRACY // Enable tracy profiling support // Window constexpr int INITIAL_WIDTH = 800; diff --git a/include/input.hpp b/include/input.hpp index 8ef34c0..5f6263e 100644 --- a/include/input.hpp +++ b/include/input.hpp @@ -1,6 +1,7 @@ #ifndef __INPUT_HPP_ #define __INPUT_HPP_ +#include "config.hpp" #include "state.hpp" class InputHandler { diff --git a/include/octree.hpp b/include/octree.hpp index 296c359..9793d56 100644 --- a/include/octree.hpp +++ b/include/octree.hpp @@ -1,6 +1,8 @@ #ifndef __OCTREE_HPP_ #define __OCTREE_HPP_ +#include "config.hpp" + #include #include #include diff --git a/include/physics.hpp b/include/physics.hpp index 2dd8b36..79e6ee9 100644 --- a/include/physics.hpp +++ b/include/physics.hpp @@ -1,6 +1,9 @@ #ifndef __PHYSICS_HPP_ #define __PHYSICS_HPP_ +#include "config.hpp" +#include "octree.hpp" + #include #include #include @@ -9,17 +12,18 @@ #include #include #include -#include #include #include -#include "octree.hpp" - #ifndef WEB #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include #endif +#ifdef TRACY +#include +#endif + class Mass { public: Vector3 position; @@ -116,10 +120,18 @@ class ThreadedPhysics { using Command = std::variant; struct PhysicsState { +#ifdef TRACY TracyLockable(std::mutex, command_mtx); +#else + std::mutex command_mtx; +#endif std::queue pending_commands; +#ifdef TRACY TracyLockable(std::mutex, data_mtx); +#else + std::mutex data_mtx; +#endif std::condition_variable_any data_ready_cnd; std::condition_variable_any data_consumed_cnd; unsigned int ups = 0; diff --git a/include/presets.hpp b/include/presets.hpp index d2ebc5f..8e4d224 100644 --- a/include/presets.hpp +++ b/include/presets.hpp @@ -1,11 +1,12 @@ #ifndef __PRESETS_HPP_ #define __PRESETS_HPP_ +#include "config.hpp" +#include "puzzle.hpp" + #include #include -#include "puzzle.hpp" - using StateGenerator = std::function; inline auto state_simple_1r() -> State { diff --git a/include/puzzle.hpp b/include/puzzle.hpp index ea854aa..b4b0a9a 100644 --- a/include/puzzle.hpp +++ b/include/puzzle.hpp @@ -1,6 +1,8 @@ #ifndef __PUZZLE_HPP_ #define __PUZZLE_HPP_ +#include "config.hpp" + #include #include #include diff --git a/include/renderer.hpp b/include/renderer.hpp index 64a67dc..f9dfa34 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -1,14 +1,14 @@ #ifndef __RENDERER_HPP_ #define __RENDERER_HPP_ -#include -#include - #include "camera.hpp" #include "config.hpp" #include "input.hpp" #include "state.hpp" +#include +#include + class Renderer { private: const StateManager &state; diff --git a/include/state.hpp b/include/state.hpp index 83ae6f3..7a3c814 100644 --- a/include/state.hpp +++ b/include/state.hpp @@ -1,6 +1,7 @@ #ifndef __STATE_HPP_ #define __STATE_HPP_ +#include "config.hpp" #include "physics.hpp" #include "presets.hpp" #include "puzzle.hpp" diff --git a/include/tracy.hpp b/include/tracy.hpp index be7031c..5aa0e82 100644 --- a/include/tracy.hpp +++ b/include/tracy.hpp @@ -1,6 +1,10 @@ #ifndef __TRACY_HPP_ #define __TRACY_HPP_ +#include "config.hpp" + +#ifdef TRACY + #include void *operator new(std::size_t count); @@ -8,3 +12,5 @@ void operator delete(void *ptr) noexcept; void operator delete(void *ptr, std::size_t count) noexcept; #endif + +#endif diff --git a/include/util.hpp b/include/util.hpp index 205c87d..dcb0f3f 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -1,6 +1,8 @@ #ifndef __UTIL_HPP_ #define __UTIL_HPP_ +#include "config.hpp" + #include #include #include diff --git a/src/camera.cpp b/src/camera.cpp index b838f2a..97b076c 100644 --- a/src/camera.cpp +++ b/src/camera.cpp @@ -1,10 +1,14 @@ #include "camera.hpp" #include "config.hpp" -#include "tracy.hpp" #include #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto OrbitCamera3D::HandleCameraInput() -> Vector2 { Vector2 mouse = GetMousePosition(); if (mouse.x >= GetScreenWidth() / 2.0 && mouse.y >= MENU_HEIGHT) { diff --git a/src/input.cpp b/src/input.cpp index 76fdd32..7a7e45d 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1,10 +1,14 @@ #include "input.hpp" #include "config.hpp" -#include "tracy.hpp" #include #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto InputHandler::HandleMouseHover() -> void { const int board_width = GetScreenWidth() / 2.0 - 2 * BOARD_PADDING; const int board_height = GetScreenHeight() - MENU_HEIGHT - 2 * BOARD_PADDING; diff --git a/src/main.cpp b/src/main.cpp index c98f9d5..775247a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,14 +1,17 @@ #include #include #include -#include #include "config.hpp" #include "input.hpp" #include "physics.hpp" #include "renderer.hpp" #include "state.hpp" + +#ifdef TRACY #include "tracy.hpp" +#include +#endif // TODO: Klotski state file loading // - File should contain a single state per line, multiple lines possible @@ -20,8 +23,6 @@ // - Click states to display them in the board // - Find shortest path to any winning state and mark it in the graph // - Also mark the next move along the path on the board -// TODO: Do I have a huge memory leak or is the memory just not reclaimed from -// the C++ runtime? auto main(int argc, char *argv[]) -> int { // if (argc < 2) { @@ -50,7 +51,9 @@ auto main(int argc, char *argv[]) -> int { // Game loop while (!WindowShouldClose()) { +#ifdef TRACY FrameMarkStart("MainThread"); +#endif // Input update state.previous_state = state.current_state; @@ -58,9 +61,15 @@ auto main(int argc, char *argv[]) -> int { state.UpdateGraph(); // Add state added after user input // Read positions from physics thread +#ifdef TRACY FrameMarkStart("MainThreadConsumeLock"); +#endif { +#ifdef TRACY std::unique_lock lock(physics.state.data_mtx); +#else + std::unique_lock lock(physics.state.data_mtx); +#endif ups = physics.state.ups; @@ -77,7 +86,9 @@ auto main(int argc, char *argv[]) -> int { physics.state.data_consumed_cnd.notify_all(); } } +#ifdef TRACY FrameMarkEnd("MainThreadConsumeLock"); +#endif // Update the camera after the physics, so target lock is smooth std::size_t current_index = state.CurrentMassIndex(); @@ -92,7 +103,10 @@ auto main(int argc, char *argv[]) -> int { renderer.DrawKlotski(); renderer.DrawMenu(masses, springs); renderer.DrawTextures(ups); +#ifdef TRACY + FrameMark; FrameMarkEnd("MainThread"); +#endif } CloseWindow(); diff --git a/src/octree.cpp b/src/octree.cpp index 2bbcd41..c2dd790 100644 --- a/src/octree.cpp +++ b/src/octree.cpp @@ -1,11 +1,15 @@ #include "octree.hpp" #include "config.hpp" -#include "tracy.hpp" #include "util.hpp" #include #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto OctreeNode::ChildCount() const -> int { int child_count = 0; for (int child : children) { diff --git a/src/physics.cpp b/src/physics.cpp index fae51fd..ed56592 100644 --- a/src/physics.cpp +++ b/src/physics.cpp @@ -1,20 +1,21 @@ #include "physics.hpp" #include "config.hpp" -#include "tracy.hpp" #include #include #include #include #include -#include -#include #include #include -#include #include #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto Mass::ClearForce() -> void { force = Vector3Zero(); } auto Mass::CalculateVelocity(const float delta_time) -> void { @@ -94,7 +95,9 @@ auto MassSpringSystem::Clear() -> void { } auto MassSpringSystem::ClearForces() -> void { +#ifdef TRACY ZoneScoped; +#endif for (auto &mass : masses) { mass.ClearForce(); @@ -102,7 +105,9 @@ auto MassSpringSystem::ClearForces() -> void { } auto MassSpringSystem::CalculateSpringForces() -> void { +#ifdef TRACY ZoneScoped; +#endif for (const auto spring : springs) { Mass &a = masses.at(spring.a); @@ -116,7 +121,9 @@ auto MassSpringSystem::SetThreadName(std::size_t idx) -> void { } auto MassSpringSystem::BuildOctree() -> void { +#ifdef TRACY ZoneScoped; +#endif octree.nodes.clear(); octree.nodes.reserve(masses.size() * 2); @@ -151,7 +158,9 @@ auto MassSpringSystem::BuildOctree() -> void { } auto MassSpringSystem::CalculateRepulsionForces() -> void { +#ifdef TRACY ZoneScoped; +#endif BuildOctree(); @@ -173,7 +182,9 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void { } auto MassSpringSystem::VerletUpdate(float delta_time) -> void { +#ifdef TRACY ZoneScoped; +#endif for (auto &mass : masses) { mass.VerletUpdate(delta_time); @@ -198,7 +209,9 @@ auto ThreadedPhysics::PhysicsThread(ThreadedPhysics::PhysicsState &state) unsigned int updates = 0; while (state.running.load()) { +#ifdef TRACY FrameMarkStart("PhysicsThread"); +#endif // Time tracking std::chrono::time_point now = std::chrono::high_resolution_clock::now(); @@ -209,7 +222,11 @@ auto ThreadedPhysics::PhysicsThread(ThreadedPhysics::PhysicsState &state) // Handle queued commands { +#ifdef TRACY std::lock_guard lock(state.command_mtx); +#else + std::lock_guard lock(state.command_mtx); +#endif while (!state.pending_commands.empty()) { Command &cmd = state.pending_commands.front(); cmd.visit(visitor); @@ -234,9 +251,15 @@ auto ThreadedPhysics::PhysicsThread(ThreadedPhysics::PhysicsState &state) } // Publish the positions for the renderer (copy) +#ifdef TRACY FrameMarkStart("PhysicsThreadProduceLock"); +#endif { +#ifdef TRACY std::unique_lock lock(state.data_mtx); +#else + std::unique_lock lock(state.data_mtx); +#endif state.data_consumed_cnd.wait( lock, [&] { return state.data_consumed || !state.running.load(); }); if (!state.running.load()) { @@ -268,29 +291,43 @@ auto ThreadedPhysics::PhysicsThread(ThreadedPhysics::PhysicsState &state) } // Notify the rendering thread that new data is available state.data_ready_cnd.notify_all(); +#ifdef TRACY FrameMarkEnd("PhysicsThreadProduceLock"); FrameMarkEnd("PhysicsThread"); +#endif } } auto ThreadedPhysics::AddMassCmd() -> void { { +#ifdef TRACY std::lock_guard lock(state.command_mtx); +#else + std::lock_guard lock(state.command_mtx); +#endif state.pending_commands.push(AddMass{}); } } auto ThreadedPhysics::AddSpringCmd(std::size_t a, std::size_t b) -> void { { +#ifdef TRACY std::lock_guard lock(state.command_mtx); +#else + std::lock_guard lock(state.command_mtx); +#endif state.pending_commands.push(AddSpring{a, b}); } } auto ThreadedPhysics::ClearCmd() -> void { { +#ifdef TRACY std::lock_guard lock(state.command_mtx); +#else + std::lock_guard lock(state.command_mtx); +#endif state.pending_commands.push(ClearGraph{}); } } @@ -299,7 +336,11 @@ auto ThreadedPhysics::AddMassSpringsCmd( std::size_t num_masses, const std::vector> &springs) -> void { { +#ifdef TRACY std::lock_guard lock(state.command_mtx); +#else + std::lock_guard lock(state.command_mtx); +#endif for (std::size_t i = 0; i < num_masses; ++i) { state.pending_commands.push(AddMass{}); } diff --git a/src/puzzle.cpp b/src/puzzle.cpp index 84b3be1..cf58046 100644 --- a/src/puzzle.cpp +++ b/src/puzzle.cpp @@ -1,8 +1,13 @@ #include "puzzle.hpp" -#include "tracy.hpp" +#include "config.hpp" #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto Block::Hash() const -> int { std::string s = std::format("{},{},{},{}", x, y, width, height); return std::hash{}(s); @@ -267,6 +272,10 @@ auto State::GetNextStates() const -> std::vector { auto State::Closure() const -> std::pair, std::vector>> { +#ifdef TRACY + ZoneScoped; +#endif + std::vector states; std::vector> links; diff --git a/src/renderer.cpp b/src/renderer.cpp index b66b1d0..efb0165 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -1,14 +1,17 @@ #include "renderer.hpp" #include "config.hpp" #include "puzzle.hpp" -#include "tracy.hpp" #include #include #include #include #include + +#ifdef TRACY +#include "tracy.hpp" #include +#endif #ifdef BATCHING #include @@ -59,11 +62,15 @@ auto Renderer::ReallocateGraphInstancingIfNecessary(std::size_t size) -> void { auto Renderer::DrawMassSprings( const std::vector &masses, const std::vector> &springs) -> void { +#ifdef TRACY ZoneScoped; +#endif // Prepare cube instancing { +#ifdef TRACY ZoneNamedN(prepare_masses, "PrepareMasses", true); +#endif if (masses.size() < DRAW_VERTICES_LIMIT) { if (transforms == nullptr) { AllocateGraphInstancing(masses.size()); @@ -85,7 +92,9 @@ auto Renderer::DrawMassSprings( // Draw springs (batched) { +#ifdef TRACY ZoneNamedN(draw_springs, "DrawSprings", true); +#endif rlBegin(RL_LINES); for (const auto &[from, to] : springs) { if (masses.size() > from && masses.size() > to) { @@ -101,7 +110,9 @@ auto Renderer::DrawMassSprings( // Draw masses (instanced) { +#ifdef TRACY ZoneNamedN(draw_masses, "DrawMasses", true); +#endif if (masses.size() < DRAW_VERTICES_LIMIT) { // NOTE: I don't know if drawing all this inside a shader would make it // much faster... The amount of data sent to the GPU would be @@ -171,7 +182,9 @@ auto Renderer::DrawMassSprings( } auto Renderer::DrawKlotski() -> void { +#ifdef TRACY ZoneScoped; +#endif BeginTextureMode(klotski_target); ClearBackground(RAYWHITE); @@ -262,7 +275,9 @@ auto Renderer::DrawKlotski() -> void { auto Renderer::DrawMenu( const std::vector &masses, const std::vector> &springs) -> void { +#ifdef TRACY ZoneScoped; +#endif BeginTextureMode(menu_target); ClearBackground(RAYWHITE); diff --git a/src/state.cpp b/src/state.cpp index d1e12ec..fc52a99 100644 --- a/src/state.cpp +++ b/src/state.cpp @@ -1,9 +1,14 @@ #include "state.hpp" +#include "config.hpp" #include "presets.hpp" -#include "tracy.hpp" #include +#ifdef TRACY +#include "tracy.hpp" +#include +#endif + auto StateManager::LoadPreset(int preset) -> void { current_preset = preset; current_state = CurrentGenerator()(); diff --git a/src/tracy.cpp b/src/tracy.cpp index 393b30e..37a0669 100644 --- a/src/tracy.cpp +++ b/src/tracy.cpp @@ -1,5 +1,8 @@ -#include "tracy.hpp" +#include "config.hpp" +#ifdef TRACY + +#include "tracy.hpp" #include void *operator new(std::size_t count) { @@ -15,3 +18,5 @@ void operator delete(void *ptr, std::size_t count) noexcept { TracyFreeS(ptr, 20); free(ptr); } + +#endif