diff --git a/CMakeLists.txt b/CMakeLists.txt index 332897e..8ecf4dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) find_package(raylib REQUIRED) find_package(OpenMP REQUIRED) -# TODO: Need to enable/disable this based on a variable for nix build +# Need to enable/disable this based on a variable for nix build if(USE_TRACY) include(FetchContent) FetchContent_Declare(tracy diff --git a/include/config.hpp b/include/config.hpp index fc78bc6..03f1c48 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -4,8 +4,8 @@ #include #define PRINT_TIMINGS -// #define WEB -#define BARNES_HUT +// #define WEB // Disables multithreading +#define BARNES_HUT // Use octree BH instead of uniform grid // Window constexpr int INITIAL_WIDTH = 800; @@ -30,18 +30,19 @@ constexpr float PAN_MULTIPLIER = 10.0; constexpr float ROT_SPEED = 1.0; // Physics Engine -constexpr float SIM_SPEED = 4.0; -constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second -constexpr float MASS = 1.0; -constexpr float SPRING_CONSTANT = 5.0; -constexpr float DAMPENING_CONSTANT = 1.0; -constexpr float REST_LENGTH = 2.0; -constexpr float REPULSION_FORCE = 2.0; -constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH; +constexpr float SIM_SPEED = 4.0; // How large each update should be +constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second +constexpr float MASS = 1.0; // Mass spring system +constexpr float SPRING_CONSTANT = 5.0; // Mass spring system +constexpr float DAMPENING_CONSTANT = 1.0; // Mass spring system +constexpr float REST_LENGTH = 2.0; // Mass spring system +constexpr float VERLET_DAMPENING = 0.05; // [0, 1] +constexpr float BH_FORCE = 2.0; // BH: [1.0, 3.0] constexpr float THETA = 1.0; // Barnes-Hut [0.5, ~] constexpr float SOFTENING = 0.01; // Barnes-Hut [0.01, 1.0] -constexpr int REPULSION_GRID_REFRESH = 5; // Updates between grid rebuilds -constexpr float VERLET_DAMPENING = 0.05; // [0, 1] +constexpr float GRID_FORCE = 0.02; // Grid: [0.0, ~0.05] +constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH; // Grid +constexpr int REPULSION_GRID_REFRESH = 5; // Grid rebuild freq // Graph Drawing constexpr float VERTEX_SIZE = 0.5; diff --git a/include/physics.hpp b/include/physics.hpp index 153be68..410d436 100644 --- a/include/physics.hpp +++ b/include/physics.hpp @@ -13,6 +13,10 @@ #include "octree.hpp" #endif +#ifndef WEB +#include +#endif + class Mass { public: const float mass; @@ -71,6 +75,10 @@ private: int last_springs_count; #endif +#ifndef WEB + BS::thread_pool threads; +#endif + public: // This is the main ownership of all the states/masses/springs. // TODO: Everything is stored multiple times but idc (currently). @@ -82,6 +90,11 @@ public: #ifndef BARNES_HUT last_build = REPULSION_GRID_REFRESH; #endif + +#ifndef WEB + std::cout << "Thread-Pool: " << threads.get_thread_count() << " threads." + << std::endl; +#endif }; MassSpringSystem(const MassSpringSystem ©) = delete; diff --git a/src/main.cpp b/src/main.cpp index 37b2873..254cd23 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,9 +8,6 @@ #include "renderer.hpp" #include "state.hpp" -#ifndef WEB -#include -#endif #ifdef PRINT_TIMINGS #include #include @@ -33,10 +30,6 @@ auto main(int argc, char *argv[]) -> int { // return 1; // } -#ifndef WEB - std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl; -#endif - // RayLib window setup SetTraceLogLevel(LOG_ERROR); // SetConfigFlags(FLAG_VSYNC_HINT); diff --git a/src/octree.cpp b/src/octree.cpp index 4003f57..6c5a3d2 100644 --- a/src/octree.cpp +++ b/src/octree.cpp @@ -129,7 +129,7 @@ auto Octree::CalculateForce(int node_idx, const Vector3 &pos) -> Vector3 { // Barnes-Hut if (node.leaf || (size * size / dist_sq) < (THETA * THETA)) { float dist = std::sqrt(dist_sq); - float force_mag = REPULSION_FORCE * node.mass_total / dist_sq; + float force_mag = BH_FORCE * node.mass_total / dist_sq; return Vector3Scale(diff, force_mag / dist); } diff --git a/src/physics.cpp b/src/physics.cpp index 82d8813..313ca36 100644 --- a/src/physics.cpp +++ b/src/physics.cpp @@ -138,6 +138,8 @@ auto MassSpringSystem::ClearForces() -> void { } auto MassSpringSystem::CalculateSpringForces() -> void { + ZoneScoped; + for (auto &[states, spring] : springs) { spring.CalculateSpringForce(); } @@ -145,6 +147,8 @@ auto MassSpringSystem::CalculateSpringForces() -> void { #ifdef BARNES_HUT auto MassSpringSystem::BuildOctree() -> void { + ZoneScoped; + octree.nodes.clear(); octree.nodes.reserve(masses.size() * 2); @@ -228,17 +232,33 @@ auto MassSpringSystem::BuildUniformGrid() -> void { auto MassSpringSystem::CalculateRepulsionForces() -> void { ZoneScoped; + #ifdef BARNES_HUT BuildOctree(); -// Calculate forces using Barnes-Hut -#pragma omp parallel for schedule(dynamic, 256) - for (int i = 0; i < mass_pointers.size(); ++i) { + auto solve_octree = [&](int i) { int root = 0; Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position); mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force); + }; + +// Calculate forces using Barnes-Hut +#ifdef WEB + for (int i = 0; i < mass_pointers.size(); ++i) { + solve_octree(i); } +#else + threads.detach_blocks( + 0, mass_pointers.size(), + [&](int start, int end) { + for (int i = start; i < end; ++i) { + solve_octree(i); + } + }, + 256); + threads.wait(); +#endif #else @@ -253,10 +273,7 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void { } last_build++; - // Calculate forces using uniform grid -#pragma omp parallel for schedule(dynamic, 256) - // Search the neighboring cells for each mass to calculate repulsion forces - for (int i = 0; i < masses.size(); ++i) { + auto solve_grid = [&](int i) { Mass *mass = mass_pointers[mass_indices[i]]; int cell_x = (int)std::floor(mass->position.x / REPULSION_RANGE); int cell_y = (int)std::floor(mass->position.y / REPULSION_RANGE); @@ -299,19 +316,40 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void { continue; } - force = Vector3Add(force, Vector3Scale(Vector3Normalize(direction), - REPULSION_FORCE)); + force = Vector3Add( + force, Vector3Scale(Vector3Normalize(direction), GRID_FORCE)); } } } } mass->force = Vector3Add(mass->force, force); + }; + + // Calculate forces using uniform grid +#ifdef WEB + // Search the neighboring cells for each mass to calculate repulsion forces + for (int i = 0; i < mass_pointers.size(); ++i) { + calculate_grid(i); } +#else + threads.detach_blocks( + 0, mass_pointers.size(), + [&](int start, int end) { + for (int i = start; i < end; ++i) { + solve_grid(i); + } + }, + 512); + threads.wait(); +#endif + #endif } auto MassSpringSystem::VerletUpdate(float delta_time) -> void { + ZoneScoped; + for (auto &[state, mass] : masses) { mass.VerletUpdate(delta_time); } diff --git a/src/renderer.cpp b/src/renderer.cpp index bfb303e..285ff5e 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "config.hpp" @@ -64,6 +65,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs, const State ¤t_state, const std::unordered_set &winning_states) -> void { + ZoneScoped; + // Prepare cube instancing if (transforms == nullptr) { AllocateGraphInstancing(mass_springs); @@ -135,6 +138,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs, auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x, int sel_y, int block_add_x, int block_add_y, const WinCondition win_condition) -> void { + ZoneScoped; + BeginTextureMode(klotski_target); ClearBackground(RAYWHITE); @@ -220,6 +225,8 @@ auto Renderer::DrawMenu(const MassSpringSystem &mass_springs, int current_preset, const State ¤t_state, const std::unordered_set &winning_states) -> void { + ZoneScoped; + BeginTextureMode(menu_target); ClearBackground(RAYWHITE);