replace openmp with thread-pool library bc openmp has larger fork boundary overhead

This commit is contained in:
2026-02-22 23:54:14 +01:00
parent 73b01f6af3
commit 443069f597
7 changed files with 82 additions and 30 deletions

View File

@ -8,7 +8,7 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
find_package(raylib REQUIRED) find_package(raylib REQUIRED)
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED)
# TODO: Need to enable/disable this based on a variable for nix build # Need to enable/disable this based on a variable for nix build
if(USE_TRACY) if(USE_TRACY)
include(FetchContent) include(FetchContent)
FetchContent_Declare(tracy FetchContent_Declare(tracy

View File

@ -4,8 +4,8 @@
#include <raylib.h> #include <raylib.h>
#define PRINT_TIMINGS #define PRINT_TIMINGS
// #define WEB // #define WEB // Disables multithreading
#define BARNES_HUT #define BARNES_HUT // Use octree BH instead of uniform grid
// Window // Window
constexpr int INITIAL_WIDTH = 800; constexpr int INITIAL_WIDTH = 800;
@ -30,18 +30,19 @@ constexpr float PAN_MULTIPLIER = 10.0;
constexpr float ROT_SPEED = 1.0; constexpr float ROT_SPEED = 1.0;
// Physics Engine // Physics Engine
constexpr float SIM_SPEED = 4.0; constexpr float SIM_SPEED = 4.0; // How large each update should be
constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second
constexpr float MASS = 1.0; constexpr float MASS = 1.0; // Mass spring system
constexpr float SPRING_CONSTANT = 5.0; constexpr float SPRING_CONSTANT = 5.0; // Mass spring system
constexpr float DAMPENING_CONSTANT = 1.0; constexpr float DAMPENING_CONSTANT = 1.0; // Mass spring system
constexpr float REST_LENGTH = 2.0; constexpr float REST_LENGTH = 2.0; // Mass spring system
constexpr float REPULSION_FORCE = 2.0; constexpr float VERLET_DAMPENING = 0.05; // [0, 1]
constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH; constexpr float BH_FORCE = 2.0; // BH: [1.0, 3.0]
constexpr float THETA = 1.0; // Barnes-Hut [0.5, ~] constexpr float THETA = 1.0; // Barnes-Hut [0.5, ~]
constexpr float SOFTENING = 0.01; // Barnes-Hut [0.01, 1.0] constexpr float SOFTENING = 0.01; // Barnes-Hut [0.01, 1.0]
constexpr int REPULSION_GRID_REFRESH = 5; // Updates between grid rebuilds constexpr float GRID_FORCE = 0.02; // Grid: [0.0, ~0.05]
constexpr float VERLET_DAMPENING = 0.05; // [0, 1] constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH; // Grid
constexpr int REPULSION_GRID_REFRESH = 5; // Grid rebuild freq
// Graph Drawing // Graph Drawing
constexpr float VERTEX_SIZE = 0.5; constexpr float VERTEX_SIZE = 0.5;

View File

@ -13,6 +13,10 @@
#include "octree.hpp" #include "octree.hpp"
#endif #endif
#ifndef WEB
#include <BS_thread_pool.hpp>
#endif
class Mass { class Mass {
public: public:
const float mass; const float mass;
@ -71,6 +75,10 @@ private:
int last_springs_count; int last_springs_count;
#endif #endif
#ifndef WEB
BS::thread_pool<BS::tp::none> threads;
#endif
public: public:
// This is the main ownership of all the states/masses/springs. // This is the main ownership of all the states/masses/springs.
// TODO: Everything is stored multiple times but idc (currently). // TODO: Everything is stored multiple times but idc (currently).
@ -82,6 +90,11 @@ public:
#ifndef BARNES_HUT #ifndef BARNES_HUT
last_build = REPULSION_GRID_REFRESH; last_build = REPULSION_GRID_REFRESH;
#endif #endif
#ifndef WEB
std::cout << "Thread-Pool: " << threads.get_thread_count() << " threads."
<< std::endl;
#endif
}; };
MassSpringSystem(const MassSpringSystem &copy) = delete; MassSpringSystem(const MassSpringSystem &copy) = delete;

View File

@ -8,9 +8,6 @@
#include "renderer.hpp" #include "renderer.hpp"
#include "state.hpp" #include "state.hpp"
#ifndef WEB
#include <omp.h>
#endif
#ifdef PRINT_TIMINGS #ifdef PRINT_TIMINGS
#include <chrono> #include <chrono>
#include <ratio> #include <ratio>
@ -33,10 +30,6 @@ auto main(int argc, char *argv[]) -> int {
// return 1; // return 1;
// } // }
#ifndef WEB
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
#endif
// RayLib window setup // RayLib window setup
SetTraceLogLevel(LOG_ERROR); SetTraceLogLevel(LOG_ERROR);
// SetConfigFlags(FLAG_VSYNC_HINT); // SetConfigFlags(FLAG_VSYNC_HINT);

View File

@ -129,7 +129,7 @@ auto Octree::CalculateForce(int node_idx, const Vector3 &pos) -> Vector3 {
// Barnes-Hut // Barnes-Hut
if (node.leaf || (size * size / dist_sq) < (THETA * THETA)) { if (node.leaf || (size * size / dist_sq) < (THETA * THETA)) {
float dist = std::sqrt(dist_sq); float dist = std::sqrt(dist_sq);
float force_mag = REPULSION_FORCE * node.mass_total / dist_sq; float force_mag = BH_FORCE * node.mass_total / dist_sq;
return Vector3Scale(diff, force_mag / dist); return Vector3Scale(diff, force_mag / dist);
} }

View File

@ -138,6 +138,8 @@ auto MassSpringSystem::ClearForces() -> void {
} }
auto MassSpringSystem::CalculateSpringForces() -> void { auto MassSpringSystem::CalculateSpringForces() -> void {
ZoneScoped;
for (auto &[states, spring] : springs) { for (auto &[states, spring] : springs) {
spring.CalculateSpringForce(); spring.CalculateSpringForce();
} }
@ -145,6 +147,8 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
#ifdef BARNES_HUT #ifdef BARNES_HUT
auto MassSpringSystem::BuildOctree() -> void { auto MassSpringSystem::BuildOctree() -> void {
ZoneScoped;
octree.nodes.clear(); octree.nodes.clear();
octree.nodes.reserve(masses.size() * 2); octree.nodes.reserve(masses.size() * 2);
@ -228,17 +232,33 @@ auto MassSpringSystem::BuildUniformGrid() -> void {
auto MassSpringSystem::CalculateRepulsionForces() -> void { auto MassSpringSystem::CalculateRepulsionForces() -> void {
ZoneScoped; ZoneScoped;
#ifdef BARNES_HUT #ifdef BARNES_HUT
BuildOctree(); BuildOctree();
// Calculate forces using Barnes-Hut auto solve_octree = [&](int i) {
#pragma omp parallel for schedule(dynamic, 256)
for (int i = 0; i < mass_pointers.size(); ++i) {
int root = 0; int root = 0;
Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position); Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position);
mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force); mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force);
};
// Calculate forces using Barnes-Hut
#ifdef WEB
for (int i = 0; i < mass_pointers.size(); ++i) {
solve_octree(i);
} }
#else
threads.detach_blocks(
0, mass_pointers.size(),
[&](int start, int end) {
for (int i = start; i < end; ++i) {
solve_octree(i);
}
},
256);
threads.wait();
#endif
#else #else
@ -253,10 +273,7 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void {
} }
last_build++; last_build++;
// Calculate forces using uniform grid auto solve_grid = [&](int i) {
#pragma omp parallel for schedule(dynamic, 256)
// Search the neighboring cells for each mass to calculate repulsion forces
for (int i = 0; i < masses.size(); ++i) {
Mass *mass = mass_pointers[mass_indices[i]]; Mass *mass = mass_pointers[mass_indices[i]];
int cell_x = (int)std::floor(mass->position.x / REPULSION_RANGE); int cell_x = (int)std::floor(mass->position.x / REPULSION_RANGE);
int cell_y = (int)std::floor(mass->position.y / REPULSION_RANGE); int cell_y = (int)std::floor(mass->position.y / REPULSION_RANGE);
@ -299,19 +316,40 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void {
continue; continue;
} }
force = Vector3Add(force, Vector3Scale(Vector3Normalize(direction), force = Vector3Add(
REPULSION_FORCE)); force, Vector3Scale(Vector3Normalize(direction), GRID_FORCE));
} }
} }
} }
} }
mass->force = Vector3Add(mass->force, force); mass->force = Vector3Add(mass->force, force);
};
// Calculate forces using uniform grid
#ifdef WEB
// Search the neighboring cells for each mass to calculate repulsion forces
for (int i = 0; i < mass_pointers.size(); ++i) {
calculate_grid(i);
} }
#else
threads.detach_blocks(
0, mass_pointers.size(),
[&](int start, int end) {
for (int i = start; i < end; ++i) {
solve_grid(i);
}
},
512);
threads.wait();
#endif
#endif #endif
} }
auto MassSpringSystem::VerletUpdate(float delta_time) -> void { auto MassSpringSystem::VerletUpdate(float delta_time) -> void {
ZoneScoped;
for (auto &[state, mass] : masses) { for (auto &[state, mass] : masses) {
mass.VerletUpdate(delta_time); mass.VerletUpdate(delta_time);
} }

View File

@ -5,6 +5,7 @@
#include <raylib.h> #include <raylib.h>
#include <raymath.h> #include <raymath.h>
#include <rlgl.h> #include <rlgl.h>
#include <tracy/Tracy.hpp>
#include <unordered_set> #include <unordered_set>
#include "config.hpp" #include "config.hpp"
@ -64,6 +65,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
const State &current_state, const State &current_state,
const std::unordered_set<State> &winning_states) const std::unordered_set<State> &winning_states)
-> void { -> void {
ZoneScoped;
// Prepare cube instancing // Prepare cube instancing
if (transforms == nullptr) { if (transforms == nullptr) {
AllocateGraphInstancing(mass_springs); AllocateGraphInstancing(mass_springs);
@ -135,6 +138,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x, auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x,
int sel_y, int block_add_x, int block_add_y, int sel_y, int block_add_x, int block_add_y,
const WinCondition win_condition) -> void { const WinCondition win_condition) -> void {
ZoneScoped;
BeginTextureMode(klotski_target); BeginTextureMode(klotski_target);
ClearBackground(RAYWHITE); ClearBackground(RAYWHITE);
@ -220,6 +225,8 @@ auto Renderer::DrawMenu(const MassSpringSystem &mass_springs,
int current_preset, const State &current_state, int current_preset, const State &current_state,
const std::unordered_set<State> &winning_states) const std::unordered_set<State> &winning_states)
-> void { -> void {
ZoneScoped;
BeginTextureMode(menu_target); BeginTextureMode(menu_target);
ClearBackground(RAYWHITE); ClearBackground(RAYWHITE);