replace openmp with thread-pool library bc openmp has larger fork boundary overhead

This commit is contained in:
2026-02-22 23:54:14 +01:00
parent 73b01f6af3
commit baf257758a
4 changed files with 43 additions and 8 deletions

View File

@ -13,6 +13,10 @@
#include "octree.hpp"
#endif
#ifndef WEB
#include <BS_thread_pool.hpp>
#endif
class Mass {
public:
const float mass;
@ -71,6 +75,10 @@ private:
int last_springs_count;
#endif
#ifndef WEB
BS::thread_pool<BS::tp::none> threads;
#endif
public:
// This is the main ownership of all the states/masses/springs.
// TODO: Everything is stored multiple times but idc (currently).
@ -82,6 +90,11 @@ public:
#ifndef BARNES_HUT
last_build = REPULSION_GRID_REFRESH;
#endif
#ifndef WEB
std::cout << "Thread-Pool: " << threads.get_thread_count() << " threads."
<< std::endl;
#endif
};
MassSpringSystem(const MassSpringSystem &copy) = delete;

View File

@ -8,9 +8,6 @@
#include "renderer.hpp"
#include "state.hpp"
#ifndef WEB
#include <omp.h>
#endif
#ifdef PRINT_TIMINGS
#include <chrono>
#include <ratio>
@ -33,10 +30,6 @@ auto main(int argc, char *argv[]) -> int {
// return 1;
// }
#ifndef WEB
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
#endif
// RayLib window setup
SetTraceLogLevel(LOG_ERROR);
// SetConfigFlags(FLAG_VSYNC_HINT);

View File

@ -138,6 +138,8 @@ auto MassSpringSystem::ClearForces() -> void {
}
auto MassSpringSystem::CalculateSpringForces() -> void {
ZoneScoped;
for (auto &[states, spring] : springs) {
spring.CalculateSpringForce();
}
@ -145,6 +147,8 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
#ifdef BARNES_HUT
auto MassSpringSystem::BuildOctree() -> void {
ZoneScoped;
octree.nodes.clear();
octree.nodes.reserve(masses.size() * 2);
@ -228,17 +232,33 @@ auto MassSpringSystem::BuildUniformGrid() -> void {
auto MassSpringSystem::CalculateRepulsionForces() -> void {
ZoneScoped;
#ifdef BARNES_HUT
BuildOctree();
// Calculate forces using Barnes-Hut
#pragma omp parallel for schedule(dynamic, 256)
#ifdef WEB
for (int i = 0; i < mass_pointers.size(); ++i) {
int root = 0;
Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position);
mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force);
}
#else
threads.detach_blocks(
0, mass_pointers.size(),
[&](int start, int end) {
for (int i = start; i < end; ++i) {
int root = 0;
Vector3 force =
octree.CalculateForce(root, mass_pointers[i]->position);
mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force);
}
},
256);
threads.wait();
#endif
#else
@ -312,6 +332,8 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void {
}
auto MassSpringSystem::VerletUpdate(float delta_time) -> void {
ZoneScoped;
for (auto &[state, mass] : masses) {
mass.VerletUpdate(delta_time);
}

View File

@ -5,6 +5,7 @@
#include <raylib.h>
#include <raymath.h>
#include <rlgl.h>
#include <tracy/Tracy.hpp>
#include <unordered_set>
#include "config.hpp"
@ -64,6 +65,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
const State &current_state,
const std::unordered_set<State> &winning_states)
-> void {
ZoneScoped;
// Prepare cube instancing
if (transforms == nullptr) {
AllocateGraphInstancing(mass_springs);
@ -135,6 +138,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x,
int sel_y, int block_add_x, int block_add_y,
const WinCondition win_condition) -> void {
ZoneScoped;
BeginTextureMode(klotski_target);
ClearBackground(RAYWHITE);
@ -220,6 +225,8 @@ auto Renderer::DrawMenu(const MassSpringSystem &mass_springs,
int current_preset, const State &current_state,
const std::unordered_set<State> &winning_states)
-> void {
ZoneScoped;
BeginTextureMode(menu_target);
ClearBackground(RAYWHITE);