replace openmp with thread-pool library bc openmp has larger fork boundary overhead
This commit is contained in:
@ -8,7 +8,7 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
|||||||
find_package(raylib REQUIRED)
|
find_package(raylib REQUIRED)
|
||||||
find_package(OpenMP REQUIRED)
|
find_package(OpenMP REQUIRED)
|
||||||
|
|
||||||
# TODO: Need to enable/disable this based on a variable for nix build
|
# Need to enable/disable this based on a variable for nix build
|
||||||
if(USE_TRACY)
|
if(USE_TRACY)
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
FetchContent_Declare(tracy
|
FetchContent_Declare(tracy
|
||||||
|
|||||||
@ -4,8 +4,8 @@
|
|||||||
#include <raylib.h>
|
#include <raylib.h>
|
||||||
|
|
||||||
#define PRINT_TIMINGS
|
#define PRINT_TIMINGS
|
||||||
// #define WEB
|
// #define WEB // Disables multithreading
|
||||||
#define BARNES_HUT
|
#define BARNES_HUT // Use octree BH instead of uniform grid
|
||||||
|
|
||||||
// Window
|
// Window
|
||||||
constexpr int INITIAL_WIDTH = 800;
|
constexpr int INITIAL_WIDTH = 800;
|
||||||
@ -30,18 +30,19 @@ constexpr float PAN_MULTIPLIER = 10.0;
|
|||||||
constexpr float ROT_SPEED = 1.0;
|
constexpr float ROT_SPEED = 1.0;
|
||||||
|
|
||||||
// Physics Engine
|
// Physics Engine
|
||||||
constexpr float SIM_SPEED = 4.0;
|
constexpr float SIM_SPEED = 4.0; // How large each update should be
|
||||||
constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second
|
constexpr float TIMESTEP = 1.0 / 60; // Do 60 physics updates per second
|
||||||
constexpr float MASS = 1.0;
|
constexpr float MASS = 1.0; // Mass spring system
|
||||||
constexpr float SPRING_CONSTANT = 5.0;
|
constexpr float SPRING_CONSTANT = 5.0; // Mass spring system
|
||||||
constexpr float DAMPENING_CONSTANT = 1.0;
|
constexpr float DAMPENING_CONSTANT = 1.0; // Mass spring system
|
||||||
constexpr float REST_LENGTH = 2.0;
|
constexpr float REST_LENGTH = 2.0; // Mass spring system
|
||||||
constexpr float REPULSION_FORCE = 2.0;
|
constexpr float VERLET_DAMPENING = 0.05; // [0, 1]
|
||||||
constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH;
|
constexpr float BH_FORCE = 2.0; // BH: [1.0, 3.0]
|
||||||
constexpr float THETA = 1.0; // Barnes-Hut [0.5, ~]
|
constexpr float THETA = 1.0; // Barnes-Hut [0.5, ~]
|
||||||
constexpr float SOFTENING = 0.01; // Barnes-Hut [0.01, 1.0]
|
constexpr float SOFTENING = 0.01; // Barnes-Hut [0.01, 1.0]
|
||||||
constexpr int REPULSION_GRID_REFRESH = 5; // Updates between grid rebuilds
|
constexpr float GRID_FORCE = 0.02; // Grid: [0.0, ~0.05]
|
||||||
constexpr float VERLET_DAMPENING = 0.05; // [0, 1]
|
constexpr float REPULSION_RANGE = 5.0 * REST_LENGTH; // Grid
|
||||||
|
constexpr int REPULSION_GRID_REFRESH = 5; // Grid rebuild freq
|
||||||
|
|
||||||
// Graph Drawing
|
// Graph Drawing
|
||||||
constexpr float VERTEX_SIZE = 0.5;
|
constexpr float VERTEX_SIZE = 0.5;
|
||||||
|
|||||||
@ -13,6 +13,10 @@
|
|||||||
#include "octree.hpp"
|
#include "octree.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WEB
|
||||||
|
#include <BS_thread_pool.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
class Mass {
|
class Mass {
|
||||||
public:
|
public:
|
||||||
const float mass;
|
const float mass;
|
||||||
@ -71,6 +75,10 @@ private:
|
|||||||
int last_springs_count;
|
int last_springs_count;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WEB
|
||||||
|
BS::thread_pool<BS::tp::none> threads;
|
||||||
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// This is the main ownership of all the states/masses/springs.
|
// This is the main ownership of all the states/masses/springs.
|
||||||
// TODO: Everything is stored multiple times but idc (currently).
|
// TODO: Everything is stored multiple times but idc (currently).
|
||||||
@ -82,6 +90,11 @@ public:
|
|||||||
#ifndef BARNES_HUT
|
#ifndef BARNES_HUT
|
||||||
last_build = REPULSION_GRID_REFRESH;
|
last_build = REPULSION_GRID_REFRESH;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WEB
|
||||||
|
std::cout << "Thread-Pool: " << threads.get_thread_count() << " threads."
|
||||||
|
<< std::endl;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
MassSpringSystem(const MassSpringSystem ©) = delete;
|
MassSpringSystem(const MassSpringSystem ©) = delete;
|
||||||
|
|||||||
@ -8,9 +8,6 @@
|
|||||||
#include "renderer.hpp"
|
#include "renderer.hpp"
|
||||||
#include "state.hpp"
|
#include "state.hpp"
|
||||||
|
|
||||||
#ifndef WEB
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
#ifdef PRINT_TIMINGS
|
#ifdef PRINT_TIMINGS
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <ratio>
|
#include <ratio>
|
||||||
@ -33,10 +30,6 @@ auto main(int argc, char *argv[]) -> int {
|
|||||||
// return 1;
|
// return 1;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
#ifndef WEB
|
|
||||||
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// RayLib window setup
|
// RayLib window setup
|
||||||
SetTraceLogLevel(LOG_ERROR);
|
SetTraceLogLevel(LOG_ERROR);
|
||||||
// SetConfigFlags(FLAG_VSYNC_HINT);
|
// SetConfigFlags(FLAG_VSYNC_HINT);
|
||||||
|
|||||||
@ -129,7 +129,7 @@ auto Octree::CalculateForce(int node_idx, const Vector3 &pos) -> Vector3 {
|
|||||||
// Barnes-Hut
|
// Barnes-Hut
|
||||||
if (node.leaf || (size * size / dist_sq) < (THETA * THETA)) {
|
if (node.leaf || (size * size / dist_sq) < (THETA * THETA)) {
|
||||||
float dist = std::sqrt(dist_sq);
|
float dist = std::sqrt(dist_sq);
|
||||||
float force_mag = REPULSION_FORCE * node.mass_total / dist_sq;
|
float force_mag = BH_FORCE * node.mass_total / dist_sq;
|
||||||
|
|
||||||
return Vector3Scale(diff, force_mag / dist);
|
return Vector3Scale(diff, force_mag / dist);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -138,6 +138,8 @@ auto MassSpringSystem::ClearForces() -> void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto MassSpringSystem::CalculateSpringForces() -> void {
|
auto MassSpringSystem::CalculateSpringForces() -> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
for (auto &[states, spring] : springs) {
|
for (auto &[states, spring] : springs) {
|
||||||
spring.CalculateSpringForce();
|
spring.CalculateSpringForce();
|
||||||
}
|
}
|
||||||
@ -145,6 +147,8 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
|
|||||||
|
|
||||||
#ifdef BARNES_HUT
|
#ifdef BARNES_HUT
|
||||||
auto MassSpringSystem::BuildOctree() -> void {
|
auto MassSpringSystem::BuildOctree() -> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
octree.nodes.clear();
|
octree.nodes.clear();
|
||||||
octree.nodes.reserve(masses.size() * 2);
|
octree.nodes.reserve(masses.size() * 2);
|
||||||
|
|
||||||
@ -228,17 +232,33 @@ auto MassSpringSystem::BuildUniformGrid() -> void {
|
|||||||
|
|
||||||
auto MassSpringSystem::CalculateRepulsionForces() -> void {
|
auto MassSpringSystem::CalculateRepulsionForces() -> void {
|
||||||
ZoneScoped;
|
ZoneScoped;
|
||||||
|
|
||||||
#ifdef BARNES_HUT
|
#ifdef BARNES_HUT
|
||||||
BuildOctree();
|
BuildOctree();
|
||||||
|
|
||||||
// Calculate forces using Barnes-Hut
|
auto solve_octree = [&](int i) {
|
||||||
#pragma omp parallel for schedule(dynamic, 256)
|
|
||||||
for (int i = 0; i < mass_pointers.size(); ++i) {
|
|
||||||
int root = 0;
|
int root = 0;
|
||||||
Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position);
|
Vector3 force = octree.CalculateForce(root, mass_pointers[i]->position);
|
||||||
|
|
||||||
mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force);
|
mass_pointers[i]->force = Vector3Add(mass_pointers[i]->force, force);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate forces using Barnes-Hut
|
||||||
|
#ifdef WEB
|
||||||
|
for (int i = 0; i < mass_pointers.size(); ++i) {
|
||||||
|
solve_octree(i);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
threads.detach_blocks(
|
||||||
|
0, mass_pointers.size(),
|
||||||
|
[&](int start, int end) {
|
||||||
|
for (int i = start; i < end; ++i) {
|
||||||
|
solve_octree(i);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
256);
|
||||||
|
threads.wait();
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@ -253,10 +273,7 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void {
|
|||||||
}
|
}
|
||||||
last_build++;
|
last_build++;
|
||||||
|
|
||||||
// Calculate forces using uniform grid
|
auto solve_grid = [&](int i) {
|
||||||
#pragma omp parallel for schedule(dynamic, 256)
|
|
||||||
// Search the neighboring cells for each mass to calculate repulsion forces
|
|
||||||
for (int i = 0; i < masses.size(); ++i) {
|
|
||||||
Mass *mass = mass_pointers[mass_indices[i]];
|
Mass *mass = mass_pointers[mass_indices[i]];
|
||||||
int cell_x = (int)std::floor(mass->position.x / REPULSION_RANGE);
|
int cell_x = (int)std::floor(mass->position.x / REPULSION_RANGE);
|
||||||
int cell_y = (int)std::floor(mass->position.y / REPULSION_RANGE);
|
int cell_y = (int)std::floor(mass->position.y / REPULSION_RANGE);
|
||||||
@ -299,19 +316,40 @@ auto MassSpringSystem::CalculateRepulsionForces() -> void {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
force = Vector3Add(force, Vector3Scale(Vector3Normalize(direction),
|
force = Vector3Add(
|
||||||
REPULSION_FORCE));
|
force, Vector3Scale(Vector3Normalize(direction), GRID_FORCE));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mass->force = Vector3Add(mass->force, force);
|
mass->force = Vector3Add(mass->force, force);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate forces using uniform grid
|
||||||
|
#ifdef WEB
|
||||||
|
// Search the neighboring cells for each mass to calculate repulsion forces
|
||||||
|
for (int i = 0; i < mass_pointers.size(); ++i) {
|
||||||
|
calculate_grid(i);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
threads.detach_blocks(
|
||||||
|
0, mass_pointers.size(),
|
||||||
|
[&](int start, int end) {
|
||||||
|
for (int i = start; i < end; ++i) {
|
||||||
|
solve_grid(i);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
512);
|
||||||
|
threads.wait();
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
auto MassSpringSystem::VerletUpdate(float delta_time) -> void {
|
auto MassSpringSystem::VerletUpdate(float delta_time) -> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
for (auto &[state, mass] : masses) {
|
for (auto &[state, mass] : masses) {
|
||||||
mass.VerletUpdate(delta_time);
|
mass.VerletUpdate(delta_time);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
#include <raylib.h>
|
#include <raylib.h>
|
||||||
#include <raymath.h>
|
#include <raymath.h>
|
||||||
#include <rlgl.h>
|
#include <rlgl.h>
|
||||||
|
#include <tracy/Tracy.hpp>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
@ -64,6 +65,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
|
|||||||
const State ¤t_state,
|
const State ¤t_state,
|
||||||
const std::unordered_set<State> &winning_states)
|
const std::unordered_set<State> &winning_states)
|
||||||
-> void {
|
-> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
// Prepare cube instancing
|
// Prepare cube instancing
|
||||||
if (transforms == nullptr) {
|
if (transforms == nullptr) {
|
||||||
AllocateGraphInstancing(mass_springs);
|
AllocateGraphInstancing(mass_springs);
|
||||||
@ -135,6 +138,8 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &mass_springs,
|
|||||||
auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x,
|
auto Renderer::DrawKlotski(const State &state, int hov_x, int hov_y, int sel_x,
|
||||||
int sel_y, int block_add_x, int block_add_y,
|
int sel_y, int block_add_x, int block_add_y,
|
||||||
const WinCondition win_condition) -> void {
|
const WinCondition win_condition) -> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
BeginTextureMode(klotski_target);
|
BeginTextureMode(klotski_target);
|
||||||
ClearBackground(RAYWHITE);
|
ClearBackground(RAYWHITE);
|
||||||
|
|
||||||
@ -220,6 +225,8 @@ auto Renderer::DrawMenu(const MassSpringSystem &mass_springs,
|
|||||||
int current_preset, const State ¤t_state,
|
int current_preset, const State ¤t_state,
|
||||||
const std::unordered_set<State> &winning_states)
|
const std::unordered_set<State> &winning_states)
|
||||||
-> void {
|
-> void {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
BeginTextureMode(menu_target);
|
BeginTextureMode(menu_target);
|
||||||
ClearBackground(RAYWHITE);
|
ClearBackground(RAYWHITE);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user