build octree async and reuse tree from last frame (disabled as it breaks physics)

This commit is contained in:
2026-03-02 18:52:06 +01:00
parent d62d5c78bf
commit 9c48954a78
13 changed files with 329 additions and 273 deletions

View File

@ -9,8 +9,8 @@ if(POLICY CMP0167)
endif() endif()
option(DISABLE_BACKWARD "Disable backward stacktrace printer" OFF) option(DISABLE_BACKWARD "Disable backward stacktrace printer" OFF)
option(DISABLE_TRACY "Disable the Tracy profiler client" ON) option(DISABLE_TRACY "Disable the Tracy profiler client" OFF)
option(DISABLE_TESTS "Disable building and running tests" ON) option(DISABLE_TESTS "Disable building and running tests" OFF)
# Headers + Sources # Headers + Sources
set(SOURCES set(SOURCES

View File

@ -5,6 +5,22 @@
#define THREADPOOL // Enable physics threadpool #define THREADPOOL // Enable physics threadpool
#ifdef THREADPOOL
#if defined(_WIN32)
#define NOGDI // All GDI defines and routines
#define NOUSER // All USER defines and routines
#endif
#define BS_THREAD_POOL_NATIVE_EXTENSIONS
#include <BS_thread_pool.hpp>
#if defined(_WIN32) // raylib uses these names as function parameters
#undef near
#undef far
#endif
#endif
// TODO: Using the octree from the last frame completely breaks the physics :/
// #define ASYNC_OCTREE
// Gets set by CMake // Gets set by CMake
// #define BACKWARD // Enable pretty stack traces // #define BACKWARD // Enable pretty stack traces
// #define TRACY // Enable tracy profiling support // #define TRACY // Enable tracy profiling support

View File

@ -2,49 +2,13 @@
#define MASS_SPRING_SYSTEM_HPP_ #define MASS_SPRING_SYSTEM_HPP_
#include "octree.hpp" #include "octree.hpp"
#include "util.hpp"
#include "config.hpp" #include "config.hpp"
#include <raylib.h> #include <raylib.h>
#include <raymath.h>
#ifdef THREADPOOL
#if defined(_WIN32)
#define NOGDI // All GDI defines and routines
#define NOUSER // All USER defines and routines
#endif
#define BS_THREAD_POOL_NATIVE_EXTENSIONS
#include <BS_thread_pool.hpp>
#if defined(_WIN32) // raylib uses these names as function parameters
#undef near
#undef far
#endif
#endif
class mass_spring_system class mass_spring_system
{ {
public: public:
class mass
{
public:
Vector3 position = Vector3Zero();
Vector3 previous_position = Vector3Zero(); // for verlet integration
Vector3 velocity = Vector3Zero();
Vector3 force = Vector3Zero();
public:
mass() = delete;
explicit mass(const Vector3 _position)
: position(_position), previous_position(_position) {}
public:
auto clear_force() -> void;
auto calculate_velocity(float delta_time) -> void;
auto calculate_position(float delta_time) -> void;
auto verlet_update(float delta_time) -> void;
};
class spring class spring
{ {
public: public:
@ -54,9 +18,6 @@ public:
public: public:
spring(const size_t _a, const size_t _b) spring(const size_t _a, const size_t _b)
: a(_a), b(_b) {} : a(_a), b(_b) {}
public:
static auto calculate_spring_force(mass& _a, mass& _b) -> void;
}; };
private: private:
@ -65,26 +26,25 @@ private:
#endif #endif
public: public:
static constexpr int SMALL_TASK_BLOCK_SIZE = 256;
static constexpr int LARGE_TASK_BLOCK_SIZE = 256;
octree tree; octree tree;
// This is the main ownership of all the states/masses/springs. // This is the main ownership of all the states/masses/springs.
std::vector<mass> masses; std::vector<Vector3> positions;
std::vector<Vector3> previous_positions; // for verlet integration
std::vector<Vector3> velocities;
std::vector<Vector3> forces;
std::vector<spring> springs; std::vector<spring> springs;
public: public:
mass_spring_system() mass_spring_system()
#ifdef THREADPOOL #ifdef THREADPOOL
: threads(std::thread::hardware_concurrency() - 1, set_thread_name) : threads(std::thread::hardware_concurrency() - 2, set_mass_springs_pool_thread_name)
#endif #endif
{ {}
infoln("Using Barnes-Hut + Octree repulsion force calculation.");
#ifdef THREADPOOL
infoln("Thread-pool: {} threads.", threads.get_thread_count());
#else
infoln("Thread-pool: Disabled.");
#endif
}
mass_spring_system(const mass_spring_system& copy) = delete; mass_spring_system(const mass_spring_system& copy) = delete;
auto operator=(const mass_spring_system& copy) -> mass_spring_system& = delete; auto operator=(const mass_spring_system& copy) -> mass_spring_system& = delete;
@ -93,20 +53,22 @@ public:
private: private:
#ifdef THREADPOOL #ifdef THREADPOOL
static auto set_thread_name(size_t idx) -> void; static auto set_mass_springs_pool_thread_name(size_t idx) -> void;
#endif #endif
auto build_octree() -> void;
public: public:
auto clear() -> void; auto clear() -> void;
auto add_mass() -> void; auto add_mass() -> void;
auto add_spring(size_t a, size_t b) -> void; auto add_spring(size_t a, size_t b) -> void;
auto clear_forces() -> void; auto clear_forces() -> void;
auto calculate_spring_force(size_t s) -> void;
auto calculate_spring_forces() -> void; auto calculate_spring_forces() -> void;
auto calculate_repulsion_forces() -> void; auto calculate_repulsion_forces() -> void;
auto verlet_update(float delta_time) -> void; auto integrate_velocity(size_t m, float dt) -> void;
auto integrate_position(size_t m, float dt) -> void;
auto verlet_update(size_t m, float dt) -> void;
auto update(float dt) -> void;
auto center_masses() -> void; auto center_masses() -> void;
}; };

View File

@ -31,20 +31,18 @@ public:
public: public:
octree() = default; octree() = default;
octree(const octree& copy) = delete; // octree(const octree& copy) = delete;
auto operator=(const octree& copy) -> octree& = delete; // auto operator=(const octree& copy) -> octree& = delete;
octree(octree&& move) = delete; // octree(octree&& move) = delete;
auto operator=(octree&& move) -> octree& = delete; // auto operator=(octree&& move) -> octree& = delete;
public: public:
auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int;
[[nodiscard]] auto get_octant(int node_idx, const Vector3& pos) const -> int; [[nodiscard]] auto get_octant(int node_idx, const Vector3& pos) const -> int;
[[nodiscard]] auto get_child_bounds(int node_idx, int octant) const [[nodiscard]] auto get_child_bounds(int node_idx, int octant) const
-> std::pair<Vector3, Vector3>; -> std::pair<Vector3, Vector3>;
auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int;
auto insert(int node_idx, int mass_id, const Vector3& pos, float mass, int depth) -> void; auto insert(int node_idx, int mass_id, const Vector3& pos, float mass, int depth) -> void;
static auto build_octree(octree& t, const std::vector<Vector3>& positions) -> void;
[[nodiscard]] auto calculate_force(int node_idx, const Vector3& pos) const -> Vector3; [[nodiscard]] auto calculate_force(int node_idx, const Vector3& pos) const -> Vector3;
}; };

View File

@ -1,6 +1,8 @@
#ifndef PHYSICS_HPP_ #ifndef PHYSICS_HPP_
#define PHYSICS_HPP_ #define PHYSICS_HPP_
#include "config.hpp"
#include <atomic> #include <atomic>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
@ -80,15 +82,16 @@ public:
} }
private: private:
#ifdef ASYNC_OCTREE
static auto set_octree_pool_thread_name(size_t idx) -> void;
#endif
static auto physics_thread(physics_state& state) -> void; static auto physics_thread(physics_state& state) -> void;
public: public:
auto add_mass_cmd() -> void;
auto add_spring_cmd(size_t a, size_t b) -> void;
auto clear_cmd() -> void; auto clear_cmd() -> void;
auto add_mass_cmd() -> void;
auto add_spring_cmd(size_t a, size_t b) -> void;
auto add_mass_springs_cmd(size_t num_masses, const std::vector<std::pair<size_t, size_t>>& springs) -> void; auto add_mass_springs_cmd(size_t num_masses, const std::vector<std::pair<size_t, size_t>>& springs) -> void;
}; };

View File

@ -140,6 +140,13 @@ inline auto ansi_reset() -> std::string
} }
// std::println doesn't work with mingw // std::println doesn't work with mingw
template <typename... Args>
auto traceln(std::format_string<Args...> fmt, Args&&... args) -> void
{
std::cout << std::format("[{}TRACE{}]: ", ansi_bold_fg(fg_cyan), ansi_reset()) << std::format(
fmt, std::forward<Args>(args)...) << std::endl;
}
template <typename... Args> template <typename... Args>
auto infoln(std::format_string<Args...> fmt, Args&&... args) -> void auto infoln(std::format_string<Args...> fmt, Args&&... args) -> void
{ {

View File

@ -64,6 +64,12 @@ auto main(int argc, char* argv[]) -> int
infoln("Tracy adapter disabled."); infoln("Tracy adapter disabled.");
#endif #endif
#ifdef THREADPOOL
infoln("Thread-pool enabled.");
#else
infoln("Thread-pool disabled.");
#endif
// RayLib window setup // RayLib window setup
SetTraceLogLevel(LOG_ERROR); SetTraceLogLevel(LOG_ERROR);
SetConfigFlags(FLAG_VSYNC_HINT); SetConfigFlags(FLAG_VSYNC_HINT);
@ -141,8 +147,8 @@ auto main(int argc, char* argv[]) -> int
// Update the camera after the physics, so target lock is smooth // Update the camera after the physics, so target lock is smooth
size_t current_index = state.get_current_index(); size_t current_index = state.get_current_index();
if (masses.size() > current_index) { if (masses.size() > current_index) {
const mass_spring_system::mass& current_mass = mass_spring_system::mass(masses.at(current_index)); const Vector3& current_mass = masses[current_index];
camera.update(current_mass.position, mass_center, input.camera_lock, input.camera_mass_center_lock); camera.update(current_mass, mass_center, input.camera_lock, input.camera_mass_center_lock);
} }
// Rendering // Rendering

View File

@ -1,67 +1,77 @@
#include "mass_spring_system.hpp" #include "mass_spring_system.hpp"
#include "config.hpp" #include "config.hpp"
#include "util.hpp"
#include <cfloat> #include <cfloat>
#include <cstring>
#ifdef TRACY #ifdef TRACY
#include <tracy/Tracy.hpp> #include <tracy/Tracy.hpp>
#endif #endif
auto mass_spring_system::mass::clear_force() -> void auto mass_spring_system::calculate_spring_force(const size_t s) -> void
{ {
force = Vector3Zero(); const spring _s = springs[s];
const Vector3 a_pos = positions[_s.a];
const Vector3 b_pos = positions[_s.b];
const Vector3 a_vel = velocities[_s.a];
const Vector3 b_vel = velocities[_s.b];
const Vector3 delta_pos = a_pos - b_pos;
const Vector3 delta_vel = a_vel - b_vel;
const float sq_len = Vector3DotProduct(delta_pos, delta_pos);
const float inv_len = rsqrt(sq_len);
const float len = sq_len * inv_len;
const float hooke = SPRING_CONSTANT * (len - REST_LENGTH);
const float dampening = DAMPENING_CONSTANT * Vector3DotProduct(delta_vel, delta_pos) * inv_len;
const Vector3 a_force = Vector3Scale(delta_pos, -(hooke + dampening) * inv_len);
const Vector3 b_force = a_force * -1.0f;
forces[_s.a] += a_force;
forces[_s.b] += b_force;
} }
auto mass_spring_system::mass::calculate_velocity(const float delta_time) -> void auto mass_spring_system::integrate_velocity(const size_t m, const float dt) -> void
{ {
const Vector3 acceleration = Vector3Scale(force, 1.0 / MASS); const Vector3 acc = forces[m] / MASS;
const Vector3 temp = Vector3Scale(acceleration, delta_time); velocities[m] += acc * dt;
velocity = Vector3Add(velocity, temp);
} }
auto mass_spring_system::mass::calculate_position(const float delta_time) -> void auto mass_spring_system::integrate_position(const size_t m, const float dt) -> void
{ {
previous_position = position; previous_positions[m] = positions[m];
positions[m] += velocities[m] * dt;
const Vector3 temp = Vector3Scale(velocity, delta_time);
position = Vector3Add(position, temp);
} }
auto mass_spring_system::mass::verlet_update(const float delta_time) -> void auto mass_spring_system::verlet_update(const size_t m, const float dt) -> void
{ {
const Vector3 acceleration = Vector3Scale(force, 1.0 / MASS); const Vector3 acc = (forces[m] / MASS) * dt * dt;
const Vector3 temp_position = position; const Vector3 pos = positions[m];
Vector3 displacement = Vector3Subtract(position, previous_position); Vector3 delta_pos = pos - previous_positions[m];
const Vector3 accel_term = Vector3Scale(acceleration, delta_time * delta_time); delta_pos *= 1.0 - VERLET_DAMPENING; // Minimal dampening
// Minimal dampening positions[m] += delta_pos + acc;
displacement = Vector3Scale(displacement, 1.0 - VERLET_DAMPENING); previous_positions[m] = pos;
position = Vector3Add(Vector3Add(position, displacement), accel_term);
previous_position = temp_position;
} }
auto mass_spring_system::spring::calculate_spring_force(mass& _a, mass& _b) -> void #ifdef THREADPOOL
auto mass_spring_system::set_mass_springs_pool_thread_name(size_t idx) -> void
{ {
// TODO: Use a bungee force here instead of springs, since we already have global repulsion? BS::this_thread::set_os_thread_name(std::format("repulsion-{}", idx));
const Vector3 delta_position = Vector3Subtract(_a.position, _b.position); traceln("Using thread \"{}\"", BS::this_thread::get_os_thread_name().value_or("INVALID NAME"));
const float current_length = Vector3Length(delta_position);
const Vector3 delta_velocity = Vector3Subtract(_a.velocity, _b.velocity);
const float hooke = SPRING_CONSTANT * (current_length - REST_LENGTH);
const float dampening = DAMPENING_CONSTANT * Vector3DotProduct(delta_velocity, delta_position) / current_length;
const Vector3 force_a = Vector3Scale(delta_position, -(hooke + dampening) / current_length);
const Vector3 force_b = Vector3Scale(force_a, -1.0);
_a.force = Vector3Add(_a.force, force_a);
_b.force = Vector3Add(_b.force, force_b);
} }
#endif
auto mass_spring_system::clear() -> void auto mass_spring_system::clear() -> void
{ {
masses.clear(); positions.clear();
previous_positions.clear();
velocities.clear();
forces.clear();
springs.clear(); springs.clear();
tree.nodes.clear(); tree.nodes.clear();
} }
@ -77,14 +87,17 @@ auto mass_spring_system::add_mass() -> void
// }; // };
// position = Vector3Scale(Vector3Normalize(position), REST_LENGTH * 2.0); // position = Vector3Scale(Vector3Normalize(position), REST_LENGTH * 2.0);
masses.emplace_back(Vector3Zero()); positions.emplace_back(Vector3Zero());
previous_positions.emplace_back(Vector3Zero());
velocities.emplace_back(Vector3Zero());
forces.emplace_back(Vector3Zero());
} }
auto mass_spring_system::add_spring(size_t a, size_t b) -> void auto mass_spring_system::add_spring(size_t a, size_t b) -> void
{ {
// Update masses to be located along a random walk when adding the springs // Update masses to be located along a random walk when adding the springs
const mass& mass_a = masses.at(a); const Vector3& mass_a = positions[a];
mass& mass_b = masses.at(b); const Vector3& mass_b = positions[b];
Vector3 offset{ Vector3 offset{
static_cast<float>(GetRandomValue(-100, 100)), static_cast<float>(GetRandomValue(-100, 100)), static_cast<float>(GetRandomValue(-100, 100)), static_cast<float>(GetRandomValue(-100, 100)),
@ -94,7 +107,7 @@ auto mass_spring_system::add_spring(size_t a, size_t b) -> void
// If the offset moves the mass closer to the current center of mass, flip it // If the offset moves the mass closer to the current center of mass, flip it
if (!tree.nodes.empty()) { if (!tree.nodes.empty()) {
const Vector3 mass_center_direction = Vector3Subtract(mass_a.position, tree.nodes.at(0).mass_center); const Vector3 mass_center_direction = Vector3Subtract(positions[a], tree.nodes[0].mass_center);
const float mass_center_distance = Vector3Length(mass_center_direction); const float mass_center_distance = Vector3Length(mass_center_direction);
if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) { if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) {
@ -102,8 +115,8 @@ auto mass_spring_system::add_spring(size_t a, size_t b) -> void
} }
} }
mass_b.position = mass_a.position + offset; positions[b] = mass_a + offset;
mass_b.previous_position = mass_b.position; previous_positions[b] = mass_b;
// infoln("Adding spring: ({}, {}, {})->({}, {}, {})", mass_a.position.x, mass_a.position.y, // infoln("Adding spring: ({}, {}, {})->({}, {}, {})", mass_a.position.x, mass_a.position.y,
// mass_a.position.z, // mass_a.position.z,
@ -118,9 +131,7 @@ auto mass_spring_system::clear_forces() -> void
ZoneScoped; ZoneScoped;
#endif #endif
for (auto& m : masses) { memset(forces.data(), 0, forces.size() * sizeof(Vector3));
m.clear_force();
}
} }
auto mass_spring_system::calculate_spring_forces() -> void auto mass_spring_system::calculate_spring_forces() -> void
@ -129,56 +140,18 @@ auto mass_spring_system::calculate_spring_forces() -> void
ZoneScoped; ZoneScoped;
#endif #endif
for (const auto s : springs) { const auto solve_spring_force = [&](const int i)
mass& a = masses.at(s.a); {
mass& b = masses.at(s.b); calculate_spring_force(i);
spring::calculate_spring_force(a, b); };
#ifdef THREADPOOL
threads.submit_loop(0, springs.size(), solve_spring_force, SMALL_TASK_BLOCK_SIZE).wait();
#else
for (size_t i = 0; i < springs.size(); ++i) {
solve_spring_force(i);
} }
}
#ifdef THREADPOOL
auto mass_spring_system::set_thread_name(size_t idx) -> void
{
BS::this_thread::set_os_thread_name(std::format("bh-worker-{}", idx));
}
#endif
auto mass_spring_system::build_octree() -> void
{
#ifdef TRACY
ZoneScoped;
#endif #endif
tree.nodes.clear();
tree.nodes.reserve(masses.size() * 2);
// Compute bounding box around all masses
Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX};
Vector3 max{-FLT_MAX, -FLT_MAX, -FLT_MAX};
for (const auto& m : masses) {
min.x = std::min(min.x, m.position.x);
max.x = std::max(max.x, m.position.x);
min.y = std::min(min.y, m.position.y);
max.y = std::max(max.y, m.position.y);
min.z = std::min(min.z, m.position.z);
max.z = std::max(max.z, m.position.z);
}
// Pad the bounding box
constexpr float pad = 1.0;
min = Vector3Subtract(min, Vector3Scale(Vector3One(), pad));
max = Vector3Add(max, Vector3Scale(Vector3One(), pad));
// Make it cubic (so subdivisions are balanced)
const float max_extent = std::max({max.x - min.x, max.y - min.y, max.z - min.z});
max = Vector3Add(min, Vector3Scale(Vector3One(), max_extent));
// Root node spans the entire area
const int root = tree.create_empty_leaf(min, max);
for (size_t i = 0; i < masses.size(); ++i) {
tree.insert(root, static_cast<int>(i), masses[i].position, MASS, 0);
}
} }
auto mass_spring_system::calculate_repulsion_forces() -> void auto mass_spring_system::calculate_repulsion_forces() -> void
@ -187,45 +160,60 @@ auto mass_spring_system::calculate_repulsion_forces() -> void
ZoneScoped; ZoneScoped;
#endif #endif
build_octree(); const auto solve_octree = [&](const int i)
auto solve_octree = [&](const int i)
{ {
const Vector3 force = tree.calculate_force(0, masses[i].position); const Vector3 force = tree.calculate_force(0, positions[i]);
masses[i].force = Vector3Add(masses[i].force, force); forces[i] += force;
}; };
// Calculate forces using Barnes-Hut // Calculate forces using Barnes-Hut
#ifdef THREADPOOL #ifdef THREADPOOL
const BS::multi_future<void> loop_future = threads.submit_loop(0, masses.size(), solve_octree, 256); threads.submit_loop(0, positions.size(), solve_octree, LARGE_TASK_BLOCK_SIZE).wait();
loop_future.wait();
#else #else
for (size_t i = 0; i < masses.size(); ++i) { for (size_t i = 0; i < positions.size(); ++i) {
solve_octree(i); solve_octree(i);
} }
#endif #endif
} }
auto mass_spring_system::verlet_update(const float delta_time) -> void auto mass_spring_system::update(const float dt) -> void
{ {
#ifdef TRACY #ifdef TRACY
ZoneScoped; ZoneScoped;
#endif #endif
for (auto& m : masses) { const auto update = [&](const int i)
m.verlet_update(delta_time); {
verlet_update(i, dt);
};
#ifdef THREADPOOL
threads.submit_loop(0, positions.size(), update, SMALL_TASK_BLOCK_SIZE).wait();
#else
for (size_t i = 0; i < positions.size(); ++i) {
update(i);
} }
#endif
} }
auto mass_spring_system::center_masses() -> void auto mass_spring_system::center_masses() -> void
{ {
Vector3 mean = Vector3Zero(); Vector3 mean = Vector3Zero();
for (const auto& m : masses) { for (const Vector3& pos : positions) {
mean += m.position; mean += pos;
} }
mean /= static_cast<float>(masses.size()); mean /= static_cast<float>(positions.size());
for (auto& m : masses) { const auto center_mass = [&](const int i)
m.position -= mean; {
positions[i] -= mean;
};
#ifdef THREADPOOL
threads.submit_loop(0, positions.size(), center_mass, SMALL_TASK_BLOCK_SIZE).wait();
#else
for (size_t i = 0; i < positions.size(); ++i) {
center_mass(i);
} }
#endif
} }

View File

@ -1,7 +1,7 @@
#include "octree.hpp" #include "octree.hpp"
#include "config.hpp" #include "config.hpp"
#include "util.hpp"
#include <cfloat>
#include <raymath.h> #include <raymath.h>
#ifdef TRACY #ifdef TRACY
@ -19,16 +19,6 @@ auto octree::node::child_count() const -> int
return child_count; return child_count;
} }
auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int
{
node n;
n.box_min = box_min;
n.box_max = box_max;
nodes.emplace_back(n);
return static_cast<int>(nodes.size() - 1);
}
auto octree::get_octant(const int node_idx, const Vector3& pos) const -> int auto octree::get_octant(const int node_idx, const Vector3& pos) const -> int
{ {
const node& n = nodes[node_idx]; const node& n = nodes[node_idx];
@ -75,6 +65,16 @@ auto octree::get_child_bounds(const int node_idx, const int octant) const -> std
return std::make_pair(min, max); return std::make_pair(min, max);
} }
auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int
{
node n;
n.box_min = box_min;
n.box_max = box_max;
nodes.emplace_back(n);
return static_cast<int>(nodes.size() - 1);
}
auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, const float mass, auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, const float mass,
const int depth) -> void const int depth) -> void
{ {
@ -153,6 +153,44 @@ auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, c
nodes[node_idx].mass_total = new_mass; nodes[node_idx].mass_total = new_mass;
} }
auto octree::build_octree(octree& t, const std::vector<Vector3>& positions) -> void
{
#ifdef TRACY
ZoneScoped;
#endif
t.nodes.clear();
t.nodes.reserve(positions.size() * 2);
// Compute bounding box around all masses
Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX};
Vector3 max{-FLT_MAX, -FLT_MAX, -FLT_MAX};
for (const auto& [x, y, z] : positions) {
min.x = std::min(min.x, x);
max.x = std::max(max.x, x);
min.y = std::min(min.y, y);
max.y = std::max(max.y, y);
min.z = std::min(min.z, z);
max.z = std::max(max.z, z);
}
// Pad the bounding box
constexpr float pad = 1.0;
min = Vector3Subtract(min, Vector3Scale(Vector3One(), pad));
max = Vector3Add(max, Vector3Scale(Vector3One(), pad));
// Make it cubic (so subdivisions are balanced)
const float max_extent = std::max({max.x - min.x, max.y - min.y, max.z - min.z});
max = Vector3Add(min, Vector3Scale(Vector3One(), max_extent));
// Root node spans the entire area
const int root = t.create_empty_leaf(min, max);
for (size_t i = 0; i < positions.size(); ++i) {
t.insert(root, static_cast<int>(i), positions[i], MASS, 0);
}
}
auto octree::calculate_force(const int node_idx, const Vector3& pos) const -> Vector3 auto octree::calculate_force(const int node_idx, const Vector3& pos) const -> Vector3
{ {
if (node_idx < 0) { if (node_idx < 0) {

View File

@ -542,7 +542,7 @@ auto puzzle::try_get_invalid_reason() const -> std::optional<std::string>
const auto [w, h, gx, gy, r, g] = unpack_meta(); const auto [w, h, gx, gy, r, g] = unpack_meta();
infoln("Validating puzzle {}", string_repr()); traceln("Validating puzzle \"{}\"", string_repr());
const std::optional<block>& b = try_get_target_block(); const std::optional<block>& b = try_get_target_block();
if (get_goal() && !b) { if (get_goal() && !b) {

View File

@ -6,7 +6,7 @@
#include <rlgl.h> #include <rlgl.h>
#ifdef TRACY #ifdef TRACY
#include <tracy/Tracy.hpp> #include <tracy/Tracy.hpp>
#endif #endif
auto renderer::update_texture_sizes() -> void auto renderer::update_texture_sizes() -> void
@ -29,9 +29,9 @@ auto renderer::update_texture_sizes() -> void
auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
{ {
#ifdef TRACY #ifdef TRACY
ZoneScoped; ZoneScoped;
#endif #endif
if (masses.size() != state.get_state_count()) { if (masses.size() != state.get_state_count()) {
// Because the physics run in a different thread, it might need time to catch up // Because the physics run in a different thread, it might need time to catch up
@ -40,16 +40,16 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Prepare connection batching // Prepare connection batching
{ {
#ifdef TRACY #ifdef TRACY
ZoneNamedN(prepare_masses, "PrepareConnectionsBatching", true); ZoneNamedN(prepare_connections, "PrepareConnectionsBatching", true);
#endif #endif
connections.clear(); connections.clear();
connections.reserve(state.get_target_count()); connections.reserve(state.get_target_count());
if (input.connect_solutions) { if (input.connect_solutions) {
for (const size_t& _state : state.get_winning_indices()) { for (const size_t& _state : state.get_winning_indices()) {
const Vector3& current_mass = masses.at(state.get_current_index()); const Vector3& current_mass = masses[state.get_current_index()];
const Vector3& winning_mass = masses.at(_state); const Vector3& winning_mass = masses[_state];
connections.emplace_back(current_mass, winning_mass); connections.emplace_back(current_mass, winning_mass);
DrawLine3D(current_mass, winning_mass, Fade(TARGET_BLOCK_COLOR, 0.5)); DrawLine3D(current_mass, winning_mass, Fade(TARGET_BLOCK_COLOR, 0.5));
} }
@ -58,9 +58,9 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Prepare cube instancing // Prepare cube instancing
{ {
#ifdef TRACY #ifdef TRACY
ZoneNamedN(prepare_masses, "PrepareMassInstancing", true); ZoneNamedN(prepare_masses, "PrepareMassInstancing", true);
#endif #endif
if (masses.size() < DRAW_VERTICES_LIMIT) { if (masses.size() < DRAW_VERTICES_LIMIT) {
// Don't have to reserve, capacity is already set to DRAW_VERTICES_LIMIT in constructor // Don't have to reserve, capacity is already set to DRAW_VERTICES_LIMIT in constructor
@ -73,12 +73,10 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Normal vertex // Normal vertex
Color c = VERTEX_COLOR; Color c = VERTEX_COLOR;
if ((input.mark_solutions || input.mark_path) && if ((input.mark_solutions || input.mark_path) && state.get_winning_indices().contains(mass)) {
state.get_winning_indices().contains(mass)) {
// Winning vertex // Winning vertex
c = VERTEX_TARGET_COLOR; c = VERTEX_TARGET_COLOR;
} else if ((input.mark_solutions || input.mark_path) && } else if ((input.mark_solutions || input.mark_path) && state.get_path_indices().contains(mass)) {
state.get_path_indices().contains(mass)) {
// Path vertex // Path vertex
c = VERTEX_PATH_COLOR; c = VERTEX_PATH_COLOR;
} else if (mass == state.get_starting_index()) { } else if (mass == state.get_starting_index()) {
@ -104,15 +102,15 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Draw springs (batched) // Draw springs (batched)
{ {
#ifdef TRACY #ifdef TRACY
ZoneNamedN(draw_springs, "DrawSprings", true); ZoneNamedN(draw_springs, "DrawSprings", true);
#endif #endif
rlBegin(RL_LINES); rlBegin(RL_LINES);
for (const auto& [from, to] : state.get_links()) { for (const auto& [from, to] : state.get_links()) {
if (masses.size() > from && masses.size() > to) { if (masses.size() > from && masses.size() > to) {
const auto& [ax, ay, az] = masses.at(from); const auto& [ax, ay, az] = masses[from];
const auto& [bx, by, bz] = masses.at(to); const auto& [bx, by, bz] = masses[to];
rlColor4ub(EDGE_COLOR.r, EDGE_COLOR.g, EDGE_COLOR.b, EDGE_COLOR.a); rlColor4ub(EDGE_COLOR.r, EDGE_COLOR.g, EDGE_COLOR.b, EDGE_COLOR.a);
rlVertex3f(ax, ay, az); rlVertex3f(ax, ay, az);
rlVertex3f(bx, by, bz); rlVertex3f(bx, by, bz);
@ -123,17 +121,16 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Draw masses (instanced) // Draw masses (instanced)
{ {
#ifdef TRACY #ifdef TRACY
ZoneNamedN(draw_masses, "DrawMasses", true); ZoneNamedN(draw_masses, "DrawMasses", true);
#endif #endif
if (masses.size() < DRAW_VERTICES_LIMIT) { if (masses.size() < DRAW_VERTICES_LIMIT) {
// NOTE: I don't know if drawing all this inside a shader would make it // NOTE: I don't know if drawing all this inside a shader would make it
// much faster... The amount of data sent to the GPU would be // much faster... The amount of data sent to the GPU would be
// reduced (just positions instead of matrices), but is this // reduced (just positions instead of matrices), but is this
// noticable for < 100000 cubes? // noticable for < 100000 cubes?
DrawMeshInstanced(cube_instance, vertex_mat, transforms.data(), DrawMeshInstanced(cube_instance, vertex_mat, transforms.data(), masses.size()); // NOLINT(*-narrowing-conversions)
masses.size()); // NOLINT(*-narrowing-conversions)
} }
} }
@ -152,9 +149,8 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
// Mark current state // Mark current state
const size_t current_index = state.get_current_index(); const size_t current_index = state.get_current_index();
if (masses.size() > current_index) { if (masses.size() > current_index) {
const Vector3& current_mass = masses.at(current_index); const Vector3& current_mass = masses[current_index];
DrawCube(current_mass, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_SIZE * 2, DrawCube(current_mass, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_CURRENT_COLOR);
VERTEX_CURRENT_COLOR);
} }
EndMode3D(); EndMode3D();
@ -163,9 +159,9 @@ auto renderer::draw_mass_springs(const std::vector<Vector3>& masses) -> void
auto renderer::draw_klotski() const -> void auto renderer::draw_klotski() const -> void
{ {
#ifdef TRACY #ifdef TRACY
ZoneScoped; ZoneScoped;
#endif #endif
BeginTextureMode(klotski_target); BeginTextureMode(klotski_target);
ClearBackground(RAYWHITE); ClearBackground(RAYWHITE);
@ -177,9 +173,9 @@ auto renderer::draw_klotski() const -> void
auto renderer::draw_menu() const -> void auto renderer::draw_menu() const -> void
{ {
#ifdef TRACY #ifdef TRACY
ZoneScoped; ZoneScoped;
#endif #endif
BeginTextureMode(menu_target); BeginTextureMode(menu_target);
ClearBackground(RAYWHITE); ClearBackground(RAYWHITE);
@ -194,32 +190,28 @@ auto renderer::draw_textures(const int fps, const int ups, const size_t mass_cou
{ {
BeginDrawing(); BeginDrawing();
DrawTextureRec(menu_target.texture, DrawTextureRec(menu_target.texture, Rectangle(0, 0, menu_target.texture.width, -menu_target.texture.height),
Rectangle(0, 0, menu_target.texture.width, -menu_target.texture.height),
Vector2(0, 0), WHITE); Vector2(0, 0), WHITE);
DrawTextureRec(klotski_target.texture, DrawTextureRec(klotski_target.texture,
Rectangle(0, 0, klotski_target.texture.width, -klotski_target.texture.height), Rectangle(0, 0, klotski_target.texture.width, -klotski_target.texture.height),
Vector2(0, MENU_HEIGHT), WHITE); Vector2(0, MENU_HEIGHT), WHITE);
DrawTextureRec(render_target.texture, DrawTextureRec(render_target.texture, Rectangle(0, 0, render_target.texture.width, -render_target.texture.height),
Rectangle(0, 0, render_target.texture.width, -render_target.texture.height),
Vector2(GetScreenWidth() / 2.0f, MENU_HEIGHT), WHITE); Vector2(GetScreenWidth() / 2.0f, MENU_HEIGHT), WHITE);
// Draw borders // Draw borders
DrawRectangleLinesEx(Rectangle(0, 0, GetScreenWidth(), MENU_HEIGHT), 1.0f, BLACK); DrawRectangleLinesEx(Rectangle(0, 0, GetScreenWidth(), MENU_HEIGHT), 1.0f, BLACK);
DrawRectangleLinesEx( DrawRectangleLinesEx(Rectangle(0, MENU_HEIGHT, GetScreenWidth() / 2.0f, GetScreenHeight() - MENU_HEIGHT), 1.0f,
Rectangle(0, MENU_HEIGHT, GetScreenWidth() / 2.0f, GetScreenHeight() - MENU_HEIGHT), 1.0f, BLACK);
BLACK);
DrawRectangleLinesEx(Rectangle(GetScreenWidth() / 2.0f, MENU_HEIGHT, GetScreenWidth() / 2.0f, DrawRectangleLinesEx(Rectangle(GetScreenWidth() / 2.0f, MENU_HEIGHT, GetScreenWidth() / 2.0f,
GetScreenHeight() - MENU_HEIGHT), GetScreenHeight() - MENU_HEIGHT), 1.0f, BLACK);
1.0f, BLACK);
gui.draw(fps, ups, mass_count, spring_count); gui.draw(fps, ups, mass_count, spring_count);
EndDrawing(); EndDrawing();
} }
auto renderer::render(const std::vector<Vector3>& masses, const int fps, const int ups, auto renderer::render(const std::vector<Vector3>& masses, const int fps, const int ups, const size_t mass_count,
const size_t mass_count, const size_t spring_count) -> void const size_t spring_count) -> void
{ {
update_texture_sizes(); update_texture_sizes();

View File

@ -12,7 +12,7 @@
auto state_manager::synced_try_insert_state(const puzzle& state) -> size_t auto state_manager::synced_try_insert_state(const puzzle& state) -> size_t
{ {
if (state_indices.contains(state)) { if (state_indices.contains(state)) {
return state_indices.at(state); return state_indices[state];
} }
const size_t index = state_pool.size(); const size_t index = state_pool.size();
@ -148,7 +148,7 @@ auto state_manager::append_preset_file(const std::string& preset_name) -> bool
auto state_manager::load_preset(const size_t preset) -> void auto state_manager::load_preset(const size_t preset) -> void
{ {
clear_graph_and_add_current(preset_states.at(preset)); clear_graph_and_add_current(preset_states[preset]);
current_preset = preset; current_preset = preset;
edited = false; edited = false;
} }
@ -270,8 +270,8 @@ auto state_manager::goto_most_distant_state() -> void
int max_distance = 0; int max_distance = 0;
size_t max_distance_index = 0; size_t max_distance_index = 0;
for (size_t i = 0; i < node_target_distances.distances.size(); ++i) { for (size_t i = 0; i < node_target_distances.distances.size(); ++i) {
if (node_target_distances.distances.at(i) > max_distance) { if (node_target_distances.distances[i] > max_distance) {
max_distance = node_target_distances.distances.at(i); max_distance = node_target_distances.distances[i];
max_distance_index = i; max_distance_index = i;
} }
} }
@ -285,7 +285,7 @@ auto state_manager::goto_closest_target_state() -> void
return; return;
} }
update_current_state(get_state(node_target_distances.nearest_targets.at(current_state_index))); update_current_state(get_state(node_target_distances.nearest_targets[current_state_index]));
} }
auto state_manager::populate_graph() -> void auto state_manager::populate_graph() -> void
@ -306,9 +306,9 @@ auto state_manager::populate_graph() -> void
const auto& [states, _links] = s.explore_state_space(); const auto& [states, _links] = s.explore_state_space();
synced_insert_statespace(states, _links); synced_insert_statespace(states, _links);
current_state_index = state_indices.at(p); current_state_index = state_indices[p];
previous_state_index = current_state_index; previous_state_index = current_state_index;
starting_state_index = state_indices.at(s); starting_state_index = state_indices[s];
// Search for cool stuff // Search for cool stuff
populate_winning_indices(); populate_winning_indices();
@ -393,7 +393,7 @@ auto state_manager::get_starting_index() const -> size_t
auto state_manager::get_state(const size_t index) const -> const puzzle& auto state_manager::get_state(const size_t index) const -> const puzzle&
{ {
return state_pool.at(index); return state_pool[index];
} }
auto state_manager::get_current_state() const -> const puzzle& auto state_manager::get_current_state() const -> const puzzle&
@ -468,7 +468,7 @@ auto state_manager::get_preset_count() const -> size_t
auto state_manager::get_current_preset_comment() const -> const std::string& auto state_manager::get_current_preset_comment() const -> const std::string&
{ {
return preset_comments.at(current_preset); return preset_comments[current_preset];
} }
auto state_manager::has_history() const -> bool auto state_manager::has_history() const -> bool

View File

@ -1,6 +1,7 @@
#include "threaded_physics.hpp" #include "threaded_physics.hpp"
#include "config.hpp" #include "config.hpp"
#include "mass_spring_system.hpp" #include "mass_spring_system.hpp"
#include "util.hpp"
#include <chrono> #include <chrono>
#include <raylib.h> #include <raylib.h>
@ -12,16 +13,30 @@
#include <tracy/Tracy.hpp> #include <tracy/Tracy.hpp>
#endif #endif
#ifdef ASYNC_OCTREE
auto threaded_physics::set_octree_pool_thread_name(size_t idx) -> void
{
BS::this_thread::set_os_thread_name(std::format("octree-{}", idx));
traceln("Using thread \"{}\"", BS::this_thread::get_os_thread_name().value_or("INVALID NAME"));
}
#endif
auto threaded_physics::physics_thread(physics_state& state) -> void auto threaded_physics::physics_thread(physics_state& state) -> void
{ {
#ifdef THREADPOOL
BS::this_thread::set_os_thread_name("physics");
#endif
mass_spring_system mass_springs; mass_spring_system mass_springs;
#ifdef ASYNC_OCTREE
BS::this_thread::set_os_thread_name("physics");
BS::thread_pool<> octree_thread(1, set_octree_pool_thread_name);
std::future<void> octree_future;
octree tree_buffer;
size_t last_mass_count = 0;
infoln("Using asynchronous octree builder.");
#endif
const auto visitor = overloads{ const auto visitor = overloads{
[&](const struct add_mass& am) [&](const struct add_mass&)
{ {
mass_springs.add_mass(); mass_springs.add_mass();
}, },
@ -29,7 +44,7 @@ auto threaded_physics::physics_thread(physics_state& state) -> void
{ {
mass_springs.add_spring(as.a, as.b); mass_springs.add_spring(as.a, as.b);
}, },
[&](const struct clear_graph& cg) [&](const struct clear_graph&)
{ {
mass_springs.clear(); mass_springs.clear();
}, },
@ -66,26 +81,58 @@ auto threaded_physics::physics_thread(physics_state& state) -> void
} }
} }
if (mass_springs.masses.empty()) { if (mass_springs.positions.empty()) {
std::this_thread::sleep_for(std::chrono::milliseconds(1)); std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue; continue;
} }
// Physics update // Physics update
if (physics_accumulator.count() > TIMESTEP) { if (physics_accumulator.count() > TIMESTEP) {
#ifdef ASYNC_OCTREE
// Snapshot the positions so mass_springs is not mutating the vector while the octree is building
std::vector<Vector3> positions = mass_springs.positions;
// Start building the octree for the next physics update.
// Move the snapshot into the closure so it doesn't get captured by reference (don't use [&])
octree_future = octree_thread.submit_task([&tree_buffer, positions = std::move(positions)]()
{
octree::build_octree(tree_buffer, positions);
});
// Rebuild the tree synchronously if we changed the number of masses to not use
// an empty tree from the last frame in the frame where the graph was generated
if (last_mass_count != mass_springs.positions.size()) {
traceln("Rebuilding octree synchronously because graph size changed");
octree::build_octree(mass_springs.tree, mass_springs.positions);
last_mass_count = mass_springs.positions.size();
}
#else
octree::build_octree(mass_springs.tree, mass_springs.positions);
#endif
mass_springs.clear_forces(); mass_springs.clear_forces();
mass_springs.calculate_spring_forces(); mass_springs.calculate_spring_forces();
mass_springs.calculate_repulsion_forces(); mass_springs.calculate_repulsion_forces();
mass_springs.verlet_update(TIMESTEP * SIM_SPEED); mass_springs.update(TIMESTEP * SIM_SPEED);
// This is only helpful if we're drawing a grid at (0, 0, 0). Otherwise, it's just // This is only helpful if we're drawing a grid at (0, 0, 0). Otherwise, it's just
// expensive and yields no benefit since we can lock the camera to the center of mass // expensive and yields no benefit since we can lock the camera to the center of mass
// cheaply. mass_springs.center_masses(); // cheaply.
// mass_springs.center_masses();
++loop_iterations; ++loop_iterations;
physics_accumulator -= std::chrono::duration<double>(TIMESTEP); physics_accumulator -= std::chrono::duration<double>(TIMESTEP);
} }
#ifdef ASYNC_OCTREE
// Wait for the octree to be built
if (octree_future.valid()) {
octree_future.wait();
octree_future = std::future<void>{};
std::swap(mass_springs.tree, tree_buffer);
}
#endif
// Publish the positions for the renderer (copy) // Publish the positions for the renderer (copy)
#ifdef TRACY #ifdef TRACY
FrameMarkStart("PhysicsThreadProduceLock"); FrameMarkStart("PhysicsThreadProduceLock");
@ -114,16 +161,16 @@ auto threaded_physics::physics_thread(physics_state& state) -> void
if (mass_springs.tree.nodes.empty()) { if (mass_springs.tree.nodes.empty()) {
state.mass_center = Vector3Zero(); state.mass_center = Vector3Zero();
} else { } else {
state.mass_center = mass_springs.tree.nodes.at(0).mass_center; state.mass_center = mass_springs.tree.nodes[0].mass_center;
} }
state.masses.clear(); state.masses.clear();
state.masses.reserve(mass_springs.masses.size()); state.masses.reserve(mass_springs.positions.size());
for (const auto& mass : mass_springs.masses) { for (const Vector3& pos : mass_springs.positions) {
state.masses.emplace_back(mass.position); state.masses.emplace_back(pos);
} }
state.mass_count = mass_springs.masses.size(); state.mass_count = mass_springs.positions.size();
state.spring_count = mass_springs.springs.size(); state.spring_count = mass_springs.springs.size();
state.data_ready = true; state.data_ready = true;
@ -131,14 +178,25 @@ auto threaded_physics::physics_thread(physics_state& state) -> void
} }
// Notify the rendering thread that new data is available // Notify the rendering thread that new data is available
state.data_ready_cnd.notify_all(); state.data_ready_cnd.notify_all();
#ifdef TRACY
FrameMarkEnd("PhysicsThreadProduceLock");
FrameMarkEnd("PhysicsThread"); #ifdef TRACY
FrameMarkEnd("PhysicsThreadProduceLock"); FrameMarkEnd("PhysicsThread");
#endif #endif
} }
} }
auto threaded_physics::clear_cmd() -> void
{
{
#ifdef TRACY
std::lock_guard<LockableBase(std::mutex)> lock(state.command_mtx);
#else
std::lock_guard<std::mutex> lock(state.command_mtx);
#endif
state.pending_commands.emplace(clear_graph{});
}
}
auto threaded_physics::add_mass_cmd() -> void auto threaded_physics::add_mass_cmd() -> void
{ {
{ {
@ -163,18 +221,6 @@ auto threaded_physics::add_spring_cmd(const size_t a, const size_t b) -> void
} }
} }
auto threaded_physics::clear_cmd() -> void
{
{
#ifdef TRACY
std::lock_guard<LockableBase(std::mutex)> lock(state.command_mtx);
#else
std::lock_guard<std::mutex> lock(state.command_mtx);
#endif
state.pending_commands.emplace(clear_graph{});
}
}
auto threaded_physics::add_mass_springs_cmd(const size_t num_masses, auto threaded_physics::add_mass_springs_cmd(const size_t num_masses,
const std::vector<std::pair<size_t, size_t>>& springs) -> void const std::vector<std::pair<size_t, size_t>>& springs) -> void
{ {