diff --git a/CMakeLists.txt b/CMakeLists.txt index b6f9ebd..c33a962 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,8 @@ if(POLICY CMP0167) endif() option(DISABLE_BACKWARD "Disable backward stacktrace printer" OFF) -option(DISABLE_TRACY "Disable the Tracy profiler client" ON) -option(DISABLE_TESTS "Disable building and running tests" ON) +option(DISABLE_TRACY "Disable the Tracy profiler client" OFF) +option(DISABLE_TESTS "Disable building and running tests" OFF) # Headers + Sources set(SOURCES diff --git a/include/config.hpp b/include/config.hpp index 53876da..95f793e 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -5,6 +5,22 @@ #define THREADPOOL // Enable physics threadpool +#ifdef THREADPOOL +#if defined(_WIN32) +#define NOGDI // All GDI defines and routines +#define NOUSER // All USER defines and routines +#endif +#define BS_THREAD_POOL_NATIVE_EXTENSIONS +#include +#if defined(_WIN32) // raylib uses these names as function parameters +#undef near +#undef far +#endif +#endif + +// TODO: Using the octree from the last frame completely breaks the physics :/ +// #define ASYNC_OCTREE + // Gets set by CMake // #define BACKWARD // Enable pretty stack traces // #define TRACY // Enable tracy profiling support diff --git a/include/mass_spring_system.hpp b/include/mass_spring_system.hpp index 25dc965..db77ee6 100644 --- a/include/mass_spring_system.hpp +++ b/include/mass_spring_system.hpp @@ -2,49 +2,13 @@ #define MASS_SPRING_SYSTEM_HPP_ #include "octree.hpp" -#include "util.hpp" #include "config.hpp" #include -#include - -#ifdef THREADPOOL -#if defined(_WIN32) -#define NOGDI // All GDI defines and routines -#define NOUSER // All USER defines and routines -#endif -#define BS_THREAD_POOL_NATIVE_EXTENSIONS -#include -#if defined(_WIN32) // raylib uses these names as function parameters -#undef near -#undef far -#endif -#endif class mass_spring_system { public: - class mass - { - public: - Vector3 position = Vector3Zero(); - Vector3 previous_position = Vector3Zero(); // for verlet integration - Vector3 velocity = Vector3Zero(); - Vector3 force = Vector3Zero(); - - public: - mass() = delete; - - explicit mass(const Vector3 _position) - : position(_position), previous_position(_position) {} - - public: - auto clear_force() -> void; - auto calculate_velocity(float delta_time) -> void; - auto calculate_position(float delta_time) -> void; - auto verlet_update(float delta_time) -> void; - }; - class spring { public: @@ -54,9 +18,6 @@ public: public: spring(const size_t _a, const size_t _b) : a(_a), b(_b) {} - - public: - static auto calculate_spring_force(mass& _a, mass& _b) -> void; }; private: @@ -65,26 +26,25 @@ private: #endif public: + static constexpr int SMALL_TASK_BLOCK_SIZE = 256; + static constexpr int LARGE_TASK_BLOCK_SIZE = 256; + octree tree; // This is the main ownership of all the states/masses/springs. - std::vector masses; + std::vector positions; + std::vector previous_positions; // for verlet integration + std::vector velocities; + std::vector forces; + std::vector springs; public: mass_spring_system() #ifdef THREADPOOL - : threads(std::thread::hardware_concurrency() - 1, set_thread_name) + : threads(std::thread::hardware_concurrency() - 2, set_mass_springs_pool_thread_name) #endif - { - infoln("Using Barnes-Hut + Octree repulsion force calculation."); - - #ifdef THREADPOOL - infoln("Thread-pool: {} threads.", threads.get_thread_count()); - #else - infoln("Thread-pool: Disabled."); - #endif - } + {} mass_spring_system(const mass_spring_system& copy) = delete; auto operator=(const mass_spring_system& copy) -> mass_spring_system& = delete; @@ -93,20 +53,22 @@ public: private: #ifdef THREADPOOL - static auto set_thread_name(size_t idx) -> void; + static auto set_mass_springs_pool_thread_name(size_t idx) -> void; #endif - auto build_octree() -> void; - public: auto clear() -> void; auto add_mass() -> void; auto add_spring(size_t a, size_t b) -> void; auto clear_forces() -> void; + auto calculate_spring_force(size_t s) -> void; auto calculate_spring_forces() -> void; auto calculate_repulsion_forces() -> void; - auto verlet_update(float delta_time) -> void; + auto integrate_velocity(size_t m, float dt) -> void; + auto integrate_position(size_t m, float dt) -> void; + auto verlet_update(size_t m, float dt) -> void; + auto update(float dt) -> void; auto center_masses() -> void; }; diff --git a/include/octree.hpp b/include/octree.hpp index 11573fc..defdf27 100644 --- a/include/octree.hpp +++ b/include/octree.hpp @@ -31,22 +31,20 @@ public: public: octree() = default; - octree(const octree& copy) = delete; - auto operator=(const octree& copy) -> octree& = delete; - octree(octree&& move) = delete; - auto operator=(octree&& move) -> octree& = delete; + // octree(const octree& copy) = delete; + // auto operator=(const octree& copy) -> octree& = delete; + // octree(octree&& move) = delete; + // auto operator=(octree&& move) -> octree& = delete; public: - auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int; - [[nodiscard]] auto get_octant(int node_idx, const Vector3& pos) const -> int; - [[nodiscard]] auto get_child_bounds(int node_idx, int octant) const -> std::pair; - + auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int; auto insert(int node_idx, int mass_id, const Vector3& pos, float mass, int depth) -> void; + static auto build_octree(octree& t, const std::vector& positions) -> void; [[nodiscard]] auto calculate_force(int node_idx, const Vector3& pos) const -> Vector3; }; -#endif +#endif \ No newline at end of file diff --git a/include/threaded_physics.hpp b/include/threaded_physics.hpp index 9fa6601..de9fb56 100644 --- a/include/threaded_physics.hpp +++ b/include/threaded_physics.hpp @@ -1,6 +1,8 @@ #ifndef PHYSICS_HPP_ #define PHYSICS_HPP_ +#include "config.hpp" + #include #include #include @@ -80,15 +82,16 @@ public: } private: + #ifdef ASYNC_OCTREE + static auto set_octree_pool_thread_name(size_t idx) -> void; + #endif + static auto physics_thread(physics_state& state) -> void; public: - auto add_mass_cmd() -> void; - - auto add_spring_cmd(size_t a, size_t b) -> void; - auto clear_cmd() -> void; - + auto add_mass_cmd() -> void; + auto add_spring_cmd(size_t a, size_t b) -> void; auto add_mass_springs_cmd(size_t num_masses, const std::vector>& springs) -> void; }; diff --git a/include/util.hpp b/include/util.hpp index 4d2ce84..1813287 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -140,6 +140,13 @@ inline auto ansi_reset() -> std::string } // std::println doesn't work with mingw +template +auto traceln(std::format_string fmt, Args&&... args) -> void +{ + std::cout << std::format("[{}TRACE{}]: ", ansi_bold_fg(fg_cyan), ansi_reset()) << std::format( + fmt, std::forward(args)...) << std::endl; +} + template auto infoln(std::format_string fmt, Args&&... args) -> void { diff --git a/src/main.cpp b/src/main.cpp index fe83b1c..6298a7a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -64,6 +64,12 @@ auto main(int argc, char* argv[]) -> int infoln("Tracy adapter disabled."); #endif + #ifdef THREADPOOL + infoln("Thread-pool enabled."); + #else + infoln("Thread-pool disabled."); + #endif + // RayLib window setup SetTraceLogLevel(LOG_ERROR); SetConfigFlags(FLAG_VSYNC_HINT); @@ -141,8 +147,8 @@ auto main(int argc, char* argv[]) -> int // Update the camera after the physics, so target lock is smooth size_t current_index = state.get_current_index(); if (masses.size() > current_index) { - const mass_spring_system::mass& current_mass = mass_spring_system::mass(masses.at(current_index)); - camera.update(current_mass.position, mass_center, input.camera_lock, input.camera_mass_center_lock); + const Vector3& current_mass = masses[current_index]; + camera.update(current_mass, mass_center, input.camera_lock, input.camera_mass_center_lock); } // Rendering @@ -164,4 +170,4 @@ auto main(int argc, char* argv[]) -> int CloseWindow(); return 0; -} +} \ No newline at end of file diff --git a/src/mass_spring_system.cpp b/src/mass_spring_system.cpp index bc065f6..b927067 100644 --- a/src/mass_spring_system.cpp +++ b/src/mass_spring_system.cpp @@ -1,67 +1,77 @@ #include "mass_spring_system.hpp" #include "config.hpp" +#include "util.hpp" #include +#include #ifdef TRACY #include #endif -auto mass_spring_system::mass::clear_force() -> void +auto mass_spring_system::calculate_spring_force(const size_t s) -> void { - force = Vector3Zero(); + const spring _s = springs[s]; + const Vector3 a_pos = positions[_s.a]; + const Vector3 b_pos = positions[_s.b]; + const Vector3 a_vel = velocities[_s.a]; + const Vector3 b_vel = velocities[_s.b]; + + const Vector3 delta_pos = a_pos - b_pos; + const Vector3 delta_vel = a_vel - b_vel; + + const float sq_len = Vector3DotProduct(delta_pos, delta_pos); + const float inv_len = rsqrt(sq_len); + const float len = sq_len * inv_len; + + const float hooke = SPRING_CONSTANT * (len - REST_LENGTH); + const float dampening = DAMPENING_CONSTANT * Vector3DotProduct(delta_vel, delta_pos) * inv_len; + + const Vector3 a_force = Vector3Scale(delta_pos, -(hooke + dampening) * inv_len); + const Vector3 b_force = a_force * -1.0f; + + forces[_s.a] += a_force; + forces[_s.b] += b_force; } -auto mass_spring_system::mass::calculate_velocity(const float delta_time) -> void +auto mass_spring_system::integrate_velocity(const size_t m, const float dt) -> void { - const Vector3 acceleration = Vector3Scale(force, 1.0 / MASS); - const Vector3 temp = Vector3Scale(acceleration, delta_time); - velocity = Vector3Add(velocity, temp); + const Vector3 acc = forces[m] / MASS; + velocities[m] += acc * dt; } -auto mass_spring_system::mass::calculate_position(const float delta_time) -> void +auto mass_spring_system::integrate_position(const size_t m, const float dt) -> void { - previous_position = position; - - const Vector3 temp = Vector3Scale(velocity, delta_time); - position = Vector3Add(position, temp); + previous_positions[m] = positions[m]; + positions[m] += velocities[m] * dt; } -auto mass_spring_system::mass::verlet_update(const float delta_time) -> void +auto mass_spring_system::verlet_update(const size_t m, const float dt) -> void { - const Vector3 acceleration = Vector3Scale(force, 1.0 / MASS); - const Vector3 temp_position = position; + const Vector3 acc = (forces[m] / MASS) * dt * dt; + const Vector3 pos = positions[m]; - Vector3 displacement = Vector3Subtract(position, previous_position); - const Vector3 accel_term = Vector3Scale(acceleration, delta_time * delta_time); + Vector3 delta_pos = pos - previous_positions[m]; + delta_pos *= 1.0 - VERLET_DAMPENING; // Minimal dampening - // Minimal dampening - displacement = Vector3Scale(displacement, 1.0 - VERLET_DAMPENING); - - position = Vector3Add(Vector3Add(position, displacement), accel_term); - previous_position = temp_position; + positions[m] += delta_pos + acc; + previous_positions[m] = pos; } -auto mass_spring_system::spring::calculate_spring_force(mass& _a, mass& _b) -> void +#ifdef THREADPOOL +auto mass_spring_system::set_mass_springs_pool_thread_name(size_t idx) -> void { - // TODO: Use a bungee force here instead of springs, since we already have global repulsion? - const Vector3 delta_position = Vector3Subtract(_a.position, _b.position); - const float current_length = Vector3Length(delta_position); - const Vector3 delta_velocity = Vector3Subtract(_a.velocity, _b.velocity); - - const float hooke = SPRING_CONSTANT * (current_length - REST_LENGTH); - const float dampening = DAMPENING_CONSTANT * Vector3DotProduct(delta_velocity, delta_position) / current_length; - - const Vector3 force_a = Vector3Scale(delta_position, -(hooke + dampening) / current_length); - const Vector3 force_b = Vector3Scale(force_a, -1.0); - - _a.force = Vector3Add(_a.force, force_a); - _b.force = Vector3Add(_b.force, force_b); + BS::this_thread::set_os_thread_name(std::format("repulsion-{}", idx)); + traceln("Using thread \"{}\"", BS::this_thread::get_os_thread_name().value_or("INVALID NAME")); } +#endif auto mass_spring_system::clear() -> void { - masses.clear(); + positions.clear(); + previous_positions.clear(); + velocities.clear(); + forces.clear(); springs.clear(); tree.nodes.clear(); } @@ -77,14 +87,17 @@ auto mass_spring_system::add_mass() -> void // }; // position = Vector3Scale(Vector3Normalize(position), REST_LENGTH * 2.0); - masses.emplace_back(Vector3Zero()); + positions.emplace_back(Vector3Zero()); + previous_positions.emplace_back(Vector3Zero()); + velocities.emplace_back(Vector3Zero()); + forces.emplace_back(Vector3Zero()); } auto mass_spring_system::add_spring(size_t a, size_t b) -> void { // Update masses to be located along a random walk when adding the springs - const mass& mass_a = masses.at(a); - mass& mass_b = masses.at(b); + const Vector3& mass_a = positions[a]; + const Vector3& mass_b = positions[b]; Vector3 offset{ static_cast(GetRandomValue(-100, 100)), static_cast(GetRandomValue(-100, 100)), @@ -94,7 +107,7 @@ auto mass_spring_system::add_spring(size_t a, size_t b) -> void // If the offset moves the mass closer to the current center of mass, flip it if (!tree.nodes.empty()) { - const Vector3 mass_center_direction = Vector3Subtract(mass_a.position, tree.nodes.at(0).mass_center); + const Vector3 mass_center_direction = Vector3Subtract(positions[a], tree.nodes[0].mass_center); const float mass_center_distance = Vector3Length(mass_center_direction); if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) { @@ -102,8 +115,8 @@ auto mass_spring_system::add_spring(size_t a, size_t b) -> void } } - mass_b.position = mass_a.position + offset; - mass_b.previous_position = mass_b.position; + positions[b] = mass_a + offset; + previous_positions[b] = mass_b; // infoln("Adding spring: ({}, {}, {})->({}, {}, {})", mass_a.position.x, mass_a.position.y, // mass_a.position.z, @@ -118,9 +131,7 @@ auto mass_spring_system::clear_forces() -> void ZoneScoped; #endif - for (auto& m : masses) { - m.clear_force(); - } + memset(forces.data(), 0, forces.size() * sizeof(Vector3)); } auto mass_spring_system::calculate_spring_forces() -> void @@ -129,56 +140,18 @@ auto mass_spring_system::calculate_spring_forces() -> void ZoneScoped; #endif - for (const auto s : springs) { - mass& a = masses.at(s.a); - mass& b = masses.at(s.b); - spring::calculate_spring_force(a, b); + const auto solve_spring_force = [&](const int i) + { + calculate_spring_force(i); + }; + + #ifdef THREADPOOL + threads.submit_loop(0, springs.size(), solve_spring_force, SMALL_TASK_BLOCK_SIZE).wait(); + #else + for (size_t i = 0; i < springs.size(); ++i) { + solve_spring_force(i); } -} - -#ifdef THREADPOOL -auto mass_spring_system::set_thread_name(size_t idx) -> void -{ - BS::this_thread::set_os_thread_name(std::format("bh-worker-{}", idx)); -} -#endif - -auto mass_spring_system::build_octree() -> void -{ - #ifdef TRACY - ZoneScoped; #endif - - tree.nodes.clear(); - tree.nodes.reserve(masses.size() * 2); - - // Compute bounding box around all masses - Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX}; - Vector3 max{-FLT_MAX, -FLT_MAX, -FLT_MAX}; - for (const auto& m : masses) { - min.x = std::min(min.x, m.position.x); - max.x = std::max(max.x, m.position.x); - min.y = std::min(min.y, m.position.y); - max.y = std::max(max.y, m.position.y); - min.z = std::min(min.z, m.position.z); - max.z = std::max(max.z, m.position.z); - } - - // Pad the bounding box - constexpr float pad = 1.0; - min = Vector3Subtract(min, Vector3Scale(Vector3One(), pad)); - max = Vector3Add(max, Vector3Scale(Vector3One(), pad)); - - // Make it cubic (so subdivisions are balanced) - const float max_extent = std::max({max.x - min.x, max.y - min.y, max.z - min.z}); - max = Vector3Add(min, Vector3Scale(Vector3One(), max_extent)); - - // Root node spans the entire area - const int root = tree.create_empty_leaf(min, max); - - for (size_t i = 0; i < masses.size(); ++i) { - tree.insert(root, static_cast(i), masses[i].position, MASS, 0); - } } auto mass_spring_system::calculate_repulsion_forces() -> void @@ -187,45 +160,60 @@ auto mass_spring_system::calculate_repulsion_forces() -> void ZoneScoped; #endif - build_octree(); - - auto solve_octree = [&](const int i) + const auto solve_octree = [&](const int i) { - const Vector3 force = tree.calculate_force(0, masses[i].position); - masses[i].force = Vector3Add(masses[i].force, force); + const Vector3 force = tree.calculate_force(0, positions[i]); + forces[i] += force; }; // Calculate forces using Barnes-Hut #ifdef THREADPOOL - const BS::multi_future loop_future = threads.submit_loop(0, masses.size(), solve_octree, 256); - loop_future.wait(); + threads.submit_loop(0, positions.size(), solve_octree, LARGE_TASK_BLOCK_SIZE).wait(); #else - for (size_t i = 0; i < masses.size(); ++i) { + for (size_t i = 0; i < positions.size(); ++i) { solve_octree(i); } #endif } -auto mass_spring_system::verlet_update(const float delta_time) -> void +auto mass_spring_system::update(const float dt) -> void { #ifdef TRACY ZoneScoped; #endif - for (auto& m : masses) { - m.verlet_update(delta_time); + const auto update = [&](const int i) + { + verlet_update(i, dt); + }; + + #ifdef THREADPOOL + threads.submit_loop(0, positions.size(), update, SMALL_TASK_BLOCK_SIZE).wait(); + #else + for (size_t i = 0; i < positions.size(); ++i) { + update(i); } + #endif } auto mass_spring_system::center_masses() -> void { Vector3 mean = Vector3Zero(); - for (const auto& m : masses) { - mean += m.position; + for (const Vector3& pos : positions) { + mean += pos; } - mean /= static_cast(masses.size()); + mean /= static_cast(positions.size()); - for (auto& m : masses) { - m.position -= mean; + const auto center_mass = [&](const int i) + { + positions[i] -= mean; + }; + + #ifdef THREADPOOL + threads.submit_loop(0, positions.size(), center_mass, SMALL_TASK_BLOCK_SIZE).wait(); + #else + for (size_t i = 0; i < positions.size(); ++i) { + center_mass(i); } + #endif } \ No newline at end of file diff --git a/src/octree.cpp b/src/octree.cpp index 3eaa893..86928c6 100644 --- a/src/octree.cpp +++ b/src/octree.cpp @@ -1,7 +1,7 @@ #include "octree.hpp" #include "config.hpp" -#include "util.hpp" +#include #include #ifdef TRACY @@ -19,16 +19,6 @@ auto octree::node::child_count() const -> int return child_count; } -auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int -{ - node n; - n.box_min = box_min; - n.box_max = box_max; - nodes.emplace_back(n); - - return static_cast(nodes.size() - 1); -} - auto octree::get_octant(const int node_idx, const Vector3& pos) const -> int { const node& n = nodes[node_idx]; @@ -75,6 +65,16 @@ auto octree::get_child_bounds(const int node_idx, const int octant) const -> std return std::make_pair(min, max); } +auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int +{ + node n; + n.box_min = box_min; + n.box_max = box_max; + nodes.emplace_back(n); + + return static_cast(nodes.size() - 1); +} + auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, const float mass, const int depth) -> void { @@ -153,6 +153,44 @@ auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, c nodes[node_idx].mass_total = new_mass; } +auto octree::build_octree(octree& t, const std::vector& positions) -> void +{ + #ifdef TRACY + ZoneScoped; + #endif + + t.nodes.clear(); + t.nodes.reserve(positions.size() * 2); + + // Compute bounding box around all masses + Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX}; + Vector3 max{-FLT_MAX, -FLT_MAX, -FLT_MAX}; + for (const auto& [x, y, z] : positions) { + min.x = std::min(min.x, x); + max.x = std::max(max.x, x); + min.y = std::min(min.y, y); + max.y = std::max(max.y, y); + min.z = std::min(min.z, z); + max.z = std::max(max.z, z); + } + + // Pad the bounding box + constexpr float pad = 1.0; + min = Vector3Subtract(min, Vector3Scale(Vector3One(), pad)); + max = Vector3Add(max, Vector3Scale(Vector3One(), pad)); + + // Make it cubic (so subdivisions are balanced) + const float max_extent = std::max({max.x - min.x, max.y - min.y, max.z - min.z}); + max = Vector3Add(min, Vector3Scale(Vector3One(), max_extent)); + + // Root node spans the entire area + const int root = t.create_empty_leaf(min, max); + + for (size_t i = 0; i < positions.size(); ++i) { + t.insert(root, static_cast(i), positions[i], MASS, 0); + } +} + auto octree::calculate_force(const int node_idx, const Vector3& pos) const -> Vector3 { if (node_idx < 0) { diff --git a/src/puzzle.cpp b/src/puzzle.cpp index 1407e94..54d1f54 100644 --- a/src/puzzle.cpp +++ b/src/puzzle.cpp @@ -542,7 +542,7 @@ auto puzzle::try_get_invalid_reason() const -> std::optional const auto [w, h, gx, gy, r, g] = unpack_meta(); - infoln("Validating puzzle {}", string_repr()); + traceln("Validating puzzle \"{}\"", string_repr()); const std::optional& b = try_get_target_block(); if (get_goal() && !b) { diff --git a/src/renderer.cpp b/src/renderer.cpp index 70b92b8..47913fe 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -6,7 +6,7 @@ #include #ifdef TRACY - #include +#include #endif auto renderer::update_texture_sizes() -> void @@ -29,9 +29,9 @@ auto renderer::update_texture_sizes() -> void auto renderer::draw_mass_springs(const std::vector& masses) -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif if (masses.size() != state.get_state_count()) { // Because the physics run in a different thread, it might need time to catch up @@ -40,16 +40,16 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Prepare connection batching { -#ifdef TRACY - ZoneNamedN(prepare_masses, "PrepareConnectionsBatching", true); -#endif + #ifdef TRACY + ZoneNamedN(prepare_connections, "PrepareConnectionsBatching", true); + #endif connections.clear(); connections.reserve(state.get_target_count()); if (input.connect_solutions) { for (const size_t& _state : state.get_winning_indices()) { - const Vector3& current_mass = masses.at(state.get_current_index()); - const Vector3& winning_mass = masses.at(_state); + const Vector3& current_mass = masses[state.get_current_index()]; + const Vector3& winning_mass = masses[_state]; connections.emplace_back(current_mass, winning_mass); DrawLine3D(current_mass, winning_mass, Fade(TARGET_BLOCK_COLOR, 0.5)); } @@ -58,9 +58,9 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Prepare cube instancing { -#ifdef TRACY + #ifdef TRACY ZoneNamedN(prepare_masses, "PrepareMassInstancing", true); -#endif + #endif if (masses.size() < DRAW_VERTICES_LIMIT) { // Don't have to reserve, capacity is already set to DRAW_VERTICES_LIMIT in constructor @@ -73,12 +73,10 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Normal vertex Color c = VERTEX_COLOR; - if ((input.mark_solutions || input.mark_path) && - state.get_winning_indices().contains(mass)) { + if ((input.mark_solutions || input.mark_path) && state.get_winning_indices().contains(mass)) { // Winning vertex c = VERTEX_TARGET_COLOR; - } else if ((input.mark_solutions || input.mark_path) && - state.get_path_indices().contains(mass)) { + } else if ((input.mark_solutions || input.mark_path) && state.get_path_indices().contains(mass)) { // Path vertex c = VERTEX_PATH_COLOR; } else if (mass == state.get_starting_index()) { @@ -104,15 +102,15 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Draw springs (batched) { -#ifdef TRACY + #ifdef TRACY ZoneNamedN(draw_springs, "DrawSprings", true); -#endif + #endif rlBegin(RL_LINES); for (const auto& [from, to] : state.get_links()) { if (masses.size() > from && masses.size() > to) { - const auto& [ax, ay, az] = masses.at(from); - const auto& [bx, by, bz] = masses.at(to); + const auto& [ax, ay, az] = masses[from]; + const auto& [bx, by, bz] = masses[to]; rlColor4ub(EDGE_COLOR.r, EDGE_COLOR.g, EDGE_COLOR.b, EDGE_COLOR.a); rlVertex3f(ax, ay, az); rlVertex3f(bx, by, bz); @@ -123,17 +121,16 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Draw masses (instanced) { -#ifdef TRACY + #ifdef TRACY ZoneNamedN(draw_masses, "DrawMasses", true); -#endif + #endif if (masses.size() < DRAW_VERTICES_LIMIT) { // NOTE: I don't know if drawing all this inside a shader would make it // much faster... The amount of data sent to the GPU would be // reduced (just positions instead of matrices), but is this // noticable for < 100000 cubes? - DrawMeshInstanced(cube_instance, vertex_mat, transforms.data(), - masses.size()); // NOLINT(*-narrowing-conversions) + DrawMeshInstanced(cube_instance, vertex_mat, transforms.data(), masses.size()); // NOLINT(*-narrowing-conversions) } } @@ -152,9 +149,8 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void // Mark current state const size_t current_index = state.get_current_index(); if (masses.size() > current_index) { - const Vector3& current_mass = masses.at(current_index); - DrawCube(current_mass, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_SIZE * 2, - VERTEX_CURRENT_COLOR); + const Vector3& current_mass = masses[current_index]; + DrawCube(current_mass, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_SIZE * 2, VERTEX_CURRENT_COLOR); } EndMode3D(); @@ -163,9 +159,9 @@ auto renderer::draw_mass_springs(const std::vector& masses) -> void auto renderer::draw_klotski() const -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif BeginTextureMode(klotski_target); ClearBackground(RAYWHITE); @@ -177,9 +173,9 @@ auto renderer::draw_klotski() const -> void auto renderer::draw_menu() const -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif BeginTextureMode(menu_target); ClearBackground(RAYWHITE); @@ -194,32 +190,28 @@ auto renderer::draw_textures(const int fps, const int ups, const size_t mass_cou { BeginDrawing(); - DrawTextureRec(menu_target.texture, - Rectangle(0, 0, menu_target.texture.width, -menu_target.texture.height), + DrawTextureRec(menu_target.texture, Rectangle(0, 0, menu_target.texture.width, -menu_target.texture.height), Vector2(0, 0), WHITE); DrawTextureRec(klotski_target.texture, Rectangle(0, 0, klotski_target.texture.width, -klotski_target.texture.height), Vector2(0, MENU_HEIGHT), WHITE); - DrawTextureRec(render_target.texture, - Rectangle(0, 0, render_target.texture.width, -render_target.texture.height), + DrawTextureRec(render_target.texture, Rectangle(0, 0, render_target.texture.width, -render_target.texture.height), Vector2(GetScreenWidth() / 2.0f, MENU_HEIGHT), WHITE); // Draw borders DrawRectangleLinesEx(Rectangle(0, 0, GetScreenWidth(), MENU_HEIGHT), 1.0f, BLACK); - DrawRectangleLinesEx( - Rectangle(0, MENU_HEIGHT, GetScreenWidth() / 2.0f, GetScreenHeight() - MENU_HEIGHT), 1.0f, - BLACK); + DrawRectangleLinesEx(Rectangle(0, MENU_HEIGHT, GetScreenWidth() / 2.0f, GetScreenHeight() - MENU_HEIGHT), 1.0f, + BLACK); DrawRectangleLinesEx(Rectangle(GetScreenWidth() / 2.0f, MENU_HEIGHT, GetScreenWidth() / 2.0f, - GetScreenHeight() - MENU_HEIGHT), - 1.0f, BLACK); + GetScreenHeight() - MENU_HEIGHT), 1.0f, BLACK); gui.draw(fps, ups, mass_count, spring_count); EndDrawing(); } -auto renderer::render(const std::vector& masses, const int fps, const int ups, - const size_t mass_count, const size_t spring_count) -> void +auto renderer::render(const std::vector& masses, const int fps, const int ups, const size_t mass_count, + const size_t spring_count) -> void { update_texture_sizes(); @@ -227,4 +219,4 @@ auto renderer::render(const std::vector& masses, const int fps, const i draw_klotski(); draw_menu(); draw_textures(fps, ups, mass_count, spring_count); -} +} \ No newline at end of file diff --git a/src/state_manager.cpp b/src/state_manager.cpp index 8a3d6e9..b1a1180 100644 --- a/src/state_manager.cpp +++ b/src/state_manager.cpp @@ -12,7 +12,7 @@ auto state_manager::synced_try_insert_state(const puzzle& state) -> size_t { if (state_indices.contains(state)) { - return state_indices.at(state); + return state_indices[state]; } const size_t index = state_pool.size(); @@ -148,7 +148,7 @@ auto state_manager::append_preset_file(const std::string& preset_name) -> bool auto state_manager::load_preset(const size_t preset) -> void { - clear_graph_and_add_current(preset_states.at(preset)); + clear_graph_and_add_current(preset_states[preset]); current_preset = preset; edited = false; } @@ -270,8 +270,8 @@ auto state_manager::goto_most_distant_state() -> void int max_distance = 0; size_t max_distance_index = 0; for (size_t i = 0; i < node_target_distances.distances.size(); ++i) { - if (node_target_distances.distances.at(i) > max_distance) { - max_distance = node_target_distances.distances.at(i); + if (node_target_distances.distances[i] > max_distance) { + max_distance = node_target_distances.distances[i]; max_distance_index = i; } } @@ -285,7 +285,7 @@ auto state_manager::goto_closest_target_state() -> void return; } - update_current_state(get_state(node_target_distances.nearest_targets.at(current_state_index))); + update_current_state(get_state(node_target_distances.nearest_targets[current_state_index])); } auto state_manager::populate_graph() -> void @@ -306,9 +306,9 @@ auto state_manager::populate_graph() -> void const auto& [states, _links] = s.explore_state_space(); synced_insert_statespace(states, _links); - current_state_index = state_indices.at(p); + current_state_index = state_indices[p]; previous_state_index = current_state_index; - starting_state_index = state_indices.at(s); + starting_state_index = state_indices[s]; // Search for cool stuff populate_winning_indices(); @@ -393,7 +393,7 @@ auto state_manager::get_starting_index() const -> size_t auto state_manager::get_state(const size_t index) const -> const puzzle& { - return state_pool.at(index); + return state_pool[index]; } auto state_manager::get_current_state() const -> const puzzle& @@ -468,7 +468,7 @@ auto state_manager::get_preset_count() const -> size_t auto state_manager::get_current_preset_comment() const -> const std::string& { - return preset_comments.at(current_preset); + return preset_comments[current_preset]; } auto state_manager::has_history() const -> bool diff --git a/src/threaded_physics.cpp b/src/threaded_physics.cpp index e2df1b3..859da31 100644 --- a/src/threaded_physics.cpp +++ b/src/threaded_physics.cpp @@ -1,6 +1,7 @@ #include "threaded_physics.hpp" #include "config.hpp" #include "mass_spring_system.hpp" +#include "util.hpp" #include #include @@ -12,16 +13,30 @@ #include #endif +#ifdef ASYNC_OCTREE +auto threaded_physics::set_octree_pool_thread_name(size_t idx) -> void +{ + BS::this_thread::set_os_thread_name(std::format("octree-{}", idx)); + traceln("Using thread \"{}\"", BS::this_thread::get_os_thread_name().value_or("INVALID NAME")); +} +#endif + auto threaded_physics::physics_thread(physics_state& state) -> void { - #ifdef THREADPOOL - BS::this_thread::set_os_thread_name("physics"); - #endif - mass_spring_system mass_springs; + #ifdef ASYNC_OCTREE + BS::this_thread::set_os_thread_name("physics"); + + BS::thread_pool<> octree_thread(1, set_octree_pool_thread_name); + std::future octree_future; + octree tree_buffer; + size_t last_mass_count = 0; + infoln("Using asynchronous octree builder."); + #endif + const auto visitor = overloads{ - [&](const struct add_mass& am) + [&](const struct add_mass&) { mass_springs.add_mass(); }, @@ -29,7 +44,7 @@ auto threaded_physics::physics_thread(physics_state& state) -> void { mass_springs.add_spring(as.a, as.b); }, - [&](const struct clear_graph& cg) + [&](const struct clear_graph&) { mass_springs.clear(); }, @@ -66,26 +81,58 @@ auto threaded_physics::physics_thread(physics_state& state) -> void } } - if (mass_springs.masses.empty()) { + if (mass_springs.positions.empty()) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } // Physics update if (physics_accumulator.count() > TIMESTEP) { + #ifdef ASYNC_OCTREE + // Snapshot the positions so mass_springs is not mutating the vector while the octree is building + std::vector positions = mass_springs.positions; + + // Start building the octree for the next physics update. + // Move the snapshot into the closure so it doesn't get captured by reference (don't use [&]) + octree_future = octree_thread.submit_task([&tree_buffer, positions = std::move(positions)]() + { + octree::build_octree(tree_buffer, positions); + }); + + // Rebuild the tree synchronously if we changed the number of masses to not use + // an empty tree from the last frame in the frame where the graph was generated + if (last_mass_count != mass_springs.positions.size()) { + traceln("Rebuilding octree synchronously because graph size changed"); + octree::build_octree(mass_springs.tree, mass_springs.positions); + last_mass_count = mass_springs.positions.size(); + } + #else + octree::build_octree(mass_springs.tree, mass_springs.positions); + #endif + mass_springs.clear_forces(); mass_springs.calculate_spring_forces(); mass_springs.calculate_repulsion_forces(); - mass_springs.verlet_update(TIMESTEP * SIM_SPEED); + mass_springs.update(TIMESTEP * SIM_SPEED); // This is only helpful if we're drawing a grid at (0, 0, 0). Otherwise, it's just // expensive and yields no benefit since we can lock the camera to the center of mass - // cheaply. mass_springs.center_masses(); + // cheaply. + // mass_springs.center_masses(); ++loop_iterations; physics_accumulator -= std::chrono::duration(TIMESTEP); } + #ifdef ASYNC_OCTREE + // Wait for the octree to be built + if (octree_future.valid()) { + octree_future.wait(); + octree_future = std::future{}; + std::swap(mass_springs.tree, tree_buffer); + } + #endif + // Publish the positions for the renderer (copy) #ifdef TRACY FrameMarkStart("PhysicsThreadProduceLock"); @@ -114,16 +161,16 @@ auto threaded_physics::physics_thread(physics_state& state) -> void if (mass_springs.tree.nodes.empty()) { state.mass_center = Vector3Zero(); } else { - state.mass_center = mass_springs.tree.nodes.at(0).mass_center; + state.mass_center = mass_springs.tree.nodes[0].mass_center; } state.masses.clear(); - state.masses.reserve(mass_springs.masses.size()); - for (const auto& mass : mass_springs.masses) { - state.masses.emplace_back(mass.position); + state.masses.reserve(mass_springs.positions.size()); + for (const Vector3& pos : mass_springs.positions) { + state.masses.emplace_back(pos); } - state.mass_count = mass_springs.masses.size(); + state.mass_count = mass_springs.positions.size(); state.spring_count = mass_springs.springs.size(); state.data_ready = true; @@ -131,14 +178,25 @@ auto threaded_physics::physics_thread(physics_state& state) -> void } // Notify the rendering thread that new data is available state.data_ready_cnd.notify_all(); - #ifdef TRACY - FrameMarkEnd("PhysicsThreadProduceLock"); - FrameMarkEnd("PhysicsThread"); + #ifdef TRACY + FrameMarkEnd("PhysicsThreadProduceLock"); FrameMarkEnd("PhysicsThread"); #endif } } +auto threaded_physics::clear_cmd() -> void +{ + { + #ifdef TRACY + std::lock_guard lock(state.command_mtx); + #else + std::lock_guard lock(state.command_mtx); + #endif + state.pending_commands.emplace(clear_graph{}); + } +} + auto threaded_physics::add_mass_cmd() -> void { { @@ -163,18 +221,6 @@ auto threaded_physics::add_spring_cmd(const size_t a, const size_t b) -> void } } -auto threaded_physics::clear_cmd() -> void -{ - { - #ifdef TRACY - std::lock_guard lock(state.command_mtx); - #else - std::lock_guard lock(state.command_mtx); - #endif - state.pending_commands.emplace(clear_graph{}); - } -} - auto threaded_physics::add_mass_springs_cmd(const size_t num_masses, const std::vector>& springs) -> void {