diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ae3f0a..9c93ef5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,16 +19,16 @@ option(DISABLE_BENCH "Disable building benchmarks" OFF) set(SOURCES src/backward.cpp src/bits.cpp + src/cpu_layout_engine.cpp + src/cpu_spring_system.cpp src/graph_distances.cpp src/input_handler.cpp src/load_save.cpp - src/mass_spring_system.cpp src/octree.cpp src/orbit_camera.cpp src/puzzle.cpp src/renderer.cpp src/state_manager.cpp - src/threaded_physics.cpp src/user_interface.cpp ) diff --git a/benchmark/state_space.cpp b/benchmark/state_space.cpp index f26beeb..62a7630 100644 --- a/benchmark/state_space.cpp +++ b/benchmark/state_space.cpp @@ -177,7 +177,7 @@ static auto explore_rush_hour_puzzle_space(benchmark::State& state) -> void constexpr std::tuple target_block_pos_range = { 0, goal_y, - board_width - 1, + goal_x, goal_y }; diff --git a/flake.nix b/flake.nix index 1a5dee1..205a0ed 100644 --- a/flake.nix +++ b/flake.nix @@ -113,7 +113,7 @@ rec { abbr -a run "${buildRelease} && ./cmake-build-release/masssprings" abbr -a run-clusters "${buildRelease} && ./cmake-build-release/masssprings --output=clusters.puzzle --space=rh --w=6 --h=6 --gx=4 --gy=2 --blocks=4" abbr -a runtests "${buildDebug} && ./cmake-build-debug/tests" - abbr -a runbenchs "mv benchs.json benchs.old.json; ${buildRelease} && sudo cpupower frequency-set --governor performance && ./cmake-build-release/benchmarks --benchmark_out=benchs.json --benchmark_out_format=console; sudo cpupower frequency-set --governor powersave" + abbr -a runbenchs "mv -f benchs.json benchs.old.json; ${buildRelease} && sudo cpupower frequency-set --governor performance && ./cmake-build-release/benchmarks --benchmark_out=benchs.json --benchmark_out_format=console; sudo cpupower frequency-set --governor powersave" abbr -a rungdb "${buildDebug} && gdb --tui ./cmake-build-debug/masssprings" abbr -a runvalgrind "${buildDebug} && valgrind --leak-check=full --show-reachable=no --show-leak-kinds=definite,indirect,possible --track-origins=no --suppressions=valgrind.supp --log-file=valgrind.log ./cmake-build-debug/masssprings && cat valgrind.log" abbr -a runperf "${buildRelease} && perf record -g ./cmake-build-release/masssprings && hotspot ./perf.data" diff --git a/include/threaded_physics.hpp b/include/cpu_layout_engine.hpp similarity index 85% rename from include/threaded_physics.hpp rename to include/cpu_layout_engine.hpp index 4bbb2a8..edfe346 100644 --- a/include/threaded_physics.hpp +++ b/include/cpu_layout_engine.hpp @@ -13,7 +13,7 @@ #include #include -class threaded_physics +class cpu_layout_engine { struct add_mass {}; @@ -64,17 +64,17 @@ public: physics_state state; public: - explicit threaded_physics( + explicit cpu_layout_engine( const std::optional* const> _thread_pool = std::nullopt) : thread_pool(_thread_pool), physics(physics_thread, std::ref(state), std::ref(thread_pool)) {} - threaded_physics(const threaded_physics& copy) = delete; - auto operator=(const threaded_physics& copy) -> threaded_physics& = delete; - threaded_physics(threaded_physics&& move) = delete; - auto operator=(threaded_physics&& move) -> threaded_physics& = delete; + cpu_layout_engine(const cpu_layout_engine& copy) = delete; + auto operator=(const cpu_layout_engine& copy) -> cpu_layout_engine& = delete; + cpu_layout_engine(cpu_layout_engine&& move) = delete; + auto operator=(cpu_layout_engine&& move) -> cpu_layout_engine& = delete; - ~threaded_physics() + ~cpu_layout_engine() { state.running = false; state.data_ready_cnd.notify_all(); @@ -98,4 +98,4 @@ public: const std::vector>& springs) -> void; }; -#endif +#endif \ No newline at end of file diff --git a/include/mass_spring_system.hpp b/include/cpu_spring_system.hpp similarity index 82% rename from include/mass_spring_system.hpp rename to include/cpu_spring_system.hpp index 58e8da3..e3be104 100644 --- a/include/mass_spring_system.hpp +++ b/include/cpu_spring_system.hpp @@ -7,7 +7,7 @@ #include #include -class mass_spring_system +class cpu_spring_system { public: class spring @@ -36,12 +36,12 @@ public: std::vector springs; public: - mass_spring_system() {} + cpu_spring_system() {} - mass_spring_system(const mass_spring_system& copy) = delete; - auto operator=(const mass_spring_system& copy) -> mass_spring_system& = delete; - mass_spring_system(mass_spring_system& move) = delete; - auto operator=(mass_spring_system&& move) -> mass_spring_system& = delete; + cpu_spring_system(const cpu_spring_system& copy) = delete; + auto operator=(const cpu_spring_system& copy) -> cpu_spring_system& = delete; + cpu_spring_system(cpu_spring_system& move) = delete; + auto operator=(cpu_spring_system&& move) -> cpu_spring_system& = delete; public: auto clear() -> void; diff --git a/include/octree.hpp b/include/octree.hpp index 2b569ef..be48c1d 100644 --- a/include/octree.hpp +++ b/include/octree.hpp @@ -1,6 +1,8 @@ #ifndef OCTREE_HPP_ #define OCTREE_HPP_ +#include "util.hpp" + #include #include @@ -21,7 +23,7 @@ class octree bool leaf = true; public: - [[nodiscard]] auto child_count() const -> int; + node(const Vector3& _box_min, const Vector3& _box_max) : box_min(_box_min), box_max(_box_max) {} }; public: @@ -29,23 +31,88 @@ public: std::vector nodes; + // This approach is actually slower than the array of nodes + // beacuse we access all the attributes in the same function + + // std::vector mass_centers; + // std::vector mass_totals; + // std::vector box_mins; + // std::vector box_maxs; + // std::vector> childrens; + // std::vector mass_ids; + // std::vector leafs; // bitpacked std::vector is a lot slower + public: octree() = default; - // octree(const octree& copy) = delete; - // auto operator=(const octree& copy) -> octree& = delete; - // octree(octree&& move) = delete; - // auto operator=(octree&& move) -> octree& = delete; + octree(const octree& copy) = delete; + auto operator=(const octree& copy) -> octree& = delete; + octree(octree&& move) = delete; + auto operator=(octree&& move) -> octree& = delete; public: - [[nodiscard]] auto get_octant(int node_idx, const Vector3& pos) const -> int; - [[nodiscard]] auto get_child_bounds(int node_idx, int octant) const - -> std::pair; - auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int; + auto clear() -> void; + auto reserve(size_t count) -> void; + [[nodiscard]] auto empty() const -> bool; + [[nodiscard]] INLINE static inline auto get_octant(const Vector3& box_min, + const Vector3& box_max, + const Vector3& pos) -> int; + [[nodiscard]] INLINE static inline auto get_child_bounds(const Vector3& box_min, + const Vector3& box_max, + int octant) -> std::pair; + INLINE inline auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int; + [[nodiscard]] auto get_child_count(int node_idx) const -> int; auto insert(int node_idx, int mass_id, const Vector3& pos, float mass, int depth) -> void; static auto build_octree(octree& t, const std::vector& positions) -> void; [[nodiscard]] auto calculate_force(int node_idx, const Vector3& pos) const -> Vector3; }; +INLINE inline auto octree::get_octant(const Vector3& box_min, const Vector3& box_max, const Vector3& pos) -> int +{ + auto [cx, cy, cz] = (box_min + box_max) / 2.0f; + + // The octant is encoded as a 3-bit integer "zyx". The node area is split + // along all 3 axes, if a position is right of an axis, this bit is set to 1. + // If a position is right of the x-axis and y-axis and left of the z-axis, the + // encoded octant is "011". + return (pos.x >= cx) | ((pos.y >= cy) << 1) | ((pos.z >= cz) << 2); +} + +INLINE inline auto octree::get_child_bounds(const Vector3& box_min, + const Vector3& box_max, + const int octant) -> std::pair +{ + auto [cx, cy, cz] = (box_min + box_max) / 2.0f; + + Vector3 min = Vector3Zero(); + Vector3 max = Vector3Zero(); + + // If (octant & 1), the octant is to the right of the node region's x-axis + // (see GetOctant). This means the left bound is the x-axis and the right + // bound the node's region max. + min.x = octant & 1 ? cx : box_min.x; + max.x = octant & 1 ? box_max.x : cx; + min.y = octant & 2 ? cy : box_min.y; + max.y = octant & 2 ? box_max.y : cy; + min.z = octant & 4 ? cz : box_min.z; + max.z = octant & 4 ? box_max.z : cz; + + return std::make_pair(min, max); +} + +INLINE inline auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int +{ + nodes.emplace_back(box_min, box_max); + return static_cast(nodes.size() - 1); + + // mass_centers.emplace_back(Vector3Zero()); + // mass_totals.emplace_back(0); + // box_mins.emplace_back(box_min); + // box_maxs.emplace_back(box_max); + // childrens.push_back({-1, -1, -1, -1, -1, -1, -1, -1}); + // mass_ids.emplace_back(-1); + // leafs.emplace_back(true); +} + #endif \ No newline at end of file diff --git a/include/state_manager.hpp b/include/state_manager.hpp index 7ce8954..3069749 100644 --- a/include/state_manager.hpp +++ b/include/state_manager.hpp @@ -3,7 +3,7 @@ #include "graph_distances.hpp" #include "load_save.hpp" -#include "threaded_physics.hpp" +#include "cpu_layout_engine.hpp" #include "puzzle.hpp" #include @@ -12,7 +12,7 @@ class state_manager { private: - threaded_physics& physics; + cpu_layout_engine& physics; std::string preset_file; size_t current_preset = 0; @@ -42,7 +42,7 @@ private: bool edited = false; public: - state_manager(threaded_physics& _physics, const std::string& _preset_file) + state_manager(cpu_layout_engine& _physics, const std::string& _preset_file) : physics(_physics), preset_file(_preset_file) { reload_preset_file(); diff --git a/src/threaded_physics.cpp b/src/cpu_layout_engine.cpp similarity index 92% rename from src/threaded_physics.cpp rename to src/cpu_layout_engine.cpp index e5414df..e37beae 100644 --- a/src/threaded_physics.cpp +++ b/src/cpu_layout_engine.cpp @@ -1,6 +1,6 @@ -#include "threaded_physics.hpp" +#include "cpu_layout_engine.hpp" #include "config.hpp" -#include "mass_spring_system.hpp" +#include "cpu_spring_system.hpp" #include "util.hpp" #include @@ -10,16 +10,16 @@ #include #ifdef ASYNC_OCTREE -auto threaded_physics::set_octree_pool_thread_name(size_t idx) -> void +auto cpu_layout_engine::set_octree_pool_thread_name(size_t idx) -> void { BS::this_thread::set_os_thread_name(std::format("octree-{}", idx)); // traceln("Using thread \"{}\"", BS::this_thread::get_os_thread_name().value_or("INVALID NAME")); } #endif -auto threaded_physics::physics_thread(physics_state& state, const std::optional* const> thread_pool) -> void +auto cpu_layout_engine::physics_thread(physics_state& state, const std::optional* const> thread_pool) -> void { - mass_spring_system mass_springs; + cpu_spring_system mass_springs; #ifdef ASYNC_OCTREE BS::this_thread::set_os_thread_name("physics"); @@ -154,7 +154,7 @@ auto threaded_physics::physics_thread(physics_state& state, const std::optional< loop_iterations = 0; ups_accumulator = std::chrono::duration(0); } - if (mass_springs.tree.nodes.empty()) { + if (mass_springs.tree.empty()) { state.mass_center = Vector3Zero(); } else { state.mass_center = mass_springs.tree.nodes[0].mass_center; @@ -181,7 +181,7 @@ auto threaded_physics::physics_thread(physics_state& state, const std::optional< } } -auto threaded_physics::clear_cmd() -> void +auto cpu_layout_engine::clear_cmd() -> void { { #ifdef TRACY @@ -193,7 +193,7 @@ auto threaded_physics::clear_cmd() -> void } } -auto threaded_physics::add_mass_cmd() -> void +auto cpu_layout_engine::add_mass_cmd() -> void { { #ifdef TRACY @@ -205,7 +205,7 @@ auto threaded_physics::add_mass_cmd() -> void } } -auto threaded_physics::add_spring_cmd(const size_t a, const size_t b) -> void +auto cpu_layout_engine::add_spring_cmd(const size_t a, const size_t b) -> void { { #ifdef TRACY @@ -217,7 +217,7 @@ auto threaded_physics::add_spring_cmd(const size_t a, const size_t b) -> void } } -auto threaded_physics::add_mass_springs_cmd(const size_t num_masses, +auto cpu_layout_engine::add_mass_springs_cmd(const size_t num_masses, const std::vector>& springs) -> void { { diff --git a/src/mass_spring_system.cpp b/src/cpu_spring_system.cpp similarity index 82% rename from src/mass_spring_system.cpp rename to src/cpu_spring_system.cpp index d2ca0ae..fa8dd15 100644 --- a/src/mass_spring_system.cpp +++ b/src/cpu_spring_system.cpp @@ -1,10 +1,78 @@ -#include "mass_spring_system.hpp" +#include "cpu_spring_system.hpp" #include "config.hpp" #include #include -auto mass_spring_system::calculate_spring_force(const size_t s) -> void +auto cpu_spring_system::clear() -> void +{ + positions.clear(); + previous_positions.clear(); + velocities.clear(); + forces.clear(); + springs.clear(); + tree.clear(); +} + +auto cpu_spring_system::add_mass() -> void +{ + // Adding all positions to (0, 0, 0) breaks the octree + + // Done when adding springs + // Vector3 position{ + // static_cast(GetRandomValue(-100, 100)), static_cast(GetRandomValue(-100, + // 100)), static_cast(GetRandomValue(-100, 100)) + // }; + // position = Vector3Scale(Vector3Normalize(position), REST_LENGTH * 2.0); + + positions.emplace_back(Vector3Zero()); + previous_positions.emplace_back(Vector3Zero()); + velocities.emplace_back(Vector3Zero()); + forces.emplace_back(Vector3Zero()); +} + +auto cpu_spring_system::add_spring(size_t a, size_t b) -> void +{ + // Update masses to be located along a random walk when adding the springs + const Vector3& mass_a = positions[a]; + const Vector3& mass_b = positions[b]; + + Vector3 offset{static_cast(GetRandomValue(-100, 100)), + static_cast(GetRandomValue(-100, 100)), + static_cast(GetRandomValue(-100, 100))}; + offset = Vector3Normalize(offset) * REST_LENGTH; + + // If the offset moves the mass closer to the current center of mass, flip it + if (!tree.empty()) { + const Vector3 mass_center_direction = + Vector3Subtract(positions[a], tree.nodes[0].mass_center); + const float mass_center_distance = Vector3Length(mass_center_direction); + + if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) { + offset = Vector3Negate(offset); + } + } + + positions[b] = mass_a + offset; + previous_positions[b] = mass_b; + + // infoln("Adding spring: ({}, {}, {})->({}, {}, {})", mass_a.position.x, mass_a.position.y, + // mass_a.position.z, + // mass_b.position.x, mass_b.position.y, mass_b.position.z); + + springs.emplace_back(a, b); +} + +auto cpu_spring_system::clear_forces() -> void +{ + #ifdef TRACY + ZoneScoped; + #endif + + memset(forces.data(), 0, forces.size() * sizeof(Vector3)); +} + +auto cpu_spring_system::calculate_spring_force(const size_t s) -> void { const spring _s = springs[s]; const Vector3 a_pos = positions[_s.a]; @@ -29,104 +97,12 @@ auto mass_spring_system::calculate_spring_force(const size_t s) -> void forces[_s.b] += b_force; } -auto mass_spring_system::integrate_velocity(const size_t m, const float dt) -> void -{ - const Vector3 acc = forces[m] / MASS; - velocities[m] += acc * dt; -} - -auto mass_spring_system::integrate_position(const size_t m, const float dt) -> void -{ - previous_positions[m] = positions[m]; - positions[m] += velocities[m] * dt; -} - -auto mass_spring_system::verlet_update(const size_t m, const float dt) -> void -{ - const Vector3 acc = (forces[m] / MASS) * dt * dt; - const Vector3 pos = positions[m]; - - Vector3 delta_pos = pos - previous_positions[m]; - delta_pos *= 1.0 - VERLET_DAMPENING; // Minimal dampening - - positions[m] += delta_pos + acc; - previous_positions[m] = pos; -} - -auto mass_spring_system::clear() -> void -{ - positions.clear(); - previous_positions.clear(); - velocities.clear(); - forces.clear(); - springs.clear(); - tree.nodes.clear(); -} - -auto mass_spring_system::add_mass() -> void -{ - // Adding all positions to (0, 0, 0) breaks the octree - - // Done when adding springs - // Vector3 position{ - // static_cast(GetRandomValue(-100, 100)), static_cast(GetRandomValue(-100, - // 100)), static_cast(GetRandomValue(-100, 100)) - // }; - // position = Vector3Scale(Vector3Normalize(position), REST_LENGTH * 2.0); - - positions.emplace_back(Vector3Zero()); - previous_positions.emplace_back(Vector3Zero()); - velocities.emplace_back(Vector3Zero()); - forces.emplace_back(Vector3Zero()); -} - -auto mass_spring_system::add_spring(size_t a, size_t b) -> void -{ - // Update masses to be located along a random walk when adding the springs - const Vector3& mass_a = positions[a]; - const Vector3& mass_b = positions[b]; - - Vector3 offset{static_cast(GetRandomValue(-100, 100)), - static_cast(GetRandomValue(-100, 100)), - static_cast(GetRandomValue(-100, 100))}; - offset = Vector3Normalize(offset) * REST_LENGTH; - - // If the offset moves the mass closer to the current center of mass, flip it - if (!tree.nodes.empty()) { - const Vector3 mass_center_direction = - Vector3Subtract(positions[a], tree.nodes[0].mass_center); - const float mass_center_distance = Vector3Length(mass_center_direction); - - if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) { - offset = Vector3Negate(offset); - } - } - - positions[b] = mass_a + offset; - previous_positions[b] = mass_b; - - // infoln("Adding spring: ({}, {}, {})->({}, {}, {})", mass_a.position.x, mass_a.position.y, - // mass_a.position.z, - // mass_b.position.x, mass_b.position.y, mass_b.position.z); - - springs.emplace_back(a, b); -} - -auto mass_spring_system::clear_forces() -> void -{ -#ifdef TRACY - ZoneScoped; -#endif - - memset(forces.data(), 0, forces.size() * sizeof(Vector3)); -} - -auto mass_spring_system::calculate_spring_forces( +auto cpu_spring_system::calculate_spring_forces( const std::optional* const> thread_pool) -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif const auto solve_spring_force = [&](const int i) { calculate_spring_force(i); }; @@ -141,12 +117,12 @@ auto mass_spring_system::calculate_spring_forces( } } -auto mass_spring_system::calculate_repulsion_forces( +auto cpu_spring_system::calculate_repulsion_forces( const std::optional* const> thread_pool) -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif const auto solve_octree = [&](const int i) { @@ -166,12 +142,36 @@ auto mass_spring_system::calculate_repulsion_forces( } } -auto mass_spring_system::update(const float dt, +auto cpu_spring_system::integrate_velocity(const size_t m, const float dt) -> void +{ + const Vector3 acc = forces[m] / MASS; + velocities[m] += acc * dt; +} + +auto cpu_spring_system::integrate_position(const size_t m, const float dt) -> void +{ + previous_positions[m] = positions[m]; + positions[m] += velocities[m] * dt; +} + +auto cpu_spring_system::verlet_update(const size_t m, const float dt) -> void +{ + const Vector3 acc = (forces[m] / MASS) * dt * dt; + const Vector3 pos = positions[m]; + + Vector3 delta_pos = pos - previous_positions[m]; + delta_pos *= 1.0 - VERLET_DAMPENING; // Minimal dampening + + positions[m] += delta_pos + acc; + previous_positions[m] = pos; +} + +auto cpu_spring_system::update(const float dt, const std::optional* const> thread_pool) -> void { -#ifdef TRACY + #ifdef TRACY ZoneScoped; -#endif + #endif const auto update = [&](const int i) { verlet_update(i, dt); }; @@ -184,7 +184,7 @@ auto mass_spring_system::update(const float dt, } } -auto mass_spring_system::center_masses(const std::optional* const> thread_pool) +auto cpu_spring_system::center_masses(const std::optional* const> thread_pool) -> void { Vector3 mean = Vector3Zero(); @@ -202,4 +202,4 @@ auto mass_spring_system::center_masses(const std::optional* co center_mass(i); } } -} +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index d84a1f7..63d9da7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,7 +4,7 @@ #include "config.hpp" #include "input_handler.hpp" -#include "threaded_physics.hpp" +#include "cpu_layout_engine.hpp" #include "renderer.hpp" #include "state_manager.hpp" #include "user_interface.hpp" @@ -66,7 +66,7 @@ auto ui_mode() -> int InitWindow(INITIAL_WIDTH * 2, INITIAL_HEIGHT + MENU_HEIGHT, "MassSprings"); // Game setup - threaded_physics physics(thread_pool); + cpu_layout_engine physics(thread_pool); state_manager state(physics, preset_file); orbit_camera camera; input_handler input(state, camera); @@ -169,7 +169,12 @@ auto rush_hour_puzzle_space() -> int puzzle::block(0, 0, 1, 3, false, false) }; const puzzle::block target_block = puzzle::block(0, 0, 2, 1, true, false); - const std::tuple target_block_pos_range = {0, goal_y, board_width - 1, goal_y}; + const std::tuple target_block_pos_range = { + 0, + goal_y, + board_width - target_block.get_width() - 1, + goal_y + }; infoln("Exploring Rush-Hour puzzle space:"); infoln("- Size: {}x{}", board_width, board_height); diff --git a/src/octree.cpp b/src/octree.cpp index b3639a7..0c60ca7 100644 --- a/src/octree.cpp +++ b/src/octree.cpp @@ -4,10 +4,43 @@ #include #include -auto octree::node::child_count() const -> int +auto octree::clear() -> void +{ + nodes.clear(); + + // mass_centers.clear(); + // mass_totals.clear(); + // box_mins.clear(); + // box_maxs.clear(); + // childrens.clear(); + // mass_ids.clear(); + // leafs.clear(); +} + +auto octree::reserve(const size_t count) -> void +{ + nodes.reserve(count); + + // mass_centers.reserve(count); + // mass_totals.reserve(count); + // box_mins.reserve(count); + // box_maxs.reserve(count); + // childrens.reserve(count); + // mass_ids.reserve(count); + // leafs.reserve(count); +} + +auto octree::empty() const -> bool +{ + return nodes.empty(); + + // return mass_centers.empty(); +} + +auto octree::get_child_count(const int node_idx) const -> int { int child_count = 0; - for (const int child : children) { + for (const int child : nodes[node_idx].children) { if (child != -1) { ++child_count; } @@ -15,88 +48,33 @@ auto octree::node::child_count() const -> int return child_count; } -auto octree::get_octant(const int node_idx, const Vector3& pos) const -> int -{ - const node& n = nodes[node_idx]; - auto [cx, cy, cz] = Vector3((n.box_min.x + n.box_max.x) / 2.0f, (n.box_min.y + n.box_max.y) / 2.0f, - (n.box_min.z + n.box_max.z) / 2.0f); - - // The octant is encoded as a 3-bit integer "zyx". The node area is split - // along all 3 axes, if a position is right of an axis, this bit is set to 1. - // If a position is right of the x-axis and y-axis and left of the z-axis, the - // encoded octant is "011". - int octant = 0; - if (pos.x >= cx) { - octant |= 1; - } - if (pos.y >= cy) { - octant |= 2; - } - if (pos.z >= cz) { - octant |= 4; - } - - return octant; -} - -auto octree::get_child_bounds(const int node_idx, const int octant) const -> std::pair -{ - const node& n = nodes[node_idx]; - auto [cx, cy, cz] = Vector3((n.box_min.x + n.box_max.x) / 2.0f, (n.box_min.y + n.box_max.y) / 2.0f, - (n.box_min.z + n.box_max.z) / 2.0f); - - Vector3 min = Vector3Zero(); - Vector3 max = Vector3Zero(); - - // If (octant & 1), the octant is to the right of the node region's x-axis - // (see GetOctant). This means the left bound is the x-axis and the right - // bound the node's region max. - min.x = octant & 1 ? cx : n.box_min.x; - max.x = octant & 1 ? n.box_max.x : cx; - min.y = octant & 2 ? cy : n.box_min.y; - max.y = octant & 2 ? n.box_max.y : cy; - min.z = octant & 4 ? cz : n.box_min.z; - max.z = octant & 4 ? n.box_max.z : cz; - - return std::make_pair(min, max); -} - -auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int -{ - node n; - n.box_min = box_min; - n.box_max = box_max; - nodes.emplace_back(n); - - return static_cast(nodes.size() - 1); -} - -auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, const float mass, +auto octree::insert(const int node_idx, + const int mass_id, + const Vector3& pos, + const float mass, const int depth) -> void { - // infoln("Inserting position ({}, {}, {}) into octree at node {} (depth {})", pos.x, pos.y, - // pos.z, node_idx, depth); if (depth > MAX_DEPTH) { - throw std::runtime_error(std::format("MAX_DEPTH! node={} box_min=({},{},{}) box_max=({},{},{}) pos=({},{},{})", - node_idx, nodes[node_idx].box_min.x, nodes[node_idx].box_min.y, - nodes[node_idx].box_min.z, nodes[node_idx].box_max.x, - nodes[node_idx].box_max.y, nodes[node_idx].box_max.z, pos.x, pos.y, - pos.z)); + throw std::runtime_error("Octree insertion reachead max depth"); } // NOTE: Do not store a nodes[node_idx] reference as the nodes vector might reallocate during // this function // We can place the particle in the empty leaf - if (nodes[node_idx].leaf && nodes[node_idx].mass_id == -1) { + const bool leaf = nodes[node_idx].leaf; + if (leaf && nodes[node_idx].mass_id == -1) { nodes[node_idx].mass_id = mass_id; nodes[node_idx].mass_center = pos; nodes[node_idx].mass_total = mass; return; } + const Vector3& box_min = nodes[node_idx].box_min; + const Vector3& box_max = nodes[node_idx].box_max; + // The leaf is occupied, we need to subdivide - if (nodes[node_idx].leaf) { + if (leaf) { const int existing_id = nodes[node_idx].mass_id; const Vector3 existing_pos = nodes[node_idx].mass_center; const float existing_mass = nodes[node_idx].mass_total; @@ -119,13 +97,13 @@ auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, c // Convert the leaf to an internal node nodes[node_idx].mass_id = -1; nodes[node_idx].leaf = false; - nodes[node_idx].mass_total = 0.0; nodes[node_idx].mass_center = Vector3Zero(); + nodes[node_idx].mass_total = 0.0; // Re-insert the existing mass into a new empty leaf (see above) - const int oct = get_octant(node_idx, existing_pos); + const int oct = get_octant(box_min, box_max, existing_pos); if (nodes[node_idx].children[oct] == -1) { - const auto& [min, max] = get_child_bounds(node_idx, oct); + const auto& [min, max] = get_child_bounds(box_min, box_max, oct); const int child_idx = create_empty_leaf(min, max); nodes[node_idx].children[oct] = child_idx; } @@ -133,9 +111,9 @@ auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, c } // Insert the new mass - const int oct = get_octant(node_idx, pos); + const int oct = get_octant(box_min, box_max, pos); if (nodes[node_idx].children[oct] == -1) { - const auto& [min, max] = get_child_bounds(node_idx, oct); + const auto& [min, max] = get_child_bounds(box_min, box_max, oct); const int child_idx = create_empty_leaf(min, max); nodes[node_idx].children[oct] = child_idx; } @@ -143,9 +121,7 @@ auto octree::insert(const int node_idx, const int mass_id, const Vector3& pos, c // Update the center of mass const float new_mass = nodes[node_idx].mass_total + mass; - nodes[node_idx].mass_center.x = (nodes[node_idx].mass_center.x * nodes[node_idx].mass_total + pos.x) / new_mass; - nodes[node_idx].mass_center.y = (nodes[node_idx].mass_center.y * nodes[node_idx].mass_total + pos.y) / new_mass; - nodes[node_idx].mass_center.z = (nodes[node_idx].mass_center.z * nodes[node_idx].mass_total + pos.z) / new_mass; + nodes[node_idx].mass_center = (nodes[node_idx].mass_center * nodes[node_idx].mass_total + pos) / new_mass; nodes[node_idx].mass_total = new_mass; } @@ -155,8 +131,8 @@ auto octree::build_octree(octree& t, const std::vector& positions) -> v ZoneScoped; #endif - t.nodes.clear(); - t.nodes.reserve(positions.size() * 2); + t.clear(); + t.reserve(positions.size() * 2); // Compute bounding box around all masses Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX}; @@ -194,28 +170,36 @@ auto octree::calculate_force(const int node_idx, const Vector3& pos) const -> Ve } const node& n = nodes[node_idx]; - if (std::abs(n.mass_total) <= 0.001f) { + + const float mass_total = n.mass_total; + const Vector3& mass_center = n.mass_center; + const Vector3& box_min = n.box_min; + const Vector3& box_max = n.box_max; + const std::array& children = n.children; + const bool leaf = n.leaf; + + if (std::abs(mass_total) <= 0.001f) { return Vector3Zero(); } - const Vector3 diff = Vector3Subtract(pos, n.mass_center); + const Vector3 diff = Vector3Subtract(pos, mass_center); float dist_sq = diff.x * diff.x + diff.y * diff.y + diff.z * diff.z; // Softening dist_sq += SOFTENING; // Barnes-Hut - const float size = n.box_max.x - n.box_min.x; - if (n.leaf || size * size / dist_sq < THETA * THETA) { + const float size = box_max.x - box_min.x; + if (leaf || size * size / dist_sq < THETA * THETA) { const float dist = std::sqrt(dist_sq); - const float force_mag = BH_FORCE * n.mass_total / dist_sq; + const float force_mag = BH_FORCE * mass_total / dist_sq; return Vector3Scale(diff, force_mag / dist); } // Collect child forces Vector3 force = Vector3Zero(); - for (const int child : n.children) { + for (const int child : children) { if (child >= 0) { const Vector3 child_force = calculate_force(child, pos); diff --git a/src/puzzle.cpp b/src/puzzle.cpp index 1066e05..3258530 100644 --- a/src/puzzle.cpp +++ b/src/puzzle.cpp @@ -1127,7 +1127,7 @@ auto puzzle::explore_puzzle_space(const boost::unordered_flat_set