diff --git a/include/config.hpp b/include/config.hpp index bfe1eb6..9a8f08e 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -91,4 +91,9 @@ constexpr Color BLOCK_COLOR = DARKBLUE; constexpr Color TARGET_BLOCK_COLOR = RED; constexpr Color WALL_COLOR = BLACK; +// Threadpool +static constexpr int SMALL_TASK_BLOCK_SIZE = 256; // Weirdly larger blocks decrease performance... +static constexpr int LARGE_TASK_BLOCK_SIZE = 256; + + #endif \ No newline at end of file diff --git a/include/cpu_spring_system.hpp b/include/cpu_spring_system.hpp index e3be104..6707183 100644 --- a/include/cpu_spring_system.hpp +++ b/include/cpu_spring_system.hpp @@ -22,9 +22,6 @@ public: }; public: - static constexpr int SMALL_TASK_BLOCK_SIZE = 256; - static constexpr int LARGE_TASK_BLOCK_SIZE = 256; - octree tree; // This is the main ownership of all the states/masses/springs. diff --git a/include/octree.hpp b/include/octree.hpp index 3d39a46..39d3510 100644 --- a/include/octree.hpp +++ b/include/octree.hpp @@ -2,6 +2,7 @@ #define OCTREE_HPP_ #include "util.hpp" +#include "config.hpp" #include #include @@ -98,7 +99,9 @@ public: [[nodiscard]] auto root() const -> const node&; // Morton/linear octree implementation - static auto build_octree_morton(octree& t, const std::vector& positions) -> void; + static auto build_octree_morton(octree& t, + const std::vector& positions, + const std::optional*>& thread_pool) -> void; [[nodiscard]] auto calculate_force_morton(int node_idx, const Vector3& pos, int self_id) const -> Vector3; }; diff --git a/src/cpu_layout_engine.cpp b/src/cpu_layout_engine.cpp index 6a24040..60dda68 100644 --- a/src/cpu_layout_engine.cpp +++ b/src/cpu_layout_engine.cpp @@ -103,7 +103,7 @@ auto cpu_layout_engine::physics_thread(physics_state& state, const std::optional last_mass_count = mass_springs.positions.size(); } #else - octree::build_octree_morton(mass_springs.tree, mass_springs.positions); + octree::build_octree_morton(mass_springs.tree, mass_springs.positions, thread_pool); #endif mass_springs.clear_forces(); diff --git a/src/octree.cpp b/src/octree.cpp index de33648..87312be 100644 --- a/src/octree.cpp +++ b/src/octree.cpp @@ -24,8 +24,10 @@ auto octree::root() const -> const node& return nodes[0]; } -// Replaced the 50 line recursive octree insertion with this bitch to gain 5 UPS, FML -auto octree::build_octree_morton(octree& t, const std::vector& positions) -> void +// Replaced the 50 line recursive octree insertion with this morton bitch to gain 5 UPS, FML +auto octree::build_octree_morton(octree& t, + const std::vector& positions, + const std::optional*>& thread_pool) -> void { #ifdef TRACY ZoneScoped; @@ -65,10 +67,21 @@ auto octree::build_octree_morton(octree& t, const std::vector& position Vector3 pos; }; + // Calculate morton code for each node std::vector sort_container; - sort_container.reserve(positions.size()); - for (uint32_t i = 0; i < positions.size(); ++i) { - sort_container.emplace_back(pos_to_morton(positions[i], root_min, root_max), i, positions[i]); + sort_container.resize(positions.size()); + + const auto calculate_morton = [&](const uint32_t i) + { + sort_container[i] = {pos_to_morton(positions[i], root_min, root_max), i, positions[i]}; + }; + + if (thread_pool) { + (*thread_pool)->submit_loop(0, positions.size(), calculate_morton, SMALL_TASK_BLOCK_SIZE).wait(); + } else { + for (uint32_t i = 0; i < positions.size(); ++i) { + calculate_morton(i); + } } // Sort the list by morton codes. Because positions close to each other have similar morten codes, @@ -123,7 +136,7 @@ auto octree::build_octree_morton(octree& t, const std::vector& position // Leaves at MAX_DEPTH: 1 particle per leaf in morton order (close particles close together) auto& leafs = tree_levels[MAX_DEPTH]; leafs.reserve(sort_container.size()); - const float leaf_size = root_extent / static_cast(MAX_DEPTH); + const float leaf_size = root_extent / static_cast(1u << MAX_DEPTH); for (const auto& [code, id, pos] : sort_container) { node leaf; leaf.leaf = true; @@ -198,12 +211,12 @@ auto octree::build_octree_morton(octree& t, const std::vector& position const node& child = tree_levels[child_depth][child_local]; // Which octant of this parent does it belong to? - // IMPORTANT: octant comes from the NEXT level after current_depth (i.e. current_depth+1), + // Octant comes from the NEXT level after current_depth, // but the child might skip levels due to compression. - // We must use the child's "first level under the parent" which is (current_depth+1). + // We must use the child's first level under the parent (current_depth+1). const int oct = octant_at_level(leaves[k].leaf_code, current_depth + 1, MAX_DEPTH); - // Store *global* child reference: we only have an int slot, so we need a single index space. + // Store global child reference: we only have an int slot, so we need a single index space. parent.children[oct] = (child_depth << 24) | (child_local & 0x00FFFFFF); mass_total += child.mass_total; @@ -277,7 +290,7 @@ auto octree::calculate_force_morton(const int node_idx, const Vector3& pos, cons float fz = 0.0f; std::vector stack; - stack.reserve(128); + stack.reserve(512); stack.push_back(node_idx); constexpr float theta2 = THETA * THETA;