Files
cpp-masssprings/src/octree.cpp

335 lines
11 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "octree.hpp"
#include "config.hpp"
#include <cfloat>
#include <raymath.h>
auto octree::clear() -> void
{
nodes.clear();
}
auto octree::reserve(const size_t count) -> void
{
nodes.reserve(count);
}
auto octree::empty() const -> bool
{
return nodes.empty();
}
auto octree::root() const -> const node&
{
return nodes[0];
}
// Replaced the 50 line recursive octree insertion with this morton bitch to gain 5 UPS, FML
auto octree::build_octree_morton(octree& t,
const std::vector<Vector3>& positions,
const std::optional<BS::thread_pool<>*>& thread_pool) -> void
{
#ifdef TRACY
ZoneScoped;
#endif
t.clear();
if (positions.empty()) {
return;
}
// Compute bounding box around all masses
Vector3 root_min{FLT_MAX, FLT_MAX, FLT_MAX};
Vector3 root_max{-FLT_MAX, -FLT_MAX, -FLT_MAX};
for (const auto& [x, y, z] : positions) {
root_min.x = std::min(root_min.x, x);
root_max.x = std::max(root_max.x, x);
root_min.y = std::min(root_min.y, y);
root_max.y = std::max(root_max.y, y);
root_min.z = std::min(root_min.z, z);
root_max.z = std::max(root_max.z, z);
}
constexpr float pad = 1.0f;
root_min = Vector3Subtract(root_min, Vector3Scale(Vector3One(), pad));
root_max = Vector3Add(root_max, Vector3Scale(Vector3One(), pad));
const float max_extent = std::max({root_max.x - root_min.x, root_max.y - root_min.y, root_max.z - root_min.z});
root_max = Vector3Add(root_min, Vector3Scale(Vector3One(), max_extent));
const float root_extent = root_max.x - root_min.x; // cubic
// Container for building the particle list before sorting by morton code
struct sort_node
{
uint64_t code;
uint32_t id;
Vector3 pos;
};
// Calculate morton code for each node
std::vector<sort_node> sort_container;
sort_container.resize(positions.size());
const auto calculate_morton = [&](const uint32_t i)
{
sort_container[i] = {pos_to_morton(positions[i], root_min, root_max), i, positions[i]};
};
if (thread_pool) {
(*thread_pool)->submit_loop(0, positions.size(), calculate_morton, SMALL_TASK_BLOCK_SIZE).wait();
} else {
for (uint32_t i = 0; i < positions.size(); ++i) {
calculate_morton(i);
}
}
// Sort the list by morton codes. Because positions close to each other have similar morten codes,
// this provides us with "spatial locality" in the datastructure.
auto sort_by_code = [&]()
{
std::ranges::sort(sort_container,
[](const sort_node& a, const sort_node& b)
{
if (a.code != b.code) {
return a.code < b.code;
}
return a.id < b.id;
});
};
sort_by_code();
// Resolve duplicates by jittering the later one deterministically and re-encoding.
for (int seed = 0; seed < 8; ++seed) {
bool had_dupes = false;
for (size_t i = 1; i < sort_container.size(); ++i) {
// Because elements are spatially ordered after sorting, we can scan for duplicates in O(n)
if (sort_container[i].code == sort_container[i - 1].code) {
had_dupes = true;
sort_container[i].pos = jitter_pos(sort_container[i].pos,
sort_container[i].id + seed * 0x9e3779b9u,
root_min,
root_max,
root_extent);
sort_container[i].code = pos_to_morton(sort_container[i].pos, root_min, root_max);
}
}
if (!had_dupes) {
break;
}
sort_by_code();
}
// Sanity check
for (size_t i = 1; i < sort_container.size(); ++i) {
if (sort_container[i].code == sort_container[i - 1].code) {
throw std::runtime_error("Duplicates remain after jittering");
}
}
std::vector<std::vector<node>> tree_levels;
tree_levels.assign(MAX_DEPTH + 1, {});
// Leaves at MAX_DEPTH: 1 particle per leaf in morton order (close particles close together)
auto& leafs = tree_levels[MAX_DEPTH];
leafs.reserve(sort_container.size());
const float leaf_size = root_extent / static_cast<float>(1u << MAX_DEPTH);
for (const auto& [code, id, pos] : sort_container) {
node leaf;
leaf.leaf = true;
leaf.mass_id = static_cast<int>(id);
leaf.depth = MAX_DEPTH;
leaf.size = leaf_size;
leaf.mass_total = MASS;
leaf.mass_center = pos; // force uses mass_center instead of jittered position
leaf.children.fill(-1);
leafs.push_back(leaf);
}
// We now have to group the particles (currently we have only sorted the leaves),
// but upwards subdivisions have to be created.
// For grouping, store a nodes local index in its level.
struct leaf
{
uint64_t leaf_code;
int depth;
int level_index;
};
std::vector<leaf> leaves;
leaves.reserve(tree_levels[MAX_DEPTH].size());
for (int i = 0; i < static_cast<int>(tree_levels[MAX_DEPTH].size()); ++i) {
leaves.emplace_back(sort_container[static_cast<size_t>(i)].code, MAX_DEPTH, i);
}
// Build internal levels from MAX_DEPTH-1 to 0
for (int current_depth = MAX_DEPTH - 1; current_depth >= 0; --current_depth) {
auto& current_level = tree_levels[current_depth];
current_level.clear();
std::vector<leaf> next_refs;
next_refs.reserve(leaves.size());
const float parent_size = root_extent / static_cast<float>(1u << current_depth);
size_t i = 0;
while (i < leaves.size()) {
const uint64_t key = path_to_ancestor(leaves[i].leaf_code, MAX_DEPTH, current_depth);
size_t j = i + 1;
while (j < leaves.size() && path_to_ancestor(leaves[j].leaf_code, MAX_DEPTH, current_depth) == key) {
++j;
}
const size_t group_size = j - i;
// Unary compression: just carry the child ref upward unchanged.
if (group_size == 1) {
next_refs.push_back(leaves[i]);
i = j;
continue;
}
node parent;
parent.leaf = false;
parent.mass_id = -1;
parent.depth = current_depth;
parent.size = parent_size;
parent.children.fill(-1);
float mass_total = 0.0f;
Vector3 mass_center_acc = Vector3Zero();
for (size_t k = i; k < j; ++k) {
const int child_depth = leaves[k].depth;
const int child_local = leaves[k].level_index;
// Read the child from its actual stored level.
const node& child = tree_levels[child_depth][child_local];
// Which octant of this parent does it belong to?
// Octant comes from the NEXT level after current_depth,
// but the child might skip levels due to compression.
// We must use the child's first level under the parent (current_depth+1).
const int oct = octant_at_level(leaves[k].leaf_code, current_depth + 1, MAX_DEPTH);
// Store global child reference: we only have an int slot, so we need a single index space.
parent.children[oct] = (child_depth << 24) | (child_local & 0x00FFFFFF);
mass_total += child.mass_total;
mass_center_acc = Vector3Add(mass_center_acc, Vector3Scale(child.mass_center, child.mass_total));
}
parent.mass_total = mass_total;
parent.mass_center = (mass_total > 0.0f) ? Vector3Scale(mass_center_acc, 1.0f / mass_total) : Vector3Zero();
const int parent_local = static_cast<int>(current_level.size());
current_level.push_back(parent);
next_refs.push_back({leaves[i].leaf_code, current_depth, parent_local});
i = j;
}
leaves.swap(next_refs);
}
// Flatten levels and fix child indices from local->global.
// All depth 0 nodes come first, then depth 1, last depth MAX_DEPTH.
std::vector<int> offsets(tree_levels.size(), 0);
int total = 0;
for (int d = 0; d <= MAX_DEPTH; ++d) {
offsets[d] = total;
total += static_cast<int>(tree_levels[d].size());
}
t.nodes.clear();
t.nodes.reserve(total);
for (int d = 0; d <= MAX_DEPTH; ++d) {
t.nodes.insert(t.nodes.end(), tree_levels[d].begin(), tree_levels[d].end());
}
// Fix child indices: convert local index in levels[d+1] to global index in t.nodes
for (int d = 0; d <= MAX_DEPTH; ++d) {
const int base = offsets[d];
for (int i2 = 0; i2 < static_cast<int>(tree_levels[d].size()); ++i2) {
node& n = t.nodes[base + i2];
if (!n.leaf) {
for (int c = 0; c < 8; ++c) {
int packed = n.children[c];
if (packed >= 0) {
const int child_depth = (packed >> 24) & 0xFF;
const int child_local = packed & 0x00FFFFFF;
n.children[c] = offsets[child_depth] + child_local;
}
}
}
}
}
// const size_t _leaves = tree_levels[MAX_DEPTH].size();
// const size_t _total = t.nodes.size();
// const size_t _internal = _total - _leaves;
// traceln("Morton octree nodes: {}, leaves: {}, ratio: {:.3} children per internal node.",
// _total,
// _leaves,
// static_cast<float>(_total - 1) / _internal);
}
auto octree::calculate_force_morton(const int node_idx, const Vector3& pos, const int self_id) const -> Vector3
{
if (node_idx < 0) {
return Vector3Zero();
}
// Force accumulator
float fx = 0.0f;
float fy = 0.0f;
float fz = 0.0f;
std::vector<int> stack;
stack.reserve(512);
stack.push_back(node_idx);
constexpr float theta2 = THETA * THETA;
while (!stack.empty()) {
const int idx = stack.back();
stack.pop_back();
const node& n = nodes[idx];
// No self-force for single-particle leafs
if (n.leaf && n.mass_id == self_id) {
continue;
}
const float dx = pos.x - n.mass_center.x;
const float dy = pos.y - n.mass_center.y;
const float dz = pos.z - n.mass_center.z;
const float dist_sq = dx * dx + dy * dy + dz * dz + SOFTENING;
// BarnesHut criterion
if (n.leaf || ((n.size * n.size) / dist_sq) < theta2) {
const float inv_dist = 1.0f / std::sqrt(dist_sq);
const float force_mag = (BH_FORCE * n.mass_total) / dist_sq; // ~ 1/r^2
const float s = force_mag * inv_dist; // scale by 1/r to get vector
fx += dx * s;
fy += dy * s;
fz += dz * s;
continue;
}
for (int c = 0; c < 8; ++c) {
const int child = n.children[c];
if (child >= 0) {
stack.push_back(child);
}
}
}
return Vector3{fx, fy, fz};
}