replace recursive octree implementation with morton code version (FML)
This commit is contained in:
@ -35,6 +35,7 @@ set(SOURCES
|
||||
# Libraries
|
||||
include(FetchContent)
|
||||
find_package(raylib REQUIRED)
|
||||
find_package(libmorton REQUIRED)
|
||||
find_package(Boost COMPONENTS program_options REQUIRED)
|
||||
set(LIBS raylib Boost::headers Boost::program_options)
|
||||
set(FLAGS "")
|
||||
@ -52,6 +53,8 @@ if(NOT DISABLE_BACKWARD)
|
||||
|
||||
list(APPEND LIBS Backward::Backward)
|
||||
list(APPEND FLAGS BACKWARD)
|
||||
|
||||
message("-- BACKWARD: Enabled")
|
||||
endif()
|
||||
|
||||
if(NOT DISABLE_TRACY)
|
||||
@ -67,6 +70,8 @@ if(NOT DISABLE_TRACY)
|
||||
|
||||
list(APPEND LIBS TracyClient)
|
||||
list(APPEND FLAGS TRACY)
|
||||
|
||||
message("-- TRACY: Enabled")
|
||||
endif()
|
||||
|
||||
# Set this after fetching tracy to hide tracy's warnings.
|
||||
@ -111,6 +116,8 @@ if(NOT DISABLE_TESTS AND NOT WIN32)
|
||||
|
||||
include(Catch)
|
||||
catch_discover_tests(tests)
|
||||
|
||||
message("-- TESTS: Enabled")
|
||||
endif()
|
||||
|
||||
# Benchmarking
|
||||
@ -124,14 +131,16 @@ if(NOT DISABLE_BENCH AND NOT WIN32)
|
||||
add_executable(benchmarks ${BENCH_SOURCES} ${SOURCES})
|
||||
target_include_directories(benchmarks PRIVATE include)
|
||||
target_link_libraries(benchmarks benchmark raylib)
|
||||
|
||||
message("-- BENCHMARKS: Enabled")
|
||||
endif()
|
||||
|
||||
# LTO
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT supported OUTPUT error)
|
||||
if(supported)
|
||||
message(STATUS "IPO / LTO enabled")
|
||||
message("-- IPO/LTO: Enabled")
|
||||
set_property(TARGET masssprings PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
else()
|
||||
message(STATUS "IPO / LTO not supported")
|
||||
message("-- IPO/LTO: Disabled")
|
||||
endif()
|
||||
24
flake.nix
24
flake.nix
@ -135,7 +135,7 @@ rec {
|
||||
# Define custom dependencies
|
||||
# ===========================================================================================
|
||||
|
||||
raygui = stdenv.mkDerivation (finalAttrs: {
|
||||
raygui = stdenv.mkDerivation rec {
|
||||
pname = "raygui";
|
||||
version = "4.0-unstable-2026-02-24";
|
||||
|
||||
@ -163,13 +163,13 @@ rec {
|
||||
Name: raygui
|
||||
Description: Simple and easy-to-use immediate-mode gui library
|
||||
URL: https://github.com/raysan5/raygui
|
||||
Version: ${finalAttrs.version}
|
||||
Version: ${version}
|
||||
Cflags: -I"{includedir}"
|
||||
EOF
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
});
|
||||
};
|
||||
|
||||
thread-pool = stdenv.mkDerivation {
|
||||
pname = "thread-pool";
|
||||
@ -202,11 +202,19 @@ rec {
|
||||
};
|
||||
|
||||
# Header-only library
|
||||
dontBuild = true;
|
||||
installPhase = ''
|
||||
mkdir -p $out
|
||||
mv ./include $out/include
|
||||
'';
|
||||
# dontBuild = true;
|
||||
# installPhase = ''
|
||||
# mkdir -p $out
|
||||
# mv ./include $out/include
|
||||
# '';
|
||||
|
||||
nativeBuildInputs = with pkgs; [cmake];
|
||||
|
||||
cmakeFlags = [
|
||||
"-DBUILD_TESTING=OFF"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DCMAKE_INSTALL_DATADIR=share"
|
||||
];
|
||||
};
|
||||
|
||||
# ===========================================================================================
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
#include <raylib.h>
|
||||
#include <raymath.h>
|
||||
#include <libmorton/morton.h>
|
||||
|
||||
class octree
|
||||
{
|
||||
@ -16,24 +17,21 @@ class octree
|
||||
public:
|
||||
Vector3 mass_center = Vector3Zero();
|
||||
float mass_total = 0.0;
|
||||
Vector3 box_min = Vector3Zero(); // area start
|
||||
Vector3 box_max = Vector3Zero(); // area end
|
||||
uint8_t depth = 0;
|
||||
float size = 0.0f; // Because our octree cells are cubic we don't need to store the bounds
|
||||
std::array<int, 8> children = {-1, -1, -1, -1, -1, -1, -1, -1};
|
||||
int mass_id = -1;
|
||||
bool leaf = true;
|
||||
|
||||
public:
|
||||
node(const Vector3& _box_min, const Vector3& _box_max) : box_min(_box_min), box_max(_box_max) {}
|
||||
};
|
||||
|
||||
public:
|
||||
static constexpr int MAX_DEPTH = 20;
|
||||
private:
|
||||
// 21 * 3 = 63, fits in uint64_t for combined x/y/z morton-code
|
||||
static constexpr int MAX_DEPTH = 21;
|
||||
|
||||
std::vector<node> nodes;
|
||||
|
||||
// This approach is actually slower than the array of nodes
|
||||
// beacuse we access all the attributes in the same function
|
||||
|
||||
// std::vector<Vector3> mass_centers;
|
||||
// std::vector<float> mass_totals;
|
||||
// std::vector<Vector3> box_mins;
|
||||
@ -50,22 +48,58 @@ public:
|
||||
octree(octree&& move) = delete;
|
||||
auto operator=(octree&& move) -> octree& = delete;
|
||||
|
||||
public:
|
||||
auto clear() -> void;
|
||||
auto reserve(size_t count) -> void;
|
||||
[[nodiscard]] auto empty() const -> bool;
|
||||
private:
|
||||
[[nodiscard]] INLINE static inline auto get_octant(const Vector3& box_min,
|
||||
const Vector3& box_max,
|
||||
const Vector3& pos) -> int;
|
||||
[[nodiscard]] INLINE static inline auto get_child_bounds(const Vector3& box_min,
|
||||
const Vector3& box_max,
|
||||
int octant) -> std::pair<Vector3, Vector3>;
|
||||
INLINE inline auto create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int;
|
||||
[[nodiscard]] auto get_child_count(int node_idx) const -> int;
|
||||
auto insert(int node_idx, int mass_id, const Vector3& pos, float mass, int depth) -> void;
|
||||
static auto build_octree(octree& t, const std::vector<Vector3>& positions) -> void;
|
||||
|
||||
[[nodiscard]] auto calculate_force(int node_idx, const Vector3& pos) const -> Vector3;
|
||||
// Map a floating point coordinate to a discrete integer (so its morton-code can be computed)
|
||||
// The "bits" parameter determines the discrete axis resolution
|
||||
[[nodiscard]] INLINE static inline auto quantize_axis(float coordinate,
|
||||
float box_min,
|
||||
float box_max,
|
||||
int bits) -> uint32_t;
|
||||
|
||||
[[nodiscard]] INLINE static inline auto pos_to_morton(const Vector3& p,
|
||||
const Vector3& root_min,
|
||||
const Vector3& root_max) -> uint64_t;
|
||||
|
||||
[[nodiscard]] INLINE static inline auto jitter_pos(Vector3 p,
|
||||
uint32_t seed,
|
||||
const Vector3& root_min,
|
||||
const Vector3& root_max,
|
||||
float root_extent) -> Vector3;
|
||||
|
||||
// Use this to obtain an ancestor node of a leaf node (on any level).
|
||||
// Because the morton codes (interleaved coordinates) encode the octree path, we can take
|
||||
// the morten code of any leaf and only take the 3*n first interleaved bits to find the
|
||||
// leaf ancestor on level n.
|
||||
// Leaf Code: [101 110 100 001] -> Ancestors (from leaf to root):
|
||||
// - [101 110 100]
|
||||
// - [101 110]
|
||||
// - [101] (root)
|
||||
[[nodiscard]] INLINE static inline auto path_to_ancestor(uint64_t leaf_code, int leaf_depth, int depth) -> uint64_t;
|
||||
|
||||
// Use this to obtain the octant a leaf node is contained in (on any level).
|
||||
// The 3 interleaved bits in the morten code encode the octant [0, 7].
|
||||
// Leaf Code: [101 110 100 001] -> Octants:
|
||||
// - [100] (Level 2)
|
||||
// - [110] (Level 1)
|
||||
// - [101] (Level 0)
|
||||
[[nodiscard]] INLINE static inline auto octant_at_level(uint64_t leaf_code, int level, int leaf_depth) -> int;
|
||||
|
||||
public:
|
||||
auto clear() -> void;
|
||||
auto reserve(size_t count) -> void;
|
||||
[[nodiscard]] auto empty() const -> bool;
|
||||
[[nodiscard]] auto root() const -> const node&;
|
||||
|
||||
// Morton/linear octree implementation
|
||||
static auto build_octree_morton(octree& t, const std::vector<Vector3>& positions) -> void;
|
||||
[[nodiscard]] auto calculate_force_morton(int node_idx, const Vector3& pos, int self_id) const -> Vector3;
|
||||
};
|
||||
|
||||
INLINE inline auto octree::get_octant(const Vector3& box_min, const Vector3& box_max, const Vector3& pos) -> int
|
||||
@ -101,18 +135,75 @@ INLINE inline auto octree::get_child_bounds(const Vector3& box_min,
|
||||
return std::make_pair(min, max);
|
||||
}
|
||||
|
||||
INLINE inline auto octree::create_empty_leaf(const Vector3& box_min, const Vector3& box_max) -> int
|
||||
INLINE inline auto octree::quantize_axis(const float coordinate,
|
||||
const float box_min,
|
||||
const float box_max,
|
||||
const int bits) -> uint32_t
|
||||
{
|
||||
nodes.emplace_back(box_min, box_max);
|
||||
return static_cast<int>(nodes.size() - 1);
|
||||
const float extent = box_max - box_min;
|
||||
if (extent <= 0.0f) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// mass_centers.emplace_back(Vector3Zero());
|
||||
// mass_totals.emplace_back(0);
|
||||
// box_mins.emplace_back(box_min);
|
||||
// box_maxs.emplace_back(box_max);
|
||||
// childrens.push_back({-1, -1, -1, -1, -1, -1, -1, -1});
|
||||
// mass_ids.emplace_back(-1);
|
||||
// leafs.emplace_back(true);
|
||||
float normalized = (coordinate - box_min) / extent; // normalize to [0,1]
|
||||
normalized = std::max(0.0f, std::min(normalized, std::nextafter(1.0f, 0.0f))); // avoid exactly 1.0
|
||||
|
||||
// bits up to 21 => (1u << bits) safe in 32-bit
|
||||
const uint32_t grid_max = (1u << bits) - 1u;
|
||||
return static_cast<uint32_t>(normalized * static_cast<float>(grid_max));
|
||||
}
|
||||
|
||||
INLINE inline auto octree::pos_to_morton(const Vector3& p, const Vector3& root_min, const Vector3& root_max) -> uint64_t
|
||||
{
|
||||
const uint32_t x = quantize_axis(p.x, root_min.x, root_max.x, MAX_DEPTH);
|
||||
const uint32_t y = quantize_axis(p.y, root_min.y, root_max.y, MAX_DEPTH);
|
||||
const uint32_t z = quantize_axis(p.z, root_min.z, root_max.z, MAX_DEPTH);
|
||||
return libmorton::morton3D_64_encode(x, y, z);
|
||||
}
|
||||
|
||||
INLINE inline auto octree::jitter_pos(Vector3 p,
|
||||
const uint32_t seed,
|
||||
const Vector3& root_min,
|
||||
const Vector3& root_max,
|
||||
const float root_extent) -> Vector3
|
||||
{
|
||||
// Use a hash to calculate a deterministic jitter: The same position should always get the same jitter.
|
||||
// We want this to get stable physics, particles at the same position shouldn't get different jitters
|
||||
// across frames...
|
||||
uint32_t h = (seed ^ 61u) ^ (seed >> 16);
|
||||
h *= 9u;
|
||||
h = h ^ (h >> 4);
|
||||
h *= 0x27d4eb2du;
|
||||
h = h ^ (h >> 15);
|
||||
|
||||
// finest cell size at depth L
|
||||
const float finest_cell = root_extent / static_cast<float>(1u << MAX_DEPTH);
|
||||
const float s = finest_cell * 1e-4f; // small pp
|
||||
|
||||
p.x += (h & 1u) ? +s : -s;
|
||||
p.y += (h & 2u) ? +s : -s;
|
||||
p.z += (h & 4u) ? +s : -s;
|
||||
|
||||
// clamp back into bounds just in case
|
||||
p.x = std::max(root_min.x, std::min(p.x, root_max.x));
|
||||
p.y = std::max(root_min.y, std::min(p.y, root_max.y));
|
||||
p.z = std::max(root_min.z, std::min(p.z, root_max.z));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
INLINE inline auto octree::path_to_ancestor(const uint64_t leaf_code, const int leaf_depth, const int depth) -> uint64_t
|
||||
{
|
||||
// keep top 3*depth bits; drop the rest
|
||||
const int drop = 3 * (leaf_depth - depth);
|
||||
return (drop > 0) ? (leaf_code >> drop) : leaf_code;
|
||||
}
|
||||
|
||||
INLINE inline auto octree::octant_at_level(const uint64_t leaf_code, const int level, const int leaf_depth) -> int
|
||||
{
|
||||
// level 1 => child of root => topmost 3 bits
|
||||
const int shift = 3 * (leaf_depth - level);
|
||||
return static_cast<int>((leaf_code >> shift) & 0x7ull);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -7,6 +7,10 @@
|
||||
#define INLINE __attribute__((always_inline))
|
||||
#define PACKED __attribute__((packed))
|
||||
|
||||
#define STARTTIME const auto start = std::chrono::high_resolution_clock::now()
|
||||
#define ENDTIME(msg) const auto end = std::chrono::high_resolution_clock::now(); \
|
||||
infoln("{}. Took {}ms.", msg, std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count())
|
||||
|
||||
// std::variant visitor
|
||||
|
||||
// https://en.cppreference.com/w/cpp/utility/variant/visit
|
||||
@ -92,28 +96,32 @@ template <typename... Args>
|
||||
auto traceln(std::format_string<Args...> fmt, Args&&... args) -> void
|
||||
{
|
||||
std::cout << std::format("[{}TRACE{}]: ", ansi_bold_fg(fg::cyan), ansi_reset()) << std::format(
|
||||
fmt, std::forward<Args>(args)...) << std::endl;
|
||||
fmt,
|
||||
std::forward<Args>(args)...) << std::endl;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto infoln(std::format_string<Args...> fmt, Args&&... args) -> void
|
||||
{
|
||||
std::cout << std::format("[{}INFO{}]: ", ansi_bold_fg(fg::blue), ansi_reset()) << std::format(
|
||||
fmt, std::forward<Args>(args)...) << std::endl;
|
||||
fmt,
|
||||
std::forward<Args>(args)...) << std::endl;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto warnln(std::format_string<Args...> fmt, Args&&... args) -> void
|
||||
{
|
||||
std::cout << std::format("[{}WARNING{}]: ", ansi_bold_fg(fg::yellow), ansi_reset()) << std::format(
|
||||
fmt, std::forward<Args>(args)...) << std::endl;
|
||||
fmt,
|
||||
std::forward<Args>(args)...) << std::endl;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto errln(std::format_string<Args...> fmt, Args&&... args) -> void
|
||||
{
|
||||
std::cout << std::format("[{}ERROR{}]: ", ansi_bold_fg(fg::red), ansi_reset()) << std::format(
|
||||
fmt, std::forward<Args>(args)...) << std::endl;
|
||||
fmt,
|
||||
std::forward<Args>(args)...) << std::endl;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -103,7 +103,7 @@ auto cpu_layout_engine::physics_thread(physics_state& state, const std::optional
|
||||
last_mass_count = mass_springs.positions.size();
|
||||
}
|
||||
#else
|
||||
octree::build_octree(mass_springs.tree, mass_springs.positions);
|
||||
octree::build_octree_morton(mass_springs.tree, mass_springs.positions);
|
||||
#endif
|
||||
|
||||
mass_springs.clear_forces();
|
||||
@ -157,7 +157,7 @@ auto cpu_layout_engine::physics_thread(physics_state& state, const std::optional
|
||||
if (mass_springs.tree.empty()) {
|
||||
state.mass_center = Vector3Zero();
|
||||
} else {
|
||||
state.mass_center = mass_springs.tree.nodes[0].mass_center;
|
||||
state.mass_center = mass_springs.tree.root().mass_center;
|
||||
}
|
||||
|
||||
state.masses.clear();
|
||||
|
||||
@ -45,7 +45,7 @@ auto cpu_spring_system::add_spring(size_t a, size_t b) -> void
|
||||
// If the offset moves the mass closer to the current center of mass, flip it
|
||||
if (!tree.empty()) {
|
||||
const Vector3 mass_center_direction =
|
||||
Vector3Subtract(positions[a], tree.nodes[0].mass_center);
|
||||
Vector3Subtract(positions[a], tree.root().mass_center);
|
||||
const float mass_center_distance = Vector3Length(mass_center_direction);
|
||||
|
||||
if (mass_center_distance > 0 && Vector3DotProduct(offset, mass_center_direction) < 0.0f) {
|
||||
@ -126,7 +126,7 @@ auto cpu_spring_system::calculate_repulsion_forces(
|
||||
|
||||
const auto solve_octree = [&](const int i)
|
||||
{
|
||||
const Vector3 force = tree.calculate_force(0, positions[i]);
|
||||
const Vector3 force = tree.calculate_force_morton(0, positions[i], i);
|
||||
forces[i] += force;
|
||||
};
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#ifndef WIN32
|
||||
#if not defined(_WIN32)
|
||||
#include <boost/program_options.hpp>
|
||||
namespace po = boost::program_options;
|
||||
#endif
|
||||
@ -235,7 +235,7 @@ enum class runmode
|
||||
|
||||
auto argparse(const int argc, char* argv[]) -> runmode
|
||||
{
|
||||
#ifndef WIN32
|
||||
#if not defined(_WIN32)
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options() //
|
||||
("help", "produce help message") //
|
||||
|
||||
415
src/octree.cpp
415
src/octree.cpp
@ -7,205 +7,316 @@
|
||||
auto octree::clear() -> void
|
||||
{
|
||||
nodes.clear();
|
||||
|
||||
// mass_centers.clear();
|
||||
// mass_totals.clear();
|
||||
// box_mins.clear();
|
||||
// box_maxs.clear();
|
||||
// childrens.clear();
|
||||
// mass_ids.clear();
|
||||
// leafs.clear();
|
||||
}
|
||||
|
||||
auto octree::reserve(const size_t count) -> void
|
||||
{
|
||||
nodes.reserve(count);
|
||||
|
||||
// mass_centers.reserve(count);
|
||||
// mass_totals.reserve(count);
|
||||
// box_mins.reserve(count);
|
||||
// box_maxs.reserve(count);
|
||||
// childrens.reserve(count);
|
||||
// mass_ids.reserve(count);
|
||||
// leafs.reserve(count);
|
||||
}
|
||||
|
||||
auto octree::empty() const -> bool
|
||||
{
|
||||
return nodes.empty();
|
||||
|
||||
// return mass_centers.empty();
|
||||
}
|
||||
|
||||
auto octree::get_child_count(const int node_idx) const -> int
|
||||
auto octree::root() const -> const node&
|
||||
{
|
||||
int child_count = 0;
|
||||
for (const int child : nodes[node_idx].children) {
|
||||
if (child != -1) {
|
||||
++child_count;
|
||||
}
|
||||
}
|
||||
return child_count;
|
||||
return nodes[0];
|
||||
}
|
||||
|
||||
auto octree::insert(const int node_idx,
|
||||
const int mass_id,
|
||||
const Vector3& pos,
|
||||
const float mass,
|
||||
const int depth) -> void
|
||||
{
|
||||
if (depth > MAX_DEPTH) {
|
||||
throw std::runtime_error("Octree insertion reachead max depth");
|
||||
}
|
||||
|
||||
// NOTE: Do not store a nodes[node_idx] reference as the nodes vector might reallocate during
|
||||
// this function
|
||||
|
||||
// We can place the particle in the empty leaf
|
||||
const bool leaf = nodes[node_idx].leaf;
|
||||
if (leaf && nodes[node_idx].mass_id == -1) {
|
||||
nodes[node_idx].mass_id = mass_id;
|
||||
nodes[node_idx].mass_center = pos;
|
||||
nodes[node_idx].mass_total = mass;
|
||||
return;
|
||||
}
|
||||
|
||||
const Vector3& box_min = nodes[node_idx].box_min;
|
||||
const Vector3& box_max = nodes[node_idx].box_max;
|
||||
|
||||
// The leaf is occupied, we need to subdivide
|
||||
if (leaf) {
|
||||
const int existing_id = nodes[node_idx].mass_id;
|
||||
const Vector3 existing_pos = nodes[node_idx].mass_center;
|
||||
const float existing_mass = nodes[node_idx].mass_total;
|
||||
|
||||
// If positions are identical we jitter the particles
|
||||
const Vector3 diff = Vector3Subtract(pos, existing_pos);
|
||||
if (diff == Vector3Zero()) {
|
||||
// warnln("Trying to insert an identical partical into octree (jittering position)");
|
||||
|
||||
Vector3 jittered = pos;
|
||||
jittered.x += 0.001;
|
||||
jittered.y += 0.001;
|
||||
insert(node_idx, mass_id, jittered, mass, depth);
|
||||
return;
|
||||
|
||||
// Could also merge them, but that leads to the octree having less leafs than we have
|
||||
// masses nodes[node_idx].mass_total += mass; return;
|
||||
}
|
||||
|
||||
// Convert the leaf to an internal node
|
||||
nodes[node_idx].mass_id = -1;
|
||||
nodes[node_idx].leaf = false;
|
||||
nodes[node_idx].mass_center = Vector3Zero();
|
||||
nodes[node_idx].mass_total = 0.0;
|
||||
|
||||
// Re-insert the existing mass into a new empty leaf (see above)
|
||||
const int oct = get_octant(box_min, box_max, existing_pos);
|
||||
if (nodes[node_idx].children[oct] == -1) {
|
||||
const auto& [min, max] = get_child_bounds(box_min, box_max, oct);
|
||||
const int child_idx = create_empty_leaf(min, max);
|
||||
nodes[node_idx].children[oct] = child_idx;
|
||||
}
|
||||
insert(nodes[node_idx].children[oct], existing_id, existing_pos, existing_mass, depth + 1);
|
||||
}
|
||||
|
||||
// Insert the new mass
|
||||
const int oct = get_octant(box_min, box_max, pos);
|
||||
if (nodes[node_idx].children[oct] == -1) {
|
||||
const auto& [min, max] = get_child_bounds(box_min, box_max, oct);
|
||||
const int child_idx = create_empty_leaf(min, max);
|
||||
nodes[node_idx].children[oct] = child_idx;
|
||||
}
|
||||
insert(nodes[node_idx].children[oct], mass_id, pos, mass, depth + 1);
|
||||
|
||||
// Update the center of mass
|
||||
const float new_mass = nodes[node_idx].mass_total + mass;
|
||||
nodes[node_idx].mass_center = (nodes[node_idx].mass_center * nodes[node_idx].mass_total + pos) / new_mass;
|
||||
nodes[node_idx].mass_total = new_mass;
|
||||
}
|
||||
|
||||
auto octree::build_octree(octree& t, const std::vector<Vector3>& positions) -> void
|
||||
// Replaced the 50 line recursive octree insertion with this bitch to gain 5 UPS, FML
|
||||
auto octree::build_octree_morton(octree& t, const std::vector<Vector3>& positions) -> void
|
||||
{
|
||||
#ifdef TRACY
|
||||
ZoneScoped;
|
||||
#endif
|
||||
|
||||
t.clear();
|
||||
t.reserve(positions.size() * 2);
|
||||
if (positions.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute bounding box around all masses
|
||||
Vector3 min{FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
Vector3 max{-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
Vector3 root_min{FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
Vector3 root_max{-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
for (const auto& [x, y, z] : positions) {
|
||||
min.x = std::min(min.x, x);
|
||||
max.x = std::max(max.x, x);
|
||||
min.y = std::min(min.y, y);
|
||||
max.y = std::max(max.y, y);
|
||||
min.z = std::min(min.z, z);
|
||||
max.z = std::max(max.z, z);
|
||||
root_min.x = std::min(root_min.x, x);
|
||||
root_max.x = std::max(root_max.x, x);
|
||||
root_min.y = std::min(root_min.y, y);
|
||||
root_max.y = std::max(root_max.y, y);
|
||||
root_min.z = std::min(root_min.z, z);
|
||||
root_max.z = std::max(root_max.z, z);
|
||||
}
|
||||
|
||||
// Pad the bounding box
|
||||
constexpr float pad = 1.0;
|
||||
min = Vector3Subtract(min, Vector3Scale(Vector3One(), pad));
|
||||
max = Vector3Add(max, Vector3Scale(Vector3One(), pad));
|
||||
constexpr float pad = 1.0f;
|
||||
root_min = Vector3Subtract(root_min, Vector3Scale(Vector3One(), pad));
|
||||
root_max = Vector3Add(root_max, Vector3Scale(Vector3One(), pad));
|
||||
|
||||
// Make it cubic (so subdivisions are balanced)
|
||||
const float max_extent = std::max({max.x - min.x, max.y - min.y, max.z - min.z});
|
||||
max = Vector3Add(min, Vector3Scale(Vector3One(), max_extent));
|
||||
const float max_extent = std::max({root_max.x - root_min.x, root_max.y - root_min.y, root_max.z - root_min.z});
|
||||
root_max = Vector3Add(root_min, Vector3Scale(Vector3One(), max_extent));
|
||||
|
||||
// Root node spans the entire area
|
||||
const int root = t.create_empty_leaf(min, max);
|
||||
const float root_extent = root_max.x - root_min.x; // cubic
|
||||
|
||||
for (size_t i = 0; i < positions.size(); ++i) {
|
||||
t.insert(root, static_cast<int>(i), positions[i], MASS, 0);
|
||||
// Container for building the particle list before sorting by morton code
|
||||
struct sort_node
|
||||
{
|
||||
uint64_t code;
|
||||
uint32_t id;
|
||||
Vector3 pos;
|
||||
};
|
||||
|
||||
std::vector<sort_node> sort_container;
|
||||
sort_container.reserve(positions.size());
|
||||
for (uint32_t i = 0; i < positions.size(); ++i) {
|
||||
sort_container.emplace_back(pos_to_morton(positions[i], root_min, root_max), i, positions[i]);
|
||||
}
|
||||
|
||||
// Sort the list by morton codes. Because positions close to each other have similar morten codes,
|
||||
// this provides us with "spatial locality" in the datastructure.
|
||||
auto sort_by_code = [&]()
|
||||
{
|
||||
std::ranges::sort(sort_container,
|
||||
[](const sort_node& a, const sort_node& b)
|
||||
{
|
||||
if (a.code != b.code) {
|
||||
return a.code < b.code;
|
||||
}
|
||||
return a.id < b.id;
|
||||
});
|
||||
};
|
||||
|
||||
sort_by_code();
|
||||
|
||||
// Resolve duplicates by jittering the later one deterministically and re-encoding.
|
||||
for (int seed = 0; seed < 8; ++seed) {
|
||||
bool had_dupes = false;
|
||||
|
||||
for (size_t i = 1; i < sort_container.size(); ++i) {
|
||||
// Because elements are spatially ordered after sorting, we can scan for duplicates in O(n)
|
||||
if (sort_container[i].code == sort_container[i - 1].code) {
|
||||
had_dupes = true;
|
||||
sort_container[i].pos = jitter_pos(sort_container[i].pos,
|
||||
sort_container[i].id + seed * 0x9e3779b9u,
|
||||
root_min,
|
||||
root_max,
|
||||
root_extent);
|
||||
sort_container[i].code = pos_to_morton(sort_container[i].pos, root_min, root_max);
|
||||
}
|
||||
}
|
||||
|
||||
if (!had_dupes) {
|
||||
break;
|
||||
}
|
||||
sort_by_code();
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
for (size_t i = 1; i < sort_container.size(); ++i) {
|
||||
if (sort_container[i].code == sort_container[i - 1].code) {
|
||||
throw std::runtime_error("Duplicates remain after jittering");
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<node>> tree_levels;
|
||||
tree_levels.assign(MAX_DEPTH + 1, {});
|
||||
|
||||
// Leaves at MAX_DEPTH: 1 particle per leaf in morton order (close particles close together)
|
||||
auto& leafs = tree_levels[MAX_DEPTH];
|
||||
leafs.reserve(sort_container.size());
|
||||
const float leaf_size = root_extent / static_cast<float>(MAX_DEPTH);
|
||||
for (const auto& [code, id, pos] : sort_container) {
|
||||
node leaf;
|
||||
leaf.leaf = true;
|
||||
leaf.mass_id = static_cast<int>(id);
|
||||
leaf.depth = MAX_DEPTH;
|
||||
leaf.size = leaf_size;
|
||||
leaf.mass_total = MASS;
|
||||
leaf.mass_center = pos; // force uses mass_center instead of jittered position
|
||||
leaf.children.fill(-1);
|
||||
leafs.push_back(leaf);
|
||||
}
|
||||
|
||||
// We now have to group the particles (currently we have only sorted the leaves),
|
||||
// but upwards subdivisions have to be created.
|
||||
// For grouping, store a nodes local index in its level.
|
||||
struct leaf
|
||||
{
|
||||
uint64_t leaf_code;
|
||||
int depth;
|
||||
int level_index;
|
||||
};
|
||||
|
||||
std::vector<leaf> leaves;
|
||||
leaves.reserve(tree_levels[MAX_DEPTH].size());
|
||||
for (int i = 0; i < static_cast<int>(tree_levels[MAX_DEPTH].size()); ++i) {
|
||||
leaves.emplace_back(sort_container[static_cast<size_t>(i)].code, MAX_DEPTH, i);
|
||||
}
|
||||
|
||||
// Build internal levels from MAX_DEPTH-1 to 0
|
||||
for (int current_depth = MAX_DEPTH - 1; current_depth >= 0; --current_depth) {
|
||||
auto& current_level = tree_levels[current_depth];
|
||||
current_level.clear();
|
||||
|
||||
std::vector<leaf> next_refs;
|
||||
next_refs.reserve(leaves.size());
|
||||
|
||||
const float parent_size = root_extent / static_cast<float>(1u << current_depth);
|
||||
|
||||
size_t i = 0;
|
||||
while (i < leaves.size()) {
|
||||
const uint64_t key = path_to_ancestor(leaves[i].leaf_code, MAX_DEPTH, current_depth);
|
||||
|
||||
size_t j = i + 1;
|
||||
while (j < leaves.size() && path_to_ancestor(leaves[j].leaf_code, MAX_DEPTH, current_depth) == key) {
|
||||
++j;
|
||||
}
|
||||
|
||||
const size_t group_size = j - i;
|
||||
|
||||
// Unary compression: just carry the child ref upward unchanged.
|
||||
if (group_size == 1) {
|
||||
next_refs.push_back(leaves[i]);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
node parent;
|
||||
parent.leaf = false;
|
||||
parent.mass_id = -1;
|
||||
parent.depth = current_depth;
|
||||
parent.size = parent_size;
|
||||
parent.children.fill(-1);
|
||||
|
||||
float mass_total = 0.0f;
|
||||
Vector3 mass_center_acc = Vector3Zero();
|
||||
|
||||
for (size_t k = i; k < j; ++k) {
|
||||
const int child_depth = leaves[k].depth;
|
||||
const int child_local = leaves[k].level_index;
|
||||
|
||||
// Read the child from its actual stored level.
|
||||
const node& child = tree_levels[child_depth][child_local];
|
||||
|
||||
// Which octant of this parent does it belong to?
|
||||
// IMPORTANT: octant comes from the NEXT level after current_depth (i.e. current_depth+1),
|
||||
// but the child might skip levels due to compression.
|
||||
// We must use the child's "first level under the parent" which is (current_depth+1).
|
||||
const int oct = octant_at_level(leaves[k].leaf_code, current_depth + 1, MAX_DEPTH);
|
||||
|
||||
// Store *global* child reference: we only have an int slot, so we need a single index space.
|
||||
parent.children[oct] = (child_depth << 24) | (child_local & 0x00FFFFFF);
|
||||
|
||||
mass_total += child.mass_total;
|
||||
mass_center_acc = Vector3Add(mass_center_acc, Vector3Scale(child.mass_center, child.mass_total));
|
||||
}
|
||||
|
||||
parent.mass_total = mass_total;
|
||||
parent.mass_center = (mass_total > 0.0f) ? Vector3Scale(mass_center_acc, 1.0f / mass_total) : Vector3Zero();
|
||||
|
||||
const int parent_local = static_cast<int>(current_level.size());
|
||||
current_level.push_back(parent);
|
||||
|
||||
next_refs.push_back({leaves[i].leaf_code, current_depth, parent_local});
|
||||
i = j;
|
||||
}
|
||||
|
||||
leaves.swap(next_refs);
|
||||
}
|
||||
|
||||
// Flatten levels and fix child indices from local->global.
|
||||
// All depth 0 nodes come first, then depth 1, last depth MAX_DEPTH.
|
||||
std::vector<int> offsets(tree_levels.size(), 0);
|
||||
int total = 0;
|
||||
for (int d = 0; d <= MAX_DEPTH; ++d) {
|
||||
offsets[d] = total;
|
||||
total += static_cast<int>(tree_levels[d].size());
|
||||
}
|
||||
|
||||
t.nodes.clear();
|
||||
t.nodes.reserve(total);
|
||||
for (int d = 0; d <= MAX_DEPTH; ++d) {
|
||||
t.nodes.insert(t.nodes.end(), tree_levels[d].begin(), tree_levels[d].end());
|
||||
}
|
||||
|
||||
// Fix child indices: convert local index in levels[d+1] to global index in t.nodes
|
||||
for (int d = 0; d <= MAX_DEPTH; ++d) {
|
||||
const int base = offsets[d];
|
||||
for (int i2 = 0; i2 < static_cast<int>(tree_levels[d].size()); ++i2) {
|
||||
node& n = t.nodes[base + i2];
|
||||
if (!n.leaf) {
|
||||
for (int c = 0; c < 8; ++c) {
|
||||
int packed = n.children[c];
|
||||
if (packed >= 0) {
|
||||
const int child_depth = (packed >> 24) & 0xFF;
|
||||
const int child_local = packed & 0x00FFFFFF;
|
||||
n.children[c] = offsets[child_depth] + child_local;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const size_t _leaves = tree_levels[MAX_DEPTH].size();
|
||||
// const size_t _total = t.nodes.size();
|
||||
// const size_t _internal = _total - _leaves;
|
||||
// traceln("Morton octree nodes: {}, leaves: {}, ratio: {:.3} children per internal node.",
|
||||
// _total,
|
||||
// _leaves,
|
||||
// static_cast<float>(_total - 1) / _internal);
|
||||
}
|
||||
|
||||
auto octree::calculate_force(const int node_idx, const Vector3& pos) const -> Vector3
|
||||
auto octree::calculate_force_morton(const int node_idx, const Vector3& pos, const int self_id) const -> Vector3
|
||||
{
|
||||
if (node_idx < 0) {
|
||||
return Vector3Zero();
|
||||
}
|
||||
|
||||
const node& n = nodes[node_idx];
|
||||
// Force accumulator
|
||||
float fx = 0.0f;
|
||||
float fy = 0.0f;
|
||||
float fz = 0.0f;
|
||||
|
||||
const float mass_total = n.mass_total;
|
||||
const Vector3& mass_center = n.mass_center;
|
||||
const Vector3& box_min = n.box_min;
|
||||
const Vector3& box_max = n.box_max;
|
||||
const std::array<int, 8>& children = n.children;
|
||||
const bool leaf = n.leaf;
|
||||
std::vector<int> stack;
|
||||
stack.reserve(128);
|
||||
stack.push_back(node_idx);
|
||||
|
||||
if (std::abs(mass_total) <= 0.001f) {
|
||||
return Vector3Zero();
|
||||
constexpr float theta2 = THETA * THETA;
|
||||
|
||||
while (!stack.empty()) {
|
||||
const int idx = stack.back();
|
||||
stack.pop_back();
|
||||
|
||||
const node& n = nodes[idx];
|
||||
|
||||
// No self-force for single-particle leafs
|
||||
if (n.leaf && n.mass_id == self_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Vector3 diff = Vector3Subtract(pos, mass_center);
|
||||
float dist_sq = diff.x * diff.x + diff.y * diff.y + diff.z * diff.z;
|
||||
const float dx = pos.x - n.mass_center.x;
|
||||
const float dy = pos.y - n.mass_center.y;
|
||||
const float dz = pos.z - n.mass_center.z;
|
||||
|
||||
// Softening
|
||||
dist_sq += SOFTENING;
|
||||
const float dist_sq = dx * dx + dy * dy + dz * dz + SOFTENING;
|
||||
|
||||
// Barnes-Hut
|
||||
const float size = box_max.x - box_min.x;
|
||||
if (leaf || size * size / dist_sq < THETA * THETA) {
|
||||
const float dist = std::sqrt(dist_sq);
|
||||
const float force_mag = BH_FORCE * mass_total / dist_sq;
|
||||
|
||||
return Vector3Scale(diff, force_mag / dist);
|
||||
// Barnes–Hut criterion
|
||||
if (n.leaf || ((n.size * n.size) / dist_sq) < theta2) {
|
||||
const float inv_dist = 1.0f / std::sqrt(dist_sq);
|
||||
const float force_mag = (BH_FORCE * n.mass_total) / dist_sq; // ~ 1/r^2
|
||||
const float s = force_mag * inv_dist; // scale by 1/r to get vector
|
||||
fx += dx * s;
|
||||
fy += dy * s;
|
||||
fz += dz * s;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Collect child forces
|
||||
Vector3 force = Vector3Zero();
|
||||
for (const int child : children) {
|
||||
for (int c = 0; c < 8; ++c) {
|
||||
const int child = n.children[c];
|
||||
if (child >= 0) {
|
||||
const Vector3 child_force = calculate_force(child, pos);
|
||||
|
||||
force = Vector3Add(force, child_force);
|
||||
stack.push_back(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return force;
|
||||
return Vector3{fx, fy, fz};
|
||||
}
|
||||
Reference in New Issue
Block a user