parallelize repulsion forces using openmp

This commit is contained in:
2026-02-18 02:08:46 +01:00
parent 43c9a5b715
commit e2e75204ef
8 changed files with 84 additions and 62 deletions

View File

@ -1 +1 @@
./cmake-build-debug/.clangd ./cmake-build-release/.clangd

View File

@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 23)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
find_package(raylib REQUIRED) find_package(raylib REQUIRED)
find_package(OpenMP REQUIRED)
include_directories(include) include_directories(include)
@ -16,4 +17,4 @@ add_executable(masssprings
) )
target_include_directories(masssprings PUBLIC ${RAYLIB_CPP_INCLUDE_DIR}) target_include_directories(masssprings PUBLIC ${RAYLIB_CPP_INCLUDE_DIR})
target_link_libraries(masssprings PUBLIC raylib) target_link_libraries(masssprings PUBLIC raylib OpenMP::OpenMP_CXX)

View File

@ -1 +1 @@
./cmake-build-debug/compile_commands.json ./cmake-build-release/compile_commands.json

View File

@ -143,6 +143,7 @@ rec {
# boost # boost
# sfml # sfml
raylib raylib
llvmPackages.openmp
# raylib-cpp # raylib-cpp
# tinyobjloader # tinyobjloader
# gperftools # gperftools
@ -194,7 +195,7 @@ rec {
# ]; # ];
# Dynamic libraries from buildinputs: # Dynamic libraries from buildinputs:
# LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs; LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs;
# ========================================================================================= # =========================================================================================
# Define shell environment # Define shell environment
@ -208,6 +209,9 @@ rec {
pkgs.writers.writeFish "cmake-${typeLower}.fish" '' pkgs.writers.writeFish "cmake-${typeLower}.fish" ''
cd $FLAKE_PROJECT_ROOT cd $FLAKE_PROJECT_ROOT
# set -g -x CC ${clang}/bin/clang
# set -g -x CXX ${clang}/bin/clang++
echo "Removing build directory ./cmake-build-${typeLower}/" echo "Removing build directory ./cmake-build-${typeLower}/"
rm -rf ./cmake-build-${typeLower} rm -rf ./cmake-build-${typeLower}

View File

@ -4,15 +4,15 @@
#include <raylib.h> #include <raylib.h>
// Window // Window
constexpr int WIDTH = 1000; constexpr int WIDTH = 1300;
constexpr int HEIGHT = 1000; constexpr int HEIGHT = 1300;
// Camera Controls // Camera Controls
constexpr float SIM_SPEED = 4.0; constexpr float SIM_SPEED = 4.0;
constexpr float CAMERA_DISTANCE = 4.0; constexpr float CAMERA_DISTANCE = 4.0;
constexpr float MIN_CAMERA_DISTANCE = 2.0; constexpr float MIN_CAMERA_DISTANCE = 2.0;
constexpr float MAX_CAMERA_DISTANCE = 50.0; constexpr float MAX_CAMERA_DISTANCE = 150.0;
constexpr float ZOOM_SPEED = 1.0; constexpr float ZOOM_SPEED = 1.5;
constexpr float PAN_SPEED = 1.0; constexpr float PAN_SPEED = 1.0;
constexpr float ROT_SPEED = 1.0; constexpr float ROT_SPEED = 1.0;
@ -20,7 +20,7 @@ constexpr float ROT_SPEED = 1.0;
constexpr float SPRING_CONSTANT = 1.5; constexpr float SPRING_CONSTANT = 1.5;
constexpr float DAMPENING_CONSTANT = 0.8; constexpr float DAMPENING_CONSTANT = 0.8;
constexpr float REST_LENGTH = 1.0; constexpr float REST_LENGTH = 1.0;
constexpr float REPULSION_FORCE = 0.05; constexpr float REPULSION_FORCE = 0.1;
constexpr float REPULSION_RANGE = 3.0 * REST_LENGTH; constexpr float REPULSION_RANGE = 3.0 * REST_LENGTH;
constexpr float VERLET_DAMPENING = 0.01; // [0, 1] constexpr float VERLET_DAMPENING = 0.01; // [0, 1]

View File

@ -2,6 +2,7 @@
#include <chrono> #include <chrono>
#include <iostream> #include <iostream>
#include <omp.h>
#include <ratio> #include <ratio>
#include <raylib.h> #include <raylib.h>
#include <raymath.h> #include <raymath.h>
@ -17,12 +18,12 @@ auto klotski_a() -> State {
s.AddBlock(Block(1, 0, 2, 2, true)); s.AddBlock(Block(1, 0, 2, 2, true));
s.AddBlock(Block(3, 0, 1, 2, false)); s.AddBlock(Block(3, 0, 1, 2, false));
s.AddBlock(Block(0, 2, 1, 2, false)); s.AddBlock(Block(0, 2, 1, 2, false));
// s.AddBlock(Block(1, 2, 2, 1, false)); s.AddBlock(Block(1, 2, 2, 1, false));
// s.AddBlock(Block(3, 2, 1, 2, false)); s.AddBlock(Block(3, 2, 1, 2, false));
// s.AddBlock(Block(1, 3, 1, 1, false)); s.AddBlock(Block(1, 3, 1, 1, false));
// s.AddBlock(Block(2, 3, 1, 1, false)); s.AddBlock(Block(2, 3, 1, 1, false));
// s.AddBlock(Block(0, 4, 1, 1, false)); s.AddBlock(Block(0, 4, 1, 1, false));
// s.AddBlock(Block(3, 4, 1, 1, false)); s.AddBlock(Block(3, 4, 1, 1, false));
return s; return s;
} }
@ -33,6 +34,8 @@ auto main(int argc, char *argv[]) -> int {
// return 1; // return 1;
// } // }
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
SetTraceLogLevel(LOG_ERROR); SetTraceLogLevel(LOG_ERROR);
// SetTargetFPS(165); // SetTargetFPS(165);
@ -180,7 +183,7 @@ auto main(int argc, char *argv[]) -> int {
render_time_accumulator += re - rs; render_time_accumulator += re - rs;
time_measure_count++; time_measure_count++;
if (GetTime() - last_print_time > 3.0) { if (GetTime() - last_print_time > 10.0) {
std::cout << "\n - Physics time avg: " std::cout << "\n - Physics time avg: "
<< physics_time_accumulator / time_measure_count << "." << physics_time_accumulator / time_measure_count << "."
<< std::endl; << std::endl;

View File

@ -2,6 +2,7 @@
#include "config.hpp" #include "config.hpp"
#include <format> #include <format>
#include <numeric>
#include <raymath.h> #include <raymath.h>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -123,68 +124,79 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
} }
auto MassSpringSystem::CalculateRepulsionForces() -> void { auto MassSpringSystem::CalculateRepulsionForces() -> void {
const float INV_CELL = 1.0 / REPULSION_RANGE; const float INV_CELL = 1.0f / REPULSION_RANGE;
const int n = masses.size();
struct CellKey {
int x, y, z;
bool operator==(const CellKey &other) const {
return x == other.x && y == other.y && z == other.z;
}
};
struct CellHash {
size_t operator()(const CellKey &key) const {
return ((size_t)key.x * 73856093) ^ ((size_t)key.y * 19349663) ^
((size_t)key.z * 83492791);
}
};
// Accelerate with uniform grid
std::unordered_map<CellKey, std::vector<Mass *>, CellHash> grid;
grid.reserve(masses.size());
// Collect pointers
std::vector<Mass *> massVec;
massVec.reserve(n);
for (auto &[state, mass] : masses) { for (auto &[state, mass] : masses) {
CellKey key{ massVec.push_back(&mass);
(int)std::floor(mass.position.x * INV_CELL),
(int)std::floor(mass.position.y * INV_CELL),
(int)std::floor(mass.position.z * INV_CELL),
};
grid[key].push_back(&mass);
} }
for (auto &[state, mass] : masses) { // Assign each particle a cell index
int cx = (int)std::floor(mass.position.x * INV_CELL); auto cellID = [&](const Vector3 &p) -> int64_t {
int cy = (int)std::floor(mass.position.y * INV_CELL); int x = (int)std::floor(p.x * INV_CELL);
int cz = (int)std::floor(mass.position.z * INV_CELL); int y = (int)std::floor(p.y * INV_CELL);
int z = (int)std::floor(p.z * INV_CELL);
// Pack into a single int64 (assumes coords fit in 20 bits each)
return ((int64_t)(x & 0xFFFFF) << 40) | ((int64_t)(y & 0xFFFFF) << 20) |
(int64_t)(z & 0xFFFFF);
};
// Sort particles by cell
std::vector<int> indices(n);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
return cellID(massVec[a]->position) < cellID(massVec[b]->position);
});
// Build cell start/end table
std::vector<int64_t> cellIDs(n);
for (int i = 0; i < n; ++i) {
cellIDs[i] = cellID(massVec[indices[i]]->position);
}
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
Mass *mass = massVec[indices[i]];
int cx = (int)std::floor(mass->position.x * INV_CELL);
int cy = (int)std::floor(mass->position.y * INV_CELL);
int cz = (int)std::floor(mass->position.z * INV_CELL);
Vector3 force = {0, 0, 0};
// Check all 27 neighboring cells (including own)
for (int dx = -1; dx <= 1; ++dx) { for (int dx = -1; dx <= 1; ++dx) {
for (int dy = -1; dy <= 1; ++dy) { for (int dy = -1; dy <= 1; ++dy) {
for (int dz = -1; dz <= 1; ++dz) { for (int dz = -1; dz <= 1; ++dz) {
CellKey neighbor{cx + dx, cy + dy, cz + dz}; int64_t nid = ((int64_t)((cx + dx) & 0xFFFFF) << 40) |
auto it = grid.find(neighbor); ((int64_t)((cy + dy) & 0xFFFFF) << 20) |
if (it == grid.end()) { (int64_t)((cz + dz) & 0xFFFFF);
// Binary search for this neighbor cell in sorted array
auto lo = std::lower_bound(cellIDs.begin(), cellIDs.end(), nid);
auto hi = std::upper_bound(cellIDs.begin(), cellIDs.end(), nid);
for (auto it = lo; it != hi; ++it) {
Mass *m = massVec[indices[it - cellIDs.begin()]];
if (m == mass) {
continue; continue;
} }
for (Mass *m : it->second) { Vector3 diff = Vector3Subtract(mass->position, m->position);
if (m == &mass) {
continue; // skip self
}
Vector3 diff = Vector3Subtract(mass.position, m->position);
float len = Vector3Length(diff); float len = Vector3Length(diff);
if (len == 0.0f || len >= REPULSION_RANGE) { if (len == 0.0f || len >= REPULSION_RANGE) {
continue; continue;
} }
mass.force = force = Vector3Add(
Vector3Add(mass.force, Vector3Scale(Vector3Normalize(diff), force, Vector3Scale(Vector3Normalize(diff), REPULSION_FORCE));
REPULSION_FORCE));
} }
} }
} }
} }
mass->force = Vector3Add(mass->force, force);
} }
// Old method // Old method

View File

@ -78,11 +78,13 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &masssprings) -> void {
DrawLine3D(a.position, b.position, EDGE_COLOR); DrawLine3D(a.position, b.position, EDGE_COLOR);
} }
// Draw masses // Draw masses (high performance impact)
if (masssprings.masses.size() <= 5000) {
for (const auto &[state, mass] : masssprings.masses) { for (const auto &[state, mass] : masssprings.masses) {
DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE, DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE,
VERTEX_COLOR); VERTEX_COLOR);
} }
}
// DrawGrid(10, 1.0); // DrawGrid(10, 1.0);