parallelize repulsion forces using openmp

This commit is contained in:
2026-02-18 02:08:46 +01:00
parent 43c9a5b715
commit e2e75204ef
8 changed files with 84 additions and 62 deletions

View File

@ -1 +1 @@
./cmake-build-debug/.clangd
./cmake-build-release/.clangd

View File

@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 23)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
find_package(raylib REQUIRED)
find_package(OpenMP REQUIRED)
include_directories(include)
@ -16,4 +17,4 @@ add_executable(masssprings
)
target_include_directories(masssprings PUBLIC ${RAYLIB_CPP_INCLUDE_DIR})
target_link_libraries(masssprings PUBLIC raylib)
target_link_libraries(masssprings PUBLIC raylib OpenMP::OpenMP_CXX)

View File

@ -1 +1 @@
./cmake-build-debug/compile_commands.json
./cmake-build-release/compile_commands.json

View File

@ -143,6 +143,7 @@ rec {
# boost
# sfml
raylib
llvmPackages.openmp
# raylib-cpp
# tinyobjloader
# gperftools
@ -194,7 +195,7 @@ rec {
# ];
# Dynamic libraries from buildinputs:
# LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs;
LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs;
# =========================================================================================
# Define shell environment
@ -208,6 +209,9 @@ rec {
pkgs.writers.writeFish "cmake-${typeLower}.fish" ''
cd $FLAKE_PROJECT_ROOT
# set -g -x CC ${clang}/bin/clang
# set -g -x CXX ${clang}/bin/clang++
echo "Removing build directory ./cmake-build-${typeLower}/"
rm -rf ./cmake-build-${typeLower}

View File

@ -4,15 +4,15 @@
#include <raylib.h>
// Window
constexpr int WIDTH = 1000;
constexpr int HEIGHT = 1000;
constexpr int WIDTH = 1300;
constexpr int HEIGHT = 1300;
// Camera Controls
constexpr float SIM_SPEED = 4.0;
constexpr float CAMERA_DISTANCE = 4.0;
constexpr float MIN_CAMERA_DISTANCE = 2.0;
constexpr float MAX_CAMERA_DISTANCE = 50.0;
constexpr float ZOOM_SPEED = 1.0;
constexpr float MAX_CAMERA_DISTANCE = 150.0;
constexpr float ZOOM_SPEED = 1.5;
constexpr float PAN_SPEED = 1.0;
constexpr float ROT_SPEED = 1.0;
@ -20,7 +20,7 @@ constexpr float ROT_SPEED = 1.0;
constexpr float SPRING_CONSTANT = 1.5;
constexpr float DAMPENING_CONSTANT = 0.8;
constexpr float REST_LENGTH = 1.0;
constexpr float REPULSION_FORCE = 0.05;
constexpr float REPULSION_FORCE = 0.1;
constexpr float REPULSION_RANGE = 3.0 * REST_LENGTH;
constexpr float VERLET_DAMPENING = 0.01; // [0, 1]

View File

@ -2,6 +2,7 @@
#include <chrono>
#include <iostream>
#include <omp.h>
#include <ratio>
#include <raylib.h>
#include <raymath.h>
@ -17,12 +18,12 @@ auto klotski_a() -> State {
s.AddBlock(Block(1, 0, 2, 2, true));
s.AddBlock(Block(3, 0, 1, 2, false));
s.AddBlock(Block(0, 2, 1, 2, false));
// s.AddBlock(Block(1, 2, 2, 1, false));
// s.AddBlock(Block(3, 2, 1, 2, false));
// s.AddBlock(Block(1, 3, 1, 1, false));
// s.AddBlock(Block(2, 3, 1, 1, false));
// s.AddBlock(Block(0, 4, 1, 1, false));
// s.AddBlock(Block(3, 4, 1, 1, false));
s.AddBlock(Block(1, 2, 2, 1, false));
s.AddBlock(Block(3, 2, 1, 2, false));
s.AddBlock(Block(1, 3, 1, 1, false));
s.AddBlock(Block(2, 3, 1, 1, false));
s.AddBlock(Block(0, 4, 1, 1, false));
s.AddBlock(Block(3, 4, 1, 1, false));
return s;
}
@ -33,6 +34,8 @@ auto main(int argc, char *argv[]) -> int {
// return 1;
// }
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
SetTraceLogLevel(LOG_ERROR);
// SetTargetFPS(165);
@ -180,7 +183,7 @@ auto main(int argc, char *argv[]) -> int {
render_time_accumulator += re - rs;
time_measure_count++;
if (GetTime() - last_print_time > 3.0) {
if (GetTime() - last_print_time > 10.0) {
std::cout << "\n - Physics time avg: "
<< physics_time_accumulator / time_measure_count << "."
<< std::endl;

View File

@ -2,6 +2,7 @@
#include "config.hpp"
#include <format>
#include <numeric>
#include <raymath.h>
#include <unordered_map>
#include <vector>
@ -123,68 +124,79 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
}
auto MassSpringSystem::CalculateRepulsionForces() -> void {
const float INV_CELL = 1.0 / REPULSION_RANGE;
struct CellKey {
int x, y, z;
bool operator==(const CellKey &other) const {
return x == other.x && y == other.y && z == other.z;
}
};
struct CellHash {
size_t operator()(const CellKey &key) const {
return ((size_t)key.x * 73856093) ^ ((size_t)key.y * 19349663) ^
((size_t)key.z * 83492791);
}
};
// Accelerate with uniform grid
std::unordered_map<CellKey, std::vector<Mass *>, CellHash> grid;
grid.reserve(masses.size());
const float INV_CELL = 1.0f / REPULSION_RANGE;
const int n = masses.size();
// Collect pointers
std::vector<Mass *> massVec;
massVec.reserve(n);
for (auto &[state, mass] : masses) {
CellKey key{
(int)std::floor(mass.position.x * INV_CELL),
(int)std::floor(mass.position.y * INV_CELL),
(int)std::floor(mass.position.z * INV_CELL),
};
grid[key].push_back(&mass);
massVec.push_back(&mass);
}
for (auto &[state, mass] : masses) {
int cx = (int)std::floor(mass.position.x * INV_CELL);
int cy = (int)std::floor(mass.position.y * INV_CELL);
int cz = (int)std::floor(mass.position.z * INV_CELL);
// Assign each particle a cell index
auto cellID = [&](const Vector3 &p) -> int64_t {
int x = (int)std::floor(p.x * INV_CELL);
int y = (int)std::floor(p.y * INV_CELL);
int z = (int)std::floor(p.z * INV_CELL);
// Pack into a single int64 (assumes coords fit in 20 bits each)
return ((int64_t)(x & 0xFFFFF) << 40) | ((int64_t)(y & 0xFFFFF) << 20) |
(int64_t)(z & 0xFFFFF);
};
// Sort particles by cell
std::vector<int> indices(n);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
return cellID(massVec[a]->position) < cellID(massVec[b]->position);
});
// Build cell start/end table
std::vector<int64_t> cellIDs(n);
for (int i = 0; i < n; ++i) {
cellIDs[i] = cellID(massVec[indices[i]]->position);
}
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
Mass *mass = massVec[indices[i]];
int cx = (int)std::floor(mass->position.x * INV_CELL);
int cy = (int)std::floor(mass->position.y * INV_CELL);
int cz = (int)std::floor(mass->position.z * INV_CELL);
Vector3 force = {0, 0, 0};
// Check all 27 neighboring cells (including own)
for (int dx = -1; dx <= 1; ++dx) {
for (int dy = -1; dy <= 1; ++dy) {
for (int dz = -1; dz <= 1; ++dz) {
CellKey neighbor{cx + dx, cy + dy, cz + dz};
auto it = grid.find(neighbor);
if (it == grid.end()) {
int64_t nid = ((int64_t)((cx + dx) & 0xFFFFF) << 40) |
((int64_t)((cy + dy) & 0xFFFFF) << 20) |
(int64_t)((cz + dz) & 0xFFFFF);
// Binary search for this neighbor cell in sorted array
auto lo = std::lower_bound(cellIDs.begin(), cellIDs.end(), nid);
auto hi = std::upper_bound(cellIDs.begin(), cellIDs.end(), nid);
for (auto it = lo; it != hi; ++it) {
Mass *m = massVec[indices[it - cellIDs.begin()]];
if (m == mass) {
continue;
}
for (Mass *m : it->second) {
if (m == &mass) {
continue; // skip self
}
Vector3 diff = Vector3Subtract(mass.position, m->position);
Vector3 diff = Vector3Subtract(mass->position, m->position);
float len = Vector3Length(diff);
if (len == 0.0f || len >= REPULSION_RANGE) {
continue;
}
mass.force =
Vector3Add(mass.force, Vector3Scale(Vector3Normalize(diff),
REPULSION_FORCE));
force = Vector3Add(
force, Vector3Scale(Vector3Normalize(diff), REPULSION_FORCE));
}
}
}
}
mass->force = Vector3Add(mass->force, force);
}
// Old method

View File

@ -78,11 +78,13 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &masssprings) -> void {
DrawLine3D(a.position, b.position, EDGE_COLOR);
}
// Draw masses
// Draw masses (high performance impact)
if (masssprings.masses.size() <= 5000) {
for (const auto &[state, mass] : masssprings.masses) {
DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE,
VERTEX_COLOR);
}
}
// DrawGrid(10, 1.0);