parallelize repulsion forces using openmp

This commit is contained in:
2026-02-18 02:08:46 +01:00
parent 43c9a5b715
commit e2e75204ef
8 changed files with 84 additions and 62 deletions

View File

@ -2,6 +2,7 @@
#include <chrono>
#include <iostream>
#include <omp.h>
#include <ratio>
#include <raylib.h>
#include <raymath.h>
@ -17,12 +18,12 @@ auto klotski_a() -> State {
s.AddBlock(Block(1, 0, 2, 2, true));
s.AddBlock(Block(3, 0, 1, 2, false));
s.AddBlock(Block(0, 2, 1, 2, false));
// s.AddBlock(Block(1, 2, 2, 1, false));
// s.AddBlock(Block(3, 2, 1, 2, false));
// s.AddBlock(Block(1, 3, 1, 1, false));
// s.AddBlock(Block(2, 3, 1, 1, false));
// s.AddBlock(Block(0, 4, 1, 1, false));
// s.AddBlock(Block(3, 4, 1, 1, false));
s.AddBlock(Block(1, 2, 2, 1, false));
s.AddBlock(Block(3, 2, 1, 2, false));
s.AddBlock(Block(1, 3, 1, 1, false));
s.AddBlock(Block(2, 3, 1, 1, false));
s.AddBlock(Block(0, 4, 1, 1, false));
s.AddBlock(Block(3, 4, 1, 1, false));
return s;
}
@ -33,6 +34,8 @@ auto main(int argc, char *argv[]) -> int {
// return 1;
// }
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
SetTraceLogLevel(LOG_ERROR);
// SetTargetFPS(165);
@ -180,7 +183,7 @@ auto main(int argc, char *argv[]) -> int {
render_time_accumulator += re - rs;
time_measure_count++;
if (GetTime() - last_print_time > 3.0) {
if (GetTime() - last_print_time > 10.0) {
std::cout << "\n - Physics time avg: "
<< physics_time_accumulator / time_measure_count << "."
<< std::endl;

View File

@ -2,6 +2,7 @@
#include "config.hpp"
#include <format>
#include <numeric>
#include <raymath.h>
#include <unordered_map>
#include <vector>
@ -123,68 +124,79 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
}
auto MassSpringSystem::CalculateRepulsionForces() -> void {
const float INV_CELL = 1.0 / REPULSION_RANGE;
struct CellKey {
int x, y, z;
bool operator==(const CellKey &other) const {
return x == other.x && y == other.y && z == other.z;
}
};
struct CellHash {
size_t operator()(const CellKey &key) const {
return ((size_t)key.x * 73856093) ^ ((size_t)key.y * 19349663) ^
((size_t)key.z * 83492791);
}
};
// Accelerate with uniform grid
std::unordered_map<CellKey, std::vector<Mass *>, CellHash> grid;
grid.reserve(masses.size());
const float INV_CELL = 1.0f / REPULSION_RANGE;
const int n = masses.size();
// Collect pointers
std::vector<Mass *> massVec;
massVec.reserve(n);
for (auto &[state, mass] : masses) {
CellKey key{
(int)std::floor(mass.position.x * INV_CELL),
(int)std::floor(mass.position.y * INV_CELL),
(int)std::floor(mass.position.z * INV_CELL),
};
grid[key].push_back(&mass);
massVec.push_back(&mass);
}
for (auto &[state, mass] : masses) {
int cx = (int)std::floor(mass.position.x * INV_CELL);
int cy = (int)std::floor(mass.position.y * INV_CELL);
int cz = (int)std::floor(mass.position.z * INV_CELL);
// Assign each particle a cell index
auto cellID = [&](const Vector3 &p) -> int64_t {
int x = (int)std::floor(p.x * INV_CELL);
int y = (int)std::floor(p.y * INV_CELL);
int z = (int)std::floor(p.z * INV_CELL);
// Pack into a single int64 (assumes coords fit in 20 bits each)
return ((int64_t)(x & 0xFFFFF) << 40) | ((int64_t)(y & 0xFFFFF) << 20) |
(int64_t)(z & 0xFFFFF);
};
// Sort particles by cell
std::vector<int> indices(n);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
return cellID(massVec[a]->position) < cellID(massVec[b]->position);
});
// Build cell start/end table
std::vector<int64_t> cellIDs(n);
for (int i = 0; i < n; ++i) {
cellIDs[i] = cellID(massVec[indices[i]]->position);
}
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
Mass *mass = massVec[indices[i]];
int cx = (int)std::floor(mass->position.x * INV_CELL);
int cy = (int)std::floor(mass->position.y * INV_CELL);
int cz = (int)std::floor(mass->position.z * INV_CELL);
Vector3 force = {0, 0, 0};
// Check all 27 neighboring cells (including own)
for (int dx = -1; dx <= 1; ++dx) {
for (int dy = -1; dy <= 1; ++dy) {
for (int dz = -1; dz <= 1; ++dz) {
CellKey neighbor{cx + dx, cy + dy, cz + dz};
auto it = grid.find(neighbor);
if (it == grid.end()) {
continue;
}
int64_t nid = ((int64_t)((cx + dx) & 0xFFFFF) << 40) |
((int64_t)((cy + dy) & 0xFFFFF) << 20) |
(int64_t)((cz + dz) & 0xFFFFF);
for (Mass *m : it->second) {
if (m == &mass) {
continue; // skip self
// Binary search for this neighbor cell in sorted array
auto lo = std::lower_bound(cellIDs.begin(), cellIDs.end(), nid);
auto hi = std::upper_bound(cellIDs.begin(), cellIDs.end(), nid);
for (auto it = lo; it != hi; ++it) {
Mass *m = massVec[indices[it - cellIDs.begin()]];
if (m == mass) {
continue;
}
Vector3 diff = Vector3Subtract(mass.position, m->position);
Vector3 diff = Vector3Subtract(mass->position, m->position);
float len = Vector3Length(diff);
if (len == 0.0f || len >= REPULSION_RANGE) {
continue;
}
mass.force =
Vector3Add(mass.force, Vector3Scale(Vector3Normalize(diff),
REPULSION_FORCE));
force = Vector3Add(
force, Vector3Scale(Vector3Normalize(diff), REPULSION_FORCE));
}
}
}
}
mass->force = Vector3Add(mass->force, force);
}
// Old method

View File

@ -78,10 +78,12 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &masssprings) -> void {
DrawLine3D(a.position, b.position, EDGE_COLOR);
}
// Draw masses
for (const auto &[state, mass] : masssprings.masses) {
DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE,
VERTEX_COLOR);
// Draw masses (high performance impact)
if (masssprings.masses.size() <= 5000) {
for (const auto &[state, mass] : masssprings.masses) {
DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE,
VERTEX_COLOR);
}
}
// DrawGrid(10, 1.0);