parallelize repulsion forces using openmp
This commit is contained in:
@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
find_package(raylib REQUIRED)
|
||||
find_package(OpenMP REQUIRED)
|
||||
|
||||
include_directories(include)
|
||||
|
||||
@ -16,4 +17,4 @@ add_executable(masssprings
|
||||
)
|
||||
|
||||
target_include_directories(masssprings PUBLIC ${RAYLIB_CPP_INCLUDE_DIR})
|
||||
target_link_libraries(masssprings PUBLIC raylib)
|
||||
target_link_libraries(masssprings PUBLIC raylib OpenMP::OpenMP_CXX)
|
||||
|
||||
@ -1 +1 @@
|
||||
./cmake-build-debug/compile_commands.json
|
||||
./cmake-build-release/compile_commands.json
|
||||
@ -143,6 +143,7 @@ rec {
|
||||
# boost
|
||||
# sfml
|
||||
raylib
|
||||
llvmPackages.openmp
|
||||
# raylib-cpp
|
||||
# tinyobjloader
|
||||
# gperftools
|
||||
@ -194,7 +195,7 @@ rec {
|
||||
# ];
|
||||
|
||||
# Dynamic libraries from buildinputs:
|
||||
# LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs;
|
||||
LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath buildInputs;
|
||||
|
||||
# =========================================================================================
|
||||
# Define shell environment
|
||||
@ -208,6 +209,9 @@ rec {
|
||||
pkgs.writers.writeFish "cmake-${typeLower}.fish" ''
|
||||
cd $FLAKE_PROJECT_ROOT
|
||||
|
||||
# set -g -x CC ${clang}/bin/clang
|
||||
# set -g -x CXX ${clang}/bin/clang++
|
||||
|
||||
echo "Removing build directory ./cmake-build-${typeLower}/"
|
||||
rm -rf ./cmake-build-${typeLower}
|
||||
|
||||
|
||||
@ -4,15 +4,15 @@
|
||||
#include <raylib.h>
|
||||
|
||||
// Window
|
||||
constexpr int WIDTH = 1000;
|
||||
constexpr int HEIGHT = 1000;
|
||||
constexpr int WIDTH = 1300;
|
||||
constexpr int HEIGHT = 1300;
|
||||
|
||||
// Camera Controls
|
||||
constexpr float SIM_SPEED = 4.0;
|
||||
constexpr float CAMERA_DISTANCE = 4.0;
|
||||
constexpr float MIN_CAMERA_DISTANCE = 2.0;
|
||||
constexpr float MAX_CAMERA_DISTANCE = 50.0;
|
||||
constexpr float ZOOM_SPEED = 1.0;
|
||||
constexpr float MAX_CAMERA_DISTANCE = 150.0;
|
||||
constexpr float ZOOM_SPEED = 1.5;
|
||||
constexpr float PAN_SPEED = 1.0;
|
||||
constexpr float ROT_SPEED = 1.0;
|
||||
|
||||
@ -20,7 +20,7 @@ constexpr float ROT_SPEED = 1.0;
|
||||
constexpr float SPRING_CONSTANT = 1.5;
|
||||
constexpr float DAMPENING_CONSTANT = 0.8;
|
||||
constexpr float REST_LENGTH = 1.0;
|
||||
constexpr float REPULSION_FORCE = 0.05;
|
||||
constexpr float REPULSION_FORCE = 0.1;
|
||||
constexpr float REPULSION_RANGE = 3.0 * REST_LENGTH;
|
||||
constexpr float VERLET_DAMPENING = 0.01; // [0, 1]
|
||||
|
||||
|
||||
17
src/main.cpp
17
src/main.cpp
@ -2,6 +2,7 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <ratio>
|
||||
#include <raylib.h>
|
||||
#include <raymath.h>
|
||||
@ -17,12 +18,12 @@ auto klotski_a() -> State {
|
||||
s.AddBlock(Block(1, 0, 2, 2, true));
|
||||
s.AddBlock(Block(3, 0, 1, 2, false));
|
||||
s.AddBlock(Block(0, 2, 1, 2, false));
|
||||
// s.AddBlock(Block(1, 2, 2, 1, false));
|
||||
// s.AddBlock(Block(3, 2, 1, 2, false));
|
||||
// s.AddBlock(Block(1, 3, 1, 1, false));
|
||||
// s.AddBlock(Block(2, 3, 1, 1, false));
|
||||
// s.AddBlock(Block(0, 4, 1, 1, false));
|
||||
// s.AddBlock(Block(3, 4, 1, 1, false));
|
||||
s.AddBlock(Block(1, 2, 2, 1, false));
|
||||
s.AddBlock(Block(3, 2, 1, 2, false));
|
||||
s.AddBlock(Block(1, 3, 1, 1, false));
|
||||
s.AddBlock(Block(2, 3, 1, 1, false));
|
||||
s.AddBlock(Block(0, 4, 1, 1, false));
|
||||
s.AddBlock(Block(3, 4, 1, 1, false));
|
||||
|
||||
return s;
|
||||
}
|
||||
@ -33,6 +34,8 @@ auto main(int argc, char *argv[]) -> int {
|
||||
// return 1;
|
||||
// }
|
||||
|
||||
std::cout << "OpenMP: " << omp_get_max_threads() << " threads." << std::endl;
|
||||
|
||||
SetTraceLogLevel(LOG_ERROR);
|
||||
|
||||
// SetTargetFPS(165);
|
||||
@ -180,7 +183,7 @@ auto main(int argc, char *argv[]) -> int {
|
||||
render_time_accumulator += re - rs;
|
||||
|
||||
time_measure_count++;
|
||||
if (GetTime() - last_print_time > 3.0) {
|
||||
if (GetTime() - last_print_time > 10.0) {
|
||||
std::cout << "\n - Physics time avg: "
|
||||
<< physics_time_accumulator / time_measure_count << "."
|
||||
<< std::endl;
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#include "config.hpp"
|
||||
|
||||
#include <format>
|
||||
#include <numeric>
|
||||
#include <raymath.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@ -123,68 +124,79 @@ auto MassSpringSystem::CalculateSpringForces() -> void {
|
||||
}
|
||||
|
||||
auto MassSpringSystem::CalculateRepulsionForces() -> void {
|
||||
const float INV_CELL = 1.0 / REPULSION_RANGE;
|
||||
|
||||
struct CellKey {
|
||||
int x, y, z;
|
||||
bool operator==(const CellKey &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
};
|
||||
struct CellHash {
|
||||
size_t operator()(const CellKey &key) const {
|
||||
return ((size_t)key.x * 73856093) ^ ((size_t)key.y * 19349663) ^
|
||||
((size_t)key.z * 83492791);
|
||||
}
|
||||
};
|
||||
|
||||
// Accelerate with uniform grid
|
||||
std::unordered_map<CellKey, std::vector<Mass *>, CellHash> grid;
|
||||
grid.reserve(masses.size());
|
||||
const float INV_CELL = 1.0f / REPULSION_RANGE;
|
||||
const int n = masses.size();
|
||||
|
||||
// Collect pointers
|
||||
std::vector<Mass *> massVec;
|
||||
massVec.reserve(n);
|
||||
for (auto &[state, mass] : masses) {
|
||||
CellKey key{
|
||||
(int)std::floor(mass.position.x * INV_CELL),
|
||||
(int)std::floor(mass.position.y * INV_CELL),
|
||||
(int)std::floor(mass.position.z * INV_CELL),
|
||||
};
|
||||
grid[key].push_back(&mass);
|
||||
massVec.push_back(&mass);
|
||||
}
|
||||
|
||||
for (auto &[state, mass] : masses) {
|
||||
int cx = (int)std::floor(mass.position.x * INV_CELL);
|
||||
int cy = (int)std::floor(mass.position.y * INV_CELL);
|
||||
int cz = (int)std::floor(mass.position.z * INV_CELL);
|
||||
// Assign each particle a cell index
|
||||
auto cellID = [&](const Vector3 &p) -> int64_t {
|
||||
int x = (int)std::floor(p.x * INV_CELL);
|
||||
int y = (int)std::floor(p.y * INV_CELL);
|
||||
int z = (int)std::floor(p.z * INV_CELL);
|
||||
// Pack into a single int64 (assumes coords fit in 20 bits each)
|
||||
return ((int64_t)(x & 0xFFFFF) << 40) | ((int64_t)(y & 0xFFFFF) << 20) |
|
||||
(int64_t)(z & 0xFFFFF);
|
||||
};
|
||||
|
||||
// Sort particles by cell
|
||||
std::vector<int> indices(n);
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
|
||||
return cellID(massVec[a]->position) < cellID(massVec[b]->position);
|
||||
});
|
||||
|
||||
// Build cell start/end table
|
||||
std::vector<int64_t> cellIDs(n);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
cellIDs[i] = cellID(massVec[indices[i]]->position);
|
||||
}
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Mass *mass = massVec[indices[i]];
|
||||
int cx = (int)std::floor(mass->position.x * INV_CELL);
|
||||
int cy = (int)std::floor(mass->position.y * INV_CELL);
|
||||
int cz = (int)std::floor(mass->position.z * INV_CELL);
|
||||
|
||||
Vector3 force = {0, 0, 0};
|
||||
|
||||
// Check all 27 neighboring cells (including own)
|
||||
for (int dx = -1; dx <= 1; ++dx) {
|
||||
for (int dy = -1; dy <= 1; ++dy) {
|
||||
for (int dz = -1; dz <= 1; ++dz) {
|
||||
CellKey neighbor{cx + dx, cy + dy, cz + dz};
|
||||
auto it = grid.find(neighbor);
|
||||
if (it == grid.end()) {
|
||||
int64_t nid = ((int64_t)((cx + dx) & 0xFFFFF) << 40) |
|
||||
((int64_t)((cy + dy) & 0xFFFFF) << 20) |
|
||||
(int64_t)((cz + dz) & 0xFFFFF);
|
||||
|
||||
// Binary search for this neighbor cell in sorted array
|
||||
auto lo = std::lower_bound(cellIDs.begin(), cellIDs.end(), nid);
|
||||
auto hi = std::upper_bound(cellIDs.begin(), cellIDs.end(), nid);
|
||||
|
||||
for (auto it = lo; it != hi; ++it) {
|
||||
Mass *m = massVec[indices[it - cellIDs.begin()]];
|
||||
if (m == mass) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (Mass *m : it->second) {
|
||||
if (m == &mass) {
|
||||
continue; // skip self
|
||||
}
|
||||
|
||||
Vector3 diff = Vector3Subtract(mass.position, m->position);
|
||||
Vector3 diff = Vector3Subtract(mass->position, m->position);
|
||||
float len = Vector3Length(diff);
|
||||
|
||||
if (len == 0.0f || len >= REPULSION_RANGE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mass.force =
|
||||
Vector3Add(mass.force, Vector3Scale(Vector3Normalize(diff),
|
||||
REPULSION_FORCE));
|
||||
force = Vector3Add(
|
||||
force, Vector3Scale(Vector3Normalize(diff), REPULSION_FORCE));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mass->force = Vector3Add(mass->force, force);
|
||||
}
|
||||
|
||||
// Old method
|
||||
|
||||
@ -78,11 +78,13 @@ auto Renderer::DrawMassSprings(const MassSpringSystem &masssprings) -> void {
|
||||
DrawLine3D(a.position, b.position, EDGE_COLOR);
|
||||
}
|
||||
|
||||
// Draw masses
|
||||
// Draw masses (high performance impact)
|
||||
if (masssprings.masses.size() <= 5000) {
|
||||
for (const auto &[state, mass] : masssprings.masses) {
|
||||
DrawCube(mass.position, VERTEX_SIZE, VERTEX_SIZE, VERTEX_SIZE,
|
||||
VERTEX_COLOR);
|
||||
}
|
||||
}
|
||||
|
||||
// DrawGrid(10, 1.0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user