Add GPU-based visible chunk culling

This commit is contained in:
Elias Stepanik 2025-06-13 12:45:29 +02:00
parent 6c4b125ea9
commit 4589204097
4 changed files with 121 additions and 4 deletions

View File

@ -0,0 +1,29 @@
// Computes visible chunk keys based on camera centre and view radius.
// Input arrays must match in length and are processed per invocation.
struct Params {
centre: vec3<i32>;
radius: i32;
count: u32;
};
@group(0) @binding(0) var<storage, read> occupied: array<vec3<i32>>;
@group(0) @binding(1) var<storage, read> spawned: array<u32>;
@group(0) @binding(2) var<storage, read_write> out_keys: array<vec3<i32>>;
@group(0) @binding(3) var<storage, read_write> out_count: atomic<u32>;
@group(0) @binding(4) var<uniform> params: Params;
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) id: vec3<u32>) {
let idx = id.x;
if idx >= params.count { return; }
let key = occupied[idx];
if spawned[idx] != 0u { return; }
let dx = key.x - params.centre.x;
let dy = key.y - params.centre.y;
let dz = key.z - params.centre.z;
if dx*dx + dy*dy + dz*dz <= params.radius * params.radius {
let i = atomicAdd(&out_count, 1u);
out_keys[i] = key;
}
}

View File

@ -5,8 +5,9 @@ use crate::plugins::environment::systems::voxels::meshing_gpu::{
};
use bevy_app_compute::prelude::{AppComputePlugin, AppComputeWorkerPlugin};
use crate::plugins::environment::systems::voxels::queue_systems;
use crate::plugins::environment::systems::voxels::queue_systems::{
enqueue_visible_chunks, process_chunk_queue,
use crate::plugins::environment::systems::voxels::queue_systems::process_chunk_queue;
use crate::plugins::environment::systems::voxels::visibility_gpu::{
enqueue_visible_chunks_gpu, GpuVisibilityWorker,
};
use crate::plugins::environment::systems::voxels::render_chunks::rebuild_dirty_chunks;
use crate::plugins::environment::systems::voxels::structure::{
@ -30,6 +31,7 @@ impl Plugin for EnvironmentPlugin {
);
app.add_plugins(AppComputePlugin);
app.add_plugins(AppComputeWorkerPlugin::<GpuMeshingWorker>::default());
app.add_plugins(AppComputeWorkerPlugin::<GpuVisibilityWorker>::default());
let view_distance_chunks = 100;
app.insert_resource(ChunkCullingCfg {
@ -52,8 +54,8 @@ impl Plugin for EnvironmentPlugin {
Update,
(
/* ---------- culling & streaming ---------- */
enqueue_visible_chunks,
process_chunk_queue.after(enqueue_visible_chunks),
enqueue_visible_chunks_gpu,
process_chunk_queue.after(enqueue_visible_chunks_gpu),
update_chunk_lods.after(process_chunk_queue),
rebuild_dirty_chunks.after(process_chunk_queue), // 4. (re)mesh dirty chunks
queue_gpu_meshing.after(rebuild_dirty_chunks),

View File

@ -8,5 +8,6 @@ pub mod culling;
pub mod lod;
mod meshing;
pub mod meshing_gpu;
pub mod visibility_gpu;
pub mod queue_systems;
pub mod render_chunks;

View File

@ -0,0 +1,85 @@
use bevy::prelude::*;
use bevy_app_compute::prelude::*;
use super::structure::{ChunkCullingCfg, ChunkQueue, PrevCameraChunk, SpawnedChunks, SparseVoxelOctree};
use crate::plugins::environment::systems::voxels::helper::world_to_chunk;
#[repr(C)]
#[derive(ShaderType, Copy, Clone, Default)]
pub struct Params {
pub centre: IVec3,
pub radius: i32,
pub count: u32,
}
#[derive(TypePath)]
struct VisibilityShader;
impl ComputeShader for VisibilityShader {
fn shader() -> ShaderRef {
"shaders/chunk_visibility.wgsl".into()
}
}
#[derive(Resource)]
pub struct GpuVisibilityWorker;
impl ComputeWorker for GpuVisibilityWorker {
fn build(world: &mut World) -> AppComputeWorker<Self> {
AppComputeWorkerBuilder::new(world)
.add_storage::<[IVec3; 1]>("occupied", &[IVec3::ZERO; 1])
.add_storage::<[u32; 1]>("spawned", &[0u32; 1])
.add_rw_storage::<[IVec3; 1]>("out_keys", &[IVec3::ZERO; 1])
.add_rw_storage::<u32>("out_count", &0u32)
.add_uniform("params", &Params::default())
.add_pass::<VisibilityShader>([1, 1, 1], &["occupied", "spawned", "out_keys", "out_count", "params"])
.one_shot()
.build()
}
}
/// GPU-driven implementation of `enqueue_visible_chunks`.
pub fn enqueue_visible_chunks_gpu(
mut worker: ResMut<AppComputeWorker<GpuVisibilityWorker>>,
tree_q: Query<&SparseVoxelOctree>,
cam_q: Query<&GlobalTransform, With<Camera>>,
spawned: Res<SpawnedChunks>,
mut prev_cam: ResMut<PrevCameraChunk>,
cfg: Res<ChunkCullingCfg>,
mut queue: ResMut<ChunkQueue>,
) {
let Ok(tree) = tree_q.get_single() else { return };
let Ok(cam_tf) = cam_q.get_single() else { return };
let cam_pos = cam_tf.translation();
let centre = world_to_chunk(tree, cam_pos);
if prev_cam.0 == Some(centre) { return; }
prev_cam.0 = Some(centre);
if !worker.ready() { return; }
let occupied: Vec<IVec3> = tree.occupied_chunks.iter().copied().collect();
let mut spawned_flags = Vec::with_capacity(occupied.len());
for key in &occupied {
spawned_flags.push(if spawned.0.contains_key(key) { 1u32 } else { 0u32 });
}
worker.write_slice("occupied", &occupied);
worker.write_slice("spawned", &spawned_flags);
worker.write_slice("out_keys", &vec![IVec3::ZERO; occupied.len()]);
worker.write("out_count", &0u32);
let params = Params { centre, radius: cfg.view_distance_chunks, count: occupied.len() as u32 };
worker.write("params", &params);
let workgroups = ((occupied.len() as f32) / 64.0).ceil() as u32;
worker.pass(0).dispatch([workgroups, 1, 1]);
worker.execute();
let count: u32 = worker.read("out_count");
let keys: Vec<IVec3> = worker.read_vec("out_keys");
queue.keys.clear();
queue.set.clear();
for key in keys.into_iter().take(count as usize) {
queue.keys.push_back(key.into());
queue.set.insert(key.into());
}
}