#pragma kernel Cull_32 GROUPS=32 #pragma kernel Cull_64 GROUPS=64 #pragma kernel Cull_128 GROUPS=128 #pragma kernel Cull_256 GROUPS=256 #pragma kernel Cull_512 GROUPS=512 #pragma kernel Cull_1024 GROUPS=1024 #pragma kernel Clear_32 GROUPS=32 #pragma kernel Clear_64 GROUPS=64 #pragma kernel Clear_128 GROUPS=128 #pragma kernel Clear_256 GROUPS=256 #pragma kernel Clear_512 GROUPS=512 #pragma kernel Clear_1024 GROUPS=1024 #pragma kernel Upload_32 GROUPS=32 #pragma kernel Upload_64 GROUPS=64 #pragma kernel Upload_128 GROUPS=128 #pragma kernel Upload_256 GROUPS=256 #pragma kernel Upload_512 GROUPS=512 #pragma kernel Upload_1024 GROUPS=1024 #pragma kernel Resize_32 GROUPS=32 #pragma kernel Resize_64 GROUPS=64 #pragma kernel Resize_128 GROUPS=128 #pragma kernel Resize_256 GROUPS=256 #pragma kernel Resize_512 GROUPS=512 #pragma kernel Resize_1024 GROUPS=1024 #include "UnityCG.cginc" struct InstanceData { // position.xyz : position.xyz // position.w : rotation.x // scale.x : scale.xz // scale.y : scale.y // scale.zw : rotation.yz float4 position; float4 scale; uint prefabId; //Used to stream out new instances int uploadAction; int uploadId; int age; }; struct PrefabData { uint batchIndex; uint indexBufferStartOffset; uint maxCount; uint lodCount; uint fadeLods;//Todo: Use Bitwise and save memory? //x: density threshold, y: density range start, z: density range length, w: shadow distance float4 densityInDistance; float4 lodData[8]; uint2 indirectArgsIndexAndCountPerLod[8]; }; struct InstanceMeta { uint visibleLod; uint fadeOutLod; float fadeAnim; // The scale of the instance can be adjusted float Scale; }; StructuredBuffer globalUploadInstances; RWStructuredBuffer globalInstances; RWStructuredBuffer tempGlobalInstances; StructuredBuffer perCamPrefabs; StructuredBuffer perCamCullableIndexesBuffer; RWStructuredBuffer perCamMeta; RWStructuredBuffer perCamIndirectArgumentsBuffer; RWStructuredBuffer perCamVisibleIndexesBuffer; RWStructuredBuffer perCamShadowVisibleIndexesBuffer; uint _CountClear; uint _CountUpload; uint _CountResize; uint _CountCull; float3 _CameraPosition; float3 _ShadowDirection; float4 _FrustumPlanes[6]; // Plane equation: {(a, b, c) = N, d = -dot(N, P)}. // Returns the distance from the plane to the point 'p' along the normal. // Positive -> in front (above), negative -> behind (below). float DistanceFromPlane(float3 p, float4 plane) { return dot(float4(p, 1.0), plane); } // Returns 'true' if the object is outside of the frustum. // 'size' is the (negative) size of the object's bounds. bool CullFrustum(float3 center, float size, float4 frustumPlanes[6], int numPlanes) { bool outside = false; [unroll(6)] for (int i = 0; i < numPlanes; i++) outside = outside || DistanceFromPlane(center, frustumPlanes[i]) < size; return outside; } // Returns 'true' if the shadow of the object is outside of the frustum. // 'size' is the (negative) size of the object's bounds. bool CullShadowFrustum(float3 center, float size, float3 lightDirection, float4 frustumPlanes[6], int numPlanes) { bool outside = false; [unroll(6)] for (int i = 0; i < numPlanes; i++) outside = outside || max(DistanceFromPlane(center, frustumPlanes[i]), DistanceFromPlane(center + lightDirection, frustumPlanes[i])) < size; return outside; } // Calculates the adjusted scale of the instance based on the "Density in Distance" // setting. Instances get scaled down if the density is reduced. float ReduceDensityScale(inout InstanceData instance, float4 densityInDistance, float distance) { float rangeStart = densityInDistance.y; float rangeLength = densityInDistance.z; // Calculate a dither pattern with range [0..1] float dither = frac(dot(float3((instance.position.xz) * 16.0f, 0), uint3(2, 7, 23) / 17.0f)); float densityThreshold = 1 - densityInDistance.x; float distanceNormalized = (distance - rangeStart) / (rangeLength * dither + 0.001f); if(dither > densityInDistance.x && distanceNormalized > 0) return 1 - distanceNormalized; else return 1; } void ClearArgumentsBuffer(uint3 id : SV_DispatchThreadID) { if (id.x >= _CountClear) return; uint indirectArgsCountOffset = id.x * 5 + 1; perCamIndirectArgumentsBuffer[indirectArgsCountOffset] = 0; } void Upload (uint3 id : SV_DispatchThreadID) { if (id.x >= _CountUpload) return; InstanceData instance = globalUploadInstances[id.x]; if (instance.uploadAction == 1) { globalInstances[instance.uploadId] = instance; } } void Resize(uint3 id : SV_DispatchThreadID) { if (id.x >= _CountResize) return; tempGlobalInstances[id.x] = globalInstances[id.x]; } void Cull (uint3 id : SV_DispatchThreadID) { if(id.x >= _CountCull) return; uint instanceIndex = perCamCullableIndexesBuffer[id.x]; InstanceData instance = globalInstances[instanceIndex]; InstanceMeta meta = perCamMeta[instanceIndex]; PrefabData prefab = perCamPrefabs[instance.prefabId]; if (instance.age == 0) { meta = (InstanceMeta)0; globalInstances[instanceIndex].age = 1; } uint lodCount = prefab.lodCount; uint visibleLod = 0; uint visibleShadowLod = 0; uint batchOffset = 0; uint indirectArgsCount = 0; uint indirectArgsCountOffset = 0; uint i = 0; uint u = 0; // Calculate active LOD float dist = distance(instance.position.xyz, _CameraPosition.xyz); [unroll(8)] for(i=0; i < lodCount; i++) { float maxDist = i < lodCount - 1 ? prefab.lodData[i].y * instance.scale.y : prefab.lodData[i].y; if(dist >= prefab.lodData[i].x * instance.scale.y && dist < maxDist) { visibleLod = i + 1; if (dist < prefab.densityInDistance.w)//prefab.densityInDistance.w = max shadow distance visibleShadowLod = i + 1; } } // Reduce density if(prefab.densityInDistance.x < 1) { meta.Scale = ReduceDensityScale(instance, prefab.densityInDistance, dist); if(meta.Scale < 0.3) { visibleLod = 0; visibleShadowLod = 0; } } else { meta.Scale = 1; } // Frustum Culling if(visibleLod > 0) { float size = -prefab.lodData[visibleLod - 1].z * length(instance.scale.xy); const int planeCount = 5; // Do not test near/far planes if (CullFrustum(instance.position.xyz, size, _FrustumPlanes, planeCount)) { // Setting active LOD to 0 culls the instance. The LODs start from 1. visibleLod = 0; if (CullShadowFrustum( instance.position.xyz, size, _ShadowDirection * 1000, _FrustumPlanes, planeCount)) { visibleShadowLod = 0; } } } uint visibleCount = 0; if (visibleLod > 0) { batchOffset = prefab.indexBufferStartOffset + ((visibleLod - 1) * prefab.maxCount); indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[visibleLod - 1].y; indirectArgsCountOffset = prefab.indirectArgsIndexAndCountPerLod[visibleLod - 1].x + 1; InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount); for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5) { InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1); } perCamVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex; if (meta.visibleLod != visibleLod && meta.visibleLod > 0 && prefab.fadeLods > 0) { meta.fadeOutLod = meta.visibleLod; meta.fadeAnim = 1; } } if (visibleShadowLod > 0) { batchOffset = prefab.indexBufferStartOffset + ((visibleShadowLod - 1) * prefab.maxCount); indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[visibleShadowLod - 1].y; indirectArgsCountOffset = indirectArgsCount * 5 + prefab.indirectArgsIndexAndCountPerLod[visibleShadowLod - 1].x + 1; InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount); for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5) { InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1); } perCamShadowVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex; if (meta.visibleLod != visibleShadowLod && meta.visibleLod > 0 && prefab.fadeLods > 0) { meta.fadeOutLod = meta.visibleLod; meta.fadeAnim = 1; } } if (meta.fadeOutLod == 0) meta.fadeAnim = 0; if (meta.fadeAnim == 0) meta.fadeOutLod = 0; //Add lod cross fade if (meta.fadeAnim > 0 && meta.fadeOutLod > 0) { meta.fadeAnim = max(0, meta.fadeAnim - unity_DeltaTime.z); //Normal if (visibleLod > 0 && meta.fadeAnim > 0) { batchOffset = prefab.indexBufferStartOffset + ((meta.fadeOutLod - 1) * prefab.maxCount); indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].y; indirectArgsCountOffset = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].x + 1; InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount); for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5) { InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1); } perCamVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex; } //Shadow if (visibleShadowLod > 0 && meta.fadeAnim > 0) { batchOffset = prefab.indexBufferStartOffset + ((meta.fadeOutLod - 1) * prefab.maxCount); indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].y; indirectArgsCountOffset = indirectArgsCount * 5 + prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].x + 1; InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount); for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5) { InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1); } perCamShadowVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex; } } meta.visibleLod = max(visibleLod, visibleShadowLod); perCamMeta[instanceIndex] = meta; } [numthreads(GROUPS,1,1)] void Cull_32(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS,1,1)] void Cull_64(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS,1,1)] void Cull_128(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS,1,1)] void Cull_256(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS,1,1)] void Cull_512(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS,1,1)] void Cull_1024(uint3 id : SV_DispatchThreadID) { Cull(id); } [numthreads(GROUPS, 1, 1)] void Clear_32(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Clear_64(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Clear_128(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Clear_256(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Clear_512(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Clear_1024(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); } [numthreads(GROUPS, 1, 1)] void Upload_32(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Upload_64(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Upload_128(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Upload_256(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Upload_512(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Upload_1024(uint3 id : SV_DispatchThreadID) { Upload(id); } [numthreads(GROUPS, 1, 1)] void Resize_32(uint3 id : SV_DispatchThreadID) { Resize(id); } [numthreads(GROUPS, 1, 1)] void Resize_64(uint3 id : SV_DispatchThreadID) { Resize(id); } [numthreads(GROUPS, 1, 1)] void Resize_128(uint3 id : SV_DispatchThreadID) { Resize(id); } [numthreads(GROUPS, 1, 1)] void Resize_256(uint3 id : SV_DispatchThreadID) { Resize(id); } [numthreads(GROUPS, 1, 1)] void Resize_512(uint3 id : SV_DispatchThreadID) { Resize(id); } [numthreads(GROUPS, 1, 1)] void Resize_1024(uint3 id : SV_DispatchThreadID) { Resize(id); }