ThomagicRenderer/Runtime/Resources/ThoMagic Renderer Instancin...

419 lines
13 KiB
Plaintext

#pragma kernel Cull_32 GROUPS=32
#pragma kernel Cull_64 GROUPS=64
#pragma kernel Cull_128 GROUPS=128
#pragma kernel Cull_256 GROUPS=256
#pragma kernel Cull_512 GROUPS=512
#pragma kernel Cull_1024 GROUPS=1024
#pragma kernel Clear_32 GROUPS=32
#pragma kernel Clear_64 GROUPS=64
#pragma kernel Clear_128 GROUPS=128
#pragma kernel Clear_256 GROUPS=256
#pragma kernel Clear_512 GROUPS=512
#pragma kernel Clear_1024 GROUPS=1024
#pragma kernel Upload_32 GROUPS=32
#pragma kernel Upload_64 GROUPS=64
#pragma kernel Upload_128 GROUPS=128
#pragma kernel Upload_256 GROUPS=256
#pragma kernel Upload_512 GROUPS=512
#pragma kernel Upload_1024 GROUPS=1024
#pragma kernel Resize_32 GROUPS=32
#pragma kernel Resize_64 GROUPS=64
#pragma kernel Resize_128 GROUPS=128
#pragma kernel Resize_256 GROUPS=256
#pragma kernel Resize_512 GROUPS=512
#pragma kernel Resize_1024 GROUPS=1024
#include "UnityCG.cginc"
struct InstanceData
{
// position.xyz : position.xyz
// position.w : rotation.x
// scale.x : scale.xz
// scale.y : scale.y
// scale.zw : rotation.yz
float4 position;
float4 scale;
uint prefabId;
//Used to stream out new instances
int uploadAction;
int uploadId;
int age;
};
struct PrefabData
{
uint batchIndex;
uint indexBufferStartOffset;
uint maxCount;
uint lodCount;
uint fadeLods;//Todo: Use Bitwise and save memory?
//x: density threshold, y: density range start, z: density range length, w: shadow distance
float4 densityInDistance;
float4 lodData[8];
uint2 indirectArgsIndexAndCountPerLod[8];
};
struct InstanceMeta
{
uint visibleLod;
uint fadeOutLod;
float fadeAnim;
// The scale of the instance can be adjusted
float Scale;
};
StructuredBuffer<InstanceData> globalUploadInstances;
RWStructuredBuffer<InstanceData> globalInstances;
RWStructuredBuffer<InstanceData> tempGlobalInstances;
StructuredBuffer<PrefabData> perCamPrefabs;
StructuredBuffer<uint> perCamCullableIndexesBuffer;
RWStructuredBuffer<InstanceMeta> perCamMeta;
RWStructuredBuffer<uint> perCamIndirectArgumentsBuffer;
RWStructuredBuffer<uint> perCamVisibleIndexesBuffer;
RWStructuredBuffer<uint> perCamShadowVisibleIndexesBuffer;
uint _CountClear;
uint _CountUpload;
uint _CountResize;
uint _CountCull;
float3 _CameraPosition;
float3 _ShadowDirection;
float4 _FrustumPlanes[6];
// Plane equation: {(a, b, c) = N, d = -dot(N, P)}.
// Returns the distance from the plane to the point 'p' along the normal.
// Positive -> in front (above), negative -> behind (below).
float DistanceFromPlane(float3 p, float4 plane)
{
return dot(float4(p, 1.0), plane);
}
// Returns 'true' if the object is outside of the frustum.
// 'size' is the (negative) size of the object's bounds.
bool CullFrustum(float3 center, float size, float4 frustumPlanes[6], int numPlanes)
{
bool outside = false;
[unroll(6)]
for (int i = 0; i < numPlanes; i++)
outside = outside || DistanceFromPlane(center, frustumPlanes[i]) < size;
return outside;
}
// Returns 'true' if the shadow of the object is outside of the frustum.
// 'size' is the (negative) size of the object's bounds.
bool CullShadowFrustum(float3 center, float size, float3 lightDirection, float4 frustumPlanes[6], int numPlanes)
{
bool outside = false;
[unroll(6)]
for (int i = 0; i < numPlanes; i++)
outside = outside || max(DistanceFromPlane(center, frustumPlanes[i]), DistanceFromPlane(center + lightDirection, frustumPlanes[i])) < size;
return outside;
}
// Calculates the adjusted scale of the instance based on the "Density in Distance"
// setting. Instances get scaled down if the density is reduced.
float ReduceDensityScale(inout InstanceData instance, float4 densityInDistance, float distance)
{
float rangeStart = densityInDistance.y;
float rangeLength = densityInDistance.z;
// Calculate a dither pattern with range [0..1]
float dither = frac(dot(float3((instance.position.xz) * 16.0f, 0), uint3(2, 7, 23) / 17.0f));
float densityThreshold = 1 - densityInDistance.x;
float distanceNormalized = (distance - rangeStart) / (rangeLength * dither + 0.001f);
if(dither > densityInDistance.x && distanceNormalized > 0)
return 1 - distanceNormalized;
else
return 1;
}
void ClearArgumentsBuffer(uint3 id : SV_DispatchThreadID)
{
if (id.x >= _CountClear)
return;
uint indirectArgsCountOffset = id.x * 5 + 1;
perCamIndirectArgumentsBuffer[indirectArgsCountOffset] = 0;
}
void Upload (uint3 id : SV_DispatchThreadID)
{
if (id.x >= _CountUpload)
return;
InstanceData instance = globalUploadInstances[id.x];
if (instance.uploadAction == 1)
{
globalInstances[instance.uploadId] = instance;
}
}
void Resize(uint3 id : SV_DispatchThreadID)
{
if (id.x >= _CountResize)
return;
tempGlobalInstances[id.x] = globalInstances[id.x];
}
void Cull (uint3 id : SV_DispatchThreadID)
{
if(id.x >= _CountCull)
return;
uint instanceIndex = perCamCullableIndexesBuffer[id.x];
InstanceData instance = globalInstances[instanceIndex];
InstanceMeta meta = perCamMeta[instanceIndex];
PrefabData prefab = perCamPrefabs[instance.prefabId];
if (instance.age == 0)
{
meta = (InstanceMeta)0;
globalInstances[instanceIndex].age = 1;
}
uint lodCount = prefab.lodCount;
uint visibleLod = 0;
uint visibleShadowLod = 0;
uint batchOffset = 0;
uint indirectArgsCount = 0;
uint indirectArgsCountOffset = 0;
uint i = 0;
uint u = 0;
// Calculate active LOD
float dist = distance(instance.position.xyz, _CameraPosition.xyz);
[unroll(8)]
for(i=0; i < lodCount; i++)
{
float maxDist =
i < lodCount - 1 ? prefab.lodData[i].y * instance.scale.y : prefab.lodData[i].y;
if(dist >= prefab.lodData[i].x * instance.scale.y && dist < maxDist)
{
visibleLod = i + 1;
if (dist < prefab.densityInDistance.w)//prefab.densityInDistance.w = max shadow distance
visibleShadowLod = i + 1;
}
}
// Reduce density
if(prefab.densityInDistance.x < 1)
{
meta.Scale = ReduceDensityScale(instance, prefab.densityInDistance, dist);
if(meta.Scale < 0.3)
{
visibleLod = 0;
visibleShadowLod = 0;
}
}
else
{
meta.Scale = 1;
}
// Frustum Culling
if(visibleLod > 0)
{
float size = -prefab.lodData[visibleLod - 1].z * length(instance.scale.xy);
const int planeCount = 5; // Do not test near/far planes
if (CullFrustum(instance.position.xyz, size, _FrustumPlanes, planeCount))
{
// Setting active LOD to 0 culls the instance. The LODs start from 1.
visibleLod = 0;
if (CullShadowFrustum(
instance.position.xyz,
size,
_ShadowDirection * 1000,
_FrustumPlanes,
planeCount))
{
visibleShadowLod = 0;
}
}
}
uint visibleCount = 0;
if (visibleLod > 0)
{
batchOffset = prefab.indexBufferStartOffset + ((visibleLod - 1) * prefab.maxCount);
indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[visibleLod - 1].y;
indirectArgsCountOffset = prefab.indirectArgsIndexAndCountPerLod[visibleLod - 1].x + 1;
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount);
for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5)
{
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1);
}
perCamVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex;
if (meta.visibleLod != visibleLod && meta.visibleLod > 0 && prefab.fadeLods > 0)
{
meta.fadeOutLod = meta.visibleLod;
meta.fadeAnim = 1;
}
}
if (visibleShadowLod > 0)
{
batchOffset = prefab.indexBufferStartOffset + ((visibleShadowLod - 1) * prefab.maxCount);
indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[visibleShadowLod - 1].y;
indirectArgsCountOffset = indirectArgsCount * 5 + prefab.indirectArgsIndexAndCountPerLod[visibleShadowLod - 1].x + 1;
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount);
for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5)
{
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1);
}
perCamShadowVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex;
if (meta.visibleLod != visibleShadowLod && meta.visibleLod > 0 && prefab.fadeLods > 0)
{
meta.fadeOutLod = meta.visibleLod;
meta.fadeAnim = 1;
}
}
if (meta.fadeOutLod == 0)
meta.fadeAnim = 0;
if (meta.fadeAnim == 0)
meta.fadeOutLod = 0;
//Add lod cross fade
if (meta.fadeAnim > 0 && meta.fadeOutLod > 0)
{
meta.fadeAnim = max(0, meta.fadeAnim - unity_DeltaTime.z);
//Normal
if (visibleLod > 0 && meta.fadeAnim > 0)
{
batchOffset = prefab.indexBufferStartOffset + ((meta.fadeOutLod - 1) * prefab.maxCount);
indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].y;
indirectArgsCountOffset = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].x + 1;
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount);
for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5)
{
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1);
}
perCamVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex;
}
//Shadow
if (visibleShadowLod > 0 && meta.fadeAnim > 0)
{
batchOffset = prefab.indexBufferStartOffset + ((meta.fadeOutLod - 1) * prefab.maxCount);
indirectArgsCount = prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].y;
indirectArgsCountOffset = indirectArgsCount * 5 + prefab.indirectArgsIndexAndCountPerLod[meta.fadeOutLod - 1].x + 1;
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset], 1, visibleCount);
for (i = 1, u = 5; i < indirectArgsCount; i++, u += 5)
{
InterlockedAdd(perCamIndirectArgumentsBuffer[indirectArgsCountOffset + u], 1);
}
perCamShadowVisibleIndexesBuffer[batchOffset + visibleCount] = instanceIndex;
}
}
meta.visibleLod = max(visibleLod, visibleShadowLod);
perCamMeta[instanceIndex] = meta;
}
[numthreads(GROUPS,1,1)]
void Cull_32(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS,1,1)]
void Cull_64(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS,1,1)]
void Cull_128(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS,1,1)]
void Cull_256(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS,1,1)]
void Cull_512(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS,1,1)]
void Cull_1024(uint3 id : SV_DispatchThreadID) { Cull(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_32(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_64(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_128(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_256(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_512(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Clear_1024(uint3 id : SV_DispatchThreadID) { ClearArgumentsBuffer(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_32(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_64(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_128(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_256(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_512(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Upload_1024(uint3 id : SV_DispatchThreadID) { Upload(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_32(uint3 id : SV_DispatchThreadID) { Resize(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_64(uint3 id : SV_DispatchThreadID) { Resize(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_128(uint3 id : SV_DispatchThreadID) { Resize(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_256(uint3 id : SV_DispatchThreadID) { Resize(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_512(uint3 id : SV_DispatchThreadID) { Resize(id); }
[numthreads(GROUPS, 1, 1)]
void Resize_1024(uint3 id : SV_DispatchThreadID) { Resize(id); }