commit e2bc0ba7e6a2b82a3e3f498cb701e5ceca0f4050
parent 1494036a2c88de72e059e729880321db83593b55
Author: Christian Ermann <christianermann@gmail.com>
Date: Sat, 20 Jul 2024 10:45:21 -0400
Add meshlet generation algorithms
Diffstat:
7 files changed, 1320 insertions(+), 142 deletions(-)
diff --git a/src/forsyth_optimize.zig b/src/forsyth_optimize.zig
@@ -0,0 +1,278 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+const Mesh = @import("mesh.zig");
+
+const ScoreParameters = struct {
+ last_tri: f32 = 0.75,
+ cache_decay_power: f32 = 1.5,
+ valence_boost_scale: f32 = 2.0,
+ valence_boost_power: f32 = 0.5,
+};
+
+const Vertex = struct {
+ cache_idx: ?u32 = undefined,
+ score: f32 = 0,
+ num_triangles: u32 = 0,
+ max_triangles: u32 = 0,
+ triangles: []u32 = undefined,
+
+ fn scoreFn(
+ self: *const Vertex,
+ max_cache_idx: u32,
+ params: ScoreParameters,
+ ) f32 {
+ // Vertex will never be used again
+ const num_active = self.max_triangles - self.num_triangles;
+ if (num_active <= 0) {
+ return -1.0;
+ }
+
+ // Boost score for lonely vertices so they go away fast
+ const valence_boost = std.math.pow(
+ f32,
+ @floatFromInt(num_active),
+ -params.valence_boost_power,
+ );
+ const score = params.valence_boost_scale * valence_boost;
+
+ // No adjustments for vertices not in the cache yet
+ if (self.cache_idx == null) {
+ return score;
+ }
+
+ // Vertices from last triangle all get the same score
+ const cache_idx = self.cache_idx.?;
+ if (cache_idx < 3) {
+ return score + params.last_tri;
+ }
+
+ // For other vertices, score depends on cache position
+ assert(cache_idx < max_cache_idx);
+ const scale = 1.0 / @as(f32, @floatFromInt(max_cache_idx - 3));
+ const adjusted_idx: f32 = @floatFromInt(cache_idx - 3);
+ const cache_score = 1.0 - adjusted_idx * scale;
+ return score + std.math.pow(f32, cache_score, params.cache_decay_power);
+ }
+};
+
+const Triangle = struct {
+ added: bool = false,
+ score: f32 = 0,
+ vertices: [3]u32 = undefined,
+};
+
+fn LRUCache(comptime T: type) type {
+ return struct {
+ const Self = @This();
+ const List = std.DoublyLinkedList(T);
+
+ allocator: std.mem.Allocator,
+ list: List,
+ max_size: u32,
+
+ pub fn init(allocator: std.mem.Allocator, max_size: u32) Self {
+ return .{
+ .list = std.DoublyLinkedList(T){},
+ .allocator = allocator,
+ .max_size = max_size,
+ };
+ }
+
+ fn find(cache: *const Self, value: T) ?*List.Node {
+ var it = cache.list.first;
+ while (it) |node| : (it = node.next) {
+ if (node.data == value) {
+ return node;
+ }
+ }
+ return null;
+ }
+
+ pub fn insert(cache: *Self, value: T) !?*List.Node {
+ var node = cache.find(value);
+ if (node != null) {
+ cache.list.remove(node.?);
+ } else {
+ node = try cache.allocator.create(List.Node);
+ node.?.data = value;
+ }
+ cache.list.prepend(node.?);
+
+ if (cache.list.len > cache.max_size) {
+ return cache.list.pop();
+ }
+ return null;
+ }
+
+ pub fn first(cache: *Self) ?*List.Node {
+ return cache.list.first;
+ }
+ };
+}
+
+pub fn optimize(
+ allocator: std.mem.Allocator,
+ mesh: *Mesh,
+ max_cache_idx: u32,
+ params: ScoreParameters,
+) !void {
+ // Initialize vertex data structure
+ const vertices = try allocator.alloc(Vertex, mesh.positions.len);
+ @memset(vertices, Vertex{});
+ for (0..mesh.indices.len / 3) |i| {
+ const mesh_offset = i * 3;
+ const a = mesh.indices[mesh_offset];
+ const b = mesh.indices[mesh_offset + 1];
+ const c = mesh.indices[mesh_offset + 2];
+
+ vertices[a].max_triangles += 1;
+ vertices[b].max_triangles += 1;
+ vertices[c].max_triangles += 1;
+ }
+ for (vertices) |*vertex| {
+ vertex.triangles = try allocator.alloc(u32, vertex.max_triangles);
+ vertex.max_triangles = 0;
+ }
+ for (0..mesh.indices.len / 3) |i| {
+ const mesh_offset = i * 3;
+ const a = mesh.indices[mesh_offset];
+ const b = mesh.indices[mesh_offset + 1];
+ const c = mesh.indices[mesh_offset + 2];
+
+ const idx: u32 = @intCast(i);
+ vertices[a].triangles[vertices[a].max_triangles] = idx;
+ vertices[b].triangles[vertices[b].max_triangles] = idx;
+ vertices[c].triangles[vertices[c].max_triangles] = idx;
+
+ vertices[a].max_triangles += 1;
+ vertices[b].max_triangles += 1;
+ vertices[c].max_triangles += 1;
+ }
+ for (vertices) |*vertex| {
+ vertex.score = vertex.scoreFn(max_cache_idx, params);
+ }
+
+ // Initialize triangle data structure
+ const triangles = try allocator.alloc(Triangle, mesh.indices.len / 3);
+ @memset(triangles, Triangle{});
+ for (triangles, 0..) |*tri, i| {
+ const mesh_offset = i * 3;
+ const a = mesh.indices[mesh_offset];
+ const b = mesh.indices[mesh_offset + 1];
+ const c = mesh.indices[mesh_offset + 2];
+ tri.vertices[0] = a;
+ tri.vertices[1] = b;
+ tri.vertices[2] = c;
+ tri.score = vertices[a].score + vertices[b].score + vertices[c].score;
+ }
+
+ // Store new indices here
+ const indices = try allocator.alloc(u32, mesh.indices.len);
+
+ // Run algorithm
+ var cache = LRUCache(u32).init(allocator, max_cache_idx);
+ for (0..triangles.len) |i| {
+ var tri = bestTriangleFromCache(vertices, triangles, &cache);
+ if (tri == null) {
+ tri = bestTriangleOverall(triangles);
+ }
+ assert(tri != null);
+
+ const offset = i * 3;
+ indices[offset] = triangles[tri.?].vertices[0];
+ indices[offset + 1] = triangles[tri.?].vertices[1];
+ indices[offset + 2] = triangles[tri.?].vertices[2];
+
+ try addTriangleToCache(
+ allocator,
+ tri.?,
+ vertices,
+ triangles,
+ &cache,
+ params,
+ );
+ }
+
+ allocator.free(mesh.indices);
+ mesh.indices = indices;
+}
+
+fn addTriangleToCache(
+ allocator: std.mem.Allocator,
+ triangle_idx: u32,
+ vertices: []Vertex,
+ triangles: []Triangle,
+ cache: *LRUCache(u32),
+ params: ScoreParameters,
+) !void {
+ assert(!triangles[triangle_idx].added);
+
+ // Insert triangle
+ triangles[triangle_idx].added = true;
+ for (triangles[triangle_idx].vertices) |idx| {
+ const removed = try cache.insert(idx);
+ if (removed != null) {
+ const vtx = &vertices[removed.?.data];
+ vtx.cache_idx = null;
+ vtx.score = vtx.scoreFn(cache.max_size, params);
+ allocator.destroy(removed.?);
+ }
+ vertices[idx].num_triangles += 1;
+ }
+
+ // Update all other cache entries
+ var it = cache.first();
+ var cache_idx: u32 = 0;
+ while (it) |node| : (it = node.next) {
+ const vtx = &vertices[node.data];
+ if (cache_idx < cache.max_size) {
+ vtx.cache_idx = cache_idx;
+ } else {
+ vtx.cache_idx = null;
+ }
+ cache_idx += 1;
+ vtx.score = vtx.scoreFn(cache.max_size, params);
+ }
+}
+
+fn bestTriangleOverall(triangles: []Triangle) ?u32 {
+ var best_triangle: ?u32 = null;
+ var best_score: f32 = 0.0;
+ for (triangles, 0..) |tri, i| {
+ if (!tri.added and tri.score > best_score) {
+ best_triangle = @intCast(i);
+ best_score = tri.score;
+ }
+ }
+ return best_triangle;
+}
+
+fn bestTriangleFromCache(
+ vertices: []Vertex,
+ triangles: []Triangle,
+ cache: *LRUCache(u32),
+) ?u32 {
+ var best_score: f32 = 0.0;
+ var best_triangle: ?u32 = null;
+ var it = cache.first();
+ while (it) |node| : (it = node.next) {
+ const vtx = &vertices[node.data];
+ for (vtx.triangles) |idx| {
+ const tri = &triangles[idx];
+ if (tri.added) {
+ continue;
+ }
+ const a = vertices[tri.vertices[0]];
+ const b = vertices[tri.vertices[1]];
+ const c = vertices[tri.vertices[2]];
+ tri.score = a.score + b.score + c.score;
+
+ if (tri.score > best_score) {
+ best_triangle = idx;
+ best_score = tri.score;
+ }
+ }
+ }
+ return best_triangle;
+}
diff --git a/src/half_edge_map.zig b/src/half_edge_map.zig
@@ -0,0 +1,67 @@
+const std = @import("std");
+
+const HalfEdgeMap = @This();
+
+const HalfEdge = struct {
+ head: u32,
+ tail: u32,
+};
+
+map: std.AutoArrayHashMapUnmanaged(HalfEdge, u32),
+
+pub fn init(indices: []u32, allocator: std.mem.Allocator) !HalfEdgeMap {
+ const num_indices: u32 = @intCast(indices.len);
+ const num_triangles = num_indices / 3;
+
+ var map = std.AutoArrayHashMapUnmanaged(HalfEdge, u32){};
+ try map.ensureTotalCapacity(allocator, num_indices);
+
+ for (0..num_triangles) |triangle_idx| {
+ const i = @as(u32, @intCast(triangle_idx)) * 3;
+ const a = indices[i];
+ const b = indices[i + 1];
+ const c = indices[i + 2];
+ map.putAssumeCapacityNoClobber(.{ .head = a, .tail = b }, c);
+ map.putAssumeCapacityNoClobber(.{ .head = b, .tail = c }, a);
+ map.putAssumeCapacityNoClobber(.{ .head = c, .tail = a }, b);
+ }
+ return .{
+ .map = map,
+ };
+}
+
+pub fn insert(self: *HalfEdgeMap, tri: [3]u32) void {
+ const a = tri[0];
+ const b = tri[1];
+ const c = tri[2];
+ self.map.putAssumeCapacityNoClobber(.{ .head = a, .tail = b }, c);
+ self.map.putAssumeCapacityNoClobber(.{ .head = b, .tail = c }, a);
+ self.map.putAssumeCapacityNoClobber(.{ .head = c, .tail = a }, b);
+}
+
+pub fn remove(self: *HalfEdgeMap, tri: [3]u32) bool {
+ const success0 = self.map.swapRemove(.{ .head = tri[0], .tail = tri[1] });
+ const success1 = self.map.swapRemove(.{ .head = tri[1], .tail = tri[2] });
+ const success2 = self.map.swapRemove(.{ .head = tri[2], .tail = tri[0] });
+ return success0 and success1 and success2;
+}
+
+pub fn adjacentTo(self: *HalfEdgeMap, tri: [3]u32, side: u32) ?[3]u32 {
+ const a = tri[(side + 1) % 3];
+ const b = tri[side];
+ const c = self.map.get(.{ .head = a, .tail = b }) orelse {
+ return null;
+ };
+ return [3]u32{ a, b, c };
+}
+
+pub fn count(self: *const HalfEdgeMap) usize {
+ return self.map.count();
+}
+
+pub fn pop(self: *HalfEdgeMap) [3]u32 {
+ const kv = self.map.pop();
+ const tri = [3]u32{ kv.key.head, kv.key.tail, kv.value };
+ _ = self.remove(tri);
+ return tri;
+}
diff --git a/src/main.zig b/src/main.zig
@@ -15,6 +15,7 @@ const Mesh = @import("mesh.zig");
const Camera = @import("camera.zig");
const input = @import("input.zig");
+const ClusterMesh = @import("meshlets.zig").ClusterMesh;
const load_obj = @import("load_obj.zig");
@@ -298,114 +299,75 @@ pub const UniformData = struct {
proj_matrix: mat4,
};
-pub const Buffer = struct {
- data: *gpu.Buffer,
- size: u32,
- offset: u32,
- attrib_size: u32,
-
- pub fn init_vtx(n_vertices: u32, device: *gpu.Device) Buffer {
- std.log.info("buffer init: {} vertices", .{n_vertices});
- const attrib_size = @sizeOf(f32x4);
- const size = n_vertices * attrib_size;
- const descriptor = gpu.Buffer.Descriptor{
- .size = size,
- .usage = .{ .vertex = true, .copy_dst = true },
- .mapped_at_creation = .false,
- };
- const data = device.createBuffer(&descriptor);
-
- return .{
- .data = data,
- .size = size,
- .offset = 0,
- .attrib_size = attrib_size,
- };
- }
+pub fn Buffer(comptime T: type) type {
+ return struct {
+ const Self = @This();
+ const attrib_size = @sizeOf(T);
- pub fn init_idx(n_indices: u32, device: *gpu.Device) Buffer {
- std.log.info("buffer init: {} indices", .{n_indices});
- const attrib_size = @sizeOf(u32);
- const size = n_indices * attrib_size;
- const descriptor = gpu.Buffer.Descriptor{
- .size = size,
- .usage = .{ .index = true, .copy_dst = true },
- .mapped_at_creation = .false,
- };
- const data = device.createBuffer(&descriptor);
-
- return .{
- .data = data,
- .size = size,
- .offset = 0,
- .attrib_size = attrib_size,
- };
- }
-
- pub fn init_indirect(n_meshes: u32, device: *gpu.Device) Buffer {
- std.log.info("buffer init: {} indirect", .{n_meshes});
- const attrib_size = @sizeOf(u32) * 5;
- const size = n_meshes * attrib_size;
- const descriptor = gpu.Buffer.Descriptor{
- .size = size,
- .usage = .{ .indirect = true, .copy_dst = true },
- .mapped_at_creation = .false,
- };
- const data = device.createBuffer(&descriptor);
- return .{
- .data = data,
- .size = size,
- .offset = 0,
- .attrib_size = attrib_size,
- };
- }
+ data: *gpu.Buffer,
+ size: u32,
+ offset: u32,
+ attrib_size: u32,
+
+ pub fn init(
+ max_attribs: u32,
+ device: *gpu.Device,
+ usage: gpu.Buffer.UsageFlags,
+ mapped_at_creation: bool,
+ ) Self {
+ std.log.info("buffer init: {} {}", .{ max_attribs, T });
+ const size = max_attribs * attrib_size;
+ const descriptor = gpu.Buffer.Descriptor{
+ .size = size,
+ .usage = usage,
+ .mapped_at_creation = gpu.Bool32.from(mapped_at_creation),
+ };
+ const data = device.createBuffer(&descriptor);
+ return .{
+ .data = data,
+ .size = size,
+ .offset = 0,
+ .attrib_size = attrib_size,
+ };
+ }
- pub fn init_instance(n_instances: u32, device: *gpu.Device) Buffer {
- std.log.info("buffer init: {} instances", .{n_instances});
- const attrib_size = @sizeOf(InstanceData);
- const size = n_instances * attrib_size;
- const descriptor = gpu.Buffer.Descriptor{
- .size = size,
- .usage = .{ .storage = true, .copy_dst = true },
- .mapped_at_creation = .false,
- };
- const data = device.createBuffer(&descriptor);
- return .{
- .data = data,
- .size = size,
- .offset = 0,
- .attrib_size = attrib_size,
- };
- }
+ pub fn alloc(self: *Self, n_attribs: u32) !u32 {
+ const size = n_attribs * attrib_size;
+ const offset_start = self.offset;
+ const offset_after = self.offset + size;
+ std.log.info("buffer alloc: {} {}", .{ n_attribs, T });
+ if (offset_after > self.size) {
+ std.log.err("buffer out of memory", .{});
+ return error.BufferEmpty;
+ }
+ self.offset = offset_after;
+ return offset_start / attrib_size;
+ }
- pub fn alloc(self: *Buffer, n_elements: u32) !u32 {
- const size = n_elements * self.attrib_size;
- const offset_start = self.offset;
- const offset_after = self.offset + size;
- std.log.info("buffer alloc: {} elements", .{n_elements});
- if (offset_after > self.size) {
- std.log.err("buffer out of memory", .{});
- return error.BufferEmpty;
+ pub fn write(
+ self: *Self,
+ data_slice: anytype,
+ queue: *gpu.Queue,
+ offset: u32,
+ ) void {
+ queue.writeBuffer(self.data, offset * attrib_size, data_slice);
}
- self.offset = offset_after;
- return offset_start;
- }
- pub fn write(
- self: *Buffer,
- data_slice: anytype,
- queue: *gpu.Queue,
- offset: u32,
- ) void {
- queue.writeBuffer(self.data, offset, data_slice);
- }
+ pub fn allocWrite(
+ self: *Self,
+ data_slice: anytype,
+ queue: *gpu.Queue,
+ ) !u32 {
+ const offset = try self.alloc(@intCast(data_slice.len));
+ queue.writeBuffer(self.data, offset * attrib_size, data_slice);
+ return offset;
+ }
- pub fn allocWrite(self: *Buffer, data_slice: anytype, queue: *gpu.Queue) !u32 {
- const offset = try self.alloc(@intCast(data_slice.len));
- queue.writeBuffer(self.data, offset, data_slice);
- return offset;
- }
-};
+ pub fn count(self: *const Self) u32 {
+ return self.offset / attrib_size;
+ }
+ };
+}
pub const MeshBufferOptions = struct {
device: *gpu.Device,
@@ -422,19 +384,25 @@ pub const MeshOptions = struct {
data: *const Mesh,
};
+pub const ClusterMeshOptions = struct {
+ scale: f32 = 1.0,
+ offset: f32x3 = .{ 0, 0, 0 },
+ data: *const ClusterMesh,
+};
+
pub const MeshBuffer = struct {
device: *gpu.Device,
queue: *gpu.Queue,
- positions: Buffer,
- normals: Buffer,
- tex_coords: Buffer,
- tangents: Buffer,
- bitangents: Buffer,
- indices: Buffer,
+ positions: Buffer(f32x4),
+ normals: Buffer(f32x4),
+ tex_coords: Buffer(f32x4),
+ tangents: Buffer(f32x4),
+ bitangents: Buffer(f32x4),
+ indices: Buffer(u32),
- instances: Buffer,
- indirect: Buffer,
+ instances: Buffer(InstanceData),
+ indirect: Buffer([5]u32),
meshes: std.ArrayList(RenderMesh),
@@ -446,14 +414,54 @@ pub const MeshBuffer = struct {
return .{
.device = options.device,
.queue = options.queue,
- .positions = Buffer.init_vtx(n_vertices, device),
- .normals = Buffer.init_vtx(n_vertices, device),
- .tex_coords = Buffer.init_vtx(n_vertices, device),
- .tangents = Buffer.init_vtx(n_vertices, device),
- .bitangents = Buffer.init_vtx(n_vertices, device),
- .indices = Buffer.init_idx(n_indices, device),
- .instances = Buffer.init_instance(options.n_meshes, device),
- .indirect = Buffer.init_indirect(options.n_meshes, device),
+ .positions = Buffer(f32x4).init(
+ n_vertices,
+ device,
+ .{ .vertex = true, .copy_dst = true },
+ false,
+ ),
+ .normals = Buffer(f32x4).init(
+ n_vertices,
+ device,
+ .{ .vertex = true, .copy_dst = true },
+ false,
+ ),
+ .tex_coords = Buffer(f32x4).init(
+ n_vertices,
+ device,
+ .{ .vertex = true, .copy_dst = true },
+ false,
+ ),
+ .tangents = Buffer(f32x4).init(
+ n_vertices,
+ device,
+ .{ .vertex = true, .copy_dst = true },
+ false,
+ ),
+ .bitangents = Buffer(f32x4).init(
+ n_vertices,
+ device,
+ .{ .vertex = true, .copy_dst = true },
+ false,
+ ),
+ .indices = Buffer(u32).init(
+ n_indices,
+ device,
+ .{ .index = true, .copy_dst = true },
+ false,
+ ),
+ .instances = Buffer(InstanceData).init(
+ options.n_meshes,
+ device,
+ .{ .storage = true, .copy_dst = true },
+ false,
+ ),
+ .indirect = Buffer([5]u32).init(
+ options.n_meshes,
+ device,
+ .{ .indirect = true, .copy_dst = true },
+ false,
+ ),
.meshes = std.ArrayList(RenderMesh).init(options.allocator),
};
}
@@ -502,19 +510,67 @@ pub const MeshBuffer = struct {
return &self.meshes.items[0];
}
+ pub fn initClusterMesh(
+ self: *MeshBuffer,
+ options: *const ClusterMeshOptions,
+ allocator: std.mem.Allocator,
+ ) !*RenderMesh {
+ const mesh_data = options.data;
+ const num_vertices = mesh_data.positions.len;
+ const num_indices = mesh_data.indices.len;
+ const num_clusters = mesh_data.descriptors.len;
+ std.log.info(
+ "init mesh with {} vertices, {} indices, {} clusters",
+ .{ num_vertices, num_indices, num_clusters },
+ );
+
+ const offset_p = try self.positions.allocWrite(mesh_data.positions, self.queue);
+ _ = try self.normals.allocWrite(mesh_data.normals, self.queue);
+ _ = try self.tex_coords.allocWrite(mesh_data.tex_coords, self.queue);
+ _ = try self.tangents.allocWrite(mesh_data.tangents, self.queue);
+ _ = try self.bitangents.allocWrite(mesh_data.bitangents, self.queue);
+
+ const offset_i = try self.indices.allocWrite(mesh_data.indices, self.queue);
+
+ const mesh = RenderMesh{
+ .num_vertices = num_vertices,
+ .num_indices = num_indices,
+ .scale = options.scale,
+ .offset = options.offset,
+ };
+
+ const instances = [_]InstanceData{
+ mesh.instanceData(),
+ };
+ _ = try self.instances.allocWrite(&instances, self.queue);
+
+ const indirect_args = try allocator.alloc([5]u32, num_clusters);
+ defer allocator.free(indirect_args);
+
+ var mesh_id: u32 = @intCast(self.meshes.items.len);
+ for (mesh_data.descriptors, 0..) |descriptor, i| {
+ indirect_args[i] = .{
+ @intCast(descriptor.num_triangles * 3),
+ 1,
+ offset_i + descriptor.offset,
+ offset_p,
+ mesh_id,
+ };
+ mesh_id += 1;
+ }
+ _ = try self.indirect.allocWrite(indirect_args, self.queue);
+
+ try self.meshes.append(mesh);
+ return &self.meshes.items[0];
+ }
+
pub fn updateMeshes(self: *MeshBuffer) void {
- std.log.info("RUNNING!", .{});
for (self.meshes.items, 0..) |*mesh, i| {
if (!mesh.dirty) {
- std.log.info("{}", .{mesh.dirty});
continue;
}
- const offset = @sizeOf(InstanceData) * i;
- const instances = [_]InstanceData{
- mesh.instanceData(),
- };
- std.log.info("instance: {}", .{mesh.instanceData()});
- self.instances.write(&instances, self.queue, @intCast(offset));
+ const instances = [_]InstanceData{mesh.instanceData()};
+ self.instances.write(&instances, self.queue, @intCast(i));
mesh.dirty = false;
}
}
@@ -590,30 +646,36 @@ pub fn main() !void {
var mesh_buffer = MeshBuffer.init(.{
.device = app.device,
.queue = app.queue,
- .n_meshes = 10,
+ .n_meshes = 200,
.n_vertices = 60000,
.n_indices = 60000,
.allocator = allocator,
});
- const mesh = try load_obj.loadFile(.{
+ var mesh = try load_obj.loadFile(.{
.allocator = allocator,
.path = "Cottage_Clean/cottage.obj",
});
- var render_mesh = try mesh_buffer.initMesh(&.{
- .data = &mesh,
- .scale = 1,
- .offset = .{ 0, -5, 15 },
- });
- _ = try load_obj.loadFile(.{
- .allocator = allocator,
- .path = "teapot.obj",
- //.scale = 0.05,
- //.offset = .{ 0, -0.5, 0 },
- });
+ //var bunny_mesh = try load_obj.loadFile(.{
+ // .allocator = allocator,
+ // .path = "bunny-fixed.obj",
+ //});
+
+ const optimizeMesh = @import("forsyth_optimize.zig").optimize;
+ try optimizeMesh(allocator, &mesh, 32, .{});
+
+ const buildClusters = @import("meshlets.zig").buildClusters;
+ const cluster_mesh = try buildClusters(allocator, &mesh, 64, 126);
+ std.log.info("num clusters: {}", .{cluster_mesh.descriptors.len});
+
+ var render_mesh = try mesh_buffer.initClusterMesh(&.{
+ .data = &cluster_mesh,
+ .scale = 1,
+ .offset = .{ 0, 0, 0 },
+ }, allocator);
- var rp = try MeshRenderPipeline.init(&app, &mesh_buffer, &textures, "shaders/mesh_pbr.wgsl");
+ var rp = try MeshRenderPipeline.init(&app, &mesh_buffer, &textures, "shaders/mesh_clusters.wgsl");
const material = try Material.init(
&textures,
diff --git a/src/mesh_render_pipeline.zig b/src/mesh_render_pipeline.zig
@@ -173,8 +173,9 @@ pub fn frame(ptr: *anyopaque, pass: *gpu.RenderPassEncoder) void {
pass.setBindGroup(0, self.bind_group, null);
pass.setIndexBuffer(self.mesh_buffer.indices.data, .uint32, 0, self.mesh_buffer.indices.size);
- for (0..self.mesh_buffer.meshes.items.len) |mesh_idx| {
- const offset = mesh_idx * 5 * @sizeOf(u32);
+ const num_indirect = self.mesh_buffer.indirect.count();
+ for (0..num_indirect) |idx| {
+ const offset = idx * self.mesh_buffer.indirect.attrib_size;
pass.drawIndexedIndirect(self.mesh_buffer.indirect.data, offset);
}
}
diff --git a/src/meshlets.zig b/src/meshlets.zig
@@ -0,0 +1,120 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+const Mesh = @import("mesh.zig");
+
+const f32x3 = @Vector(3, f32);
+const f32x4 = @Vector(4, f32);
+
+pub const ClusterDescriptor = struct {
+ offset: u32,
+ num_vertices: u32,
+ num_triangles: u32,
+};
+
+pub const ClusterMesh = struct {
+ positions: []f32x4,
+ normals: []f32x3,
+ tex_coords: []f32x3,
+ tangents: []f32x3,
+ bitangents: []f32x3,
+ indices: []u32,
+ descriptors: []ClusterDescriptor,
+};
+
+pub fn buildClusters(
+ allocator: std.mem.Allocator,
+ mesh: *const Mesh,
+ max_vertices: u32,
+ max_triangles: u32,
+) !ClusterMesh {
+ // Only triangle meshes are supported
+ assert(mesh.indices.len % 3 == 0);
+ // Must have at least one triangle
+ assert(max_vertices >= 3);
+ assert(max_triangles >= 1);
+
+ // 'unused' tracks which vertices have not been used
+ // unused[vtx_idx] = 1 -> not used
+ // unused[vtx_idx] = 0 -> used
+ const unused = try allocator.alloc(u8, mesh.positions.len);
+ defer allocator.free(unused);
+ @memset(unused, 1);
+
+ const indices = try allocator.alloc(u32, mesh.indices.len);
+ var descriptors = std.ArrayList(ClusterDescriptor).init(allocator);
+ defer descriptors.deinit();
+
+ var num_vertices: u32 = 0;
+ var num_triangles: u32 = 0;
+ var cluster_offset: u32 = 0;
+
+ for (0..mesh.indices.len / 3) |i| {
+ const mesh_offset = i * 3;
+ const a = mesh.indices[mesh_offset];
+ const b = mesh.indices[mesh_offset + 1];
+ const c = mesh.indices[mesh_offset + 2];
+
+ // Triangles must be non-degenerate
+ assert(a != b and a != c and b != c);
+ // Vertices must be contained in the mesh
+ const a_valid = a < mesh.positions.len;
+ const b_valid = b < mesh.positions.len;
+ const c_valid = c < mesh.positions.len;
+ assert(a_valid and b_valid and c_valid);
+
+ const num_extra = unused[a] + unused[b] + unused[c];
+ const too_many_vertices = num_vertices + num_extra >= max_vertices;
+ const too_many_triangles = num_triangles >= max_triangles;
+ if (too_many_vertices or too_many_triangles) {
+ // Save cluster info
+ try descriptors.append(.{
+ .offset = cluster_offset,
+ .num_vertices = num_vertices,
+ .num_triangles = num_triangles,
+ });
+
+ // Reset state for next cluster
+ @memset(unused, 1);
+ cluster_offset += num_triangles * 3;
+ num_vertices = 0;
+ num_triangles = 0;
+ }
+
+ // Mark vertices as used
+ unused[a] = 0;
+ unused[b] = 0;
+ unused[c] = 0;
+
+ // Add triangle to cluster
+ const offset = cluster_offset + num_triangles * 3;
+ indices[offset] = a;
+ indices[offset + 1] = b;
+ indices[offset + 2] = c;
+
+ num_vertices += num_extra;
+ num_triangles += 1;
+ }
+
+ // Add final cluster
+ if (num_triangles > 0) {
+ try descriptors.append(.{
+ .offset = cluster_offset,
+ .num_vertices = num_vertices,
+ .num_triangles = num_triangles,
+ });
+ }
+
+ // Total size of mesh should not change
+ assert(mesh.indices.len == cluster_offset + num_triangles * 3);
+
+ return .{
+ .positions = mesh.positions,
+ .normals = mesh.normals,
+ .tex_coords = mesh.tex_coords,
+ .tangents = mesh.tangents,
+ .bitangents = mesh.bitangents,
+ .indices = indices,
+ .descriptors = try descriptors.toOwnedSlice(),
+ };
+}
diff --git a/src/meshlets_adjacency.zig b/src/meshlets_adjacency.zig
@@ -0,0 +1,446 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+const HalfEdgeMap = @import("half_edge_map.zig");
+const Mesh = @import("mesh.zig");
+
+const meshlets = @import("meshlets.zig");
+const ClusterDescriptor = meshlets.ClusterDescriptor;
+const ClusterMesh = meshlets.ClusterMesh;
+
+pub fn buildClusters(
+ allocator: std.mem.Allocator,
+ mesh: *const Mesh,
+ max_vertices: u32,
+ max_triangles: u32,
+) !ClusterMesh {
+ // Only triangle meshes are supported
+ assert(mesh.indices.len % 3 == 0);
+ // Must have at least one triangle
+ assert(max_vertices >= 3);
+ assert(max_triangles >= 1);
+
+ // 'unused' tracks which vertices have not been used
+ // unused[vtx_idx] = 1 -> not used
+ // unused[vtx_idx] = 0 -> used
+ const unused = try allocator.alloc(u8, mesh.positions.len);
+ defer allocator.free(unused);
+ @memset(unused, 1);
+
+ const indices = try allocator.alloc(u32, mesh.indices.len);
+ var descriptors = std.ArrayList(ClusterDescriptor).init(allocator);
+ defer descriptors.deinit();
+
+ var num_vertices: u32 = 0;
+ var num_triangles: u32 = 0;
+ var cluster_offset: u32 = 0;
+
+ var map = try HalfEdgeMap.init(mesh.indices, allocator);
+ var tri: [3]u32 = undefined;
+ var adjacent = std.ArrayList(u32).init(allocator);
+ const usage = try buildUsageMap(mesh, allocator);
+
+ for (0..mesh.indices.len / 3) |_| {
+ // Fetch next triangle
+ if (adjacent.items.len > 0) {
+ var min_extra: u32 = std.math.maxInt(u32);
+ var min_score: u32 = std.math.maxInt(u32);
+ var idx_best: u32 = 0;
+ const num_adjacent = adjacent.items.len / 3;
+ for (0..num_adjacent) |triangle_idx| {
+ const i = @as(u32, @intCast(triangle_idx)) * 3;
+ const adj = [3]u32{
+ adjacent.items[i],
+ adjacent.items[i + 1],
+ adjacent.items[i + 2],
+ };
+
+ var extra: u32 = 1;
+ if (inSlice(u32, indices[cluster_offset..], adj[2])) {
+ extra = 0;
+ }
+
+ if ((usage[adj[0]] == 1) or (usage[adj[1]] == 1) or (usage[adj[2]] == 1)) {
+ extra = 0;
+ }
+
+ if (extra > min_extra) {
+ continue;
+ }
+ const score = usage[adj[0]] + usage[adj[1]] + usage[adj[2]] - 3;
+
+ if ((extra < min_extra) or (score < min_score)) {
+ min_extra = extra;
+ min_score = score;
+ tri = adj;
+ idx_best = i;
+ }
+ }
+ tri = [3]u32{
+ adjacent.items[idx_best],
+ adjacent.items[idx_best + 1],
+ adjacent.items[idx_best + 2],
+ };
+ _ = adjacent.orderedRemove(idx_best);
+ _ = adjacent.orderedRemove(idx_best);
+ _ = adjacent.orderedRemove(idx_best);
+ } else {
+ tri = map.pop();
+ }
+ const a = tri[0];
+ const b = tri[1];
+ const c = tri[2];
+ usage[a] -= 1;
+ usage[b] -= 1;
+ usage[c] -= 1;
+
+ // Triangles must be non-degenerate
+ assert(a != b and a != c and b != c);
+
+ // Vertices must be contained in the mesh
+ const a_valid = a < mesh.positions.len;
+ const b_valid = b < mesh.positions.len;
+ const c_valid = c < mesh.positions.len;
+ assert(a_valid and b_valid and c_valid);
+
+ // Verify cluster has space
+ const num_extra = unused[a] + unused[b] + unused[c];
+ const too_many_vertices = num_vertices + num_extra >= max_vertices;
+ const too_many_triangles = num_triangles >= max_triangles;
+ if (too_many_vertices or too_many_triangles) {
+ // Save cluster info
+ try descriptors.append(.{
+ .offset = cluster_offset,
+ .num_vertices = num_vertices,
+ .num_triangles = num_triangles,
+ });
+
+ // Reset state for next cluster
+ @memset(unused, 1);
+ cluster_offset += num_triangles * 3;
+ num_vertices = 0;
+ num_triangles = 0;
+
+ // Add all adjacent triangles back to the half-edge map
+ for (0..adjacent.items.len / 3) |i| {
+ const idx = i * 3;
+ const adj = [3]u32{
+ adjacent.items[idx],
+ adjacent.items[idx + 1],
+ adjacent.items[idx + 2],
+ };
+ map.insert(adj);
+ }
+ adjacent.clearAndFree();
+ }
+
+ // Mark vertices as used
+ unused[a] = 0;
+ unused[b] = 0;
+ unused[c] = 0;
+
+ // Add triangle to cluster
+ const offset = cluster_offset + num_triangles * 3;
+ indices[offset] = a;
+ indices[offset + 1] = b;
+ indices[offset + 2] = c;
+
+ num_vertices += num_extra;
+ num_triangles += 1;
+
+ // Get newly adjacent triangles
+ for (0..3) |side| {
+ const triangle = map.adjacentTo(tri, @intCast(side)) orelse {
+ continue;
+ };
+ try adjacent.appendSlice(&triangle);
+ std.debug.assert(map.remove(triangle));
+ }
+
+ // If we're out of adjacent triangles, this cluster is done
+ const no_more_adjacent = adjacent.items.len <= 0;
+ if (no_more_adjacent) {
+ // Save cluster info
+ try descriptors.append(.{
+ .offset = cluster_offset,
+ .num_vertices = num_vertices,
+ .num_triangles = num_triangles,
+ });
+
+ // Reset state for next cluster
+ @memset(unused, 1);
+ cluster_offset += num_triangles * 3;
+ num_vertices = 0;
+ num_triangles = 0;
+ continue;
+ }
+ }
+
+ // Add final cluster
+ if (num_triangles > 0) {
+ try descriptors.append(.{
+ .offset = cluster_offset,
+ .num_vertices = num_vertices,
+ .num_triangles = num_triangles,
+ });
+ }
+
+ // Total size of mesh should not change
+ assert(mesh.indices.len == cluster_offset + num_triangles * 3);
+
+ return .{
+ .positions = mesh.positions,
+ .normals = mesh.normals,
+ .tex_coords = mesh.tex_coords,
+ .tangents = mesh.tangents,
+ .bitangents = mesh.bitangents,
+ .indices = indices,
+ .descriptors = try descriptors.toOwnedSlice(),
+ };
+}
+
+pub fn buildUsageMap(
+ mesh: *const Mesh,
+ allocator: std.mem.Allocator,
+) ![]u32 {
+ const num_vertices = mesh.positions.len;
+ const map = try allocator.alloc(u32, num_vertices);
+ @memset(map, 0);
+ for (mesh.indices) |idx| {
+ map[idx] += 1;
+ }
+ return map;
+}
+
+fn inSlice(comptime T: type, slice: []const T, item: T) bool {
+ for (slice) |iter_item| {
+ if (iter_item == item) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//const ClusterOptions = struct {
+// max_verts: u32,
+// max_prims: u32,
+//};
+//
+//const ClusterDescriptor = struct {
+// offset: u32,
+// num_prims: u32,
+// num_verts: u32,
+//};
+//
+//pub fn buildClusters(mesh: *const Mesh, allocator: std.mem.Allocator) !void {
+// const max_prims = 126;
+// var offset: u32 = 0;
+//
+// var map = try HalfEdgeMap.init(mesh.indices, allocator);
+// var tri: ?[3]u32 = null;
+//
+// var descriptors = std.ArrayList(ClusterDescriptor).init(allocator);
+// // this holds offsets and counts for all clusters
+//
+// const indices: []u32 = try allocator.alloc(u32, mesh.indices.len);
+// // this holds the actual index data for all clusters
+// // should be free'd by caller (eventually)
+//
+// while (map.count() > 0) {
+// if (tri == null) {
+// tri = map.pop();
+// }
+// const cluster_end = @min(offset + max_prims * 3, mesh.indices.len);
+// const cluster = indices[offset..cluster_end];
+//
+// //result = try buildClusterFewest(allocator, &map, usage, tri.?);
+// const result = try buildClusterSimple(
+// allocator,
+// &map,
+// tri.?,
+// cluster,
+// );
+// offset += result.descriptor.num_prims * 3;
+// try descriptors.append(result.descriptor);
+// tri = result.next;
+// }
+// std.log.info("offset: {}", .{offset});
+// std.debug.assert(offset == indices.len);
+// std.log.info("num clusters: {}", .{descriptors.items.len});
+//}
+//
+//const ClusterResult = struct {
+// descriptor: ClusterDescriptor,
+// next: ?[3]u32,
+//};
+//
+//pub const Triangle = struct {
+// a: u32,
+// b: u32,
+// c: u32,
+//
+// pub fn permute(self: *const Triangle) Triangle {
+// return .{ .a = self.b, .b = self.c, .c = self.a };
+// }
+//
+// pub fn reverse(self: *const Triangle) Triangle {
+// return .{ .a = self.c, .b = self.b, .c = self.a };
+// }
+//};
+//
+//fn buildClusterSimple(
+// allocator: std.mem.Allocator,
+// map: *HalfEdgeMap,
+// initial_triangle: [3]u32,
+// cluster: []u32,
+//) !ClusterResult {
+// const max_prims = 126;
+//
+// var vertices = std.AutoHashMap(u32, void).init(allocator);
+// var adjacent = std.ArrayList(Triangle).init(allocator);
+// var tri: ?Triangle = .{
+// .a = initial_triangle[0],
+// .b = initial_triangle[1],
+// .c = initial_triangle[2],
+// };
+//
+// var idx: u32 = 0;
+// while (idx < max_prims * 3) {
+// // Add the most recent triangle to the cluster
+// cluster[idx] = tri.?.a;
+// cluster[idx + 1] = tri.?.b;
+// cluster[idx + 2] = tri.?.c;
+// //@memcpy(cluster[idx .. idx + 3], &tri.?);
+// try vertices.put(tri.?.a, {});
+// try vertices.put(tri.?.b, {});
+// try vertices.put(tri.?.c, {});
+// idx += 3;
+//
+// // Get newly adjacent triangles
+// for (0..3) |side| {
+// const triangle = map.adjacentTo(tri.?, @intCast(side)) orelse {
+// continue;
+// };
+// try adjacent.append(triangle);
+// std.debug.assert(map.remove(triangle));
+// }
+// if (adjacent.items.len <= 0) {
+// tri = null;
+// break;
+// }
+//
+// // Oldest adjacent triangle is up next
+// tri = [3]u32{
+// adjacent.items[0].a,
+// adjacent.items[0].b,
+// adjacent.items[0].c,
+// };
+// _ = adjacent.orderedRemove(0);
+// }
+//
+// // Add all adjacent triangles back to the half-edge map
+// for (adjacent.items) |adj_tri| {
+// //const adj = [3]u32{ adj_tri.a, adj_tri.b, adj_tri.c };
+// map.insert(adj_tri);
+// }
+// if (tri != null) {
+// const triangle = .{
+// .a = tri.?[0],
+// .b = tri.?[1],
+// .c = tri.?[2],
+// };
+// map.insert(triangle);
+// }
+//
+// return .{
+// .descriptor = .{
+// .offset = undefined,
+// .num_prims = idx / 3,
+// .num_verts = vertices.count(),
+// },
+// .next = null, //tri,
+// };
+//}
+//
+//fn buildClusterFewest(
+// allocator: std.mem.Allocator,
+// map: *HalfEdgeMap,
+// usage: []u32,
+// initial_triangle: [3]u32,
+//) !ClusterResult {
+// var cluster = std.ArrayList(u32).init(allocator);
+// var adjacent = std.ArrayList(u32).init(allocator);
+// var tri: ?[3]u32 = initial_triangle;
+// while (cluster.items.len < 126 * 3) {
+// // Add the most recent triangle to the cluster
+// try cluster.appendSlice(&tri.?);
+//
+// // Get newly adjacent triangles
+// for (0..3) |side| {
+// const triangle = map.adjacentTo(tri.?, @intCast(side)) orelse {
+// continue;
+// };
+// try adjacent.appendSlice(&triangle);
+// std.debug.assert(map.remove(triangle));
+// }
+// if (adjacent.items.len <= 0) {
+// tri = null;
+// break;
+// }
+//
+// var min_extra: u32 = std.math.maxInt(u32);
+// var min_score: u32 = std.math.maxInt(u32);
+// var idx_best: u32 = 0;
+// const num_adjacent = adjacent.items.len / 3;
+// for (0..num_adjacent) |triangle_idx| {
+// const i = @as(u32, @intCast(triangle_idx)) * 3;
+// const adj = [3]u32{
+// adjacent.items[i],
+// adjacent.items[i + 1],
+// adjacent.items[i + 2],
+// };
+//
+// var extra: u32 = 1;
+// if (inSlice(u32, cluster.items, adj[2])) {
+// extra = 0;
+// }
+//
+// if ((usage[adj[0]] == 1) or (usage[adj[1]] == 1) or (usage[adj[2]] == 1)) {
+// extra = 0;
+// }
+//
+// if (extra > min_extra) {
+// continue;
+// }
+// const score = usage[adj[0]] + usage[adj[1]] + usage[adj[2]] - 3;
+//
+// if ((extra < min_extra) or (score < min_score)) {
+// min_extra = extra;
+// min_score = score;
+// tri = adj;
+// idx_best = i;
+// }
+// }
+// _ = adjacent.orderedRemove(idx_best);
+// _ = adjacent.orderedRemove(idx_best);
+// _ = adjacent.orderedRemove(idx_best);
+// usage[tri.?[0]] -= 1;
+// usage[tri.?[1]] -= 1;
+// usage[tri.?[2]] -= 1;
+// }
+// const num_adjacent = adjacent.items.len / 3;
+// for (0..num_adjacent) |triangle_idx| {
+// const i = @as(u32, @intCast(triangle_idx)) * 3;
+// const adj = [3]u32{
+// adjacent.items[i],
+// adjacent.items[i + 1],
+// adjacent.items[i + 2],
+// };
+// map.insert(adj);
+// }
+// std.log.info("cluster size: {}", .{cluster.items.len / 3});
+// return .{
+// .cluster = cluster,
+// .next = tri,
+// };
+//}
diff --git a/src/shaders/mesh_clusters.wgsl b/src/shaders/mesh_clusters.wgsl
@@ -0,0 +1,204 @@
+struct VertexInput {
+ @location(0) position: vec4<f32>,
+ @location(1) normal: vec3<f32>,
+ @location(2) tex_coord: vec3<f32>,
+ @location(3) tangent: vec3<f32>,
+ @location(4) bitangent: vec3<f32>,
+};
+
+struct VertexOutput {
+ @builtin(position) clip_position: vec4<f32>,
+ @location(0) tex_coord: vec3<f32>,
+ @location(1) pt_fragment: vec3<f32>,
+ @location(2) pt_light: vec3<f32>,
+ @location(3) pt_camera: vec3<f32>,
+ @location(4) @interpolate(flat) instance_id: u32,
+};
+
+@group(0) @binding(0)
+var<storage, read> instances: array<Instance>;
+struct Instance {
+ model_matrix: mat4x4<f32>,
+ material: Material,
+};
+struct Material {
+ albedo: u32,
+ normal: u32,
+ roughness: u32,
+ metalness: u32,
+};
+
+@group(0) @binding(1)
+var<uniform> uniform_data: UniformData;
+struct UniformData {
+ pw_camera: vec3<f32>,
+ view_matrix: mat4x4<f32>,
+ proj_matrix: mat4x4<f32>,
+};
+
+@vertex fn vertex(
+ in: VertexInput,
+ @builtin(instance_index) idx: u32,
+) -> VertexOutput {
+ let model_matrix = instances[0].model_matrix;
+ let camera_matrix = uniform_data.proj_matrix * uniform_data.view_matrix;
+
+ let t = normalize((model_matrix * vec4(in.tangent, 0.0)).xyz);
+ let b = normalize((model_matrix * vec4(in.bitangent, 0.0)).xyz);
+ let n = normalize((model_matrix * vec4(in.normal, 0.0)).xyz);
+ let tangent_matrix = mat3x3<f32>(t, b, n);
+
+ let world_position = model_matrix * in.position;
+ let tangent_position = tangent_matrix * world_position.xyz;
+
+ let light_position = vec3<f32>(20, 80, 20);
+ let tangent_light_position = tangent_matrix * light_position;
+
+ let pt_camera = tangent_matrix * uniform_data.pw_camera;
+
+
+ return VertexOutput(
+ camera_matrix * world_position,
+ in.tex_coord,
+ tangent_position,
+ tangent_light_position,
+ pt_camera,
+ idx,
+ );
+}
+
+
+@group(0) @binding(2) var material_sampler: sampler;
+@group(0) @binding(3) var material_texture: texture_2d_array<f32>;
+
+fn diffuse_lambert(albedo: vec3<f32>) -> vec3<f32> {
+ return albedo * 0.31830988618;
+}
+
+fn distribution_trowbridge_reitz_ggx(
+ n_dot_h: f32,
+ alpha: f32,
+) -> f32 {
+ let alpha_squared = alpha * alpha;
+ let n_dot_h_squared = n_dot_h * n_dot_h;
+ let denom = n_dot_h_squared * (alpha_squared - 1) + 1;
+ let denom_squared = denom * denom;
+ return alpha_squared / (3.14159265359 * denom_squared);
+}
+
+fn geometry_schlick_ggx(
+ n_dot_x: f32,
+ alpha: f32,
+) -> f32 {
+ let k = 0.5 * alpha;
+ let denom = n_dot_x * (1.0 - k) + k;
+ return n_dot_x / denom;
+}
+
+fn geometry_smith(
+ n_dot_l: f32,
+ n_dot_v: f32,
+ alpha: f32,
+) -> f32 {
+ let g1 = geometry_schlick_ggx(n_dot_l, alpha);
+ let g2 = geometry_schlick_ggx(n_dot_v, alpha);
+ return g1 * g2;
+}
+
+fn fresnel_schlick(
+ f0: vec3<f32>,
+ v_dot_h: f32,
+) -> vec3<f32> {
+ return f0 + (1.0 - f0) * pow(1.0 - v_dot_h, 5.0);
+}
+
+fn cook_torrance(
+ n_dot_v: f32,
+ n_dot_l: f32,
+ n_dot_h: f32,
+ roughness: f32,
+) -> f32 {
+ let alpha = roughness * roughness;
+ let distribution = distribution_trowbridge_reitz_ggx(
+ n_dot_h,
+ alpha,
+ );
+ let geometry = geometry_smith(
+ n_dot_l,
+ n_dot_v,
+ alpha,
+ );
+ let denom = 4.0 * n_dot_v * n_dot_l + 0.000001;
+ return (distribution * geometry) / denom;
+}
+
+fn lighting(
+ normal: vec3<f32>,
+ p_fragment: vec3<f32>,
+ p_camera: vec3<f32>,
+ p_light: vec3<f32>,
+ c_light: vec3<f32>,
+ albedo: vec3<f32>,
+ roughness: f32,
+ metalness: f32,
+) -> vec3<f32> {
+ let v = normalize(p_camera - p_fragment);
+ let l = normalize(p_light - p_fragment);
+ let h = normalize(v + l);
+
+ let n_dot_v = max(dot(normal, v), 0.0);
+ let n_dot_l = max(dot(normal, l), 0.0);
+ let n_dot_h = max(dot(normal, h), 0.0);
+ let v_dot_h = max(dot(v, h), 0.0);
+
+ let f0 = mix(vec3<f32>(0.04), albedo, metalness);
+ let k_s = fresnel_schlick(f0, v_dot_h);
+ let k_d = 1.0 - k_s;
+
+ let diffuse = k_d * diffuse_lambert(albedo);
+ let specular = k_s * cook_torrance(n_dot_v, n_dot_l, n_dot_h, roughness);
+ let brdf = (diffuse + specular) * n_dot_l;
+ return brdf * c_light;
+}
+
+@fragment fn fragment(in: VertexOutput) -> @location(0) vec4<f32> {
+ let material = instances[in.instance_id].material;
+ let uv = vec2<f32>(in.tex_coord.x, 1 - in.tex_coord.y);
+ var albedo = textureSample(material_texture, material_sampler, uv, material.albedo).rgb;
+ albedo = pow(albedo, vec3<f32>(2.2));
+ var normal = textureSample(material_texture, material_sampler, uv, material.normal).rgb;
+ normal = normalize(normal * 2.0 - 1.0);
+ var roughness = textureSample(material_texture, material_sampler, uv, material.roughness).r;
+ var metalness = textureSample(material_texture, material_sampler, uv, material.metalness).r;
+
+ let light_color = normalize(vec3<f32>(23.47, 21.31, 20.79));
+ //let light_color = normalize(vec3<f32>(0.9, 0.9, 0.9));
+ var c = lighting(
+ normal,
+ in.pt_fragment,
+ in.pt_camera,
+ in.pt_light,
+ light_color,
+ albedo.rgb,
+ roughness,
+ metalness,
+ );
+
+ let ambient = vec3<f32>(0.03) * albedo;
+ c += ambient;
+
+ c = c / (c + 1.0);
+ c = pow(c, vec3<f32>(1.0/2.2));
+
+ let colors = array<vec3<f32>, 6>(
+ vec3<f32>(1, 1, 0),
+ vec3<f32>(0, 1, 1),
+ vec3<f32>(1, 0, 1),
+ vec3<f32>(1, 0, 0),
+ vec3<f32>(0, 1, 0),
+ vec3<f32>(0, 0, 1),
+ );
+ let color = colors[in.instance_id % 6];
+
+ return vec4<f32>(color, 1.0);
+}