Metal Kernel Functions / Compute Shaders in Swift
Join the DZone community and get the full member experience.
Join For Freevar device: MTLDevice! = nil
[...]
device = MTLCreateSystemDefaultDevice()I also need to create a library and command queue:
defaultLibrary = device.newDefaultLibrary()
commandQueue = device.newCommandQueue()Finally, I add a reference to my Metal function to the library and synchronously create and compile a compute pipeline state:
et kernelFunction = defaultLibrary.newFunctionWithName("kernelShader")
pipelineState = device.newComputePipelineStateWithFunction(kernelFunction!, error: nil)
The kernelShader points to the saturation image processing function, written in Metal, that lives in my Shaders.metal file:
kernel void kernelShader(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
constant AdjustSaturationUniforms &uniforms [[buffer(0)]],
uint2 gid [[thread_position_in_grid]])
{
float4 inColor = inTexture.read(gid);
float value = dot(inColor.rgb, float3(0.299, 0.587, 0.114));
float4 grayColor(value, value, value, 1.0);
float4 outColor = mix(grayColor, inColor, uniforms.saturationFactor);
outTexture.write(outColor, gid);
}let image = UIImage(named: "grand_canyon.jpg")
let imageRef = image.CGImage
let imageWidth = CGImageGetWidth(imageRef)
let imageHeight = CGImageGetHeight(imageRef)
let bytesPerRow = bytesPerPixel * imageWidth
var rawData = [UInt8](count: Int(imageWidth * imageHeight * 4), repeatedValue: 0)
let bitmapInfo = CGBitmapInfo(CGBitmapInfo.ByteOrder32Big.toRaw() | CGImageAlphaInfo.PremultipliedLast.toRaw())
let context = CGBitmapContextCreate(&rawData, imageWidth, imageHeight, bitsPerComponent, bytesPerRow, rgbColorSpace, bitmapInfo)
CGContextDrawImage(context, CGRectMake(0, 0, CGFloat(imageWidth), CGFloat(imageHeight)), imageRef)
Once all of those steps have executed, I can create a new texture use its replaceRegion() method to write the image into it:
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptorWithPixelFormat(MTLPixelFormat.RGBA8Unorm, width: Int(imageWidth), height: Int(imageHeight), mipmapped: true)
texture = device.newTextureWithDescriptor(textureDescriptor)
let region = MTLRegionMake2D(0, 0, Int(imageWidth), Int(imageHeight))
texture.replaceRegion(region, mipmapLevel: 0, withBytes: &rawData, bytesPerRow: Int(bytesPerRow))
I also create an empty texture which the kernel function will write into:
let outTextureDescriptor = MTLTextureDescriptor.texture2DDescriptorWithPixelFormat(texture.pixelFormat, width: texture.width, height: texture.height, mipmapped: false)
outTexture = device.newTextureWithDescriptor(outTextureDescriptor)let commandBuffer = commandQueue.commandBuffer()
let commandEncoder = commandBuffer.computeCommandEncoder()...then set the pipeline state (we got from device.newComputePipelineStateWithFunction() earlier) and textures on the command encoder:
commandEncoder.setComputePipelineState(pipelineState)
commandEncoder.setTexture(texture, atIndex: 0)
commandEncoder.setTexture(outTexture, atIndex: 1)
The filter requires an addition parameter that defines the saturation amount. This is passed into the shader via anMTLBuffer. To populate the buffer, I've created a small struct:
struct AdjustSaturationUniforms
{
var saturationFactor: Float
}
Then newBufferWithBytes() to pass in my saturationFactor float value:
var saturationFactor = AdjustSaturationUniforms(saturationFactor: self.saturationFactor)
var buffer: MTLBuffer = device.newBufferWithBytes(&saturationFactor, length: sizeof(AdjustSaturationUniforms), options: nil)
commandEncoder.setBuffer(buffer, offset: 0, atIndex: 0)
This is now accessible inside the shader as an argument to its kernel function:
constant AdjustSaturationUniforms &uniforms [[buffer(0)]]
Now I'm ready invoke the function itself. Metal kernel functions use thread groups to break up their workload into chunks. In my example, I create 64 thread groups, then send them off to the GPU:
let threadGroupCount = MTLSizeMake(8, 8, 1)
let threadGroups = MTLSizeMake(texture.width / threadGroupCount.width, texture.height / threadGroupCount.height, 1)
commandQueue = device.newCommandQueue()
commandEncoder.dispatchThreadgroups(threadGroups, threadsPerThreadgroup: threadGroupCount)
commandEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()let imageSize = CGSize(width: texture.width, height: texture.height)
let imageByteCount = Int(imageSize.width * imageSize.height * 4)
let bytesPerRow = bytesPerPixel * UInt(imageSize.width)
var imageBytes = [UInt8](count: imageByteCount, repeatedValue: 0)
let region = MTLRegionMake2D(0, 0, Int(imageSize.width), Int(imageSize.height))
outTexture.getBytes(&imageBytes, bytesPerRow: Int(bytesPerRow), fromRegion: region, mipmapLevel: 0)Now that imageBytes holds the raw data, it's a few lines to create a CGImage:
let providerRef = CGDataProviderCreateWithCFData(
NSData(bytes: &imageBytes, length: imageBytes.count * sizeof(UInt8))
)
let bitmapInfo = CGBitmapInfo(CGBitmapInfo.ByteOrder32Big.toRaw() | CGImageAlphaInfo.PremultipliedLast.toRaw())
let renderingIntent = kCGRenderingIntentDefault
let imageRef = CGImageCreate(UInt(imageSize.width), UInt(imageSize.height), bitsPerComponent, bitsPerPixel, bytesPerRow, rgbColorSpace, bitmapInfo, providerRef, nil, false, renderingIntent)
imageView.image = UIImage(CGImage: imageRef)Published at DZone with permission of Simon Gladman. See the original article here.
Opinions expressed by DZone contributors are their own.
Comments