Metal Kernel Functions / Compute Shaders in Swift
Join the DZone community and get the full member experience.
Join For Freevar device: MTLDevice! = nil [...] device = MTLCreateSystemDefaultDevice()
I also need to create a library and command queue:
defaultLibrary = device.newDefaultLibrary() commandQueue = device.newCommandQueue()
Finally, I add a reference to my Metal function to the library and synchronously create and compile a compute pipeline state:
et kernelFunction = defaultLibrary.newFunctionWithName("kernelShader") pipelineState = device.newComputePipelineStateWithFunction(kernelFunction!, error: nil)
The kernelShader points to the saturation image processing function, written in Metal, that lives in my Shaders.metal file:
kernel void kernelShader(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], constant AdjustSaturationUniforms &uniforms [[buffer(0)]], uint2 gid [[thread_position_in_grid]]) { float4 inColor = inTexture.read(gid); float value = dot(inColor.rgb, float3(0.299, 0.587, 0.114)); float4 grayColor(value, value, value, 1.0); float4 outColor = mix(grayColor, inColor, uniforms.saturationFactor); outTexture.write(outColor, gid); }
let image = UIImage(named: "grand_canyon.jpg") let imageRef = image.CGImage let imageWidth = CGImageGetWidth(imageRef) let imageHeight = CGImageGetHeight(imageRef) let bytesPerRow = bytesPerPixel * imageWidth var rawData = [UInt8](count: Int(imageWidth * imageHeight * 4), repeatedValue: 0) let bitmapInfo = CGBitmapInfo(CGBitmapInfo.ByteOrder32Big.toRaw() | CGImageAlphaInfo.PremultipliedLast.toRaw()) let context = CGBitmapContextCreate(&rawData, imageWidth, imageHeight, bitsPerComponent, bytesPerRow, rgbColorSpace, bitmapInfo) CGContextDrawImage(context, CGRectMake(0, 0, CGFloat(imageWidth), CGFloat(imageHeight)), imageRef)
Once all of those steps have executed, I can create a new texture use its replaceRegion() method to write the image into it:
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptorWithPixelFormat(MTLPixelFormat.RGBA8Unorm, width: Int(imageWidth), height: Int(imageHeight), mipmapped: true) texture = device.newTextureWithDescriptor(textureDescriptor) let region = MTLRegionMake2D(0, 0, Int(imageWidth), Int(imageHeight)) texture.replaceRegion(region, mipmapLevel: 0, withBytes: &rawData, bytesPerRow: Int(bytesPerRow))
I also create an empty texture which the kernel function will write into:
let outTextureDescriptor = MTLTextureDescriptor.texture2DDescriptorWithPixelFormat(texture.pixelFormat, width: texture.width, height: texture.height, mipmapped: false) outTexture = device.newTextureWithDescriptor(outTextureDescriptor)
let commandBuffer = commandQueue.commandBuffer() let commandEncoder = commandBuffer.computeCommandEncoder()
...then set the pipeline state (we got from device.newComputePipelineStateWithFunction() earlier) and textures on the command encoder:
commandEncoder.setComputePipelineState(pipelineState) commandEncoder.setTexture(texture, atIndex: 0) commandEncoder.setTexture(outTexture, atIndex: 1)
The filter requires an addition parameter that defines the saturation amount. This is passed into the shader via anMTLBuffer. To populate the buffer, I've created a small struct:
struct AdjustSaturationUniforms { var saturationFactor: Float }
Then newBufferWithBytes() to pass in my saturationFactor float value:
var saturationFactor = AdjustSaturationUniforms(saturationFactor: self.saturationFactor) var buffer: MTLBuffer = device.newBufferWithBytes(&saturationFactor, length: sizeof(AdjustSaturationUniforms), options: nil) commandEncoder.setBuffer(buffer, offset: 0, atIndex: 0)
This is now accessible inside the shader as an argument to its kernel function:
constant AdjustSaturationUniforms &uniforms [[buffer(0)]]
Now I'm ready invoke the function itself. Metal kernel functions use thread groups to break up their workload into chunks. In my example, I create 64 thread groups, then send them off to the GPU:
let threadGroupCount = MTLSizeMake(8, 8, 1) let threadGroups = MTLSizeMake(texture.width / threadGroupCount.width, texture.height / threadGroupCount.height, 1) commandQueue = device.newCommandQueue() commandEncoder.dispatchThreadgroups(threadGroups, threadsPerThreadgroup: threadGroupCount) commandEncoder.endEncoding() commandBuffer.commit() commandBuffer.waitUntilCompleted()
let imageSize = CGSize(width: texture.width, height: texture.height) let imageByteCount = Int(imageSize.width * imageSize.height * 4) let bytesPerRow = bytesPerPixel * UInt(imageSize.width) var imageBytes = [UInt8](count: imageByteCount, repeatedValue: 0) let region = MTLRegionMake2D(0, 0, Int(imageSize.width), Int(imageSize.height)) outTexture.getBytes(&imageBytes, bytesPerRow: Int(bytesPerRow), fromRegion: region, mipmapLevel: 0)
Now that imageBytes holds the raw data, it's a few lines to create a CGImage:
let providerRef = CGDataProviderCreateWithCFData( NSData(bytes: &imageBytes, length: imageBytes.count * sizeof(UInt8)) ) let bitmapInfo = CGBitmapInfo(CGBitmapInfo.ByteOrder32Big.toRaw() | CGImageAlphaInfo.PremultipliedLast.toRaw()) let renderingIntent = kCGRenderingIntentDefault let imageRef = CGImageCreate(UInt(imageSize.width), UInt(imageSize.height), bitsPerComponent, bitsPerPixel, bytesPerRow, rgbColorSpace, bitmapInfo, providerRef, nil, false, renderingIntent) imageView.image = UIImage(CGImage: imageRef)
Published at DZone with permission of Simon Gladman, DZone MVB. See the original article here.
Opinions expressed by DZone contributors are their own.
Comments