まずは、openCL.NETのDLLへの参照を追加し、using openCL; をコードの先頭に書きましょう!
1. プラットフォームとデバイスを列挙し情報を取得する
Platform[] platforms = Platform.GetPlatforms ();
for (int i = 0; i < platforms.Length; i ++) {
Console.WriteLine ("Platform={0}", i);
Console.WriteLine (" Name = {0}", platforms[i].Name);
Console.WriteLine (" Vendor = {0}", platforms[i].Vendor);
Console.WriteLine (" Version = {0}", platforms[i].Version);
Console.WriteLine (" Profile = {0}", platforms[i].Profile);
Console.WriteLine (" Extensions = {0}", platforms[i].Extensions);
Device[] devices = platforms[0].GetDevices (DeviceType.All);
for (int j = 0; j < devices.Length; j ++) {
Console.WriteLine (" Device={0}", j);
Console.WriteLine (" Name = {0}", devices[j].Name);
Console.WriteLine (" Vendor = {0}", devices[j].Vendor);
Console.WriteLine (" MaxComputeUnits = {0}", devices[j].MaxComputeUnits);
}
}
2. カーネルを実行する
int memorySize = 1024;
using (Context context = new Context (DeviceType.GPU))
using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
using (Memory mem = context.CreateBuffer (MemoryFlags.ReadWrite, memorySize))
using (Program prog = context.CreateProgram ("__kernel void test (__global uint *state) {}")) {
prog.Build (context.Devices, null);
using (Kernel kernel = prog.CreateKernel ("test")) {
kernel.SetArgument (0, mem);
queue.Execute (kernel);
}
}
3. メモリを読み書きする (ついでにスループット計測)
using System.Diagnostics; を追加
byte[] input = new byte[1024 * 1024 * 64];
new Random ().NextBytes (input);
using (Context context = new Context (DeviceType.GPU))
using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
using (Memory mem = context.CreateBuffer (MemoryFlags.ReadWrite, input.Length)) {
int iterations = 100;
Stopwatch sw = Stopwatch.StartNew ();
for (int i = 0; i < iterations; i++)
queue.WriteBuffer (mem, 0, input, 0, input.Length);
sw.Stop ();
Console.WriteLine ("write throughput: {0:f3}GB/s",
input.Length / sw.Elapsed.TotalSeconds * iterations / 1024.0 / 1024.0 / 1024.0);
sw = Stopwatch.StartNew ();
for (int i = 0; i < iterations; i++)
queue.ReadBuffer (mem, 0, input, 0, input.Length);
sw.Stop ();
Console.WriteLine ("read throughput: {0:f3}GB/s",
input.Length / sw.Elapsed.TotalSeconds * iterations / 1024.0 / 1024.0 / 1024.0);
}