More Related Content
Similar to GPUをJavaで使う話(Java Casual Talks #1) (20)
GPUをJavaで使う話(Java Casual Talks #1)
- 12. Aparapiコード
public class AparapiKernel extends Kernel{
float[] inputA;
float[] inputB;
float[] output;
@Override
public void run() {
int gid = getGlobalId();
output[gid] = inputA[gid] * inputB[gid];
}
public static void main(String[] args) {
AparapiKernel kernel = new AparapiKernel();
int elementCount = 1_444_477;
kernel.inputA = new float[elementCount];
kernel.inputB = new float[elementCount];
kernel.output = new float[elementCount];
fillBuffer(kernel.inputA);
fillBuffer(kernel.inputB);
kernel.execute(elementCount);
}
}
- 13. バグがある・・・
• 三項演算子のカッコが反映されない
– (修正してプルリクなげてとりこまれてます)
• CPUとの結果と比較するテストを用意したほうがいい
– けど、丸めの違いを考慮するの面倒
void proc(int fxy) {
float d = (result[fxy] >= 0 ? 1 : 0) * delta[fxy];
tempBiasDelta[fxy] = learningRate * d;
}
void kishida_cnn_kernels_ConvolutionBackwordBiasKernel__proc(This
*this, int fxy){
float d = (float)(this->result[fxy]>=0.0f)?1:0 * this->delta[fxy];
this->tempBiasDelta[fxy] = this->learningRate * d;
return;
}
↓
- 15. JOCLのコード
String KERNEL_CODE =
"kernel void add(global const float* inputA,"
+ " global const float* inputB,"
+ " global float* output,"
+ " uint numElements){"
+ " size_t gid = get_global_id(0);"
+ " if(gid >= numElements){"
+ " return;"
+ " }"
+ " output[gid] = inputA[gid] + inputB[gid];"
+ "}";
CLContext ctx = CLContext.create();
CLDevice device = ctx.getMaxFlopsDevice();
CLCommandQueue queue = device.createCommandQueue();
CLProgram program = ctx.createProgram(KERNEL_CODE).build();
int elementCount = 1_444_477;
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256);
int globalWorkSize = ((elementCount + localWorkSize - 1) /
localWorkSize) * localWorkSize;
CLBuffer<FloatBuffer> clBufferA = ctx.createFloatBuffer(
elementCount, CLMemory.Mem.READ_ONLY);
CLBuffer<FloatBuffer> clBufferB = ctx.createFloatBuffer(
elementCount, CLMemory.Mem.READ_ONLY);
CLBuffer<FloatBuffer> clBufferC = ctx.createFloatBuffer(
elementCount, CLMemory.Mem.READ_WRITE);
fillBuffer(clBufferA.getBuffer());
fillBuffer(clBufferB.getBuffer());
CLKernel kernel = program.createCLKernel("add");
kernel
.putArgs(clBufferA, clBufferB, clBufferC)
.putArg(elementCount);
queue.putWriteBuffer(clBufferA, false)
.putWriteBuffer(clBufferB, false)
.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
.putReadBuffer(clBufferC, true);
- 18. と思ったら
●
「Sumatra is not in active development for
now.(2015/5/1) 」
http://mail.openjdk.java.net/pipermail/sumatra-dev/2015-
May/000310.html