@@ -43,38 +43,47 @@ export class Im2ColPackedProgram implements GPGPUProgram {
4343 const itemsPerBlockRow = inChannels * filterWidth ;
4444 const glsl = getGlslDifferences ( ) ;
4545
46+ let unrolled = `` ;
47+
48+ for ( let row = 0 ; row <= 1 ; row ++ ) {
49+ for ( let col = 0 ; col <= 1 ; col ++ ) {
50+ unrolled += `
51+ blockIndex = rc.y + ${ col } ;
52+ pos = rc.x + ${ row } ;
53+
54+ if(blockIndex < ${ outputShape [ 1 ] } && pos < ${ outputShape [ 0 ] } ) {
55+ offsetY = int(blockIndex / (${ outWidth } )) * ${ strideHeight } - ${
56+ top } ;
57+ d0 = offsetY + ${ dilationHeight } * (pos / ${ itemsPerBlockRow } );
58+
59+ if(d0 < ${ inputShape [ 0 ] } && d0 >= 0) {
60+ offsetX = int(mod(float(blockIndex), ${ outWidth } .) * ${
61+ strideWidth } . - ${ left } .);
62+ d1 = offsetX + ${ dilationWidth } * (int(mod(float(pos), ${
63+ itemsPerBlockRow } .) / ${ inChannels } .));
64+
65+ if(d1 < ${ inputShape [ 1 ] } && d1 >= 0) {
66+ innerDims = vec2(d1, int(mod(float(pos), ${ inChannels } .)));
67+ result[${ row * 2 + col } ] = getChannel(getA(d0, int(innerDims.x),
68+ int(innerDims.y)), innerDims);
69+ }
70+ }
71+
72+ }
73+ ` ;
74+ }
75+ }
76+
4677 this . userCode = `
4778 void main() {
4879 ivec2 rc = getOutputCoords();
4980
5081 vec4 result = vec4(0);
5182
52- for(int row=0; row<=1; row++) {
53- for(int col=0; col<=1; col++) {
54- int blockIndex = rc.y + col;
55- int pos = rc.x + row;
56-
57- if(blockIndex >= ${ outputShape [ 1 ] } || pos >= ${
58- outputShape [ 0 ] } ) continue;
83+ int blockIndex, pos, offsetY, d0, offsetX, d1;
84+ vec2 innerDims;
5985
60- int offsetY = int(blockIndex / (${ outWidth } )) * ${ strideHeight } - ${
61- top } ;
62- int d0 = offsetY + ${ dilationHeight } * (pos / ${ itemsPerBlockRow } );
63-
64- if(d0 >= ${ inputShape [ 0 ] } || d0 < 0) continue;
65-
66- int offsetX = int(mod(float(blockIndex), ${ outWidth } .) * ${
67- strideWidth } . - ${ left } .);
68- int d1 = offsetX + ${ dilationWidth } * (int(mod(float(pos), ${
69- itemsPerBlockRow } .) / ${ inChannels } .));
70-
71- if(d1 >= ${ inputShape [ 1 ] } || d1 < 0) continue;
72-
73- vec2 innerDims = vec2(d1, int(mod(float(pos), ${ inChannels } .)));
74- result[row * 2 + col] = getChannel(getA(d0, int(innerDims.x),
75- int(innerDims.y)), innerDims);
76- }
77- }
86+ ${ unrolled }
7887
7988 ${ glsl . output } = result;
8089 }
0 commit comments