Skip to content
This repository was archived by the owner on Aug 15, 2019. It is now read-only.

Commit 29c48ac

Browse files
authored
Manually unroll loop to fix D3D compilation errors. (#1850)
BUG BUG
1 parent 46bc471 commit 29c48ac

File tree

1 file changed

+34
-25
lines changed

1 file changed

+34
-25
lines changed

src/backends/webgl/im2col_packed_gpu.ts

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,38 +43,47 @@ export class Im2ColPackedProgram implements GPGPUProgram {
4343
const itemsPerBlockRow = inChannels * filterWidth;
4444
const glsl = getGlslDifferences();
4545

46+
let unrolled = ``;
47+
48+
for (let row = 0; row <= 1; row++) {
49+
for (let col = 0; col <= 1; col++) {
50+
unrolled += `
51+
blockIndex = rc.y + ${col};
52+
pos = rc.x + ${row};
53+
54+
if(blockIndex < ${outputShape[1]} && pos < ${outputShape[0]}) {
55+
offsetY = int(blockIndex / (${outWidth})) * ${strideHeight} - ${
56+
top};
57+
d0 = offsetY + ${dilationHeight} * (pos / ${itemsPerBlockRow});
58+
59+
if(d0 < ${inputShape[0]} && d0 >= 0) {
60+
offsetX = int(mod(float(blockIndex), ${outWidth}.) * ${
61+
strideWidth}. - ${left}.);
62+
d1 = offsetX + ${dilationWidth} * (int(mod(float(pos), ${
63+
itemsPerBlockRow}.) / ${inChannels}.));
64+
65+
if(d1 < ${inputShape[1]} && d1 >= 0) {
66+
innerDims = vec2(d1, int(mod(float(pos), ${inChannels}.)));
67+
result[${row * 2 + col}] = getChannel(getA(d0, int(innerDims.x),
68+
int(innerDims.y)), innerDims);
69+
}
70+
}
71+
72+
}
73+
`;
74+
}
75+
}
76+
4677
this.userCode = `
4778
void main() {
4879
ivec2 rc = getOutputCoords();
4980
5081
vec4 result = vec4(0);
5182
52-
for(int row=0; row<=1; row++) {
53-
for(int col=0; col<=1; col++) {
54-
int blockIndex = rc.y + col;
55-
int pos = rc.x + row;
56-
57-
if(blockIndex >= ${outputShape[1]} || pos >= ${
58-
outputShape[0]}) continue;
83+
int blockIndex, pos, offsetY, d0, offsetX, d1;
84+
vec2 innerDims;
5985
60-
int offsetY = int(blockIndex / (${outWidth})) * ${strideHeight} - ${
61-
top};
62-
int d0 = offsetY + ${dilationHeight} * (pos / ${itemsPerBlockRow});
63-
64-
if(d0 >= ${inputShape[0]} || d0 < 0) continue;
65-
66-
int offsetX = int(mod(float(blockIndex), ${outWidth}.) * ${
67-
strideWidth}. - ${left}.);
68-
int d1 = offsetX + ${dilationWidth} * (int(mod(float(pos), ${
69-
itemsPerBlockRow}.) / ${inChannels}.));
70-
71-
if(d1 >= ${inputShape[1]} || d1 < 0) continue;
72-
73-
vec2 innerDims = vec2(d1, int(mod(float(pos), ${inChannels}.)));
74-
result[row * 2 + col] = getChannel(getA(d0, int(innerDims.x),
75-
int(innerDims.y)), innerDims);
76-
}
77-
}
86+
${unrolled}
7887
7988
${glsl.output} = result;
8089
}

0 commit comments

Comments
 (0)