BEGIN
Inner:
PROC [x: [0..8), d: [0..8)] = {
y: [0..8) = x+d;
w ← Basics.BITAND[w&x, mask];
w&x ← Basics.BITAND[mask, Basics.BITRSHIFT[w&x, &d]];
w&x ← w&x + Basics.BITAND[Basics.BITNOT[mask], w&y];
w&y ← w + Basics.BITLSHIFT[Basics.BITAND[mask, w&y], &d];
};
w, w0, w1, w2, w3, w4, w5, w6, w7, mask: WORD;
off: INTEGER ← srcOff;
Fetch section: fetch 32 bytes and assemble into 8 registers
FOR j: [0..
BYTES[
WORD])
IN [0..
BYTES[
WORD])
DO
shift: NAT = j*8;
FOR i: [0..8)
IN [0..8)
DO
IF i # 0 OR j # 0 THEN { off ← off + srcPitch; };
IF j = 0
ELSE
{
w ← src[off]; w&i ← w&i + Basics.BITLSHIFT[w, &shift]; };
ENDLOOP;
ENDLOOP;
nz ¬
Basics.BITOR[Basics.BITOR[Basics.BITOR[Basics.BITOR[Basics.BITOR[Basics.BITOR[Basics.BITOR[w0, w1], w2], w3], w4], w5], w6], w7];
IF nz # 0
THEN {
Rotate section: recursively rotate using 2*2, 4*4, 8*8 cells
mask ← 55555555h;
Inner[0, 1]; Inner[2, 1]; Inner[4, 1]; Inner[6, 1];
mask ← 33333333h;
Inner[0, 2]; Inner[1, 2]; Inner[4, 2]; Inner[5, 2];
mask ← 0f0f0f0fh;
Inner[0, 4]; Inner[1, 4]; Inner[2, 4]; Inner[3, 4];
};
Store section: store the 8 registers
off ← dstPitch*2;
dst^ ← w0; (dst+dstPitch)^ ← w1; dst ← dst + off;
dst^ ← w2; (dst+dstPitch)^ ← w3; dst ← dst + off;
dst^ ← w4; (dst+dstPitch)^ ← w5; dst ← dst + off;
dst^ ← w6; (dst+dstPitch)^ ← w7;
END;