:TITLE[Halfastone];*quad read version of fast table lookup halftone
*Last edited by Fiala 24 November 1980
*Last edited by Maleson April 3, 1980;

SetTask[0];
OnPage[InitPage];

* Registers for test program
* Available Temps: 40-53 56-57, 64, 66-67, 72-73
* Available Unused: 14, 70-71
RV[AddrTemp,10]; *same as Stack7
RV[AddrTempHi,11];

RV[nInputPixels,40];
RV[blackVal,41];
RV[range,42];
RV[nOutputDots,43];

RV4[errorVec,errorVecHi,screenWord,screenWordHi,44];
RV4[bitCount,bitmapWidth,pixelArray,pixelArrayHi,50];

RV[CascadeRight,56];
RV[CascadeDiag,57];
RV[returnLoc,64];
RV[x,66];
RV[jctr,67];

RV[bitval,70];
RV[val,71];
RV[errorTemp,72];

*renames for fast halftone
RV[val1,56];
RV[val2,57];
RV[Q0,70];
RV[Q0,71];
RV[Q0,72];
RV[Q0,73];


*New names for standard registers
*At locations

PSet[PrintLine,InitPage,000];
*Starting location
PSet[FastLine,InitPage,001];
*Starting location
PSet[modeLoc1,InitPage,060];
PSet[fastTableLoc,InitPage,100];


*fast table lookup stuff
AddrTempHi ← 0C, At[FastLine];
T ← Stack;
AddrTemp ← T;
PFetch4[AddrTemp,errorVec,4];
UseCTask;
T ← APCTask&APC, Call[stuffParams];
pixelArrayHi ← (pixelArrayHi) + T + 1, Call[initCounters];
nInputPixels ← (nInputPixels) + T;
x ← T;
bitCount ← (bitCount) and not (3C);
*per output quad: 9 mi
*per output word: 17 mi
*per input pixel: 7 (used) or 6 (unused)
*TOTAL: 9*3+17=44 mi/16 dots = .55usec/output dot
*+ 1usec/input pixel
*512x512 pixels ~ .35 sec
*512x512 dots ~ .15 sec
whileJ:
Dispatch[bitCount,14,4];*only 4 possible locations
whileJplus1:
T ← RSh[x,1], Disp[fastTable];
fastTable:
PFetch1[pixelArray,val], Call[Get4], At[fastTableLoc,0];
val1 ← T, Goto[fastSelectDone];
PFetch1[pixelArray,val], Call[Get4], At[fastTableLoc,4];
val1 ← (LSh[val1,4]) + T, Goto[fastSelectDone];
PFetch1[pixelArray,val], Call[Get4], At[fastTableLoc,10];
val2 ← T, Goto[fastSelectDone];
PFetch1[pixelArray,val], Call[Get4], At[fastTableLoc,14];
val2 ← (LSh[val2,4]) + T;
T ← LSh[val2,2];
PFetch1[errorVec,val2];
T ← LSh[val1,2];
PFetch1[errorVec,val1];
T ← val2;
val1 ← (LSh[val1,10]) + T;
T ← LdF[bitCount,0,14];
PStore1[screenWord,val1];
fastSelectDone:
T ← LSh[Stack4,2];
jctr ← (jctr) + T;
bitCount ← (bitCount) + (4C), Goto[whileJ,ALU<0];
jDone:
T ← nOutputDots;
jDoneA:
jctr ← (jctr) - T, Task;
x ← T ← (x) + 1;
LU ← (nInputPixels) - T - 1;
LU ← jctr, Goto[fastExit,ALU<0];
T ← nOutputDots, Goto[jDoneB,ALU>=0];
Dispatch[bitCount,14,4], Goto[whileJplus1];
jDoneB:
Goto[jDoneA];

fastExit:
APCTask&APC ← returnLoc;
Return;

Get4:
LU ← x, Goto[fastxOdd,R Odd];
fastxEven:
T ← LdF[val,0,4], Return;
fastxOdd:
T ← LdF[val,10,4], Return;


*Halftone stuff
AddrTempHi ← 0C, At[PrintLine];
T ← Stack;
AddrTemp ← T;
PFetch4[AddrTemp,errorVec,4];
UseCTask;
T ← APCTask&APC, Call[stuffParams];
Dispatch[bitCount,14,4];
pixelArrayHi ← (pixelArrayHi) + T + 1, Disp[.+1];
*bit selection dispatch table
bitval ← 100000C, Goto[b20], DispTable[20];
bitval ← 40000C, Goto[b20];
bitval ← 20000C, Goto[b20];
bitval ← 10000C, Goto[b20];
bitval ← 4000C, Goto[b20];
bitval ← 2000C, Goto[b20];
bitval ← 1000C, Goto[b20];
bitval ← 400C, Goto[b20];
bitval ← 200C, Goto[b20];
bitval ← 100C, Goto[b20];
bitval ← 40C, Goto[b20];
bitval ← 20C, Goto[b20];
bitval ← 10C, Goto[b20];
bitval ← 4C, Goto[b20];
bitval ← 2C, Goto[b20];
bitval ← 1C, Goto[b20];
b20:
Dispatch[bitmapWidth,0,2];
T ← bitCount, Disp[fetchX0];
fetchX0:
PFetch1[errorVec,CascadeRight], Goto[haveFetch], At[modeLoc1,0];
fetchX1:
PFetch1[errorVec,CascadeRight], Goto[haveFetch], At[modeLoc1,1];
fetchX2:
PFetch1[errorVec,CascadeRight,0], At[modeLoc1,2];
T ← LdF[bitmapWidth,5,13];
Stack5 ← (Zero) + T;*initial decrement
bitCount ← Zero, Goto[haveFetch];
fetchX3:
PFetch1[errorVec,CascadeRight,0], At[modeLoc1,3];
T ← LdF[bitmapWidth,5,13];
Stack5 ← (Zero) - T;*initial increment
bitCount ← Zero, Goto[haveFetch];
haveFetch:
CascadeDiag ← Zero, Call[initCounters];
CascadeRight ← (LSh[CascadeRight,2]) - 1;
Dispatch[bitmapWidth,0,2];
nInputPixels ← (nInputPixels) + T, Disp[.+1];
x ← T, Goto[forX0], DispTable[4];
x ← T, Goto[forX1];
x ← T, Goto[forX2];
x ← T, Goto[forX3];

*subroutines for fast and slow modes
stuffParams:
returnLoc ← T;
PFetch4[AddrTemp,bitCount,10];
T ← LSh[errorVecHi,10];
errorVecHi ← (errorVecHi) + T + 1;
T ← LSh[screenWordHi,10];
screenWordHi ← (screenWordHi) + T;*no +1, for negative adds;
PFetch4[AddrTemp,nInputPixels,0];
T ← LSh[pixelArrayHi,10], Return;
initCounters:
T ← nOutputDots;
jctr ← (Zero) - T;
T ← nInputPixels;
Stack4 ← T;
T ← LdF[bitmapWidth,4,1], Return;
*subroutines: pieces used by all four modes
GetPixel:
T ← RSh[x,1], Goto[xDone,ALU<0];
PFetch1[pixelArray,val];
T ← blackVal;
LU ← x, Goto[xOdd,R Odd];
xEven:
val ← T ← (RSh[val,10]) - T, Goto[blackCheck];
xOdd:
val ← T ← (RHMask[val]) - T, Goto[blackCheck];
blackCheck:
LU ← (range) - T, Goto[blackOK,ALU>=0];
val ← Zero, Return;
blackOK:
T ← range, Goto[whiteOK,ALU>=0];
val ← T, Return;
whiteOK:
Return;

OrBlack:
PFetch1[screenWord,Stack6];
T ← bitVal;
Stack6 ← (Stack6) or T, Return;
StoreBlack:
PStore1[screenWord,Stack6], Return;

GetError:
CascadeRight ← (RSh[CascadeRight,1]) or T, Goto[a1,R Odd];
a0:
T ← (RSh[CascadeRight,1]) or T, Goto[a00,R Even];
a01:
CascadeRight ← (CascadeRight) + 1;
a00:
errorTemp ← T, Goto[haveError];

a1:
T ← (RSh[CascadeRight,1]) or T, Goto[a10,R Even];
a11:
errorTemp ← T ← (Zero) + T + 1, Goto[haveError];
a10:
CascadeRight ← (CascadeRight) + 1, Goto[a00];

haveError:
CascadeDiag ← (CascadeDiag) + T;
T ← bitCount;
PStore1[errorVec,CascadeDiag], Return;

GetNext:
PFetch1[errorVec,Stack6];
T ← errorTemp;
CascadeDiag ← T;
T ← Stack6;
CascadeRight ← (CascadeRight) + T;
T ← Stack4, Return;

xDone:
APCTask&APC ← returnLoc;
Return;

forX0:
LU ← (nInputPixels) - T - 1, Call[GetPixel]; *5 mi
T ← jctr, Goto[jDone0,R>=0];
whileJ0:
T ← val;
CascadeRight ← (CascadeRight) + T;
T←range, Goto[White0,ALU>=0];
Black0: T ← LdF[bitCount,0,14], Call[OrBlack];
*3 mi
T ← LdF[bitCount,0,14], Call[StoreBlack];*1 mi
EndDot0:
T ← (CascadeRight) and (100000C), Call[GetError]; *6.5 mi
bitCount ← T ← (bitCount) + 1, Call[GetNext]; *6 mi
jctr ← (jctr) + T;
bitVal ← RCY[bitVal,1], Goto[whileJ0,ALU<0];
jDone0:
T ← nOutputDots;
jctr ← (jctr) - T;
x ← T ← (x) + 1, Goto[forX0]; *could save 7 mi if jctr still negative
White0: CascadeRight ← (CascadeRight) - T, Goto[EndDot0];

forX1:
LU ← (nInputPixels) - T - 1, Call[GetPixel]; *5 mi
T ← jctr, Goto[jDone1,R>=0];
whileJ1:
T ← val;
CascadeRight ← (CascadeRight) + T;
T←range, Goto[White1,ALU>=0];
Black1: T ← LdF[bitCount,0,14], Call[OrBlack];
*3 mi
T ← LdF[bitCount,0,14], Call[StoreBlack];*1 mi
EndDot1:
T ← (CascadeRight) and (100000C), Call[GetError]; *6.5 mi
bitCount ← T ← (bitCount) - 1, Call[GetNext]; *6 mi
jctr ← (jctr) + T;
bitVal ← LCy[bitVal,1], Goto[whileJ1,ALU<0];
jDone1:
T ← nOutputDots;
jctr ← (jctr) - T;
x ← T ← (x) + 1, Goto[forX1]; *could save 7 mi if jctr still negative
White1: CascadeRight ← (CascadeRight) - T, Goto[EndDot1];

forX2:
LU ← (nInputPixels) - T - 1, Call[GetPixel];
T ← jctr, Goto[jDone2,R>=0];
whileJ2:
T ← val;
CascadeRight ← (CascadeRight) + T;
T←LdF[bitmapWidth,5,13], Goto[White2,ALU>=0];
Black2: Stack5 ← T ← (Stack5) - T, Call[OrBlack];
T ← Stack5, Call[StoreBlack];
EndDot2:
T ← (CascadeRight) and (100000C), Call[GetError];
bitCount ← T ← (bitCount) + 1, Call[GetNext];
jctr ← (jctr) + T;
Goto[whileJ2,ALU<0];
jDone2:
T ← nOutputDots;
jctr ← (jctr) - T;
x ← T ← (x) + 1, Goto[forX2]; *could save 7 mi if jctr still negative
White2: Stack5 ← (Stack5) - T;
T ← range;
CascadeRight ← (CascadeRight) - T, Goto[EndDot2];

forX3:
LU ← (nInputPixels) - T - 1, Call[GetPixel];
T ← jctr, Goto[jDone3,R>=0];
whileJ3:
T ← val;
CascadeRight ← (CascadeRight) + T;
T←LdF[bitmapWidth,5,13], Goto[White3,ALU>=0];
Black3: Stack5 ← T ← (Stack5) + T, Call[OrBlack];
T ← Stack5, Call[StoreBlack];
EndDot3:
T ← (CascadeRight) and (100000C), Call[GetError];
bitCount ← T ← (bitCount) + 1, Call[GetNext];
jctr ← (jctr) + T;
Goto[whileJ3,ALU<0];
jDone3:
T ← nOutputDots;
jctr ← (jctr) - T;
x ← T ← (x) + 1, Goto[forX3]; *could save 7 mi if jctr still negative
White3: Stack5 ← (Stack5) + T;
T ← range;
CascadeRight ← (CascadeRight) - T, Goto[EndDot3];

:END;