/***********************************************************
Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts,
and the Massachusetts Institute of Technology, Cambridge, Massachusetts.

                        All Rights Reserved

Permission to use, copy, modify, and distribute this software and its 
documentation for any purpose and without fee is hereby granted, 
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in 
supporting documentation, and that the names of Digital or MIT not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.  

DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
SOFTWARE.

******************************************************************/
#include "X.h"
#include "Xmd.h"
extern int starttab[];
extern int endtab[];
extern int startpartial[];
extern int endpartial[];
extern int rmask[32];
extern int mask[32];


/* the following notes use the following conventions:
SCREEN LEFT				SCREEN RIGHT
in this file and maskbits.c, left and right refer to screen coordinates,
NOT bit numbering in registers.

starttab[n] 
	bits[0,n-1] = 0	bits[n,31] = 1
endtab[n] =
	bits[0,n-1] = 1	bits[n,31] = 0

startpartial[], endpartial[]
	these are used as accelerators for doing putbits and masking out
bits that are all contained between longword boudaries.  the extra
256 bytes of data seems a small price to pay -- code is smaller,
and narrow things (e.g. window borders) go faster.

the names may seem misleading; they are derived not from which end
of the word the bits are turned on, but at which end of a scanline
the table tends to be used.

look at the tables and macros to understand boundary conditions.
(careful readers will note that starttab[n] = ~endtab[n] for n != 0)

-----------------------------------------------------------------------
these two macros depend on the screen's bit ordering.
in both of them x is a screen position.  they are used to
combine bits collected from multiple longwords into a
single destination longword, and to unpack a single
source longword into multiple destinations.

SCRLEFT(dst, x)
	takes dst[x, 32] and moves them to dst[0, 32-x]
	the contents of the rest of dst are 0 ONLY IF
	dst is UNSIGNED.
	this is a right shift on LSBFirst (forrward-thinking)
	machines like the VAX, and left shift on MSBFirst
	(backwards) machines like the 680x0 and pc/rt.

SCRRIGHT(dst, x)
	takes dst[0,x] and moves them to dst[32-x, 32]
	the contents of the rest of dst are 0 ONLY IF
	dst is UNSIGNED.
	this is a left shift on LSBFirst, right shift
	on MSBFirst.


the remaining macros are cpu-independent; all bit order dependencies
are built into the tables and the two macros above.

maskbits(x, w, startmask, endmask, nlw)
	for a span of width w starting at position x, returns
a mask for ragged bits at start, mask for ragged bits at end,
and the number of whole longwords between the ends.

maskpartialbits(x, w, mask)
	works like maskbits(), except all the bits are in the
	same longword (i.e. (x&0x1f + w) <= 32)

mask32bits(x, w, startmask, endmask, nlw)
	as maskbits, but does not calculate nlw.  it is used by
	mfbGlyphBlt to put down glyphs <= 32 bits wide.

-------------------------------------------------------------------

NOTE
	any pointers passe to the following 4 macros are
	guranteed to be 32-bit aligned.

getbits(psrc, x, w, dst)
	starting at position x in psrc (x < 32), collect w
	bits and put them in the screen left portion of dst.
	psrc is a longword pointer.  this may span longword boundaries.
	it special-cases fetching all w bits from one longword.

	+--------+--------+		+--------+
	|    | m |n|      |	==> 	| m |n|  |
	+--------+--------+		+--------+
	    x      x+w			0     w
	psrc     psrc+1			dst
			m = 32 - x
			n = w - m

	implementation:
	get m bits, move to screen-left of dst, zeroing rest of dst;
	get n bits from next word, move screen-right by m, zeroing
		 lower m bits of word.
	OR the two things together.

	in the font code, psrc is NOT guaranteed to be 32-bit aligned.
	on  many machines this will cause problems.  there are
	several versions of this macro which do various alignments
	so that the macro converts this into one or two aligned fetches.

putbits(src, x, w, pdst)
	starting at position x in pdst, put down the screen-leftmost
	w bits of src.  pdst is a longword pointer.  this may
	span longword boundaries.
	it special-cases putting all w bits into the same longword.

	+--------+			+--------+--------+
	| m |n|  |		==>	|    | m |n|      |
	+--------+			+--------+--------+
	0     w				     x     x+w
	dst				pdst     pdst+1
			m = 32 - x
			n = w - m

	implementation:
	get m bits, shift screen-right by x, zero screen-leftmost x
		bits; zero rightmost m bits of *pdst and OR in stuff
		from before the semicolon.
	shift src screen-left by m, zero bits n-32;
		zero leftmost n bits of *(pdst+1) and OR in the
		stuff from before the semicolon.

putbitsrop(src, x, w, pdst, ROP)
	like putbits but calls DoRop with the rasterop ROP (see mfb.h for
	DoRop)

putbitsrrop(src, x, w, pdst, ROP)
	like putbits but calls DoRRop with the reduced rasterop ROP 
	(see mfb.h for DoRRop)

-----------------------------------------------------------------------

NOTE
getleftbits(psrc, w, dst)
	get the leftmost w (w<=32) bits from *psrc and put them
	in dst.  this is used by the mfbGlyphBlt code for glyphs
	<=32 bits wide.
	psrc is declared (unsigned char *)

WARNING:
   some C compilers object to ANDing a pointer with a constant.  silly
twits.  so, we cast psrc as an int before doing anything to it.  this
means we assume (int) and (unsigned char *) are the same size.

	this macro is called ONLY for getting bits from font glyphs,
	and assumes that the bits of the glyph have been
	written in memory as pieces of a byte stream.  if
	this is not true (i.e. if you have changed
	the server-natural font format definition)
	you may have to rewrite this.

	for blazing text performance, you want this macro
	to touch memory as infrequently as possible (e.g.
	fetch longwords) and as efficiently as possible
	(e.g. don't fetch misaligned longwords)

	YOU SHOULD TUNE THIS MACRO TO YOUR PROCESSOR!

*/

#if (BITMAP←BIT←ORDER == MSBFirst)	/* pc/rt, 680x0 */
#define SCRLEFT(lw, n)	((lw) << (n))
#define SCRRIGHT(lw, n)	((lw) >> (n))
#else					/* vax, intel */
#define SCRLEFT(lw, n)	((lw) >> (n))
#define SCRRIGHT(lw, n)	((lw) << (n))
#endif


#define maskbits(x, w, startmask, endmask, nlw) \
    startmask = starttab[(x)&BWM]; \
    endmask = endtab[((x)+(w)) & BWM]; \
    if (startmask) \
	nlw = (((w) - (BW - ((x)&BWM))) >> L2BP); \
    else \
	nlw = (w) >> L2BP;

#define maskpartialbits(x, w, mask) \
    mask = startpartial[(x) & BWM] & endpartial[((x) + (w)) & BWM];

#define mask32bits(x, w, startmask, endmask) \
    startmask = starttab[(x)&BWM]; \
    endmask = endtab[((x)+(w)) & BWM];

#if GETBITS←ALIGNMENT == 1
#define getbits(psrc, x, w, dst) \
if ( ((x) + (w)) <= BW) \
{ \
    dst = SCRLEFT(*(psrc), (x)); \
} \
else \
{ \
    int m; \
    m = BW-(x); \
    dst = (SCRLEFT(*(psrc), (x)) & endtab[m]) | \
	  (SCRRIGHT(*((psrc)+1), m) & starttab[m]); \
}
#endif /* GETBITS←ALIGNMENT == 1 */

#if GETBITS←ALIGNMENT == 2
#define getbits(psrc, x, w, dst) \
{\
    register unsigned *ptr= (unsigned *)(psrc);\
    register unsigned off= (x)+((((unsigned)ptr)&0x01)<<3);\
    ptr=(unsigned *)((((unsigned long)ptr)&(~0x1L))+(((off&0xf0)>0)<<1));\
    off&=0xf;\
    if ( ((off) + (w)) <= BW) \
    { \
	dst = SCRLEFT(*ptr, (off)); \
    } \
    else \
    { \
	int m; \
	m = BW-(off); \
	dst = (SCRLEFT(*ptr, (off)) & endtab[m]) | \
	    (SCRRIGHT(*(ptr+1), m) & starttab[m]); \
    }\
}
#endif /* GETBITS←ALIGNMENT == 2 */

#if GETBITS←ALIGNMENT == 4
#define getbits(psrc, x, w, dst) \
{\
    register unsigned *ptr= (unsigned *)(psrc);\
    register unsigned off= (x)+((((unsigned)ptr)&0x03)<<3);\
    ptr=(unsigned *)(((unsigned)ptr)&(~0x3))+((off&0xe0)>0);\
    off&=BWM;\
    if ( ((off) + (w)) <= BW) \
    { \
	dst = SCRLEFT(*ptr, (off)); \
    } \
    else \
    { \
	int m; \
	m = BW-(off); \
	dst = (SCRLEFT(*ptr, (off)) & endtab[m]) | \
	    (SCRRIGHT(*(ptr+1), m) & starttab[m]); \
    }\
}
#endif /* GETBITS←ALIGNMENT == 4 */


#define putbits(src, x, w, pdst) \
if ( ((x)+(w)) <= BW) \
{ \
    int tmpmask; \
    maskpartialbits((x), (w), tmpmask); \
    *(pdst) = (*(pdst) & ~tmpmask) | (SCRRIGHT(src, x) & tmpmask); \
} \
else \
{ \
    int m; \
    int n; \
    m = BW-(x); \
    n = (w) - m; \
    *(pdst) = (*(pdst) & endtab[x]) | (SCRRIGHT(src, x) & starttab[x]); \
    *((pdst)+1) = (*((pdst)+1) & starttab[n]) | (SCRLEFT(src, m) & endtab[n]); \
}

#define putbitsrop(src, x, w, pdst, rop) \
if ( ((x)+(w)) <= BW) \
{ \
    int tmpmask; \
    int t1, t2; \
    maskpartialbits((x), (w), tmpmask); \
    t1 = SCRRIGHT((src), (x)); \
    t2 = DoRop(rop, t1, *(pdst)); \
    *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
} \
else \
{ \
    int m; \
    int n; \
    int t1, t2; \
    m = BW-(x); \
    n = (w) - m; \
    t1 = SCRRIGHT((src), (x)); \
    t2 = DoRop(rop, t1, *(pdst)); \
    *(pdst) = (*(pdst) & endtab[x]) | (t2 & starttab[x]); \
    t1 = SCRLEFT((src), m); \
    t2 = DoRop(rop, t1, *((pdst) + 1)); \
    *((pdst)+1) = (*((pdst)+1) & starttab[n]) | (t2 & endtab[n]); \
}

#define putbitsrrop(src, x, w, pdst, rop) \
if ( ((x)+(w)) <= BW) \
{ \
    int tmpmask; \
    int t1, t2; \
    maskpartialbits((x), (w), tmpmask); \
    t1 = SCRRIGHT((src), (x)); \
    t2 = DoRRop(rop, t1, *(pdst)); \
    *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
} \
else \
{ \
    int m; \
    int n; \
    int t1, t2; \
    m = BW-(x); \
    n = (w) - m; \
    t1 = SCRRIGHT((src), (x)); \
    t2 = DoRRop(rop, t1, *(pdst)); \
    *(pdst) = (*(pdst) & endtab[x]) | (t2 & starttab[x]); \
    t1 = SCRLEFT((src), m); \
    t2 = DoRRop(rop, t1, *((pdst) + 1)); \
    *((pdst)+1) = (*((pdst)+1) & starttab[n]) | (t2 & endtab[n]); \
}

#define getleftbits(psrc, w, dst) \
	getbits((unsigned int *)(psrc), 0, (w), (dst))