(FILECREATED "12-Feb-86 12:09:16" {QV}<IDL>SOURCES>IDLLIBRARY.;1 18211  

      changes to:  (FNS ZSCORE)

      previous date: "14-FEB-82 13:33:28" {QV}<IDL>SOURCES>LIBRARY.;1)


(* Copyright (c) 1986 by Xerox Corporation. All rights reserved.)

(PRETTYCOMPRINT IDLLIBRARYCOMS)

(RPAQQ IDLLIBRARYCOMS [(FNS APOOL ARITHP FREQ FREQHIST FRIEDMAN HMEAN KRUSKAL-WALLIS MANN-WHITNEY 
			      NFROMS PCT REGRESS RMSE SIGNTEST PAIRORDER KENDALLS-TAU GAMMA GEOMEAN 
			      WILCOXON YHAT ZSCORE)
	(DECLARE: DOEVAL@COMPILE DONTCOPY (P (RESETSAVE DWIMIFYCOMPFLG T])
(DEFINEQ

(APOOL
  [ELAMBDA ((ATABLE MATRIX)
            LINES (TITLE <'POOL < " of lines " LINES> < " from " ATABLE>>))
                                                             (* rmk: "10-MAR-80 14:14")
                                                             (* Pools lines of an ANOVA table, producing a vector 
							     of the SS, df, and MS)
    (PROG (P (SS (RPLUS ATABLE@ <LINES '1 >))
	       (DF (RPLUS ATABLE@ <LINES '2 >)))
	    (P←(ADJOIN SS DF SS/DF))
	    (P@(LABEL 1 ALL)← '(SumSq df MS))
	    (RETURN P))])

(ARITHP
  [ELAMBDA ((X SCALAR))
                                                             (* rmk: " 2-AUG-80 15:36")
                                                             (* True if X is not missing)
    ~(EQP X NIL)])

(FREQ
  [LAMBDA (A WT)                                             (* rmk: "22-JAN-79 17:35" posted: "20-DEC-77 13:45")
                                                             (* Computes frequencies for arbitrary array A.
							     Not extended, cause there might be different numbers 
							     of values for different slices)
    (PROG [(F (COUNTS (GROUP (RESHAPE A)
				   WT]
	    (F@(TITLE)← <'FREQ A !(if WT
					then <WT>)
			    >)
	    (RETURN F])

(FREQHIST
  [LAMBDA (A FILE WT)                                        (* rmk: "23-JAN-79 21:42" posted: "30-NOV-77 22:04")

          (* Produces frequency histograms of the values of A, grouped according to A's KEEP properties.
	  The NIL value avoids a shape mismatch on the returns to the extension mechanism. -
	  Must open the file before the EAPPLY* so that different slices won't get printed on different file versions)


    (RESETLST [if FILE=NIL
		  elseif (OPENP FILE 'OUTPUT)
		    then (RESETSAVE (OUTPUT FILE))
		  else (RESETSAVE (OUTFILE FILE)
				      '(PROGN (CLOSEF (OUTPUT OLDVALUE]
		FILE←(OUTPUT)
		([ELAMBDA ((A ARRAY)
                           (WT ARRAY))
                   (HIST (FREQ A WT))
		   NIL]
		 A WT))
    FILE])

(FRIEDMAN
  [ELAMBDA ((M MATRIX))
                                                             (* rmk: " 5-JUL-79 17:43")

          (* Computes the Friedman 2-way analysis-of-variance by ranks. Columns are the repeated measure.
	  The sign test is essentially the special case where there are only 2 columns. -
	  Produces the Chi-square/r statistic, which may be looked up in Siegel, or referred to the chi-square distribution 
	  with C-1 degrees of freedom, for C the number of columns.)


    (PROG [(AN (ANOVA (MOMENTS (KEEP (RANK (KEEP M 1))
					     ALL]            (* The (KEEP -- ALL) makes both rows and columns be 
							     effects, as in a standard repeated measures ANOVA)

          (* SS for rows (line 2) is always zero, SS for treatments (columns, line 3) is the numerator in the quotient below.
	  The denominator is MS-within-rows, obtained by pooling the column and row*column effects. Note that ties are 
	  automatically accounted for.)


	    (RETURN AN@('(3 SumSq))/(APOOL AN '(3 4))@('MS)))])

(HMEAN
  [ELAMBDA ((A ARRAY))
                                                             (* rmk: "17-AUG-78 14:47")
                                                             (* Computes the harmonic mean of the non-NIL elements 
							     of A. The ELAMBDA is so that explicit KEEPS will be 
							     efficiently processed.)
    1.0/(MOMENTS 1/A NIL 1)@('Mean)])

(KRUSKAL-WALLIS
  [ELAMBDA ((VALUES NIL))
                                                             (* rmk: " 2-AUG-80 15:26" posted: "28-APR-78 23:04")

          (* Computes the Kruskal-Wallis analysis-of-variance by ranks. Mann-Whitney U is essentially the special case where 
	  there are only 2 groups. -
	  Produces the H statistic, which may be looked up in Siegel, or referred to the chi-square distribution with K-1 
	  degrees of freedom, for K the number of groups. -
	  VALUES should be a grouped array, with the treatment dimension the only kept dimension,)


    (PROG [(AN (ANOVA (MOMENTS (KEEP (RANK (LEAVE VALUES 'ALL))
					     (KEEP VALUES]
                                                             (* Line 2 is treatments, 3 is error.
							     The numerator in the quotient is thus SS-treatments, 
							     the denominator MS-total. Ties are automatically 
							     accounted for.)
	    (RETURN AN@('(2 SumSq))/(APOOL AN '(2 3))@('MS)))])

(MANN-WHITNEY
  [ELAMBDA ((V1 VECTOR)
            (V2 VECTOR))
                                                             (* rmk: " 2-AUG-80 15:36")
                                                             (* The two vectors are pasted into a classified matrix
							     and given to KRUSKAL-WALLIS)
    (PROG (VAL (N1 (INDEX (SEEK (FUNCTION ARITHP)
				      V1)
			      1 -1))
		 (N2 (INDEX (SEEK (FUNCTION ARITHP)
				      V2)
			      1 -1)))
	    [VAL←(ADJOIN N1 N2 N1*N2*.5*(1-(SQRT (N1+N2+1)/(N1*N2*3)*(KRUSKAL-WALLIS
						       (GROUP (ADJOIN (RESHAPE 1
										     (INDEX V1 1 -1)
										     )
									  (RESHAPE 2
										     (INDEX V2 1 -1)
										     ))
								(ADJOIN V1 V2]
	    (VAL@(LABEL 1 ALL)← '(N1 N2 U))
	    (RETURN VAL))])

(NFROMS
  [ELAMBDA ((M MATRIX))
                                                             (* rmk: "12-MAR-80 13:13")

          (* Returns the N on which a swept cross-product matrix M is based, no matter what the pattern of sweepin has been, 
	  by sweepin in all the variables that are swept out. Provided of course that nothing is swept out or in twice)


    [PROG [SEL NOUTS (OUTS (SEEK 'MINUSP (TRANSPOSE M '(1 1]
	    (NOUTS←(INDEX OUTS 1 -1))
	    (RETURN (if NOUTS
			  then SEL←(if (MINUSP M@('(-1 -1)))
					 then              (* The constant was swept out)
						OUTS
				       else (ADJOIN OUTS -1))
				 (SWEEP M@ <SEL SEL> NIL (GENVEC 1 NOUTS))
			else 

          (* Have to test for this case specially, cause the GENVEC does the wrong thing if given 1 and NIL.
	  We'd like the empty vector in that case.)


			       M)@('(-1 -1]])

(PCT
  [LAMBDA (A KEEPS)                                          (* rmk: "22-JAN-79 17:36")

          (* Returns an image of A with elements indicating the percentage that the corresponding element A is of the margin 
	  collapsing across the dimenension not in KEEPS. -
	  E.g. for a matrix, KEEPS=1 means row percentages, KEEPS=2 means column percentages)


    A←(KEEP A KEEPS)
    (PROG [(PCT (100*(A/(RPLUS A]
	    (PCT@(TITLE)← <'PERCENTAGES <'KEEP A KEEPS>>)
	    (RETURN PCT])

(REGRESS
  [ELAMBDA ((PRE MATRIX)
            (POST MATRIX)
            (DV NIL)
            (N SCALAR)
            (TITLE <'Regression PRE>))
                                                             (* rmk: "12-MAR-80 15:17")

          (* Produces a more-or-less standard regression table for the dependent variables in the selector DV, given a 
	  pre-swept matrix PRE and a POST-swept matrix POST. If DV=NIL, then tables for all unswept variables in POST are 
	  constructed. -
	  If N not NIL, then it is the total number of observations on which the regression should be based.
	  Otherwise, the N is computed by looking at the constant elements. The user should provide the N if the constant is 
	  not present in the array, e.g., because it was NORMed.)


    (PROG [POSTOUTS NPOSTOUTS PREOUTS MC RESDF REGDF (PREDIAG (TRANSPOSE PRE '(1 1)))
		      (POSTDIAG (TRANSPOSE POST '(1 1]
	    (POSTOUTS←(SEEK 'MINUSP POSTDIAG))
	    (PREOUTS←(SEEK 'MINUSP PREDIAG))
	    (if ~(EQP (INDEX PREOUTS 1 -1)
			  (INDEX (SEEK PREOUTS POSTOUTS)
				   1 -1))
		then (ERROR "PRE outs are not a subset of POST outs"))
	    (if NPOSTOUTS←(INDEX POSTOUTS 1 -1)=NIL
		then (ERROR "No variables have been swept out"))
                                                             (* Number of swept out variables in POST.)
	    (RESDF←(if (EQP N NIL)
		       then (NFROMS PRE)
		     else N)
	      -NPOSTOUTS)
	    (if ~(PLUSP RESDF)
		then (ERROR "No degrees of freedom left!"))
	    (REGDF←NPOSTOUTS-(INDEX PREOUTS 1 -1))
	    (MC←(-1.0/POSTDIAG@POSTOUTS))                    (* Multi-colinearity)

          (* * Have to convert labels to integers to get a vector for the extension below)


	    (if DV
		then DV←(INDEX PRE 1 DV)
		       (if (INDEX (SEEK 'MINUSP POSTDIAG@DV)
				      1 -1)
			   then (ERROR "Swept-out variable cannot be dependent" DV))
	      else DV←(SEEK 'PLUSP POSTDIAG))
	    (DV@(LABEL 1)← PRE@(LABEL 1))
	    (DV@(LABEL 1 ALL)← PRE@(LABEL 1 DV))         (* Labels on DV go to the leading dimension 
							     (if any) of the output)
	    (RETURN ([ELAMBDA ((DVSEL SCALAR))
                         (PROG [B SE REGSS RESMS REGMS TOTSS (RESSS (POSTDIAG@DVSEL))
				    (RESULT (RESHAPE 1.0 (ADJOIN 2+NPOSTOUTS 6]
			         (TOTSS←PREDIAG@DVSEL)
			         (RESMS←RESSS/RESDF)
			         (REGSS←PREDIAG@DVSEL-RESSS)
			         (REGMS←REGSS/REGDF)

          (* * Labeling: First the Independent variable dimension, then the coefficient dimension)


			         (RESULT@(LABEL 1)← PRE@(LABEL 1))
			         (RESULT@(LABEL 1 ALL)← < !! PRE@(LABEL 1 POSTOUTS) ! '(
							      Regression Residual)
							    >)
			         (RESULT@(LABEL 2)← 'Column)
			         (RESULT@(LABEL 2 ALL)← '(B SE EtaSq F df p))

          (* * Now fill in the parameters)


			         (RESULT@('B)← B←POST@
				   <POSTOUTS DVSEL>)
			         (RESULT@('SE)← SE←(SQRT RESMS/MC))
                                                             (* Standard error of coefficient)
			         (RESULT@('EtaSq)← B*B*MC/TOTSS)
                                                             (* Proportion of total variance 
							     (SS) uniquely explained by this variable)
			         (RESULT@('p)←(FPROB (RESULT@('F)←(B/SE)↑2)
						       1.0 RESDF))
			         (RESULT@('((Regression Residual)
				      B))← NIL)
			         (RESULT@('(Regression SE))←(SQRT REGMS))
			         (RESULT@('(Residual SE))←(SQRT RESMS))
			         (RESULT@('(Regression EtaSq))← REGSS/TOTSS)
                                                             (* Multiple R↑2)
			         (RESULT@('(Regression df))← REGDF)
			         (RESULT@('(Residual (F p)))← NIL)
			         (RESULT@('(Residual EtaSq))← RESSS/TOTSS)
			         (RESULT@('(Residual df))← RESDF)
			         (RESULT@('(Regression p))←(FPROB (RESULT@('(Regression F))← 
								      REGMS/RESMS)
								    REGDF RESDF))
			         (RETURN RESULT))]
		       DV)))])

(RMSE
  [ELAMBDA ((X ARRAY)
            (Y ARRAY))
                                                             (* rmk: "12-MAR-80 10:02")
                                                             (* Computes the root-mean-square-error of the 
							     conformable arrays X and Y)
    (SQRT (MOMENTS (X-Y)↑2 NIL 1)@'Mean)])

(SIGNTEST
  [ELAMBDA ((V1 VECTOR)
            (V2 VECTOR))
                                                             (* rmk: "12-MAR-80 10:02" posted: " 6-AUG-78 23:09")
                                                             (* Produces a frequency table which can be referred to
							     the binomial distribution for the sign-test)
    [COUNTS (GROUP (TRANSLATE V1-V2 '((0 0 NIL)
				     (NIL 0 -1)
				     (O NIL 1]])

(PAIRORDER
  [ELAMBDA ((M MATRIX))
                                                             (* rmk: "31-JUL-80 18:07")

          (* Returns a vector containing the Different and Same statistics used in the TAU and GAMMA statistics.
	  The vector also contains the number of ties and missing elements. M is a matrix whose first dimension represents 
	  individuals and whose second dimension consists of 2 ranking variables)


    (if ~(EQP 2 (INDEX M 2 -1))
	then (ERROR "PAIRORDER:  More than two rankings" M))
    (for I TOTAL SAME←0
	   DIFFERENT←0
	   TIE1←0
	   TIE2←0
	   MISSING←0
	   (F ←(RESHAPE 0 4)) from 1 to (INDEX M 1 -1) first (F@(LABEL 1 ALL)← '(
									   Different Order1 Order2 
										     Same))
       do (for J SUBS DIFF1 DIFF2 PROD from 1 to (IPLUS I -1)
	       do (SUBS← <I J>)
		    (DIFF1←(REDUCE M@ <SUBS 1> 'DIFFERENCE))
		    (DIFF2←(REDUCE M@ <SUBS 2> 'DIFFERENCE))
		    (PROD←DIFF1*DIFF2)
		    (if PROD=NIL
			then (add MISSING 1)
		      elseif (PLUSP PROD)
			then (add SAME 1)
		      elseif (MINUSP PROD)
			then (add DIFFERENT 1)
		      elseif (EQP 0 DIFF1)
			then (if (EQP 0 DIFF2)
				   then (add TIE2 1))
			       (add TIE1 1)
		      else (add TIE2 1)))
       finally (TOTAL←(INDEX M 1 -1))
		 (TOTAL←TOTAL*(TOTAL-1)/2-MISSING)
		 (F@ALL ← <DIFFERENT TOTAL-TIE1 TOTAL-TIE2 SAME>)
		 (RETURN F))])

(KENDALLS-TAU
  [ELAMBDA ((M MATRIX))
                                                             (* rmk: " 2-AUG-80 15:48")

          (* Computes Kendall's rank-order correlation Tau. M is a matrix of ranking variables on individuals along the first
	  dimensions. M can also be a pair-ordering vector for some other matrix.)


    [PROG [(PO (if (EQP 1 (INDEX M -1))
		     then M
		   else (PAIRORDER M]
	    (RETURN (PO@('Same)
			-PO@('Different))/(GEOMEAN PO@'((Order1 Order2]])

(GAMMA
  [ELAMBDA ((M MATRIX))
                                                             (* rmk: "31-JUL-80 13:26")
                                                             (* Comutes the Goodman-Kruskal Gamma statistic.
							     M is a matrix of ranking variables on individuals 
							     along the first dimension, or a pair-ordering vector.)
    (PROG [(PO ((if 1=(INDEX M -1)
		      then M
		    else (PAIRORDER M))@('((Same Different]
	    (RETURN (REDUCE PO 'DIFFERENCE)/(RPLUS PO)))])

(GEOMEAN
  [ELAMBDA ((A ARRAY))
                                                             (* rmk: " 2-AUG-80 15:45")
                                                             (* Computes geometric mean of elements in A)
    [PROG ((AA (RESHAPE A)))                             (* RESHAPE only necessary until SEEK is extended to 
							     higher-dimensional arrays.)
	    (AA←AA@(SEEK (FUNCTION ARITHP)
			   AA))                              (* Selection means that missing elements don't yield a
							     NIL mean.)
	    (RETURN (SELECTQ (INDEX AA 1 -1)
				 (0 1)
				 (1 AA@ ('(1)))
				 (2 (SQRT (RTIMES AA)))
				 ((RTIMES AA)↑(1.0/(INDEX AA 1 -1]])

(WILCOXON
  [ELAMBDA ((V1 VECTOR)
            (V2 VECTOR))
                                                             (* rmk: "12-MAR-80 10:03" posted: " 6-AUG-78 23:09")
                                                             (* Produces the Wilcoxon statistic to be looked up in 
							     tables (for N<25), or computes the correct probability
							     using ANOVA)
    [PROG (G (DIFF (V1-V2)))
	    [G←(GROUP (TRANSLATE DIFF '((0 0 NIL)
				      (NIL 0 -1)
				      (0 NIL 1)))
			(RANK (ABS DIFF]
	    (RETURN (if (SHAPE V1) LT 25
			  then (ADJOIN (SHAPE V1)
					   (REDUCE (COUNTS G)
						     'MIN))
			else (ANOVA (MOMENTS G]])

(YHAT
  [ELAMBDA ((M MATRIX)
            (SW MATRIX)
            (DV NIL))
                                                             (* rmk: " 2-AUG-80 15:34")

          (* Computes predicted values for variables in M, given the swept matrix SW and the dependent variable selector DV.
	  If DV is NIL, then predictors for all unswept variables in SW are computed. -
	  The TRANSPOSE produces the diagonal of SW -
	  The ADJOIN adds the Constant variable to M, to be multiplied by the Constant coefficient.)


    (PROG [IV (DIAG (TRANSPOSE SW '(1 1]
	    (IV←(SEEK (FUNCTION MINUSP)
			DIAG))
	    (if DV=NIL
		then DV←(SEEK (FUNCTION PLUSP)
				  DIAG))
	    (RETURN (MPROD (ADJOIN M 1)@ <ALL IV> SW@ <IV DV>)))])

(ZSCORE
  [ELAMBDA ((A ARRAY)
            (MOM VECTOR))
                                                             (* rmk: " 2-AUG-80 15:34")

          (* Computes the image of A under the Zscore mapping. MOM, if given, is the MOMENTS table for A.
	  Otherwise, the MOMENTS are computed here and discarded)


    (if MOM=NIL
	then MOM←(MOMENTS A))
    (A-MOM@('Mean))/(SQRT MOM@('Variance))])
)
(DECLARE: DOEVAL@COMPILE DONTCOPY 
(RESETSAVE DWIMIFYCOMPFLG T)
)
(PUTPROPS IDLLIBRARY COPYRIGHT ("Xerox Corporation" 1986))
(DECLARE: DONTCOPY
  (FILEMAP (NIL (568 18064 (APOOL 578 . 1150) (ARITHP 1152 . 1396) (FREQ 1398 . 1916) (FREQHIST 1918 . 
2753) (FRIEDMAN 2755 . 3853) (HMEAN 3855 . 4251) (KRUSKAL-WALLIS 4253 . 5307) (MANN-WHITNEY 5309 . 
6174) (NFROMS 6176 . 7118) (PCT 7120 . 7650) (REGRESS 7652 . 11883) (RMSE 11885 . 12238) (SIGNTEST 
12240 . 12702) (PAIRORDER 12704 . 14255) (KENDALLS-TAU 14257 . 14795) (GAMMA 14797 . 15358) (GEOMEAN 
15360 . 16099) (WILCOXON 16101 . 16841) (YHAT 16843 . 17633) (ZSCORE 17635 . 18062)))))
STOP