	TITLE	MATRIXM - MATRIX ALGRBRA IN ASSEMBLER

	COMMENT $

/* Contact: dstampe@sunee.waterloo.edu */

// Routines to do matrix math for the integer math library
// All code by Dave Stampe, last updated 23/12/93

/*
 This code is part of the VR-386 project, created by Dave Stampe.
 VR-386 is a desendent of REND386, created by Dave Stampe and
 Bernie Roehl.  Almost all the code has been rewritten by Dave
 Stampre for VR-386.

 Copyright (c) 1994 by Dave Stampe:
 May be freely used to write software for release into the public domain
 or for educational use; all commercial endeavours MUST contact Dave Stampe
 (dstampe@psych.toronto.edu) for permission to incorporate any part of
 this software or source code into their products!  Usually there is no
 charge for under 50-100 items for low-cost or shareware products, and terms
 are reasonable.  Any royalties are used for development, so equipment is
 often acceptable payment.

 ATTRIBUTION:  If you use any part of this source code or the libraries
 in your projects, you must give attribution to VR-386 and Dave Stampe,
 and any other authors in your documentation, source code, and at startup
 of your program.  Let's keep the freeware ball rolling!

 DEVELOPMENT: VR-386 is a effort to develop the process started by
 REND386, improving programmer access by rewriting the code and supplying
 a standard API.  If you write improvements, add new functions rather
 than rewriting current functions.  This will make it possible to
 include you improved code in the next API release.  YOU can help advance
 VR-386.  Comments on the API are welcome.

 CONTACT: dstampe@psych.toronto.edu
*/


NOTES ON MATRIX FORMATS:

Matrices are 32 bit [4][3] arrays, row major format.
They are a reduced homogenous matrix, with a rotational
and translational part (translational part = position).
No scaling is allowed if you want to use the fast inverse
supplied here.

Matrix multiplies are generally quite expensive: the 36 32-bit
multiplies take 60 microseconds on a 486/33!  So use them sparingly.


OFFSETS IN MATRIX: (homogenaous)

[ 0  4  8  | 36 ]
[ 12 16 20 | 40 ]
[ 24 28 32 | 44 ]

		$

	.MODEL large


	.DATA

;include 3dstruct.inc

; # define XFSC 536870912   /* 2**29 for shifting xform coeffs to long */

	.CODE INTMATH


MULT29 	MACRO a,b                 ; multiply <3.29> -> eax
	mov	eax,DWORD PTR a
	imul	DWORD PTR b
	shrd	eax,edx,29
	adc	eax,0
	ENDM

MMULT29 MACRO a,b,c               ; multiply 3 of <3.29> -> eax
	mov	eax,DWORD PTR a
	imul	DWORD PTR b
	shrd	eax,edx,29
	adc	eax,0
	imul	DWORD PTR c
	shrd	eax,edx,29
	adc	eax,0
	ENDM

DOTPROD	MACRO a,b,c,x,y,z,p      ; dot product plus p, accum in ecx:ebx
	mov	eax,a            ; result in eax
	imul	DWORD PTR x
	mov	ecx,edx
	mov	ebx,eax
	mov	eax,b
	imul	DWORD PTR y
	add	ebx,eax
	adc	ecx,edx
	mov	eax,c
	imul	DWORD PTR z
	add	eax,ebx
	adc	edx,ecx
	shrd	eax,edx,29
	adc	eax,p
	ENDM

CROSSTERM MACRO a,x,b,y      ; left-handed cross product (a*x-b*y) term
	mov	eax,b        ; accum in ebx:ecx, result in eax
	imul	DWORD PTR y
	mov	ecx,edx
	mov	ebx,eax
	mov	eax,a
	imul	DWORD PTR x
	sub	eax,ebx
	sbb	edx,ecx
	shrd	eax,edx,29
	adc	eax,0
	ENDM

;/***************** MATRIX-VECTOR OPERATIONS **************/

;/* rotate/translate XYZ by matrix */
;
;void matrix_point(MATRIX m, long *xp, long *yp, long *zp)


m	equ	[bp+8]          ; arguments
xp	equ	[bp+12]
yp	equ	[bp+16]
zp	equ	[bp+20]

x	equ	[bp-4]		; locals
y	equ	[bp-8]
z	equ	[bp-12]

	PUBLIC	_matrix_point

_matrix_point 	proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,16

	push	edi
	push	ecx
	push	edx

	les	bx,DWORD PTR xp
	mov	eax,es:[bx]
	mov	x,eax
	les	bx,DWORD PTR yp
	mov	eax,es:[bx]
	mov	y,eax
	les	bx,DWORD PTR zp
	mov	eax,es:[bx]
	mov	z,eax

	les	di,DWORD PTR m

	DOTPROD es:[di], es:[di+4], es:[di+8], x, y, z, es:[di+36]
	push	eax		; x result

	DOTPROD es:[di+12], es:[di+16], es:[di+20], x, y, z, es:[di+40]
	push	eax		; y result

	DOTPROD es:[di+24], es:[di+28], es:[di+32], x, y, z, es:[di+44]

	les	di,DWORD PTR zp
	mov	es:[di],eax		; store z result

	les	di,DWORD PTR yp
	pop	DWORD PTR es:[di]	; store y result

	les	di,DWORD PTR xp
	pop	DWORD PTR es:[di]	; store x result

	pop	edx
	pop	ecx
	pop	edi

	mov	esp,ebp
	pop	ebp
	ret

_matrix_point	endp



;				/* rotate XYZ by matrix */
;
;void matrix_rotate(MATRIX m, long *xp, long *yp, long *zp)


m	equ	[bp+8]          ; arguments
xp	equ	[bp+12]
yp	equ	[bp+16]
zp	equ	[bp+20]

x	equ	[bp-4]		; locals
y	equ	[bp-8]
z	equ	[bp-12]

	PUBLIC	_matrix_rotate

_matrix_rotate 	proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,16

	push	edi
	push	ecx
	push	edx

	les	bx,DWORD PTR xp
	mov	eax,es:[bx]
	mov	x,eax
	les	bx,DWORD PTR yp
	mov	eax,es:[bx]
	mov	y,eax
	les	bx,DWORD PTR zp
	mov	eax,es:[bx]
	mov	z,eax

	les	di,DWORD PTR m

	DOTPROD es:[di], es:[di+4], es:[di+8], x, y, z, 0
	push	eax		; x result

	DOTPROD es:[di+12], es:[di+16], es:[di+20], x, y, z, 0
	push	eax		; y result

	DOTPROD es:[di+24], es:[di+28], es:[di+32], x, y, z, 0

	les	di,DWORD PTR zp
	mov	es:[di],eax		; store z result

	les	di,DWORD PTR yp
	pop	DWORD PTR es:[di]	; store y result

	les	di,DWORD PTR xp
	pop	DWORD PTR es:[di]	; store x result

	pop	edx
	pop	ecx
	pop	edi

	mov	esp,ebp
	pop	ebp
	ret

_matrix_rotate	endp



;/******************** MISC. VECTOR MATH ****************/
;
;		/* replaces column N of a matrix with cross of other 2 */
;		/* used to speed computations, repair matrix scaling   */
;               /* pointers into matrix: same segment! */

;void cross_product(long *c1, long *c2, long *c3);

c1	equ	[bp+8]          ; arguments
c2	equ	[bp+12]
c3	equ	[bp+16]

x1	equ	DWORD PTR es:[di]  ; parts of columns
x2	equ	DWORD PTR es:[si]
y1	equ	DWORD PTR es:[di+12]
y2	equ	DWORD PTR es:[si+12]
z1	equ	DWORD PTR es:[di+24]
z2	equ	DWORD PTR es:[si+24]

		PUBLIC	_cross_product

_cross_product	proc	far

	.386
	push	ebp
	mov	ebp,esp

	push	edx
	push	ecx
	push	esi
	push	edi

	les	di,DWORD PTR c1
	mov	si,WORD PTR c2

	CROSSTERM y1,z2,y2,z1
	push	eax

	CROSSTERM z1,x2,x1,z2
	push	eax

	CROSSTERM x1,y2,x2,y1

	les	di,DWORD PTR c3     ; store new column
	mov     z1,eax
	pop	y1
	pop	x1

	pop	edi
	pop	esi
	pop	ecx
	pop	edx

	mov	esp,ebp
	pop	ebp
	ret

_cross_product	endp



;/****************** MATRIX MANIPULATION ***************/


;              /* 3x3 section of matrixs: A*B->C       */
;void matrix_mult(MATRIX a, MATRIX b, MATRIX c)

a	equ	[bp+8]          ; arguments
b	equ	[bp+12]
c	equ	[bp+16]

r1	equ	DWORD PTR [bp-4]  ; locals
r2	equ	DWORD PTR [bp-8]  ; temp result allows A*B->A
r3	equ	DWORD PTR [bp-12]
r4	equ	DWORD PTR [bp-16]
r5	equ	DWORD PTR [bp-20]
r6	equ	DWORD PTR [bp-24]
r7	equ	DWORD PTR [bp-28]
r8	equ	DWORD PTR [bp-32]
r9	equ	DWORD PTR [bp-36]

	PUBLIC	_matrix_mult

_matrix_mult	proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,40

	push	ds
	push	esi
	push	edi
	push	ecx
	push	edx

	les	si,DWORD PTR a        ; pointers to source matrices
	lds	di,DWORD PTR b

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r1,eax

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r2,eax

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r3,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r4,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r5,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r6,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r7,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r8,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r9,eax

	les	di,DWORD PTR c     ; store matrix
	mov	eax,r1
	mov	es:[di],eax
	mov	eax,r2
	mov	es:[di+4],eax
	mov	eax,r3
	mov	es:[di+8],eax
	mov	eax,r4
	mov	es:[di+12],eax
	mov	eax,r5
	mov	es:[di+16],eax
	mov	eax,r6
	mov	es:[di+20],eax
	mov	eax,r7
	mov	es:[di+24],eax
	mov	eax,r8
	mov	es:[di+28],eax
	mov	eax,r9
	mov	es:[di+32],eax

	pop	edx
	pop	ecx
	pop	edi
	pop	esi
	pop	ds

	mov	esp,ebp
	pop	ebp
	ret

_matrix_mult	endp


;		 /* full homogenous matrix multiply */
;
;void matrix_product(MATRIX a, MATRIX b, MATRIX c)

a	equ	[bp+8]          ; arguments
b	equ	[bp+12]
c	equ	[bp+16]

r1	equ	DWORD PTR [bp-4]  ; locals
r2	equ	DWORD PTR [bp-8]  ; temp result allows A*B->A
r3	equ	DWORD PTR [bp-12]
r4	equ	DWORD PTR [bp-16]
r5	equ	DWORD PTR [bp-20]
r6	equ	DWORD PTR [bp-24]
r7	equ	DWORD PTR [bp-28]
r8	equ	DWORD PTR [bp-32]
r9	equ	DWORD PTR [bp-36]

t1	equ	DWORD PTR [bp-40]  ; translation part of result
t2	equ	DWORD PTR [bp-44]
t3	equ	DWORD PTR [bp-48]

	PUBLIC	_matrix_product

_matrix_product	proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,52

	push	ds
	push	esi
	push	edi
	push	ecx
	push	edx

	les	si,DWORD PTR a        ; pointers to source matrices
	lds	di,DWORD PTR b

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r1,eax

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r2,eax

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r3,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r4,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r5,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r6,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di],ds:[di+12],ds:[di+24],0
	mov	r7,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di+4],ds:[di+16],ds:[di+28],0
	mov	r8,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di+8],ds:[di+20],ds:[di+32],0
	mov	r9,eax
					; translational part

	DOTPROD es:[si],es:[si+4],es:[si+8],ds:[di+36],ds:[di+40],ds:[di+44],es:[si+36]
	mov	t1,eax

	DOTPROD es:[si+12],es:[si+16],es:[si+20],ds:[di+36],ds:[di+40],ds:[di+44],es:[si+40]
	mov	t2,eax

	DOTPROD es:[si+24],es:[si+28],es:[si+32],ds:[di+36],ds:[di+40],ds:[di+44],es:[si+44]
	mov	t3,eax

	les	di,DWORD PTR c     ; store matrix
	mov	eax,r1
	mov	es:[di],eax
	mov	eax,r2
	mov	es:[di+4],eax
	mov	eax,r3
	mov	es:[di+8],eax
	mov	eax,r4
	mov	es:[di+12],eax
	mov	eax,r5
	mov	es:[di+16],eax
	mov	eax,r6
	mov	es:[di+20],eax
	mov	eax,r7
	mov	es:[di+24],eax
	mov	eax,r8
	mov	es:[di+28],eax
	mov	eax,r9
	mov	es:[di+32],eax
	mov	eax,t1
	mov	es:[di+36],eax
	mov	eax,t2
	mov	es:[di+40],eax
	mov	eax,t3
	mov	es:[di+44],eax

	pop	edx
	pop	ecx
	pop	edi
	pop	esi
	pop	ds

	mov	esp,ebp
	pop	ebp
	ret

_matrix_product	endp



;void matrix_transpose(MATRIX a, MATRIX b)
;     /* generate inverse of rotate matrix (transpose) */
;     /* ONLY WORKS FOR ORTHOGONAL UNSCALED MATRICES   */
;     /* will do self_transpose as well as copy        */

a	equ	[bp+8]          ; arguments
b	equ	[bp+12]

	PUBLIC	_matrix_transpose

_matrix_transpose	proc	far

	.386
	push	ebp
	mov	ebp,esp

	push	ds
	push	esi
	push	edi
	push	edx

	les	si,DWORD PTR a        ; pointers to source matrices
	lds	di,DWORD PTR b

	mov	eax,es:[si]           ; copy diagonals
	mov	ds:[di],eax
	mov	eax,es:[si+16]
	mov	ds:[di+16],eax
	mov	eax,es:[si+32]
	mov	ds:[di+32],eax

	mov	eax,es:[si+4]		; swap others
	mov	edx,es:[si+12]
	mov	ds:[di+4],edx
	mov	ds:[di+12],eax

	mov	eax,es:[si+8]
	mov	edx,es:[si+24]
	mov	ds:[di+8],edx
	mov	ds:[di+24],eax

	mov	eax,es:[si+28]
	mov	edx,es:[si+20]
	mov	ds:[di+28],edx
	mov	ds:[di+20],eax

	pop	edx
	pop	edi
	pop	esi
	pop	ds

	mov	esp,ebp
	pop	ebp
	ret

_matrix_transpose	endp



;void inverse_matrix(MATRIX a, MATRIX b)
;	/* old: Ax+b = c      */
;       /* b' = (1/A)(-b)     */
;       /* (1/A) = t(A)       */
;       /* new: (1/A)c+b' = x */

; basically, B = tr(A) | -At * tr(A)

a	equ	[bp+8]          ; arguments
b	equ	[bp+12]

	PUBLIC	_inverse_matrix

_inverse_matrix	proc	far

	.386
	push	ebp
	mov	ebp,esp

	push	ds
	push	ecx
	push	esi
	push	edi
	push	edx

	les	si,DWORD PTR a        ; pointers to source matrices
	lds	di,DWORD PTR b

	mov	eax,es:[si]           ; copy diagonals
	mov	ds:[di],eax
	mov	eax,es:[si+16]
	mov	ds:[di+16],eax
	mov	eax,es:[si+32]
	mov	ds:[di+32],eax

	mov	eax,es:[si+4]		; swap others
	mov	edx,es:[si+12]
	mov	ds:[di+4],edx
	mov	ds:[di+12],eax

	mov	eax,es:[si+8]
	mov	edx,es:[si+24]
	mov	ds:[di+8],edx
	mov	ds:[di+24],eax

	mov	eax,es:[si+28]
	mov	edx,es:[si+20]
	mov	ds:[di+28],edx
	mov	ds:[di+20],eax
				; now, use B to convert translational part

	DOTPROD ds:[di],ds:[di+4],ds:[di+8],es:[si+36],es:[si+40],es:[si+44],0
	neg	eax
	mov	ds:[di+36],eax

	DOTPROD ds:[di+12],ds:[di+16],ds:[di+20],es:[si+36],es:[si+40],es:[si+44],0
	neg	eax
	mov	ds:[di+40],eax

	DOTPROD ds:[di+24],ds:[di+28],ds:[di+32],es:[si+36],es:[si+40],es:[si+44],0
	neg	eax
	mov	ds:[di+44],eax

	pop	edx
	pop	edi
	pop	esi
	pop	ecx
	pop	ds

	mov	esp,ebp
	pop	ebp
	ret

_inverse_matrix	endp



;/*************** ANGLE/POSITION TO HOMOGENOUS MATRIX ************/


extrn _isine   : PROC
extrn _icosine : PROC

MATRIXDECL MACRO rname		; SETS UP MATRIX COMPUTE

m 	equ	[bp+8]          ; arguments
rx	equ	[bp+12]
ry	equ	[bp+16]
rz	equ	[bp+20]
tx	equ	[bp+24]
ty	equ	[bp+28]
tz	equ	[bp+32]

sinx	equ	DWORD PTR [bp-4]  ; locals
siny	equ	DWORD PTR [bp-8]  ; temp result allows A*B->A
sinz	equ	DWORD PTR [bp-12]
cosx	equ	DWORD PTR [bp-16]
cosy	equ	DWORD PTR [bp-20]
cosz	equ	DWORD PTR [bp-24]

	PUBLIC	rname

rname	proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,30

	push	DWORD PTR rx        ; trig evaluate
	call	_isine
	add	esp,4
	mov	sinx,eax

	push	DWORD PTR ry
	call	_isine
	add	esp,4
	mov	siny,eax

	push	DWORD PTR rz
	call	_isine
	add	esp,4
	mov	sinz,eax

	push	DWORD PTR rx
	call	_icosine
	add	esp,4
	mov	cosx,eax

	push	DWORD PTR ry
	call	_icosine
	add	esp,4
	mov	cosy,eax

	push	DWORD PTR rz
	call	_icosine
	add	esp,4
	mov	cosz,eax

	push	esi
	push	edi
	les	di,DWORD PTR m

	mov	eax,tx
	mov	es:[di+36],eax
	mov	eax,ty
	mov	es:[di+40],eax
	mov	eax,tz
	mov	es:[di+44],eax

ENDM

MATRIXEND  MACRO rname
	pop	edi
	pop	esi

	mov	esp,ebp
	pop	ebp
	ret

rname	endp

ENDM


;void matrix_RXYZ(MATRIX m, long rx, long ry, long rz,
;	long tx, long ty, long tz)
; /* a fast standard matrix compute */

MATRIXDECL _matrix_RXYZ

	MULT29	cosz,cosy
	mov	es:[di],eax

	MULT29	cosy,sinz
	neg	eax
	mov	es:[di+4],eax

	mov	eax,siny
	mov	es:[di+8],eax

	MULT29	cosx,sinz
	mov	esi,eax
	MMULT29 cosz,sinx,siny
	add	eax,esi
	mov	es:[di+12],eax

	MMULT29 sinz,sinx,siny
	mov	esi,eax
	MULT29	cosz,cosx
	sub	eax,esi
	mov	es:[di+16],eax

	MULT29	cosy,sinx
	neg	eax
	mov	es:[di+20],eax

	MMULT29 siny,cosz,cosx
	mov	esi,eax
	MULT29	sinz,sinx
	sub	eax,esi
	mov	es:[di+24],eax

	MULT29	cosz,sinx
	mov	esi,eax
	MMULT29 cosx,sinz,siny
	add	eax,esi
	mov	es:[di+28],eax

	MULT29	cosx,cosy
	mov	es:[di+32],eax

MATRIXEND _matrix_RXYZ


;void matrix_RYXZ(MATRIX m, long rx, long ry, long rz, Y
;	long tx, long ty, long tz)
; /* a fast standard matrix compute */

MATRIXDECL _matrix_RYXZ

	MULT29	cosz,cosy
	mov	esi,eax
	MMULT29 sinz,sinx,siny
	add	eax,esi
	mov	es:[di],eax

	MULT29	cosy,sinz
	mov	esi,eax
	MMULT29 cosz,sinx,siny
	sub	eax,esi
	mov	es:[di+4],eax

	MULT29	cosx,siny
	mov	es:[di+8],eax

	MULT29	cosx,sinz
	mov	es:[di+12],eax

	MULT29	cosz,cosx
	mov	es:[di+16],eax

	mov	eax,sinx
	neg	eax
	mov	es:[di+20],eax

	MULT29	cosz,siny
	mov	esi,eax
	MMULT29 cosy,sinz,sinx
	sub	eax,esi
	mov	es:[di+24],eax

	MULT29	sinz,siny
	mov	esi,eax
	MMULT29 cosz,cosy,sinx
	add	eax,esi
	mov	es:[di+28],eax

	MULT29	cosx,cosy
	mov	es:[di+32],eax

MATRIXEND _matrix_RYXZ



;void matrix_RXZY(MATRIX m, long rx, long ry, long rz, Y
;	long tx, long ty, long tz)
; /* a fast standard matrix compute */

MATRIXDECL _matrix_RXZY

	MULT29	cosz,cosy
	mov	es:[di],eax

	mov	eax,sinz
	neg	eax
	mov	es:[di+4],eax

	MULT29	cosz,siny
	mov	es:[di+8],eax

	MULT29	sinx,siny
	mov	esi,eax
	MMULT29 cosx,cosy,sinz
	add	eax,esi
	mov	es:[di+12],eax

	MULT29	cosz,cosx
	mov	es:[di+16],eax

	MULT29	cosy,sinx
	mov	esi,eax
	MMULT29 cosx,sinz,siny
	sub	eax,esi
	mov	es:[di+20],eax

	MULT29	cosx,siny
	mov	esi,eax
	MMULT29 cosy,sinz,sinx
	sub	eax,esi
	mov	es:[di+24],eax

	MULT29	cosz,sinx
	mov	es:[di+28],eax

	MULT29	cosx,cosy
	mov	esi,eax
	MMULT29 sinz,sinx,siny
	add	eax,esi
	mov	es:[di+32],eax

MATRIXEND _matrix_RXZY

	end