	TITLE   FASTMULP - Fast multisided convex poly filling blitter


	COMMENT $

		Written  by Dave Stampe
		Copyright (c) 1993
		Not for commercial use, so get permission
		before marketing code using this stuff!

		$

include vdmodel.inc	; sets memory model

		.DATA

sverts          dd 40 dup (0CCh)
lverts          dd 40 dup (0CCh)
rverts          dd 40 dup (0DDh)

		.CODE vdriver
		.386

include videfs.inc

				; big table more eff. than masking
				; start byte lookup table
stmask:         REPT    160
		db      15,14,12,8
		ENDM
				; end byte lookup table
fnmask:         REPT    160
		db      1,3,7,15
		ENDM

	extrn   _dpaddr         ; page base address

nv      equ     [bp+6]          ; arguments to _tpoly
verts   equ     [bp+8]

vline   equ     [bp-2]          ; video base addr. of line
lines   equ     [bp-4]          ; number of lines to fill
l_incr  equ     [bp-8]
r_incr  equ     [bp-12]
aswap   equ     [bp-16]         ; plane mask reg. swap value
lptr    equ     [bp-18]
rptr    equ     [bp-20]
cur_y   equ     [bp-22]

mcolor  equ     [bp+12]
gmask   equ     [bp+14]
cmask   equ     [bp+16]

orig_y1 equ     [bp-30]         ; original Y1 value before vertex rotation
mtl_color equ   [bp-32]         ; color cycle number

   ;
   ;   m_fastpoly(int count, int *vertices, int color, gmask, cmask))
   ;       vertices must be in CCW order and x,y order!

		PUBLIC  _m_fastpoly

_m_fastpoly     proc    far

	.386
	push    bp
	mov     bp,sp
	sub     sp,36
	push    si
	push    di
	push    dx
	push    cx         ; CREATE L, R SIDE LISTS OF VERTICES

		; DO THE SIDEWAYS SWAP

	les     si,verts
	mov     di,OFFSET sverts
	mov     cx,nv
	mov     ax,es:[si]    ; which window?
	cmp     ax,400
	jae     chloop
clloop:
	mov     ax,es:[si+2]    ; x = y
	mov     bx,399          ; y = 319-x
	sub     bx,es:[si]
	mov     ds:[di],ax
	mov     ds:[di+2],bx
	add     si,4
	add     di,4
	loop    clloop
	jmp     done_conv

chloop:
	mov     ax,es:[si]      ; y = x - 400
	sub     ax,400
	mov     bx,319          ; x = 399-y
	sub     bx,es:[si+2]
	mov     ds:[di+2],ax
	mov     ds:[di],bx
	add     si,4
	add     di,4
	loop    chloop

done_conv:
	mov     si,OFFSET sverts   ; find lowest y (any vertex on top of poly)
	mov     cx,nv
	mov     ax,30000
fmin:
	cmp     ax,ds:[si+2]
	jl      notlower
	mov     ax,ds:[si+2]
	mov     bx,si
	mov     dx,cx
notlower:
	add     si,4
	loop    fmin

	mov     orig_y1,ax
	mov     cx,dx
	mov     si,bx
	mov     di,OFFSET lverts
clsloop:                            ; copy from lowest to end
	mov     eax,ds:[si]
	mov     ds:[di],eax
	add     si,4
	add     di,4
	loop    clsloop

	mov     si,OFFSET sverts
cleloop:                            ; copy from start to lowest
	mov     eax,ds:[si]
	mov     ds:[di],eax
	add     si,4
	add     di,4
	cmp     si,bx
	jb      cleloop

	mov     si,OFFSET lverts    ; right list in reverse order
	mov     lptr,si
	mov     di,OFFSET rverts    ; except first element same
	mov     rptr,di
	mov     eax,ds:[si]
	mov     ds:[di],eax
	add     si,4
	add     di,4
	mov     cx,nv
	dec     cx
	add     di,cx
	add     di,cx
	add     di,cx
	add     di,cx
	mov     ax,-1
	mov     ds:[di+2],ax        ; "stopper"
crrloop:                            ; copy remaining elements
	sub     di,4
	mov     eax,ds:[si]
	mov     ds:[di],eax
	add     si,4
	loop    crrloop
	mov     ax,-1
	mov     WORD PTR ds:[si+4],-1
			      ; VIDEO SETUP
	cld
	mov     ax,03c5h                ; setup DX adr. swap value
	mov     aswap,ax
	mov     ax,videoseg               ; set video segment
	mov     es,ax

	test    BYTE PTR orig_y1,1      ; make sure that gmask is aligned correctly
	jz      even
	mov     al,cmask
	xor     gmask,al
even:                           ; make sure the starting color is correct...
	mov     al,mcolor       ; get offset
	and     al,0Fh
	test    WORD PTR mcolor,100h     ; extra bit of offset
	jz      firsthalf
	add     al,16
firsthalf:
	add     al,orig_y1              ; add line number
	mov     mtl_color,ax
	test    al,10h
	jz      cy1
	not     al
cy1:    and     al,0Fh
	and     WORD PTR mcolor,00F0h
	or      al,mcolor       ; combine hue and shade to get starting color
	mov     bx,colortable   ; low byte still in BL
	mov     bl,al           ; set starting color for this triangle
	mov     al,es:[bx]              ; set initial color

	mov     ax,WORD PTR [lverts+2]           ; set current line
	mov     cur_y,ax
	mov     bx,bypitch                   ; compute starting line adr
	mul     bx
	add     ax,WORD PTR ds:_dpaddr
	mov     vline,ax

	mov     cx,nv          ; FIND FIRST EDGES IN POLY
	dec     cx
	mov     bx,lptr
lslinit:
	push    cx
	call    near ptr slope    ; find first left edge
	pop     cx
	jnz     lfinit
	add     bx,4
	loop    lslinit

	pop     cx                ; unable to find next line: exit
	pop     dx
	pop     di
	pop     si
	mov     sp,bp
	pop     bp
	ret

lfinit:
	mov     l_incr,eax
	mov     lptr,bx

	mov     dx,[bx]           ; compute L start
	shl     edx,16
;       add     edx,08000h        ; force left side to round up

	mov     bx,rptr
rslinit:
	call    near ptr slope    ; find first right edge
	jnz     rfinit
	add     bx,4
	jmp     rslinit
rfinit:
	mov     r_incr,eax
	mov     rptr,bx

	mov     si,[bx]         ; compute R start
	shl     esi,16

				; POLY SLICE LOOP
sliceloop:
	mov     bx,rptr
	mov     ax,[bx+6]       ; bottom of right edge
	mov     bx,lptr
	cmp     ax,[bx+6]       ; bottom of left edge
	jb      fbots
	mov     ax,[bx+6]
fbots:
	mov     cx,ax
	sub     cx,cur_y                ; figure line count
	mov     cur_y,ax
	mov     lines,cx
	call    near ptr trapezoid      ; draw it

	mov     bx,lptr
	mov     ax,cur_y
	cmp     ax,[bx+6]               ; left edge ended?
	jnz     not_left

fll:
	add     bx,4
	call    near ptr slope
	jl      end_poly                ; no more left edges
	jnz     fleft
	jmp     fll
fleft:
	mov     l_incr,eax
	mov     lptr,bx
	mov     dx,[bx]           ; compute L start
	shl     edx,16
;       add     edx,08000h        ; force left side to round up

not_left:
	mov     bx,rptr
	mov     ax,cur_y
	cmp     ax,[bx+6]
	jnz     not_right

frl:
	add     bx,4
	call    near ptr slope
	jl      end_poly          ; no more left edges
	jnz     freft
	jmp     frl
freft:
	mov     r_incr,eax
	mov     rptr,bx
	mov     si,[bx]           ; compute R start
	shl     esi,16

not_right:
	jmp     sliceloop

end_poly:
	pop     cx
	pop     dx
	pop     di                ; exit code
	pop     si
	mov     sp,bp
	pop     bp
	ret

_m_fastpoly     endp





trapezoid:      ; call with edx = (left+0.5)>>16, esi = right<<16
		; vline, lines, l_incr, r_incr all set up

	mov     ebx,edx                 ; convert fixed pt to integer
	sar     ebx,16
	mov     ecx,esi
	sar     ecx,16

nextline:
		; start of fast h line blitter:
		;  bx=left side, cx=right side, vline=line start

	xchg    dx,aswap

	mov     al,BYTE PTR cs:[bx+stmask]  ; left mask
	and     al,gmask
	shr     bx,2                        ; left address

	mov     di,cx
	mov     ah,BYTE PTR cs:[di+fnmask]  ; right mask
	and     ah,gmask
	shr     cx,2                        ; right address

	mov     di,vline                    ; start address
	add     di,bx
	sub     cx,bx                       ; number of bytes-1
	je      short onebyte
	jc      short doneline              ; clip trap

	cmp     cx,8              ; test if big enough for word speedup
	jge     faster

	out     dx,al
	stosb                               ; mask first byte
	dec     cx                          ; mask rest
	jz      nomore
	mov     al,gmask
	out     dx,al

	rep     stosb
nomore:
	mov     al,ah
	out     dx,al
	mov     es:[di],ah                  ; mask last byte
	jmp     short doneline

faster:
	out     dx,al
	stosb                               ; mask first byte
	dec     cx                          ; mask rest
	mov     al,gmask
	out     dx,al

	test    di,1
	jz      notodd
	stosb
	dec     cx
notodd:
	mov     bx,cx
	shr     cx,1
	rep     stosw
	test    bx,1
	jz      noteodd
	stosb
noteodd:
	mov     al,ah
	out     dx,al
	mov     es:[di],al
	jmp     short doneline

onebyte:
	and     al,ah
	out     dx,al
	mov     es:[di],al              ; single byte mask

doneline:
	xchg    dx,aswap
	mov     ax,bypitch                   ; next line address
	add     vline,ax

	add     edx,DWORD PTR l_incr    ; step left, right edges
	add     esi,DWORD PTR r_incr
	mov     ebx,edx                 ; convert fixed pt to integer
	sar     ebx,16
	mov     ecx,esi
	sar     ecx,16

	push    bx
	inc     BYTE PTR mtl_color      ; next color in sequence
	mov     bl,mtl_color
	test    bl,10h
	jz      cy2
	not     bl
cy2:    and     bl,0Fh
	or      bl,mcolor       ; combine hue and shade to get color
	xor     bh,bh
	add     bx,colortable   ; figure what entry to load
	mov     al,es:[bx]      ; read entry into latches

	mov     bl,cmask        ; get mask for flipping bits
	xor     gmask,bl        ; alter transparency mask
	pop     bx

	dec     WORD PTR lines          ; done lines?
	jg      nextline

donetri:                                ; finished all drawing
exit:
	retn



slope:  ; [bx] is top, [bx+4] is bottom vertex in pair
	; returns slope<<16 in eax, line count in cx
	; Z set if vertices on same line

	push    edx
	movzx   ecx,word ptr ds:[bx+6]
	sub     cx,ds:[bx+2]
	jle     round
	mov     ax,ds:[bx+4]
	sub     ax,ds:[bx]
	movsx   eax,ax      ; conv. to double prec. << 16
	je      round       ; zero slope
	cmp     cx,1
	je      round
	cdq
	shl     eax,16      ; (x2-x1)/(y2-y1)
	idiv    ecx
	cmp     eax,0       ; round up if pos (neg already rounded up)
	jle     round
	inc     eax
round:
	pop     edx
	or      cx,cx       ; Z set if on same line
	retn

	end
