
; Listing26b.s -
; code speed tester by ross for EAB members *

; exec
_LVOOldOpenLibrary	equ     -408
_LVOCloseLibrary	equ	-414
_LVORawDoFmt		equ	-522
; dos
_LVOWrite		equ     -48
_LVOOutput		equ     -60

	section	code,code

start	
	movea.l	$4.w,a6
	move.l	a6,d4			; d4=execbase
	lea	$dff09a,a5			; a5=intena
	lea	$bfd800,a4			; a4=ciab tod (base)
	lea	buffer,a3			; a3=PutChData (after DataStream init)
	move.l	a3,d5			; d5=DataStream
	lea	stuffChar(pc),a2	; a2=PutChProc

	lea	dosname(pc),a1
	jsr	_LVOOldOpenLibrary(a6)
	move.l	d0,d7			; d7=dosbase
	beq.b 	exit
	movea.l d0,a6
	jsr 	_LVOOutput(a6)
	move.l 	d0,d6 			; d6=CLI handle
	beq.b 	close			; make sure we have a CLI handle

	move.w	#$4000,(a5)		; disable
.bwait	
	move.w	2-$9a(a5),d0	; blitter wait
	add.w	d0,d0			; $4000+$4000=$8000
	bmi.b	.bwait			; checks N-Flag (if negativ)

	; if you need to init/move/setup packed data
	jsr	init

	bsr.b	ciabtod			; start time
	move.l	d0,d2			; d2=starttime

	jsr	unpack				; GO!	(ross original)

	bsr.b	ciabtod			; end time
	move.w	#$c000,(a5)		; enable
	sub.l	d2,d0			; elapsed (endtime-starttime)

							; 15734 for NTSC
	divu.w	#15625*256/1000,d0	; d0=ms (pal_hfreq*scale_down/granularity)
	bvs.b	close			; overflow?, something over specs..
	bne.b	.inrng
	moveq	#1,d0			; 1ms minumum (also avoid division by zero)
.inrng	move.w	d0,(a3)+		; arg1, elapsed time, 65535ms max else not rt

	move.l	d1,(a3)+		; arg2, unpacked raw data
	divu.w	d0,d1
;	bvs.b	close			; overflow?!
	move.w	d1,(a3)+		; arg3, integer KB/sec
	swap	d1
	mulu.w	#100,d1			; scale fraction
	divu.w	d0,d1
	move.w	d1,(a3)+		; arg4, fractional KB/sec (not KiB!)
	

	movea.l	d4,a6			; execbase
	movea.l	d5,a1			; args
	lea	text(pc),a0			; format string
	;a2=stuffChar
	;a3=buffer
	jsr	_LVORawDoFmt(a6)

	movea.l	d7,a6			; dosbase
	move.l	d6,d1 			; CLI handle
	move.l	a3,d2			; text buffer
	moveq	#66,d3			; len
	jsr	_LVOWrite(a6)

close	
	movea.l	d4,a6			; execbase
	movea.l d7,a1			; dosbase
	jsr	_LVOCloseLibrary(a6)
exit	
	moveq	#0,d0
	rts

stuffChar
	move.b  d0,(a3)+        ; Put data to output string
	rts

ciabtod	
	move.b	$200(a4),d0		; $00bfda00 - todhi  - Horizontal sync event counter bits 23-16
	swap	d0				; schreiben nach todhi stoppt die Uhr
	move.b	$100(a4),d0		; $00bfd900 - todmid - Horizontal sync event counter bits 15-8
	lsl.w	#8,d0
	move.b	(a4),d0			; $00bfd800 - todlo  - Horizontal sync event counter bits 7-0
							; Die Uhr wird erst wieder gestartet nach einem Schreibvorgang in das LSB-Ereignisregister.
	lsl.l	#8,d0			; scale up (counter wrap proof)
	rts


dosname	dc.b	"dos.library",0
text	dc.b	"Elapsed: %d ms, data: %ld bytes, speed: %d,%d KB/s",$a,0

crunched_data:
	incbin "Sources/loader.bk" ; from Solid Gold Source loader.asm

	section	bss,bss

buffer	ds.w	1	; time elapsed
	ds.l	1		; unpacked data length
	ds.w	1		; speed.i
	ds.w	1		; speed.f
	ds.b	66		; PtChData

buffer_decrunch:
	ds.l	10000	; 4*10.000Bytes = 40.000Bytes
buffer_decrunch_end:

*****************************************************************************
* bytekiller (from Solid Gold Source)
*****************************************************************************

	section	unpack,code_f

	; preserve all registers but d0/d1/a0/a1
	; input:  a0=data stream source, a1=destination
	; output: d1= unpacked data length

init	; insert your init code here (not timed)
	rts

unpack:
	movem.l	d2-d7/a4,-(sp)
	lea	crunched_data,a0	; Start gepackte Daten
	movem.l	(a0),d5-d7		; d5=crunched, d6=decrunched, d7=chk
	move.l	d6,d1			; output: d1= unpacked data length
	lea	buffer_decrunch,a4
	move.l	a4,d6			; Start entpackte Daten
	add.l	d1,a4			; hier kommen die entpackten Daten hin
	bsr	decrunch
	movem.l	(sp)+,d2-d7/a4
	rts

decrunch:
; a0 = crunched data start
; a4 = pointer to end of decrunched area
; d5 = crunched size
; d6 = pointer to start of decrunched area

	; init bytekiller decruncher, get first word
	lea	8(a0,d5.l),a0		; a0 ptr to current word to decrunch
	move.l	(a0),d0
	eor.l	d0,d7

bytekiller_decrunch:
	lsr.l	#1,d0
	bne	.1
	bsr	nextword
.1:	bcs	.cmd1xx
	moveq	#8-1,d4
	moveq	#1,d3
	lsr.l	#1,d0
	bne	.2
	bsr	nextword
.2:	bcs	.copy_n_from_d

	; cmd 00: nnn [dddd dddd]  -  copy n+1 times next d to *dest
	moveq	#3-1,d4
	bsr	getbits
	move.w	d2,d3

.copy_d_from_stream:
; copy n+1 times next 8-bit word from stream to *dest
; d3 = n
	moveq	#8-1,d4
.3:	lsr.l	#1,d0
	bne	.4
	bsr	nextword
.4:	roxl.l	#1,d2
	dbf	d4,.3
	move.b	d2,-(a4)
	dbf	d3,.copy_d_from_stream
	bra	check_done

.cmd111:
	; cmd 111: nnnn nnnn [dddd dddd]  -  copy n+9 times next d to *dest
	moveq	#8-1,d4
	bsr	getbits
	move.w	d2,d3
	addq.w	#8,d3			; n+8
	bra	.copy_d_from_stream

.cmd1xx:
	moveq	#2-1,d4
	bsr	getbits
	cmp.b	#2,d2
	blt	.cmd10x
	cmp.b	#3,d2
	beq	.cmd111

	; cmd 110: nnnn nnnn dddd dddd dddd
	; copy n+1 times *(dest+d) to *dest
	moveq	#8-1,d4
	bsr	getbits			; n
	move.w	d2,d3
	moveq	#12-1,d4		; 12 d-bits
	bra	.copy_n_from_d

.cmd10x:
	; cmd 100: dddd dddd d  -  copy 3 times *(dest+d) to *dest
	; cmd 101: dddd dddd dd -  copy 4 times *(dest+d) to *dest
	moveq	#9-1,d4
	add.w	d2,d4			; 9 or 10 d-bits
	addq.w	#2,d2
	move.w	d2,d3			; n = cmd&3 + 2

.copy_n_from_d:
	; copy n+1 times from *(dest+d) to *dest
	; d4 = bitcount for d -1
	; d3 = n
	bsr	getbits			; get d -> d2
	lea	(a4,d2.w),a1
.copyloop:
	move.b	-(a1),-(a4)
	dbf	d3,.copyloop

check_done:
	cmp.l	a4,d6
	blt	bytekiller_decrunch

	move.l	d7,d0			; return checksum
	rts

getbits:
; d4 = bits to get - 1
; -> d2 = result, extended to 16 bits

	clr.w	d2
.1:	lsr.l	#1,d0
	bne	.2
	bsr	nextword
.2:	roxl.l	#1,d2
	dbf	d4,.1
	rts
	
nextword:
; Get the next 32-bit word into the decrunching stream.
; Load the next block from disk when needed.
; a0 = pointer to current word in stream
; d7 = checksum
; -> d7 = new checksum
; -> d0 = next word, X/C = next bit

	move.l	-(a0),d0
	eor.l	d0,d7
	move.w	#$10,ccr
	roxr.l	#1,d0
	rts

	end


Erklrung:

Start: 
asmone: >r , >a , >WO	, >Filename:cstbk	
vasm: vasmm68k_mot -Fhunkexe -kick1hunks -nocase -quiet -o cstbk Listing26b.s
Amiga shell>cstbk		; Start des Programms ber shell/cli

1. Ermittlung der Ausfhrungszeit
===============================================================================

; CIA B TOD : TOD 00008d (000000) ALARM 0000ac --
;nop									; D0 003007E0
move.b (a4,$0200) == $00bfda00,d0		; D0 00300700	; hi - stoppt
swap.w d0								; D0 07000030	; nach oben bringen
move.b (a4,$0100) == $00bfd900,d0		; D0 07000000	; mid			D0 070000xx
lsl.w #$08,d0							; D0 07000000	; verschieben	D0 0700xx00
move.b (a4),d0							; D0 0700008E
lsl.l #$08,d0							; D0 00008E00	; * 256
rts  == $00c3c800
;------------------------------------------------------------------------------
>g
Breakpoint 0 triggered.
Cycles: 2065374 Chip, 4130748 CPU. (V=142 H=57 -> V=163 H=185)
VPOS: 163 ($0a3) HPOS: 185 ($0b9) COP: $0001f6b4
  D0 00000000   D1 00006200   D2 00008E00   D3 00000FA8
  D4 00C00276   D5 00C60F70   D6 0030369D   D7 00C06290
  A0 00C3C8B4   A1 00C60FDA   A2 00C3C850   A3 00C60F70
  A4 00BFD800   A5 00DFF09A   A6 00C06290   A7 00C3FB98
USP  00C3FB98 ISP  00C80000
SR=0004 T=00 S=0 M=0 X=0 N=0 Z=1 V=0 C=0 IM=0 STP=0
Prefetch 4e71 (NOP) 102c (MOVE) Chip latch 00000000
00c3c854 4e71                     nop
Next PC: 00c3c856
>
;------------------------------------------------------------------------------
; CIA B TOD : TOD 002418 (00008e) ALARM 0000ac --
;nop									; D0 00000000
move.b (a4,$0200) == $00bfda00,d0		; D0 00000000	; hi - stoppt
swap.w d0								; D0 00000000	; nach oben bringen
move.b (a4,$0100) == $00bfd900,d0		; D0 00000024	; mid			D0 070000xx
lsl.w #$08,d0							; D0 00002400	; verschieben	D0 0700xx00
move.b (a4),d0							; D0 00002419
lsl.l #$08,d0							; D0 00241900	; * 256
rts  == $00c3c800

;------------------------------------------------------------------------------
>d c3c80e 4								; Differenz = Ausfhrungszeit
sub.l d2,d0								; D0 00241900   D1 00006200   D2 00008E00 -->  D0 00238B00
;>?$241900-$8E00
;$00238B00 = %00000000`00100011`10001011`00000000 = 2329344 = 2329344

divu.w #$0fa0,d0						; D0 05400246	-> REST.GANZ
;>?$238B00/!4000
;$00000246 = %00000000`00000000`00000010`01000110 = 582 = 582
;>		; 582ms siehe Ausgabe

divu.w	#15625*256/1000,d0	; d0=ms (pal_hfreq*scale_down/granularity)
-> divu.w #4000,d0		=>	elapsed time/4000=> x ms
; weil zuvor mit 256 erweitert wurde - Genauigkeit
>?1<<8
$00000100 = %00000000`00000000`00000001`00000000 = 256 = 256
PAL: 50 Halbbilder pro Sekunde a 312,5 Zeilen=15625 Zeilen/s
/1000 = 1s=1000ms

; das Ergebnis, d.h. dieser Wert wird als erstes Argument gespeichert

>m 00C60F70 1	; a3
00C60F70 0246 0000 0000 0000 0000 0000 0000 0000  .F..............
>

2. Bytekiller (from Solid Gold Source)
===============================================================================

Bei Aufruf der bytekiller decrunch Routine muss in a4 der Zeiger auf das Ende
des freien Speichers fr die zu entpackenden Daten stehen. Der Algorithmus
entpackt dann 'rckwrts' zum Anfang hin.

Aus diesem Grund wird die Gre der zu entpackenden Daten der Anfangsadresse
hinzugefgt:

	lea	buffer_decrunch,a4
	add.l	d1,a4

Die gecrunchte Dateigre erhalten wir durch das auslesen der Kopfdaten:

	lea	crunched_data,a0	; Start gepackte Daten
	movem.l	(a0),d5-d7		; d5=crunched, d6=decrunched, d7=chk

Den Beginn der entpackten Daten wird aber in d6 bentigt. Deswegen wird dieser
Zeiger zuvor kopiert.
	lea	buffer_decrunch,a4	
	move.l	a4,d6			; Start entpackte Daten
	
decrunch:
; a0 = crunched data start
; a4 = pointer to end of decrunched area
; d5 = crunched size
; d6 = pointer to start of decrunched area

Die entpackten Daten werden vom Ende aufgefllt.