Sseraf by Řrřola [web]
; \. ./ ; SSERAF // a 256b intro by Rrrola <[email protected]> ; //. ; // \\ ; _____\/O\/_____ ; ////' /\_/\ '\\\\ ; (/' \\ // '\) ; \ '// / ; // ; /' '\ ; greets to all sizecoders pushing the boundaries org 100h ; assume ax=bx=0 cx=0xff si=0x100 sp=-2 cpu P4 ; we need SSE2 for cvtdq2ps ;UNPACK: ; after decompression: ax=0x000f bx=0x154 cx=0 si=0x1cf di=0x357 ;Prepare a table of float32(2 * 4^i) ;[0xfff0]=0x00000000 [0xffe0]=0x01000000 ... [0xf000]=0xff000000 [0xeff0]=0x00000000 ... pop di mov al,0x13 ; ah=0 P int 0x10 imul cx,sp,-16 ; ah = -value/4 mov cl,0 ; store each value 4 times push cx push di ; sp-=4 dec bx ; bl=I = 0..255, J=I&7: .....jjj imul ax,bx,8 and al,0x3f ; add al,7 mov dh,al ; dh=R = 8*J: 00jjj000 mul bl mov ch,ah ; ch=G = 8*J*I, cl=B = 0 (from storing values) mov ax,0x1010 ; set palette color: bl=index dh=R ch=G cl=B test sp,sp js P ; ax=0x1010 bx=0xdfff cx=dx=0 sp=0x7ffc fninit fldz ;| t=t0 ;Centering segments for the 0xcccd trick: mov ax,0xcccd | mul di | add dx,segment ; segment=0x9f??: error in pixels = (segment*16-0xa0000 - ((x&0xff)-128)/256*320) mod 320 ; nice values: ; 0x9ff5 -2.25 ; ->0x9fe0 +8 choose this one because it's divisible by 0x10 ; 0x9fdf -6.75 ; 0x9fca +3.5 ; 0x9fb4 -1 ; ----------------- 0x9fa0 is the lowest segment that can access the whole screen ; 0x9f9e -5.5 ; 0x9f89 +4.75 ; 0x9f73 +0.25 mov si,0x9fe0 ; later we'll also use bx+si=0xa460 and bp+si=0xa300 mov es,si %define K(x) 0xa000 + (((~x)&0xff00) >> 4) ; Absolute constants %define K_TIME_DELTA si-0x9fe0+bx-0x480+K(0xbc00) ;0xa430 bx+si-0x30; -1/128 %define K_NEG_HALF_SCALE si-0x9fe0+bx-0x480+K(0xbe00) ;0xa410 bx+si-0x50; -1/8 %define K_NEG_2 si-0x9fe0+bx-0x480+K(0xc000) ;0xa3f0 bx+si-0x70; -2 (also used for -abs) ; Lengths (scaled by L=2^31) %define K_NEG_EPS si-0x9fe0+bp-0x320+K(0xcd00) ;0xa320 bp+si+0x20; -L/16 %define K_TRANSLATION si-0x9fe0+bp-0x320+K(0xce00) ;0xa310 bp+si+0x10; -L/4 %define K_NEG_Z0 si-0x9fe0+bp-0x320+K(0xcf00) ;0xa300 bp+si; -L %define K_CVT_BRIGHTNESS si-0x9fe0+bp-0x320+K(0xd700) ;0xa280 bp+si-0x80; -2^23 * EPS/8 %define K_CVT_HUE si-0x9fe0+bp-0x320+K(0xd400) ;0xa2b0 bp+si-0x50; -2^23 * L/32 / 256 ;For each frame: advance time, prepare rotation constants M mov bx,0x420 ; bh=4 fld st0 ;| t t fsincos ;| C1 S1 t fldl2e fmul st3 ;| 1.4427*t C1 S1 t fsincos ;| C2 S2 C1 S1 t fldlg2 fmul st5 ;| 0.30103*t C2 S2 C1 S1 t fsincos ;| C3 S3 C2 S2 C1 S1 t ;Store each rotation constant four times STORE: ; [0x420 30 40 50 60 70 80] mov cl,4 ; C3 S3 C2 S2 C1 S1 XY and SSE<->reg transfer STORE4: fst dword[bx] db 0x00,0xfb ;<- after decompression, ah = this 0 ;=add bl,bh loop STORE4 fstp st0 jns STORE ; loop 6 times: bx=0x480 fsub dword[K_TIME_DELTA] ;| t+=dt %define COS bx %define SIN bx+0x10 ;For each 4-pixel batch: X mov cl,4 ; bx=0x480 ;Combine brightness and hue from the last batch B shr bp,1 ; background mask mov ax,[bx] ; ah=hue = orbit trap: 8..<32 (floor(x) =~ round(x*256)>>8) salc ; al=0 (background) or 0xff (fractal) add al,[bx+si] ; al=brightness: -1 + 0..8 cmovnc ax,si ; if it was 0+x or -1+0, make it black aad 8 stosb ; pixel color = hue*8 + brightness ;Store XY coordinates for this 4-pixel batch mov ax,0xcccd mul di add dx,si ; 0xcccd*pixel_address + 0x9fe00000: center X and Y (almost) inc bx mov [bx],ax ; 0x0480: X = dl:ah:al:__ inc bx ; [+3 +2 +1 +0] mov [bx+si],dx ; 0xa460: Y = dh:dl:__:__ inc bx mov [bx],dx inc bx %define INT_X bx ; x ~ 2^32 * -0.5..0.5 %define INT_Y bx+si ; y ~ 2^32 * -0.3906..0.3906 = 0xcccd * 320 * -100..100 loop B ; di+=4 bx=0x490 dec di %define x xmm0 ; XYZ coordinates in the fractal iteration %define y xmm1 %define z xmm2 %define o xmm3 ; output: orbit trap %define a xmm4 ; scratch, output: estimated distance %define b xmm5 ; scratch %define c xmm6 ; translation [-c,-c/4,0] %define d xmm7 ; depth (camera Z) ;Trace steps along a ray mov bp,0xa2e0-0x9fe0+0x5000+0x20 ; 0x5320 mov cl,24 ;Start of compressed code movaps d,[K_NEG_Z0]; d=-1 db 0x3d ; skip subps on the first pass: cmp ax,0x5c0f | cld T subps d,a ; d -= -map(X,Y,d) call MAP loop T ;Compute normal.Z (scaled by ambient occlusion) addps d,[K_NEG_EPS] call MAPSTORE ; [si] = map(X,Y,d), a = -map(X,Y,d+EPS) addps d,[K_NEG_Z0]; d+=-1: d = -2..0 subps a,[si] ; a = -(map(X,Y,d+EPS) - map(X,Y,d)) ;Clip by the far plane, reject normals pointing away andps d,a ; a<0 and d<0? fractal : background or grazing hit ;Convert and store brightness and hue addps a,[K_CVT_BRIGHTNESS] ; put brightness into the lowest byte addps o,[K_CVT_HUE] ; put hue into the 2nd-lowest byte movmskps ebp,d movaps [bx+si],a ; 0xa440 movaps [bx],o ; 0x0480 ;Next pixel inc di jnz X ; di=0, ax=0 from the last "mul di" ;Esc test, next frame in al,0x60 dec ax ; ah was 0 jnz M ; fallthrough ;Return the box distance to the KIFS fractal MAPSTORE: ; bx=0x480 or 0x490 movaps [si],a ; store last step MAP: mov bl,0x80 cvtdq2ps x,[INT_X] cvtdq2ps y,[INT_Y] movaps c,[K_TRANSLATION] ; c=-L/4: translation=[-c,-c/4,0] movaps o,c ; o=-L/4 movaps z,d ;Rotate in the XZ, YX and ZY planes L mov bl,0x20 ; ch=0 on init R movaps b,[COS]; b=C3 a=S3 | b=C2 a=S2 | b=C1 a=S1 movaps a,[SIN] mulps b,z ; b=Cz mulps z,a ; z=Sz mulps a,x ; a=Sx mulps x,[COS] ; x=Cx subps a,b ; a=x'=Sx-Cz addps z,x ; z=z'=Sz+Cx movaps x,y ; cycle x,y,z <- y,z,a movaps y,z movaps z,a add bl,0x20 ; 0x20 | 0x40 | 0x60 jns R ; bx=0x480 a=z ;Reflect along X and Y movaps b,[K_NEG_2] orps x,b ; x=-|x| orps y,b ; y=-|y| ;Box-distance (L_inf) to the origin orps a,b ; a=-|z| add ch,0x10 ; 16 iterations (moved up here to get ah=0 after decompression) minps a,x minps a,y ; a=-length = min(-|x|,-|y|,-|z|) ;Orbit trap minps o,a ; orbit=min(orbit,-length) ;Translate by [-c,-c/4,0] mulps b,[K_NEG_HALF_SCALE] ; b=0.25 = -2 * -0.125 mulps b,c ; b=c/4 subps x,c ; x-=c subps y,b ; y-=c/4 ;Scale translation subps c,b ; c-=c/4 (c*=3/4) ;Next iteration jnc L subps a,c subps a,c ; a=-(length-2*c) ret ; bx=0x480
[ back to the prod ]