Vector Normalize

Pseudo Vec

Listing 14-14: ...\chap14\vsf3d\Vsf3D.cpp
image from book
 void vmp_VecNormalize(vmp3DVector * const pvD,                 const vmp3DVector * const pvA) {   float fMag;            // Magnitude =  image from book  fMag = sqrtf(pvA->x*pvA->x + pvA->y*pvA->y                              + pvA->z*pvA->z);   if (fMag < 0.0000001f)     {                     // too close to zero       pvD->x = pvA->x;       pvD->y = pvA->y;       pvD->z = pvA->z;     }   else     {    // Ick, a division, to obtain a reciprocal!       fMag = 1.0f / fMag;       pvD->x = pvA->x * fMag;       pvD->y = pvA->y * fMag;       pvD->z = pvA->z * fMag;     } } 
image from book
 

Pseudo Vec (x86)

The 3DNow! processor supports 64-bit so two loads or two stores must be handled simultaneously , but it is a simple matter of adding the two pairs of floats to each other.

 mov    eax,vA         ; Vector A mov    edx,vD         ; Vector destination 

vmp_VecNormalize (3DNow!)

Listing 14-15: \chap14\vsf3d\Vsf3DX86M.asm
image from book
 movq   mm1,[eax]                       ; {Ay Ax}   movd   mm0,(vmp3DVector PTR [eax]).z   ; {0   Az}   movq   mm4,mm1                         ; {Ay Ax}   movq   mm3,mm0                         ; {0   Az}   pfmul  mm1,mm1                         ; {AyAy AxAx}   pfmul  mm0,mm0                         ; {0       AzAz}   pfacc  mm1,mm1                         ; {AyAy+AxAx AyAy+AxAx}   pfadd  mm0,mm1                         ; {0+AyAy+AxAx AzAz+AyAy+AxAx}          ; Calculate square root (pfrsqrt=15-bit accuracy)      ; too close zero ...???   1.0 / 10000.0 ???       movd   ecx,mm0   cmp    ecx,FLOAT0001                   ; 0.0001   jl     short zmag                      ; just set vD=vA!!!          ; Not too close to zero, f= AzAz+AyAy+AxAx      ; for Newton-Raphson 24-bit resolution   pfrsqrt mm1,mm0                        ; {1/  image from book  1/  image from book  }   movq   mm2,mm1                         ; {1/  image from book  1/  image from book  }   pfmul mm1,mm1                          ; {1/r         1/r}   pfrsqit1 mm1,mm0                       ; X2=f(x,x1) {1st step}         ; *** mm1 = Magnitude ***     ; Calculate sqrt() = (1/mag)   24-bit   pfrcpit2  mm1,mm2                      ; {2nd step}   {# m}   punpckldq mm1,mm1                      ; {1/m 1/m}   pfmul  mm4,mm1                         ; {Ny Nx}= {Ay/m Ax/m}   pfmul  mm3,mm1                         ; {0 Nz}= {0/m Az/m}     zmag:                                    ; Save Resulting {x y z} Normals   movq   [edx+0],mm4                     ; {Ny Nx}   movd   (vmp3DVector PTR [edx]).z,mm3   ; {0   Nz} 
image from book
 

vmp_VecNormalize (SSE) Aligned

If the data is unaligned, change the MOVAPS instruction to MOVUPS.

Listing 14-16: \chap14\vsf3d\Vsf3DX86M.asm
image from book
  movaps  xmm0,[eax]                ; {# Az Ay Ax}  movaps  xmm7,[edx]                ; {Dw #   # #}   andps   xmm0,lomsk96              ; {0 Az Ay Ax}   andps   xmm7,himsk32              ; {Dw 0  0  0}   movaps  xmm6,xmm0                 ; {0   Az   Ay   Ax}   mulps   xmm0,xmm0                 ; {0 AzAz AyAy AxAx}   movaps  xmm1,xmm0                 ; {0 AzAz AyAy AxAx}   movaps  xmm2,xmm0                 ; {0 AzAz AyAy AxAx}       orps    xmm1,ONEHIGH              ; {1 Az  2  Ay  2  Ax  2  }   shufps  xmm1,xmm1,11001001b       ; 3021 {1 Ax  2  Az  2  Ay  2  }   shufps  xmm2,xmm2,11010010b       ; 3102 {0 Ay  2  Ax  2  Az  2  }       addps   xmm1,xmm0                 ; {1+0 Az  2  +Ax  2  Ay  2  +Az  2  Ax  2  +Ay  2  }   addps   xmm1,xmm2           ; {1+0 Ay  2  +Az  2  +Ax  2  Ax  2  +Ay  2  +Az  2  Az  2  +Ax  2  +Ay  2  }         ; Too close zero?       movss   uflow,xmm1                ; r= Ay  2  +Az  2  +Ax  2  cmp     uflow,FLOAT0001           ; 0.0001f   jl      short zmag                ; set vD=vA!!!             ; Calculate square root       sqrtps  xmm0,xmm1                 ; {1  image from book   image from book   image from book  }   divps   xmm6,xmm0                 ; {0 Nz Ny Nz}     zmag:   orps    xmm7,xmm6                 ; {Dw Nz Ny Nx}  movaps  [edx],xmm7                ; Save 
image from book
 
Question 

How would you upgrade the estimated precision version of the code to full 24-bit precision?



32.64-Bit 80X86 Assembly Language Architecture
32/64-Bit 80x86 Assembly Language Architecture
ISBN: 1598220020
EAN: 2147483647
Year: 2003
Pages: 191

Similar book on Amazon

flylib.com © 2008-2017.
If you may any questions please contact us: flylib@qtcs.net