diff --git a/Sources/Engine/Base/Types.h b/Sources/Engine/Base/Types.h index a8c40ac..487b2bf 100644 --- a/Sources/Engine/Base/Types.h +++ b/Sources/Engine/Base/Types.h @@ -161,6 +161,11 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*)); #endif #endif + #if defined(__GNU_INLINE__) && defined(__i386__) + #define FPU_REGS "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" + #define MMX_REGS "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" + #endif + #ifndef PAGESIZE #define PAGESIZE 4096 #endif diff --git a/Sources/Engine/Graphics/Color.cpp b/Sources/Engine/Graphics/Color.cpp index 4e6b8c2..50e1bf6 100644 --- a/Sources/Engine/Graphics/Color.cpp +++ b/Sources/Engine/Graphics/Color.cpp @@ -426,7 +426,7 @@ COLOR MulColors( COLOR col1, COLOR col2) "orl %%eax, %%ebx \n\t" "movl %%ebx, %%ecx \n\t" "popl %%ebx \n\t" - : "=c" (colRet) + : "=&c" (colRet) : "S" (col1), "D" (col2) : "eax", "edx", "cc", "memory" ); @@ -536,18 +536,18 @@ COLOR AddColors( COLOR col1, COLOR col2) } #elif (defined __GNU_INLINE__) + ULONG tmp; __asm__ __volatile__ ( - "pushl %%ebx \n\t" - "pushl %%edi \n\t" - "pushl %%esi \n\t" - "xorl %%ebx, %%ebx \n\t" + // if xbx is "r", gcc runs out of regs in -fPIC + -fno-omit-fp :( + //"xorl %[xbx], %[xbx] \n\t" + "movl $0, %[xbx] \n\t" "mov $255, %%esi \n\t" // red - "movl (%%esp), %%eax \n\t" + "movl %[col1], %%eax \n\t" "andl $0xFF000000, %%eax \n\t" "shrl $24, %%eax \n\t" - "movl 4(%%esp), %%edx \n\t" + "movl %[col2], %%edx \n\t" "andl $0xFF000000, %%edx \n\t" "shrl $24, %%edx \n\t" "addl %%edx, %%eax \n\t" @@ -556,13 +556,13 @@ COLOR AddColors( COLOR col1, COLOR col2) "orl %%ecx, %%eax \n\t" "shll $24, %%eax \n\t" "andl $0xFF000000, %%eax \n\t" - "orl %%eax, %%ebx \n\t" + "orl %%eax, %[xbx] \n\t" // green - "movl (%%esp), %%eax \n\t" + "movl %[col1], %%eax \n\t" "andl $0x00FF0000, %%eax \n\t" "shrl $16, %%eax \n\t" - "movl 4(%%esp), %%edx \n\t" + "movl %[col2], %%edx \n\t" "andl $0x00FF0000, %%edx \n\t" "shrl $16, %%edx \n\t" "addl %%edx, %%eax \n\t" @@ -571,13 +571,13 @@ COLOR AddColors( COLOR col1, COLOR col2) "orl %%ecx, %%eax \n\t" "shll $16, %%eax \n\t" "andl $0x00FF0000, %%eax \n\t" - "orl %%eax, %%ebx \n\t" + "orl %%eax, %[xbx] \n\t" // blue - "movl (%%esp), %%eax \n\t" + "movl %[col1], %%eax \n\t" "andl $0x0000FF00, %%eax \n\t" "shrl $8, %%eax \n\t" - "movl 4(%%esp), %%edx \n\t" + "movl %[col2], %%edx \n\t" "andl $0x0000FF00, %%edx \n\t" "shrl $8, %%edx \n\t" "addl %%edx, %%eax \n\t" @@ -586,13 +586,13 @@ COLOR AddColors( COLOR col1, COLOR col2) "orl %%ecx, %%eax \n\t" "shll $8, %%eax \n\t" "andl $0x0000FF00, %%eax \n\t" - "orl %%eax, %%ebx \n\t" + "orl %%eax, %[xbx] \n\t" // alpha - "movl (%%esp), %%eax \n\t" + "movl %[col1], %%eax \n\t" "andl $0x000000FF, %%eax \n\t" "shrl $0, %%eax \n\t" - "movl 4(%%esp), %%edx \n\t" + "movl %[col2], %%edx \n\t" "andl $0x000000FF, %%edx \n\t" "shrl $0, %%edx \n\t" "addl %%edx, %%eax \n\t" @@ -601,15 +601,10 @@ COLOR AddColors( COLOR col1, COLOR col2) "orl %%ecx, %%eax \n\t" "shll $0, %%eax \n\t" "andl $0x000000FF, %%eax \n\t" - "orl %%eax, %%ebx \n\t" - "movl %%ebx, %%ecx \n\t" - - // done. - "addl $8, %%esp \n\t" - "popl %%ebx \n\t" - : "=c" (colRet) - : "S" (col1), "D" (col2) - : "eax", "edx", "cc", "memory" + "orl %[xbx], %%eax \n\t" + : "=&a" (colRet), [xbx] "=&g" (tmp) + : [col1] "g" (col1), [col2] "g" (col2) + : "ecx", "edx", "esi", "cc", "memory" ); #else diff --git a/Sources/Engine/Graphics/Color.h b/Sources/Engine/Graphics/Color.h index 4381669..4a0318b 100644 --- a/Sources/Engine/Graphics/Color.h +++ b/Sources/Engine/Graphics/Color.h @@ -332,7 +332,7 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs) "cld \n\t" "rep \n\t" "movsd \n\t" - : // no outputs. + : "=S" (pulSrc), "=D" (pulDst), "=c" (ctLongs) : "S" (pulSrc), "D" (pulDst), "c" (ctLongs) : "cc", "memory" ); @@ -364,7 +364,7 @@ inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs) "cld \n\t" "rep \n\t" "stosd \n\t" - : // no outputs. + : "=D" (pulDst), "=c" (ctLongs) : "a" (ulVal), "D" (pulDst), "c" (ctLongs) : "cc", "memory" ); diff --git a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp index 7118e74..3412445 100644 --- a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp +++ b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp @@ -186,10 +186,10 @@ elemDone: } #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( - "pushl %%ebx \n\t" // Save GCC's register. - "movl %%eax, %%ebx \n\t" - - "movd %%ebx, %%mm1 \n\t" + "movl %[ctElems], %%ecx \n\t" + "movl %[piDst], %%edi \n\t" + "movl %[piElements], %%esi \n\t" + "movd %[iVtx0Pass], %%mm1 \n\t" "movq %%mm1, %%mm0 \n\t" "psllq $32, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t" @@ -205,17 +205,18 @@ elemDone: "jnz 0b \n\t" // elemLoop "1: \n\t" // elemRest "emms \n\t" - "testl $1, %%edx \n\t" + "testl $1, %[ctElems] \n\t" "jz 2f \n\t" // elemDone "movl (%%esi), %%eax \n\t" - "addl %%ebx, %%eax \n\t" + "addl %[iVtx0Pass], %%eax \n\t" "movl %%eax, (%%edi) \n\t" "2: \n\t" // elemDone - "popl %%ebx \n\t" // restore GCC's register. : // no outputs. - : "c" (ctElems), "d" (ctElems), "D" (piDst), - "S" (pspo->spo_piElements), "a" (pspo->spo_iVtx0Pass) - : "cc", "memory" + : [ctElems] "g" (ctElems), [piDst] "g" (piDst), + [piElements] "g" (pspo->spo_piElements), + [iVtx0Pass] "g" (pspo->spo_iVtx0Pass) + : FPU_REGS, "mm0", "mm1", "eax", "ecx", "esi", "edi", + "cc", "memory" ); #else @@ -506,12 +507,13 @@ static void RSBinToGroups( ScenePolygon *pspoFirst) #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl $2, %%eax \n\t" "bsrl (%%esi), %%ecx \n\t" "shll %%cl, %%eax \n\t" "movl %%eax, (%%esi) \n\t" : // no outputs. - : "a" (2), "S" (&_ctGroupsCount) - : "ecx", "cc", "memory" + : "S" (&_ctGroupsCount) + : "eax", "ecx", "cc", "memory" ); #else diff --git a/Sources/Engine/Graphics/Fog.cpp b/Sources/Engine/Graphics/Fog.cpp index d085729..2d4b259 100644 --- a/Sources/Engine/Graphics/Fog.cpp +++ b/Sources/Engine/Graphics/Fog.cpp @@ -97,6 +97,8 @@ pixLoop: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[pubTexture], %%esi \n\t" + "movl %[pixTextureSize], %%ecx \n\t" "leal 0(%%esi, %%ecx), %%edi \n\t" "0: \n\t" // pixLoop "movzbl (%%esi), %%eax \n\t" @@ -108,8 +110,9 @@ pixLoop: "decl %%ecx \n\t" "jnz 0b \n\t" // pixLoop : // no outputs. - : "S" (pubTexture), "D" (pubTexture), "c" (pixTextureSize) - : "eax", "cc", "memory" + : [pubTexture] "g" (pubTexture), + [pixTextureSize] "g" (pixTextureSize) + : "eax", "ecx", "esi", "edi", "cc", "memory" ); #else diff --git a/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp b/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp index 8f357a2..d20f55b 100644 --- a/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp +++ b/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp @@ -219,6 +219,9 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV, #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( "pxor %%mm0,%%mm0 \n\t" + "movl %[pulSrc],%%esi \n\t" + "movl %[pulDst],%%edi \n\t" + "movl %[pixSize],%%ecx \n\t" "0: \n\t" // pixLoop "movd 0(%%esi), %%mm1 \n\t" "movd 4(%%esi), %%mm2 \n\t" @@ -234,8 +237,10 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV, "jnz 0b \n\t" // pixLoop "emms \n\t" : - : "S" (pulSrc), "D" (pulDst), "c" (pixSize) - : "memory", "cc" + : [pulSrc] "g" (pulSrc), [pulDst] "g" (pulDst), + [pixSize] "g" (pixSize) + : FPU_REGS, "mm0", "mm1", "mm2", + "ecx", "esi", "edi", "memory", "cc" ); #else diff --git a/Sources/Engine/Graphics/Graphics.cpp b/Sources/Engine/Graphics/Graphics.cpp index 30e58e8..1373482 100644 --- a/Sources/Engine/Graphics/Graphics.cpp +++ b/Sources/Engine/Graphics/Graphics.cpp @@ -13,12 +13,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -// !!! FIXME: One of the GNU inline asm blocks has a bug that causes the -// !!! FIXME: title on the main menu to render incorrectly. (Generating an -// !!! FIXME: incorrect mipmap?) The intel compiler works fine with the -// !!! FIXME: MSVC inline asm, but GCC and Intel both have the problem when -// !!! FIXME: using the GNU inline asm. - #include "Engine/StdH.h" #include @@ -198,9 +192,9 @@ void FlipBitmap( UBYTE *pubSrc, UBYTE *pubDst, PIX pixWidth, PIX pixHeight, INDE // makes one level lower mipmap (bilinear or nearest-neighbour with border preservance) #if (defined __GNUC__) -static __int64 mmRounder = 0x0002000200020002ll; +__int64 mmRounder = 0x0002000200020002ll; #else -static __int64 mmRounder = 0x0002000200020002; +__int64 mmRounder = 0x0002000200020002; #endif static void MakeOneMipmap( ULONG *pulSrcMipmap, ULONG *pulDstMipmap, PIX pixWidth, PIX pixHeight, BOOL bBilinear) @@ -305,19 +299,19 @@ pixLoopN: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( - "pushl %%ebx \n\t" // Save GCC's register. - "movl %%ecx, %%ebx \n\t" - "pxor %%mm0, %%mm0 \n\t" + "movl %[pulSrcMipmap], %%esi \n\t" + "movl %[pulDstMipmap], %%edi \n\t" + "movl %[pixHeight], %%edx \n\t" "0: \n\t" // rowLoop - "movl %%ebx, %%ecx \n\t" + "movl %[pixWidth], %%ecx \n\t" "1: \n\t" // pixLoopN "movd 0(%%esi), %%mm1 \n\t" // up-left "movd 4(%%esi), %%mm2 \n\t" // up-right - "movd 0(%%esi, %%ebx, 8), %%mm3 \n\t" // down-left - "movd 4(%%esi, %%ebx, 8), %%mm4 \n\t" // down-right + "movd 0(%%esi, %[pixWidth], 8), %%mm3 \n\t" // down-left + "movd 4(%%esi, %[pixWidth], 8), %%mm4 \n\t" // down-right "punpcklbw %%mm0, %%mm1 \n\t" "punpcklbw %%mm0, %%mm2 \n\t" "punpcklbw %%mm0, %%mm3 \n\t" @@ -325,7 +319,7 @@ pixLoopN: "paddw %%mm2, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t" "paddw %%mm4, %%mm1 \n\t" - "paddw (%%eax), %%mm1 \n\t" + "paddw (" ASMSYM(mmRounder) "), %%mm1 \n\t" "psrlw $2, %%mm1 \n\t" "packuswb %%mm0, %%mm1 \n\t" "movd %%mm1, (%%edi) \n\t" @@ -338,15 +332,17 @@ pixLoopN: // advance to next row // skip one row in source mip-map - "leal 0(%%esi, %%ebx, 8), %%esi \n\t" + "leal 0(%%esi, %[pixWidth], 8), %%esi \n\t" "decl %%edx \n\t" "jnz 0b \n\t" // rowLoop - "popl %%ebx \n\t" // restore GCC's register. "emms \n\t" : // no outputs. - : "a" (&mmRounder), "c" (pixWidth), "S" (pulSrcMipmap), - "D" (pulDstMipmap), "d" (pixHeight) - : "cc", "memory" + : [pixWidth] "r" (pixWidth), + [pulSrcMipmap] "g" (pulSrcMipmap), + [pulDstMipmap] "g" (pulDstMipmap), + [pixHeight] "g" (pixHeight) + : FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "edi", + "cc", "memory" ); #else @@ -433,23 +429,22 @@ fullEnd: } #elif (defined __GNU_INLINE__) + ULONG tmp, tmp2; __asm__ __volatile__ ( - "pushl %%ebx \n\t" // Save GCC's register. - "movl %%ecx, %%ebx \n\t" - + "xorl %[xbx], %[xbx] \n\t" + "movl %[pulSrcMipmap], %%esi \n\t" + "movl %[pulDstMipmap], %%edi \n\t" // setup upper half - "pushl %%edx \n\t" // pixHeight - "pushl %%eax \n\t" // ulRowModulo - "pushl %%ebx \n\t" // pixWidth - "xorl %%ebx, %%ebx \n\t" - "shrl $1, %%edx \n\t" + "movl %[pixHeight], %%eax \n\t" + "movl %%eax, %[xdx] \n\t" + "shrl $1, %[xdx] \n\t" "0: \n\t" // halfLoop - "movl (%%esp), %%ecx \n\t" + "movl %[pixWidth], %%ecx \n\t" "shrl $1, %%ecx \n\t" "1: \n\t" // leftLoop - "movl 0(%%esi, %%ebx, 8), %%eax \n\t" // upper-left (or lower-left) + "movl 0(%%esi, %[xbx], 8), %%eax \n\t" // upper-left (or lower-left) "movl %%eax, (%%edi) \n\t" // advance to next pixel @@ -459,12 +454,12 @@ fullEnd: "jg 1b \n\t" // leftLoop // do right row half - "movl (%%esp), %%ecx \n\t" + "movl %[pixWidth], %%ecx \n\t" "shrl $1, %%ecx \n\t" "jz 3f \n\t" // halfEnd "2: \n\t" // rightLoop - "movl 4(%%esi, %%ebx, 8), %%eax \n\t" // upper-right (or lower-right) + "movl 4(%%esi, %[xbx], 8), %%eax \n\t" // upper-right (or lower-right) "movl %%eax, (%%edi) \n\t" // advance to next pixel @@ -475,25 +470,26 @@ fullEnd: "3: \n\t" // halfEnd // advance to next row - "addl 4(%%esp), %%esi \n\t" // skip one row in source mip-map - "subl $1, %%edx \n\t" + "addl %[ulRowModulo], %%esi \n\t" // skip one row in source mip-map + "subl $1, %[xdx] \n\t" "jg 0b \n\t" // halfLoop // do eventual lower half loop (if not yet done) - "movl 8(%%esp), %%edx \n\t" - "shrl $1, %%edx \n\t" + "movl %[pixHeight], %%eax \n\t" + "movl %%eax, %[xdx] \n\t" + "shrl $1, %[xdx] \n\t" "jz 4f \n\t" // fullEnd - "cmpl (%%esp), %%ebx \n\t" - "movl (%%esp), %%ebx \n\t" + "cmpl %[pixWidth], %[xbx] \n\t" + "movl %[pixWidth], %[xbx] \n\t" "jne 0b \n\t" // halfLoop "4: \n\t" // fullEnd - "addl $12, %%esp \n\t" - "popl %%ebx \n\t" // restore GCC's register. - : // no outputs. - : "S" (pulSrcMipmap), "D" (pulDstMipmap), "d" (pixHeight), - "c" (pixWidth), "a" (ulRowModulo) - : "cc", "memory" + : [xbx] "=&r" (tmp), [xdx] "=&g" (tmp2) + : [pulSrcMipmap] "g" (pulSrcMipmap), + [pulDstMipmap] "g" (pulDstMipmap), + [pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth), + [ulRowModulo] "g" (ulRowModulo) + : "eax", "ecx", "esi", "edi", "cc", "memory" ); #else @@ -663,9 +659,6 @@ static inline void IncrementByteWithClip( UBYTE &ub, SLONG slAdd) #endif // performs dithering of a 32-bit bipmap (can be in-place) -#ifdef __GNUC__ -__attribute__((noinline)) // because of asm labels -#endif void DitherBitmap( INDEX iDitherType, ULONG *pulSrc, ULONG *pulDst, PIX pixWidth, PIX pixHeight, PIX pixCanvasWidth, PIX pixCanvasHeight) { @@ -860,26 +853,27 @@ nextRowO: } #elif (defined __GNU_INLINE__) + ULONG tmp; __asm__ __volatile__ ( + "movl %[pulSrc], %%esi \n\t" + "movl %[pulDst], %%edi \n\t" // reset dither line offset - "pushl %%ebx \n\t" // save GCC's register. - "movl (" ASMSYM(pulDitherTable) "), %%ebx \n\t" - "pushl %%ecx \n\t" // slModulo - "pushl %%eax \n\t" // pixWidth + "movl %[pixHeight], %%eax \n\t" + "movl %%eax, %[xdx] \n\t" "xorl %%eax, %%eax \n\t" - "rowLoopO: \n\t" + "0: \n\t" // rowLoopO // get horizontal dither patterns - "movq 0(%%ebx, %%eax, 4), %%mm4 \n\t" - "movq 8(%%ebx, %%eax, 4), %%mm5 \n\t" + "movq 0(%[pulDitherTable], %%eax, 4), %%mm4 \n\t" + "movq 8(%[pulDitherTable], %%eax, 4), %%mm5 \n\t" "psrlw (" ASMSYM(mmShifter) "), %%mm4 \n\t" "psrlw (" ASMSYM(mmShifter) "), %%mm5 \n\t" "pand (" ASMSYM(mmMask) "), %%mm4 \n\t" "pand (" ASMSYM(mmMask) "), %%mm5 \n\t" // process row - "movl (%%esp), %%ecx \n\t" - "pixLoopO: \n\t" + "movl %[pixWidth], %%ecx \n\t" + "1: \n\t" // pixLoopO "movq 0(%%esi), %%mm1 \n\t" "movq 8(%%esi), %%mm2 \n\t" "paddusb %%mm4, %%mm1 \n\t" @@ -891,30 +885,30 @@ nextRowO: "addl $16, %%esi \n\t" "addl $16, %%edi \n\t" "subl $4, %%ecx \n\t" - "jg pixLoopO \n\t" // !!!! possible memory leak? - "je nextRowO \n\t" + "jg 1b \n\t" // !!!! possible memory leak? + "je 2f \n\t" // nextRowO // backup couple of pixels "leal 0(%%esi, %%ecx, 4), %%esi \n\t" "leal 0(%%edi, %%ecx, 4), %%edi \n\t" - "nextRowO: \n\t" + "2: \n\t" // nextRowO // get next dither line patterns - "addl 4(%%esp), %%esi \n\t" - "addl 4(%%esp), %%edi \n\t" + "addl %[slModulo], %%esi \n\t" + "addl %[slModulo], %%edi \n\t" "addl $4, %%eax \n\t" "andl $15, %%eax \n\t" // advance to next row - "decl %%edx \n\t" - "jnz rowLoopO \n\t" + "decl %[xdx] \n\t" + "jnz 0b \n\t" // rowLoopO "emms \n\t" - "addl $8, %%esp \n\t" - "popl %%ebx \n\t" // restore GCC's register. - : // no outputs. - : "S" (pulSrc), "D" (pulDst), "d" (pixHeight), - "a" (pixWidth), "c" (slModulo) - : "cc", "memory" + : [xdx] "=&g" (tmp) + : [pulSrc] "g" (pulSrc), [pulDst] "g" (pulDst), + [pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth), + [slModulo] "g" (slModulo), [pulDitherTable] "r" (pulDitherTable) + : FPU_REGS, MMX_REGS, "eax", "ecx", "esi", "edi", + "cc", "memory" ); #else @@ -1054,17 +1048,17 @@ allDoneE: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( - "pushl %%ebx \n\t" // Save GCC's register. - "movl %%ecx, %%ebx \n\t" "pxor %%mm0, %%mm0 \n\t" + "movl %[pulDst], %%esi \n\t" + "movl %[pixHeight], %%edx \n\t" "decl %%edx \n\t" // need not to dither last row - "rowLoopE: \n\t" + "0: \n\t" // rowLoopE // left to right - "movl %%eax, %%ecx \n\t" + "movl %[pixWidth], %%ecx \n\t" "decl %%ecx \n\t" - "pixLoopEL: \n\t" + "1: \n\t" // pixLoopEL "movd (%%esi), %%mm1 \n\t" "punpcklbw %%mm0, %%mm1 \n\t" "pand (" ASMSYM(mmErrDiffMask) "), %%mm1 \n\t" @@ -1089,29 +1083,29 @@ allDoneE: // spread errors "paddusb 4(%%esi), %%mm7 \n\t" - "paddusb -4(%%esi, %%ebx, 4), %%mm3 \n\t" - "paddusb 0(%%esi, %%ebx, 4), %%mm5 \n\t" - "paddusb 4(%%esi, %%ebx, 4), %%mm1 \n\t" // !!!! possible memory leak? + "paddusb -4(%%esi, %[pixCanvasWidth], 4), %%mm3 \n\t" + "paddusb 0(%%esi, %[pixCanvasWidth], 4), %%mm5 \n\t" + "paddusb 4(%%esi, %[pixCanvasWidth], 4), %%mm1 \n\t" // !!!! possible memory leak? "movd %%mm7, 4(%%esi) \n\t" - "movd %%mm3, -4(%%esi, %%ebx, 4) \n\t" - "movd %%mm5, 0(%%esi, %%ebx, 4) \n\t" - "movd %%mm1, 4(%%esi, %%ebx, 4) \n\t" + "movd %%mm3, -4(%%esi, %[pixCanvasWidth], 4) \n\t" + "movd %%mm5, 0(%%esi, %[pixCanvasWidth], 4) \n\t" + "movd %%mm1, 4(%%esi, %[pixCanvasWidth], 4) \n\t" // advance to next pixel "addl $4, %%esi \n\t" "decl %%ecx \n\t" - "jnz pixLoopEL \n\t" + "jnz 1b \n\t" // pixLoopEL // advance to next row - "addl %%edi, %%esi \n\t" + "addl %[slWidthModulo], %%esi \n\t" "decl %%edx \n\t" - "jz allDoneE \n\t" + "jz 3f \n\t" // allDoneE // right to left - "movl %%eax, %%ecx \n\t" + "movl %[pixWidth], %%ecx \n\t" "decl %%ecx \n\t" - "pixLoopER: \n\t" + "2: \n\t" // pixLoopER "movd (%%esi), %%mm1 \n\t" "punpcklbw %%mm0, %%mm1 \n\t" "pand (" ASMSYM(mmErrDiffMask) "), %%mm1 \n\t" @@ -1136,30 +1130,30 @@ allDoneE: // spread errors "paddusb -4(%%esi), %%mm7 \n\t" - "paddusb -4(%%esi, %%ebx, 4), %%mm1 \n\t" - "paddusb 0(%%esi, %%ebx, 4), %%mm5 \n\t" - "paddusb 4(%%esi, %%ebx, 4), %%mm3 \n\t" // !!!! possible memory leak? + "paddusb -4(%%esi, %[pixCanvasWidth], 4), %%mm1 \n\t" + "paddusb 0(%%esi, %[pixCanvasWidth], 4), %%mm5 \n\t" + "paddusb 4(%%esi, %[pixCanvasWidth], 4), %%mm3 \n\t" // !!!! possible memory leak? "movd %%mm7, -4(%%esi) \n\t" - "movd %%mm1, -4(%%esi, %%ebx, 4) \n\t" - "movd %%mm5, 0(%%esi, %%ebx, 4) \n\t" - "movd %%mm3, 4(%%esi, %%ebx, 4) \n\t" + "movd %%mm1, -4(%%esi, %[pixCanvasWidth], 4) \n\t" + "movd %%mm5, 0(%%esi, %[pixCanvasWidth], 4) \n\t" + "movd %%mm3, 4(%%esi, %[pixCanvasWidth], 4) \n\t" // revert to previous pixel "subl $4, %%esi \n\t" "decl %%ecx \n\t" - "jnz pixLoopER \n\t" + "jnz 2b \n\t" // pixLoopER // advance to next row - "leal 0(%%esi, %%ebx, 4), %%esi \n\t" + "leal 0(%%esi, %[pixCanvasWidth], 4), %%esi \n\t" "decl %%edx \n\t" - "jnz rowLoopE \n\t" - "allDoneE: \n\t" - "popl %%ebx \n\t" + "jnz 0b \n\t" // rowLoopE + "3: \n\t" // allDoneE "emms \n\t" : // no outputs. - : "S" (pulDst), "c" (pixCanvasWidth), "d" (pixHeight), "a" (pixWidth), - "D" (slWidthModulo) - : "cc", "memory" + : [pulDst] "g" (pulDst), [pixCanvasWidth] "r" (pixCanvasWidth), + [pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth), + [slWidthModulo] "g" (slWidthModulo) + : FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "cc", "memory" ); #else @@ -1271,7 +1265,7 @@ extern "C" { } -#if USE_PORTABLE_C +#ifdef USE_PORTABLE_C typedef SWORD ExtPix[4]; static inline void extpix_fromi64(ExtPix &pix, const __int64 i64) @@ -2538,7 +2532,8 @@ lowerLoop: "popl %%ebx \n\t" : // no outputs. : // inputs are all globals. - : "eax", "ecx", "edx", "edi", "esi", "cc", "memory" + : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", + "cc", "memory" ); #else diff --git a/Sources/Engine/Graphics/TextureEffects.cpp b/Sources/Engine/Graphics/TextureEffects.cpp index 08f816e..994814e 100644 --- a/Sources/Engine/Graphics/TextureEffects.cpp +++ b/Sources/Engine/Graphics/TextureEffects.cpp @@ -1363,6 +1363,13 @@ pixLoop: _pixBaseWidth_renderWater = pixBaseWidth; __asm__ __volatile__ ( + // this sucks :( + "movl %[pixBaseHeight], %%eax \n\t" + "movl %[pswHeightMap], %%ecx \n\t" + "movl %[pulTexture], %%edx \n\t" + "movl %[pulTextureBase], %%esi \n\t" + "movl %[slHeightRowStep], %%edi \n\t" + "pushl %%ebx \n\t" // GCC needs this. "movl (" ASMSYM(_pixBaseWidth_renderWater) "),%%ebx \n\t" @@ -1444,9 +1451,13 @@ pixLoop: "popl %%ebx \n\t" // restore GCC's register. "emms \n\t" : // no outputs. - : "a" (pixBaseHeight), "c" (pswHeightMap), - "d" (pulTexture), "S" (pulTextureBase), "D" (slHeightRowStep) - : "cc", "memory" + : [pixBaseHeight] "g" (pixBaseHeight), + [pswHeightMap] "g" (pswHeightMap), + [pulTexture] "g" (pulTexture), + [pulTextureBase] "g" (pulTextureBase), + [slHeightRowStep] "g" (slHeightRowStep) + : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", + "cc", "memory" ); #else @@ -1617,9 +1628,7 @@ pixLoop2: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( - "pushl %%ebx \n\t" // GCC's register. - "movl %%ecx, %%ebx \n\t" - "bsfl %%eax, %%eax \n\t" // pixBaseWidth + "bsfl %[pixBaseWidth], %%eax \n\t" "movl $32, %%edx \n\t" "subl %%eax, %%edx \n\t" "movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t" @@ -1631,11 +1640,11 @@ pixLoop2: "pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU - // (These registers were loaded here in the original version...) - //"movl (pswHeightMap), %%ebx \n\t" - //"movl (pulTextureBase), %%esi \n\t" - //"movl (pulTexture), %%edi \n\t" - + "movl %[pswHeightMap], %%edx \n\t" + "movl %[pulTextureBase], %%esi \n\t" + "movl %[pulTexture], %%edi \n\t" + "pushl %%ebx \n\t" // GCC's register. + "movl %%edx, %%ebx \n\t" "movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t" "0: \n\t" // rowLoop2 @@ -1753,9 +1762,12 @@ pixLoop2: "popl %%ebx \n\t" // GCC's value. "emms \n\t" : // no outputs. - : "a" (pixBaseWidth), "c" (pswHeightMap), - "S" (pulTextureBase), "D" (pulTexture) - : "edx", "cc", "memory" + : [pixBaseWidth] "g" (pixBaseWidth), + [pswHeightMap] "g" (pswHeightMap), + [pulTextureBase] "g" (pulTextureBase), + [pulTexture] "g" (pulTexture) + : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", + "cc", "memory" ); #else @@ -2136,26 +2148,24 @@ pixLoop4: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( - "pushl %%ebx \n\t" // GCC's register. - "movl %%ecx, %%ebx \n\t" - "bsfl %%eax, %%eax \n\t" - "movl $32, %%edx \n\t" - "subl %%eax, %%edx \n\t" - "movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t" + "bsfl %[pixBaseWidth], %%eax \n\t" + "movl $32, %%edx \n\t" + "subl %%eax, %%edx \n\t" + "movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t" - "movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t" - "psllq $32, %%mm0 \n\t" - "por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t" - "movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t" + "movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t" + "psllq $32, %%mm0 \n\t" + "por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t" + "movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t" - "pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU + "pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU - // (These registers were loaded here in the original version...) - //"movl (pswHeightMap), %%ebx \n\t" - //"movl (pulTextureBase), %%esi \n\t" - //"movl (pulTexture), %%edi \n\t" - - "movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t" + "movl %[pswHeightMap], %%edx \n\t" + "movl %[pulTextureBase], %%esi \n\t" + "movl %[pulTexture], %%edi \n\t" + "pushl %%ebx \n\t" // GCC's register. + "movl %%edx, %%ebx \n\t" + "movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t" "0: \n\t" // rowLoop4 "pushl %%edx \n\t" "movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t" @@ -2485,9 +2495,12 @@ pixLoop4: "popl %%ebx \n\t" // Restore GCC's value. "emms \n\t" : // no outputs. - : "a" (pixBaseWidth), "c" (pswHeightMap), - "S" (pulTextureBase), "D" (pulTexture) - : "edx", "cc", "memory" + : [pixBaseWidth] "g" (pixBaseWidth), + [pswHeightMap] "g" (pswHeightMap), + [pulTextureBase] "g" (pulTextureBase), + [pulTexture] "g" (pulTexture) + : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", + "cc", "memory" ); @@ -2965,6 +2978,11 @@ pixDone: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[slColumnModulo], %%edx \n\t" + "movl %[slBufferMask], %%ecx \n\t" + "movl %[slDensity], %%eax \n\t" + "movl (" ASMSYM(ulRNDSeed) "), %%edi \n\t" + "pushl %%ebx \n\t" // GCC's register. "xorl %%ebx, %%ebx \n\t" "pushl %%edx \n\t" // slColumnModulo @@ -2977,7 +2995,7 @@ pixDone: "1: \n\t" // rowLoopFM "movl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t" - "addl %%esi, %%edx \n\t" + "addl %[pubNew], %%edx \n\t" "movzbl (%%ebx, %%edx), %%eax \n\t" "addl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t" "movzbl (%%ebx, %%edx), %%edx \n\t" @@ -2985,7 +3003,7 @@ pixDone: "shrl $1, %%eax \n\t" "cmpl (%%esp), %%eax \n\t" "jg doCalc_animateFire \n\t" - "movb $0, (%%esi, %%ebx) \n\t" + "movb $0, (%[pubNew], %%ebx) \n\t" "jmp pixDone_animateFire \n\t" "doCalc_animateFire: \n\t" @@ -2996,7 +3014,7 @@ pixDone: "movsbl " ASMSYM(asbMod3Sub1Table) "(%%edx), %%edx \n\t" "addl %%ebx, %%edx \n\t" "andl 4(%%esp), %%edx \n\t" // slBufferMask - "movb %%al, (%%esi, %%edx) \n\t" + "movb %%al, (%[pubNew], %%edx) \n\t" "imull $262147, %%edi \n\t" "pixDone_animateFire: \n\t" @@ -3015,9 +3033,10 @@ pixDone: "addl $12, %%esp \n\t" // lose our locals. "popl %%ebx \n\t" // Restore GCC's var. : // no outputs. - : "a" (slDensity), "c" (slBufferMask), - "d" (slColumnModulo), "D" (ulRNDSeed), "S" (pubNew) - : "cc", "memory" + : [slBufferMask] "g" (slBufferMask), + [slColumnModulo] "g" (slColumnModulo), + [pubNew] "r" (pubNew), [slDensity] "g" (slDensity) + : "eax", "ecx", "edx", "edi", "cc", "memory" ); #else @@ -3103,6 +3122,12 @@ pixLoopF: #elif (defined __GNU_INLINE__) _pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/ __asm__ __volatile__ ( + "movl %[slHeatRowStep], %%eax \n\t" + "movl %[slHeatMapStep], %%edx \n\t" + "movl %[slBaseMipShift], %%ecx \n\t" + "movl %[pulTextureBase], %%esi \n\t" + "movl %[pulTexture], %%edi \n\t" + "pushl %%ebx \n\t" "movl (" ASMSYM(_pubHeat_RenderPlasmaFire) "),%%ebx \n\t" "pushl %%eax \n\t" // slHeatRowStep @@ -3131,9 +3156,12 @@ pixLoopF: "addl $12, %%esp \n\t" // lose our locals. "popl %%ebx \n\t" // restore GCC's register. : // no outputs. - : "S" (pulTextureBase), "D" (pulTexture), - "c" (slBaseMipShift), "a" (slHeatRowStep), "d" (slHeatMapStep) - : "cc", "memory" + : [pulTextureBase] "g" (pulTextureBase), + [pulTexture] "g" (pulTexture), + [slBaseMipShift] "g" (slBaseMipShift), + [slHeatRowStep] "g" (slHeatRowStep), + [slHeatMapStep] "g" (slHeatMapStep) + : "eax", "ecx", "edx", "esi", "edi", "cc", "memory" ); #else diff --git a/Sources/Engine/Light/LayerMixer.cpp b/Sources/Engine/Light/LayerMixer.cpp index 6f1d5ad..d2aff1d 100755 --- a/Sources/Engine/Light/LayerMixer.cpp +++ b/Sources/Engine/Light/LayerMixer.cpp @@ -365,9 +365,9 @@ skipPixel: } #elif (defined __GNU_INLINE__) + ULONG tmp1, tmp2; __asm__ __volatile__ ( // prepare interpolants - "pushl %%ebx \n\t" "movd (" ASMSYM(_slL2Row) "), %%mm0 \n\t" "movd (" ASMSYM(_slDL2oDURow) "), %%mm1 \n\t" "psllq $32, %%mm1 \n\t" @@ -378,26 +378,25 @@ skipPixel: "por %%mm0, %%mm2 \n\t" // MM2 = slDDL2oDUoDV | slDL2oDV // prepare color "pxor %%mm0, %%mm0 \n\t" - "movd %%eax, %%mm7 \n\t" + "movd %[ulLightRGB], %%mm7 \n\t" "punpcklbw %%mm0, %%mm7 \n\t" "psllw $1, %%mm7 \n\t" // loop thru rows "movl (" ASMSYM(_pulLayer) "), %%edi \n\t" - "movl (" ASMSYM(_iRowCt) "), %%ebx \n\t" + "movl (" ASMSYM(_iRowCt) "), %[xbx] \n\t" "0: \n\t" // rowLoop - "pushl %%ebx \n\t" - "movd %%mm1, %%ebx \n\t" // EBX = slL2Point + "movd %%mm1, %[slL2Point] \n\t" "movq %%mm1, %%mm3 \n\t" "psrlq $32, %%mm3 \n\t" // MM3 = 0 | slDL2oDU // loop thru pixels in current row "movl (" ASMSYM(_iPixCt) "), %%ecx \n\t" "1: \n\t" // pixLoop // check if pixel need to be drawn - "cmpl $0x10000000, %%ebx \n\t" + "cmpl $0x10000000, %[slL2Point] \n\t" "jge 3f \n\t" // skipPixel // calculate intensities and do actual drawing of shadow pixel ARGB "movd %%ecx, %%mm4 \n\t" - "movl %%ebx, %%eax \n\t" + "movl %[slL2Point], %%eax \n\t" "sarl $15, %%eax \n\t" "andl $8191, %%eax \n\t" "movzbl " ASMSYM(aubSqrt) "(%%eax), %%eax \n\t" @@ -424,22 +423,20 @@ skipPixel: // advance to next pixel "addl $4, %%edi \n\t" "movd %%mm3, %%eax \n\t" - "addl %%eax, %%ebx \n\t" + "addl %%eax, %[slL2Point] \n\t" "paddd (" ASMSYM(mmDDL2oDU_AddAmbientPoint) "), %%mm3 \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" // pixLoop // advance to the next row - "popl %%ebx \n\t" "addl (" ASMSYM(_slModulo) "), %%edi \n\t" "paddd %%mm2, %%mm1 \n\t" "paddd (" ASMSYM(mmDDL2oDV_AddAmbientPoint) "), %%mm2 \n\t" - "decl %%ebx \n\t" + "decl %[xbx] \n\t" "jnz 0b \n\t" // rowLoop - "popl %%ebx \n\t" "emms \n\t" - : // no outputs. - : "a" (ulLightRGB) - : "ecx", "edx", "edi", "esi", "cc", "memory" + : [xbx] "=&r" (tmp1), [slL2Point] "=&g" (tmp2) + : [ulLightRGB] "g" (ulLightRGB) + : FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory" ); #else @@ -580,10 +577,9 @@ skipPixel: } #elif (defined __GNU_INLINE__) + ULONG tmp1, tmp2; __asm__ __volatile__ ( // prepare interpolants - "pushl %%ebx \n\t" - "movl %%ecx, %%ebx \n\t" "movd (" ASMSYM(_slL2Row) "), %%mm0 \n\t" "movd (" ASMSYM(_slDL2oDURow) "), %%mm1 \n\t" "psllq $32, %%mm1 \n\t" @@ -594,29 +590,30 @@ skipPixel: "por %%mm0, %%mm2 \n\t" // MM2 = slDDL2oDUoDV | slDL2oDV // prepare color "pxor %%mm0, %%mm0 \n\t" // MM0 = 0 | 0 (for unpacking purposes) - "movd %%eax, %%mm7 \n\t" // eax == ulLightRGB + "movd %[ulLightRGB], %%mm7 \n\t" "punpcklbw %%mm0, %%mm7 \n\t" "psllw $1, %%mm7 \n\t" // loop thru rows + "movl %[pubMask], %%esi \n\t" "movl (" ASMSYM(_pulLayer) "), %%edi \n\t" - "movzbl (%%ebx), %%edx \n\t" // ebx == &ubMask - "movl (" ASMSYM(_iRowCt) "), %%ebx \n\t" + "movzbl %[ubMask], %%edx \n\t" + "movl (" ASMSYM(_iRowCt) "), %%eax \n\t" + "movl %%eax, %[xbx] \n\t" "0: \n\t" // rowLoop - "pushl %%ebx \n\t" - "movd %%mm1, %%ebx \n\t" // EBX = slL2Point + "movd %%mm1, %[slL2Point] \n\t" "movq %%mm1, %%mm3 \n\t" "psrlq $32, %%mm3 \n\t" // MM3 = 0 | slDL2oDU // loop thru pixels in current row "movl (" ASMSYM(_iPixCt) "), %%ecx \n\t" "1: \n\t" // pixLoop // check if pixel need to be drawn; i.e. draw if( [esi] & ubMask && (slL2Pointlm_pixCanvasSizeU), "S" (this->lm_pixCanvasSizeV), "a" (colAmbient), "D" (this->lm_pulShadowMap) : "cc", "memory" @@ -1977,12 +1968,13 @@ __forceinline void CLayerMixer::CopyShadowLayer(void) rep movsd } #elif (defined __GNU_INLINE__) + ULONG clob1, clob2, clob3; __asm__ __volatile__ ( "cld \n\t" "imull %%eax, %%ecx \n\t" "rep \n\t" "movsl \n\t" - : // no outputs. + : "=c" (clob1), "=S" (clob2), "=D" (clob3) : "c" (this->lm_pixCanvasSizeU), "a" (this->lm_pixCanvasSizeV), "S" (this->lm_pulStaticShadowMap), "D" (this->lm_pulShadowMap) : "cc", "memory" @@ -2015,13 +2007,14 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col) } #elif (defined __GNU_INLINE__) + ULONG clob1, clob2, clob3; __asm__ __volatile__ ( "cld \n\t" "imull %%edx, %%ecx \n\t" "bswapl %%eax \n\t" // convert to R,G,B,A memory format! "rep \n\t" "stosl \n\t" - : // no outputs. + : "=a" (clob1), "=c" (clob2), "=D" (clob3) : "c" (this->lm_pixCanvasSizeU), "d" (this->lm_pixCanvasSizeV), "a" (col), "D" (this->lm_pulShadowMap) : "cc", "memory" diff --git a/Sources/Engine/Rendering/RendMisc.cpp b/Sources/Engine/Rendering/RendMisc.cpp index 28b2f5e..91db0e1 100644 --- a/Sources/Engine/Rendering/RendMisc.cpp +++ b/Sources/Engine/Rendering/RendMisc.cpp @@ -125,6 +125,7 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f)) #elif (defined __GNU_INLINE__) PIX pixRet; + SLONG clobber; __asm__ __volatile__ ( "flds (%%eax) \n\t" "fistl (%%edx) \n\t" @@ -134,7 +135,7 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f)) "movl (%%ecx), %%edx \n\t" "addl $0x7FFFFFFF, %%edx \n\t" "adcl $0, %%eax \n\t" - : "=a" (pixRet) + : "=a" (pixRet), "=d" (clobber) : "a" (&f), "d" (&slTmp), "c" (&fDiff) : "cc", "memory" ); diff --git a/Sources/Engine/Sound/SoundMixer.cpp b/Sources/Engine/Sound/SoundMixer.cpp index 6a9175a..e24ace2 100644 --- a/Sources/Engine/Sound/SoundMixer.cpp +++ b/Sources/Engine/Sound/SoundMixer.cpp @@ -96,11 +96,12 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize) } #elif (defined __GNU_INLINE__) // !!! FIXME : rcg12172001 Is this REALLY any faster than memset()? + ULONG clob1, clob2; __asm__ __volatile__ ( "cld \n\t" "rep \n\t" "stosl \n\t" - : // no outputs. + : "=D" (clob1), "=c" (clob2) : "a" (0), "D" (pvMixerBuffer), "c" (slMixerBufferSize*2) : "cc", "memory" ); @@ -132,11 +133,12 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL } #elif (defined __GNU_INLINE__) // !!! FIXME : rcg12172001 Is this REALLY any faster than memcpy()? + ULONG clob1, clob2, clob3; __asm__ __volatile__ ( "cld \n\t" "rep \n\t" "movsl \n\t" - : // no outputs. + : "=S" (clob1), "=D" (clob2), "=c" (clob3) : "S" (((char *)pvMixerBuffer) + slSrcOffset), "D" (pDstBuffer), "c" (slBytes >> 2) @@ -184,6 +186,9 @@ copyLoop: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[pvMixerBuffer], %%esi \n\t" + "movl %[pDstBuffer], %%edi \n\t" + "movl %[slDW], %%ecx \n\t" "0: \n\t" // copyLoop "movzwl (%%esi), %%eax \n\t" "movw %%ax, (%%edi) \n\t" @@ -192,10 +197,10 @@ copyLoop: "decl %%ecx \n\t" "jnz 0b \n\t" // copyLoop : // no outputs. - : "S" (((char *)pvMixerBuffer) + slSrcOffset), - "D" (pDstBuffer), - "c" (slBytes >> 2) - : "cc", "memory", "eax" + : [pvMixerBuffer] "g" (((char *)pvMixerBuffer) + slSrcOffset), + [pDstBuffer] "g" (pDstBuffer), + [slDW] "g" (slBytes >> 2) + : "eax", "ecx", "esi", "edi", "cc", "memory" ); #else @@ -247,6 +252,9 @@ copyLoop: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[pvMixerBuffer], %%esi \n\t" + "movl %[pvMixerBuffer], %%edi \n\t" + "movl %[slDW], %%ecx \n\t" "cld \n\t" "0: \n\t" // copyLoop "movq (%%esi), %%mm0 \n\t" @@ -258,8 +266,8 @@ copyLoop: "jnz 0b \n\t" // copyLoop "emms \n\t" : // no outputs. - : "S" (pvMixerBuffer), "D" (pvMixerBuffer), "c" (slBytes >> 2) - : "cc", "memory" + : [pvMixerBuffer] "g" (pvMixerBuffer), [slDW] "g" (slBytes >> 2) + : FPU_REGS, "mm0", "ecx", "esi", "edi", "cc", "memory" ); #else diff --git a/Sources/Engine/Sound/SoundMixer386.asm b/Sources/Engine/Sound/SoundMixer386.asm index 9231c0c..bedc390 100644 --- a/Sources/Engine/Sound/SoundMixer386.asm +++ b/Sources/Engine/Sound/SoundMixer386.asm @@ -96,6 +96,8 @@ SEGMENT .text global MixMono_asm MixMono_asm: push ebx ; Save GCC register. + push esi + push edi ; convert from floats to fixints 32:16 fld D [fLeftOfs] fmul D [f65536] @@ -224,6 +226,8 @@ loopEnd_MixMono: shr edx,16 mov D [slLastLeftSample],eax mov D [slLastRightSample],edx + pop edi + pop esi pop ebx ; Restore GCC register. emms ret @@ -232,6 +236,8 @@ loopEnd_MixMono: global MixStereo_asm MixStereo_asm: push ebx ; Save GCC register. + push esi + push edi ; convert from floats to fixints 32:16 fld D [fLeftOfs] fmul D [f65536] @@ -363,6 +369,8 @@ loopEnd_MixStereo: mov D [slLastLeftSample],eax mov D [slLastRightSample],edx emms + pop edi + pop esi pop ebx ; Restore GCC register. ret