From dean-list-gcc-patches@arctic.org Sun Apr 20 11:53:05 2003 List-Archive: Date: Sun, 20 Apr 2003 11:52:36 -0700 (PDT) From: dean gaudet To: gcc-patches@gcc.gnu.org Subject: [PATCH][x86] movd support for mmx regs X-comment: visit http://arctic.org/~dean/legal for information regarding copyright and disclaimer. i need the more efficient "movd" to transfer the bottom 32-bits of mmx regs between mem/gpr. the existing code for _mm_cvtsi32_si64 and _mm_cvtsi64_si32 uses casts to long long, and this generates some pretty crappy code compared to the direct use of movd... there was already support for the xmm version of movd, used in the intrinsics _mm_cvtsi32_si128/_mm_cvtsi128_si32, so i cloned that with suitable modifications. -dean Index: gcc/config/i386/i386.c =================================================================== RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.c,v retrieving revision 1.560 diff -u -r1.560 i386.c --- gcc/config/i386/i386.c 11 Apr 2003 21:27:13 -0000 1.560 +++ gcc/config/i386/i386.c 20 Apr 2003 18:37:45 -0000 @@ -13330,6 +13330,11 @@ tree void_ftype_pchar_v16qi = build_function_type_list (void_type_node, pchar_type_node, V16QI_type_node, NULL_TREE); + tree v2si_ftype_pcint + = build_function_type_list (V2SI_type_node, pcint_type_node, NULL_TREE); + tree void_ftype_pcint_v2si + = build_function_type_list (void_type_node, + pcint_type_node, V2SI_type_node, NULL_TREE); tree v4si_ftype_pcint = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); tree void_ftype_pcint_v4si @@ -13435,6 +13440,9 @@ def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); + def_builtin (MASK_MMX, "__builtin_ia32_loadd_si64", v2si_ftype_pcint, IX86_BUILTIN_LOADD_SI64); + def_builtin (MASK_MMX, "__builtin_ia32_si64_stored", void_ftype_pcint_v2si, IX86_BUILTIN_SI64_STORED); + def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); @@ -14430,6 +14438,11 @@ return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); case IX86_BUILTIN_STORED: return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); + + case IX86_BUILTIN_LOADD_SI64: + return ix86_expand_unop_builtin (CODE_FOR_mmx_loadd, arglist, target, 1); + case IX86_BUILTIN_SI64_STORED: + return ix86_expand_store_builtin (CODE_FOR_mmx_stored, arglist); default: break; Index: gcc/config/i386/i386.h =================================================================== RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.h,v retrieving revision 1.333 diff -u -r1.333 i386.h --- gcc/config/i386/i386.h 17 Apr 2003 23:18:55 -0000 1.333 +++ gcc/config/i386/i386.h 20 Apr 2003 18:37:46 -0000 @@ -2148,6 +2148,8 @@ IX86_BUILTIN_MOVQ, IX86_BUILTIN_LOADD, IX86_BUILTIN_STORED, + IX86_BUILTIN_LOADD_SI64, + IX86_BUILTIN_SI64_STORED, IX86_BUILTIN_CLRTI, Index: gcc/config/i386/i386.md =================================================================== RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.md,v retrieving revision 1.455 diff -u -r1.455 i386.md --- gcc/config/i386/i386.md 15 Apr 2003 13:33:57 -0000 1.455 +++ gcc/config/i386/i386.md 20 Apr 2003 18:37:47 -0000 @@ -18860,6 +18860,28 @@ operands[2] = CONST0_RTX (V2DFmode); }) +(define_insn "mmx_loadd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_duplicate:V2SI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (const_vector:V2SI [(const_int 0) + (const_int 0)]) + (const_int 1)))] + "TARGET_MMX" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) + +(define_insn "mmx_stored" + [(set (match_operand:SI 0 "nonimmediate_operand" "=mr") + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 0)])))] + "TARGET_MMX" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) + (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] Index: gcc/config/i386/mmintrin.h =================================================================== RCS file: /cvsroot/gcc/gcc/gcc/config/i386/mmintrin.h,v retrieving revision 1.6 diff -u -r1.6 mmintrin.h --- gcc/config/i386/mmintrin.h 22 Feb 2003 02:09:06 -0000 1.6 +++ gcc/config/i386/mmintrin.h 20 Apr 2003 18:37:47 -0000 @@ -49,11 +49,10 @@ } /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ -static __inline __m64 -_mm_cvtsi32_si64 (int __i) +static __inline __m64 +_mm_cvtsi32_si64 (int __A) { - long long __tmp = (unsigned int)__i; - return (__m64) __tmp; + return (__m64) __builtin_ia32_loadd_si64 (&__A); } #ifdef __x86_64__ @@ -76,7 +75,8 @@ static __inline int _mm_cvtsi64_si32 (__m64 __i) { - long long __tmp = (long long)__i; + int __tmp; + __builtin_ia32_si64_stored (&__tmp, (__v2si)__i); return __tmp; }