diff options
author | Kinsey Moore <kmoore@digium.com> | 2012-04-12 15:25:47 +0000 |
---|---|---|
committer | Kinsey Moore <kmoore@digium.com> | 2012-04-12 15:25:47 +0000 |
commit | 8696daadf8c129d92c52b85d471fcacca9e50a85 (patch) | |
tree | ee347b0c1323438d9fcc9e7662d1ced2eb87a401 /codecs/gsm | |
parent | 683eacb59a2d322d57c4c9bd9f6e22501c2c244b (diff) |
Simplify build system architecture optimization
This change to the build system rips out any usage of PROC along with
architecture-specific optimizations in favor of using -march=native where it is
supported. This fixes broken builds on 64bit Intel systems and results in
better optimized code on systems running GCC 4.2+.
Review: https://reviewboard.asterisk.org/r/1852/
(closes issue ASTERISK-19462)
........
Merged revisions 361955 from http://svn.asterisk.org/svn/asterisk/branches/1.8
........
Merged revisions 361956 from http://svn.asterisk.org/svn/asterisk/branches/10
git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@361968 65c4cc65-6c06-0410-ace0-fbb531ad65f3
Diffstat (limited to 'codecs/gsm')
-rw-r--r-- | codecs/gsm/Makefile | 48 | ||||
-rw-r--r-- | codecs/gsm/src/k6opt.s | 739 |
2 files changed, 0 insertions, 787 deletions
diff --git a/codecs/gsm/Makefile b/codecs/gsm/Makefile index a3dd20124..a072e6d2d 100644 --- a/codecs/gsm/Makefile +++ b/codecs/gsm/Makefile @@ -45,35 +45,6 @@ ifeq ($(shell $(CC) -v 2>&1 | awk '/^gcc version/ { split($$3, v, "."); printf " OPTIMIZE=-O2 endif -# If the compiler's '-march' flag has been specified already, then assume it's a value -# that is what the user wants (or has been determined by the configure script). If not, -# do some simple logic to set a decent value -ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),) - ifeq (,$(findstring $(shell uname -s),Darwin SunOS)) - ifeq (,$(strip $(findstring $(PROC) ,"x86_64 amd64 ultrasparc sparc64 arm armv5b armeb ppc powerpc ppc64 ia64 s390 bfin mipsel mips "))) - ifeq (,$(strip $(findstring $(shell uname -m) ,"ppc ppc64 alpha armv4l s390 "))) - OPTIMIZE+=-march=$(PROC) - endif - endif - else - ifneq (,$(findstring $(OSARCH),Darwin)) - ifeq ($(shell if test `/usr/bin/sw_vers -productVersion | cut -c4` -gt 5; then echo 6; else echo 0; fi),6) - # Snow Leopard/Lion reports i386, even though it's really x86_64 - OPTIMIZE+=-mtune=native - endif - endif - endif - - #The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only. - #This works for even old (2.96) versions of gcc and provides a small boost either way. - #A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn't support it. - #So we go lowest common available by gcc and go a step down, still a step up from - #the default as we now have a better instruction set to work with. - Belgarath - ifeq ($(PROC),ultrasparc) - OPTIMIZE+=-mcpu=v8 -mtune=$(PROC) -O3 - endif -endif - PG = #PG = -g -pg ######### Profiling flags. If you don't know what that means, leave it blank. @@ -224,17 +195,6 @@ GSM_SOURCES = $(SRC)/add.c \ $(SRC)/short_term.c \ $(SRC)/table.c -# add k6-specific code only if not on a non-k6 hardware or proc. -# XXX Keep a space after each findstring argument -# XXX should merge with GSM_OBJECTS -ifeq ($(OSARCH),linux-gnu) -ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc s390 )) -ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 s390 bfin mipsel mips )) -GSM_SOURCES+= $(SRC)/k6opt.s -endif -endif -endif - TOAST_SOURCES = $(SRC)/toast.c \ $(SRC)/toast_lin.c \ $(SRC)/toast_ulaw.c \ @@ -279,14 +239,6 @@ GSM_OBJECTS = $(SRC)/add.o \ $(SRC)/short_term.o \ $(SRC)/table.o -ifeq ($(OSARCH),linux-gnu) -ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc )) -ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 bfin mipsel mips )) -GSM_OBJECTS+= $(SRC)/k6opt.o -endif -endif -endif - TOAST_OBJECTS = $(SRC)/toast.o \ $(SRC)/toast_lin.o \ $(SRC)/toast_ulaw.o \ diff --git a/codecs/gsm/src/k6opt.s b/codecs/gsm/src/k6opt.s index d84d54cbf..e69de29bb 100644 --- a/codecs/gsm/src/k6opt.s +++ b/codecs/gsm/src/k6opt.s @@ -1,739 +0,0 @@ - .file "k6opt.s" - .version "01.01" -/* gcc2_compiled.: */ -.section .rodata - .align 4 - .type coefs,@object - .size coefs,24 -coefs: - .value -134 - .value -374 - .value 0 - .value 2054 - .value 5741 - .value 8192 - .value 5741 - .value 2054 - .value 0 - .value -374 - .value -134 - .value 0 -.text - .align 4 -/* void Weighting_filter (const short *e, short *x) */ -.globl Weighting_filter - .type Weighting_filter,@function -Weighting_filter: - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %esi - pushl %ebx - movl 12(%ebp),%edi - movl 8(%ebp),%ebx - addl $-10,%ebx - emms - movl $0x1000,%eax; movd %eax,%mm5 /* for rounding */ - movq coefs,%mm1 - movq coefs+8,%mm2 - movq coefs+16,%mm3 - xorl %esi,%esi - .p2align 2 -.L21: - movq (%ebx,%esi,2),%mm0 - pmaddwd %mm1,%mm0 - - movq 8(%ebx,%esi,2),%mm4 - pmaddwd %mm2,%mm4 - paddd %mm4,%mm0 - - movq 16(%ebx,%esi,2),%mm4 - pmaddwd %mm3,%mm4 - paddd %mm4,%mm0 - - movq %mm0,%mm4 - punpckhdq %mm0,%mm4 /* mm4 has high int32 of mm0 dup'd */ - paddd %mm4,%mm0; - - paddd %mm5,%mm0 /* add for roundoff */ - psrad $13,%mm0 - packssdw %mm0,%mm0 - movd %mm0,%eax /* ax has result */ - movw %ax,(%edi,%esi,2) - incl %esi - cmpl $39,%esi - jle .L21 - emms - popl %ebx - popl %esi - popl %edi - leave - ret -.Lfe1: - .size Weighting_filter,.Lfe1-Weighting_filter - -.macro ccstep n -.if \n - movq \n(%edi),%mm1 - movq \n(%esi),%mm2 -.else - movq (%edi),%mm1 - movq (%esi),%mm2 -.endif - pmaddwd %mm2,%mm1 - paddd %mm1,%mm0 -.endm - - .align 4 -/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */ -.globl k6maxcc - .type k6maxcc,@function -k6maxcc: - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %esi - pushl %ebx - emms - movl 8(%ebp),%edi - movl 12(%ebp),%esi - movl $0,%edx /* will be maximum inner-product */ - movl $40,%ebx - movl %ebx,%ecx /* will be index of max inner-product */ - subl $80,%esi - .p2align 2 -.L41: - movq (%edi),%mm0 - movq (%esi),%mm2 - pmaddwd %mm2,%mm0 - ccstep 8 - ccstep 16 - ccstep 24 - ccstep 32 - ccstep 40 - ccstep 48 - ccstep 56 - ccstep 64 - ccstep 72 - - movq %mm0,%mm1 - punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ - paddd %mm1,%mm0; - movd %mm0,%eax /* eax has result */ - - cmpl %edx,%eax - jle .L40 - movl %eax,%edx - movl %ebx,%ecx - .p2align 2 -.L40: - subl $2,%esi - incl %ebx - cmpl $120,%ebx - jle .L41 - movl 16(%ebp),%eax - movw %cx,(%eax) - movl %edx,%eax - emms - popl %ebx - popl %esi - popl %edi - leave - ret -.Lfe2: - .size k6maxcc,.Lfe2-k6maxcc - - - .align 4 -/* long k6iprod (const short *p, const short *q, int n) */ -.globl k6iprod - .type k6iprod,@function -k6iprod: - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %esi - emms - pxor %mm0,%mm0 - movl 8(%ebp),%esi - movl 12(%ebp),%edi - movl 16(%ebp),%eax - leal -32(%esi,%eax,2),%edx /* edx = top - 32 */ - - cmpl %edx,%esi; ja .L202 - - .p2align 2 -.L201: - ccstep 0 - ccstep 8 - ccstep 16 - ccstep 24 - - addl $32,%esi - addl $32,%edi - cmpl %edx,%esi; jbe .L201 - - .p2align 2 -.L202: - addl $24,%edx /* now edx = top-8 */ - cmpl %edx,%esi; ja .L205 - - .p2align 2 -.L203: - ccstep 0 - - addl $8,%esi - addl $8,%edi - cmpl %edx,%esi; jbe .L203 - - .p2align 2 -.L205: - addl $4,%edx /* now edx = top-4 */ - cmpl %edx,%esi; ja .L207 - - movd (%edi),%mm1 - movd (%esi),%mm2 - pmaddwd %mm2,%mm1 - paddd %mm1,%mm0 - - addl $4,%esi - addl $4,%edi - - .p2align 2 -.L207: - addl $2,%edx /* now edx = top-2 */ - cmpl %edx,%esi; ja .L209 - - movswl (%edi),%eax - movd %eax,%mm1 - movswl (%esi),%eax - movd %eax,%mm2 - pmaddwd %mm2,%mm1 - paddd %mm1,%mm0 - - .p2align 2 -.L209: - movq %mm0,%mm1 - punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ - paddd %mm1,%mm0; - movd %mm0,%eax /* eax has result */ - - emms - popl %esi - popl %edi - leave - ret -.Lfe3: - .size k6iprod,.Lfe3-k6iprod - - - .align 4 -/* void k6vsraw P3((short *p, int n, int bits) */ -.globl k6vsraw - .type k6vsraw,@function -k6vsraw: - pushl %ebp - movl %esp,%ebp - pushl %esi - movl 8(%ebp),%esi - movl 16(%ebp),%ecx - andl %ecx,%ecx; jle .L399 - movl 12(%ebp),%eax - leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ - emms - movd %ecx,%mm3 - movq ones,%mm2 - psllw %mm3,%mm2; psrlw $1,%mm2 - cmpl %edx,%esi; ja .L306 - - .p2align 2 -.L302: /* 8 words per iteration */ - movq (%esi),%mm0 - movq 8(%esi),%mm1 - paddsw %mm2,%mm0 - psraw %mm3,%mm0; - paddsw %mm2,%mm1 - psraw %mm3,%mm1; - movq %mm0,(%esi) - movq %mm1,8(%esi) - addl $16,%esi - cmpl %edx,%esi - jbe .L302 - - .p2align 2 -.L306: - addl $12,%edx /* now edx = top-4 */ - cmpl %edx,%esi; ja .L310 - - .p2align 2 -.L308: /* do up to 6 words, two at a time */ - movd (%esi),%mm0 - paddsw %mm2,%mm0 - psraw %mm3,%mm0; - movd %mm0,(%esi) - addl $4,%esi - cmpl %edx,%esi - jbe .L308 - - .p2align 2 -.L310: - addl $2,%edx /* now edx = top-2 */ - cmpl %edx,%esi; ja .L315 - - movzwl (%esi),%eax - movd %eax,%mm0 - paddsw %mm2,%mm0 - psraw %mm3,%mm0; - movd %mm0,%eax - movw %ax,(%esi) - - .p2align 2 -.L315: - emms -.L399: - popl %esi - leave - ret -.Lfe4: - .size k6vsraw,.Lfe4-k6vsraw - - .align 4 -/* void k6vsllw P3((short *p, int n, int bits) */ -.globl k6vsllw - .type k6vsllw,@function -k6vsllw: - pushl %ebp - movl %esp,%ebp - pushl %esi - movl 8(%ebp),%esi - movl 16(%ebp),%ecx - andl %ecx,%ecx; jle .L499 - movl 12(%ebp),%eax - leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ - emms - movd %ecx,%mm3 - cmpl %edx,%esi; ja .L406 - - .p2align 2 -.L402: /* 8 words per iteration */ - movq (%esi),%mm0 - movq 8(%esi),%mm1 - psllw %mm3,%mm0; - psllw %mm3,%mm1; - movq %mm0,(%esi) - movq %mm1,8(%esi) - addl $16,%esi - cmpl %edx,%esi - jbe .L402 - - .p2align 2 -.L406: - addl $12,%edx /* now edx = top-4 */ - cmpl %edx,%esi; ja .L410 - - .p2align 2 -.L408: /* do up to 6 words, two at a time */ - movd (%esi),%mm0 - psllw %mm3,%mm0; - movd %mm0,(%esi) - addl $4,%esi - cmpl %edx,%esi - jbe .L408 - - .p2align 2 -.L410: - addl $2,%edx /* now edx = top-2 */ - cmpl %edx,%esi; ja .L415 - - movzwl (%esi),%eax - movd %eax,%mm0 - psllw %mm3,%mm0; - movd %mm0,%eax - movw %ax,(%esi) - - .p2align 2 -.L415: - emms -.L499: - popl %esi - leave - ret -.Lfe5: - .size k6vsllw,.Lfe5-k6vsllw - - -.section .rodata - .align 4 - .type extremes,@object - .size extremes,8 -extremes: - .long 0x80008000 - .long 0x7fff7fff - .type ones,@object - .size ones,8 -ones: - .long 0x00010001 - .long 0x00010001 - -.text - .align 4 -/* long k6maxmin (const short *p, int n, short *out) */ -.globl k6maxmin - .type k6maxmin,@function -k6maxmin: - pushl %ebp - movl %esp,%ebp - pushl %esi - emms - movl 8(%ebp),%esi - movl 12(%ebp),%eax - leal -8(%esi,%eax,2),%edx - - cmpl %edx,%esi - jbe .L52 - movd extremes,%mm0 - movd extremes+4,%mm1 - jmp .L58 - - .p2align 2 -.L52: - movq (%esi),%mm0 /* mm0 will be max's */ - movq %mm0,%mm1 /* mm1 will be min's */ - addl $8,%esi - cmpl %edx,%esi - ja .L56 - - .p2align 2 -.L54: - movq (%esi),%mm2 - - movq %mm2,%mm3 - pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ - movq %mm3,%mm4 - pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ - pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ - por %mm3,%mm4 - movq %mm4,%mm0 /* now mm0 is updated max's */ - - movq %mm1,%mm3 - pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ - pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ - por %mm3,%mm2 - movq %mm2,%mm1 /* now mm1 is updated min's */ - - addl $8,%esi - cmpl %edx,%esi - jbe .L54 - - .p2align 2 -.L56: /* merge down the 4-word max/mins to lower 2 words */ - - movq %mm0,%mm2 - psrlq $32,%mm2 - movq %mm2,%mm3 - pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ - pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ - por %mm3,%mm2 - movq %mm2,%mm0 /* now mm0 is updated max's */ - - movq %mm1,%mm2 - psrlq $32,%mm2 - movq %mm1,%mm3 - pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ - pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ - por %mm3,%mm2 - movq %mm2,%mm1 /* now mm1 is updated min's */ - - .p2align 2 -.L58: - addl $4,%edx /* now dx = top-4 */ - cmpl %edx,%esi - ja .L62 - /* here, there are >= 2 words of input remaining */ - movd (%esi),%mm2 - - movq %mm2,%mm3 - pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ - movq %mm3,%mm4 - pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ - pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ - por %mm3,%mm4 - movq %mm4,%mm0 /* now mm0 is updated max's */ - - movq %mm1,%mm3 - pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ - pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ - por %mm3,%mm2 - movq %mm2,%mm1 /* now mm1 is updated min's */ - - addl $4,%esi - - .p2align 2 -.L62: - /* merge down the 2-word max/mins to 1 word */ - - movq %mm0,%mm2 - psrlq $16,%mm2 - movq %mm2,%mm3 - pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ - pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ - por %mm3,%mm2 - movd %mm2,%ecx /* cx is max so far */ - - movq %mm1,%mm2 - psrlq $16,%mm2 - movq %mm1,%mm3 - pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ - pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ - pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ - por %mm3,%mm2 - movd %mm2,%eax /* ax is min so far */ - - addl $2,%edx /* now dx = top-2 */ - cmpl %edx,%esi - ja .L65 - - /* here, there is one word of input left */ - cmpw (%esi),%cx - jge .L64 - movw (%esi),%cx - .p2align 2 -.L64: - cmpw (%esi),%ax - jle .L65 - movw (%esi),%ax - - .p2align 2 -.L65: /* (finally!) cx is the max, ax the min */ - movswl %cx,%ecx - movswl %ax,%eax - - movl 16(%ebp),%edx /* ptr to output max,min vals */ - andl %edx,%edx; jz .L77 - movw %cx,(%edx) /* max */ - movw %ax,2(%edx) /* min */ - .p2align 2 -.L77: - /* now calculate max absolute val */ - negl %eax - cmpl %ecx,%eax - jge .L81 - movl %ecx,%eax - .p2align 2 -.L81: - emms - popl %esi - leave - ret -.Lfe6: - .size k6maxmin,.Lfe6-k6maxmin - -/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ - .equiv pm_u0,8 - .equiv pm_rp0,12 - .equiv pm_kn,16 - .equiv pm_s,20 - .equiv lv_u_top,-4 - .equiv lv_s_top,-8 - .equiv lv_rp,-40 /* local version of rp0 with each word twice */ - .align 4 -.globl Short_term_analysis_filteringx - .type Short_term_analysis_filteringx,@function -Short_term_analysis_filteringx: - pushl %ebp - movl %esp,%ebp - subl $40,%esp - pushl %edi - pushl %esi - - movl pm_rp0(%ebp),%esi; - leal lv_rp(%ebp),%edi; - cld - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - emms - movl $0x4000,%eax; - movd %eax,%mm4; - punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */ - - movl pm_u0(%ebp),%eax - addl $16,%eax - movl %eax,lv_u_top(%ebp) /* UTOP */ - movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ - movl pm_kn(%ebp),%eax - leal (%edx,%eax,2),%eax - movl %eax,lv_s_top(%ebp) - cmpl %eax,%edx - jae .L179 - .p2align 2 -.L181: - leal lv_rp(%ebp),%esi /* RP */ - movl pm_u0(%ebp),%edi /* U */ - movw (%edx),%ax /* (0,DI) */ - roll $16,%eax - movw (%edx),%ax /* (DI,DI) */ - .p2align 2 -.L185: /* RP is %esi */ - movl %eax,%ecx - movw (%edi),%ax /* (DI,U) */ - movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ - movw %cx,(%edi) - - movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ - rorl $16,%eax - movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ - - movq %mm1,%mm0 - pmullw %mm3,%mm0 - pmulhw %mm3,%mm1 - punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ - paddd %mm4,%mm0 /* mm4 is 0x00004000,0x00004000 */ - psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ - packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ - paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ - movd %mm0,%eax /* (DI,U') */ - - addl $2,%edi - addl $4,%esi - cmpl lv_u_top(%ebp),%edi - jb .L185 - - rorl $16,%eax - movw %ax,(%edx) /* last DI goes to *s */ - addl $2,%edx /* next s */ - cmpl lv_s_top(%ebp),%edx - jb .L181 - .p2align 2 -.L179: - emms - popl %esi - popl %edi - leave - ret -.Lfe7: - .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx - -.end - -/* 'as' macro's seem to be case-insensitive */ -.macro STEP n -.if \n - movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */ -.else - movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ -.endif - movq %mm5,%mm1; - movd %mm4,%ecx; movw %cx,%ax /* (DI,U) */ - psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4 - psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5 - - movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ - rorl $16,%eax - movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ - - movq %mm1,%mm0 - pmullw %mm3,%mm0 - pmulhw %mm3,%mm1 - punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ - paddd %mm6,%mm0 /* mm6 is 0x00004000,0x00004000 */ - psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ - packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ - paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ - movd %mm0,%eax /* (DI,U') */ -.endm - -/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ - .equiv pm_u0,8 - .equiv pm_rp0,12 - .equiv pm_kn,16 - .equiv pm_s,20 - .equiv lv_rp_top,-4 - .equiv lv_s_top,-8 - .equiv lv_rp,-40 /* local version of rp0 with each word twice */ - .align 4 -.globl Short_term_analysis_filteringx - .type Short_term_analysis_filteringx,@function -Short_term_analysis_filteringx: - pushl %ebp - movl %esp,%ebp - subl $56,%esp - pushl %edi - pushl %esi - pushl %ebx - - movl pm_rp0(%ebp),%esi; - leal lv_rp(%ebp),%edi; - cld - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - lodsw; stosw; stosw - movl %edi,lv_rp_top(%ebp) - emms - - movl $0x4000,%eax; - movd %eax,%mm6; - punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */ - - movl pm_u0(%ebp),%ebx - movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */ - movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ - movl pm_kn(%ebp),%eax - leal (%edx,%eax,2),%eax - movl %eax,lv_s_top(%ebp) - cmpl %eax,%edx - jae .L179 - .p2align 2 -.L181: - leal lv_rp(%ebp),%esi /* RP */ - movw (%edx),%ax /* (0,DI) */ - roll $16,%eax - movw (%edx),%ax /* (DI,DI) */ - movd %eax,%mm0 - .p2align 2 -.L185: /* RP is %esi */ - step 0 - step 4 - step 8 - step 12 -/* - step 16 - step 20 - step 24 - step 28 -*/ - addl $16,%esi - cmpl lv_rp_top(%ebp),%esi - jb .L185 - - rorl $16,%eax - movw %ax,(%edx) /* last DI goes to *s */ - addl $2,%edx /* next s */ - cmpl lv_s_top(%ebp),%edx - jb .L181 -.L179: - movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */ - emms - popl %ebx - popl %esi - popl %edi - leave - ret -.Lfe7: - .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx - .ident "GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)" |