1@/* 2@ ** Copyright 2003-2010, VisualOn, Inc. 3@ ** 4@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5@ ** you may not use this file except in compliance with the License. 6@ ** You may obtain a copy of the License at 7@ ** 8@ ** http://www.apache.org/licenses/LICENSE-2.0 9@ ** 10@ ** Unless required by applicable law or agreed to in writing, software 11@ ** distributed under the License is distributed on an "AS IS" BASIS, 12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ ** See the License for the specific language governing permissions and 14@ ** limitations under the License. 15@ */ 16@ 17@*void Convolve ( 18@* Word16 x[], /* (i) : input vector */ 19@* Word16 h[], /* (i) : impulse response */ 20@* Word16 y[], /* (o) : output vector */ 21@* Word16 L /* (i) : vector size */ 22@*) 23@ 24@ r0 --- x[] 25@ r1 --- h[] 26@ r2 --- y[] 27@ r3 --- L 28 29 .section .text 30 .global Convolve_asm 31 32Convolve_asm: 33 34 STMFD r13!, {r4 - r12, r14} 35 MOV r3, #0 36 MOV r11, #0x8000 37 38LOOP: 39 @MOV r8, #0 @ s = 0 40 ADD r4, r1, r3, LSL #1 @ tmpH address 41 ADD r5, r3, #1 @ i = n + 1 42 MOV r6, r0 43 LDRSH r9, [r6], #2 @ *tmpX++ 44 LDRSH r10, [r4] @ *tmpH-- 45 SUB r5, r5, #1 46 VMOV.S32 Q10, #0 47 MUL r8, r9, r10 48 49LOOP1: 50 CMP r5, #0 51 BLE L1 52 SUB r4, r4, #8 53 MOV r9, r4 54 VLD1.S16 D0, [r6]! 55 VLD1.S16 D1, [r9]! 56 VREV64.16 D1, D1 57 SUBS r5, r5, #4 58 VMLAL.S16 Q10, D0, D1 59 B LOOP1 60L1: 61 VADD.S32 D20, D20, D21 62 VPADD.S32 D20, D20, D20 63 VMOV.S32 r5, D20[0] 64 ADD r5, r5, r8 65 ADD r5, r11, r5, LSL #1 66 MOV r5, r5, LSR #16 @extract_h(s) 67 ADD r3, r3, #1 68 STRH r5, [r2], #2 @y[n] 69 70 71 @MOV r8, #0 72 ADD r4, r1, r3, LSL #1 @tmpH address 73 ADD r5, r3, #1 74 MOV r6, r0 75 LDRSH r9, [r6], #2 @ *tmpX++ 76 LDRSH r10, [r4], #-2 77 LDRSH r12, [r6], #2 78 LDRSH r14, [r4] 79 80 MUL r8, r9, r10 81 SUB r5, r5, #2 82 MLA r8, r12, r14, r8 83 84 VMOV.S32 Q10, #0 85LOOP2: 86 CMP r5, #0 87 BLE L2 88 SUB r4, r4, #8 89 MOV r9, r4 90 VLD1.S16 D0, [r6]! 91 VLD1.S16 D1, [r9]! 92 SUBS r5, r5, #4 93 VREV64.16 D1, D1 94 VMLAL.S16 Q10, D0, D1 95 B LOOP2 96L2: 97 VADD.S32 D20, D20, D21 98 VPADD.S32 D20, D20, D20 99 VMOV.S32 r5, D20[0] 100 ADD r8, r8, r5 101 ADD r8, r11, r8, LSL #1 102 MOV r8, r8, LSR #16 @extract_h(s) 103 ADD r3, r3, #1 104 STRH r8, [r2], #2 @y[n] 105 106 107 @MOV r8, #0 108 ADD r4, r1, r3, LSL #1 109 ADD r5, r3, #1 110 MOV r6, r0 111 LDRSH r9, [r6], #2 112 LDRSH r10, [r4], #-2 113 LDRSH r12, [r6], #2 114 LDRSH r14, [r4], #-2 115 MUL r8, r9, r10 116 LDRSH r9, [r6], #2 117 LDRSH r10, [r4] 118 MLA r8, r12, r14, r8 119 SUB r5, r5, #3 120 MLA r8, r9, r10, r8 121 122 VMOV.S32 Q10, #0 123LOOP3: 124 CMP r5, #0 125 BLE L3 126 SUB r4, r4, #8 127 MOV r9, r4 128 VLD1.S16 D0, [r6]! 129 VLD1.S16 D1, [r9]! 130 VREV64.16 D1, D1 131 SUBS r5, r5, #4 132 VMLAL.S16 Q10, D0, D1 133 B LOOP3 134 135L3: 136 VADD.S32 D20, D20, D21 137 VPADD.S32 D20, D20, D20 138 VMOV.S32 r5, D20[0] 139 ADD r8, r8, r5 140 ADD r8, r11, r8, LSL #1 141 MOV r8, r8, LSR #16 @extract_h(s) 142 ADD r3, r3, #1 143 STRH r8, [r2], #2 @y[n] 144 145 ADD r5, r3, #1 @ i = n + 1 146 ADD r4, r1, r5, LSL #1 @ tmpH address 147 MOV r6, r0 148 VMOV.S32 Q10, #0 149LOOP4: 150 CMP r5, #0 151 BLE L4 152 SUB r4, r4, #8 153 MOV r9, r4 154 VLD1.S16 D0, [r6]! 155 VLD1.S16 D1, [r9]! 156 VREV64.16 D1, D1 157 SUBS r5, r5, #4 158 VMLAL.S16 Q10, D0, D1 159 B LOOP4 160L4: 161 VADD.S32 D20, D20, D21 162 VPADD.S32 D20, D20, D20 163 VMOV.S32 r5, D20[0] 164 ADD r5, r11, r5, LSL #1 165 MOV r5, r5, LSR #16 @extract_h(s) 166 ADD r3, r3, #1 167 STRH r5, [r2], #2 @y[n] 168 169 CMP r3, #64 170 BLT LOOP 171 172Convolve_asm_end: 173 174 LDMFD r13!, {r4 - r12, r15} 175 176 @ENDFUNC 177 .end 178 179