1@/* 2@ ** Copyright 2003-2010, VisualOn, Inc. 3@ ** 4@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5@ ** you may not use this file except in compliance with the License. 6@ ** You may obtain a copy of the License at 7@ ** 8@ ** http://www.apache.org/licenses/LICENSE-2.0 9@ ** 10@ ** Unless required by applicable law or agreed to in writing, software 11@ ** distributed under the License is distributed on an "AS IS" BASIS, 12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ ** See the License for the specific language governing permissions and 14@ ** limitations under the License. 15@ */ 16@ 17@**********************************************************************/ 18@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */ 19@ Word16 x[], /* (i) 12bits: x vector */ 20@ Word16 y[], /* (i) 12bits: y vector */ 21@ Word16 lg, /* (i) : vector length */ 22@ Word16 * exp /* (o) : exponent of result (0..+30) */ 23@) 24@************************************************************************ 25@ x[] --- r0 26@ y[] --- r1 27@ lg --- r2 28@ *exp --- r3 29 30 .section .text 31 .global Dot_product12_asm 32 33Dot_product12_asm: 34 35 STMFD r13!, {r4 - r12, r14} 36 CMP r0, r1 37 BEQ LOOP_EQ 38 39 VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] 40 VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[] 41 VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[] 42 VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[] 43 VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[] 44 VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[] 45 VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] 46 47 VMULL.S16 Q15, D16, D0 48 VMLAL.S16 Q15, D17, D1 49 VMLAL.S16 Q15, D18, D2 50 VMLAL.S16 Q15, D19, D3 51 VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] 52 VMLAL.S16 Q15, D20, D4 53 VMLAL.S16 Q15, D21, D5 54 VMLAL.S16 Q15, D22, D6 55 VMLAL.S16 Q15, D23, D7 56 VMLAL.S16 Q15, D24, D8 57 VMLAL.S16 Q15, D25, D9 58 VMLAL.S16 Q15, D26, D10 59 VMLAL.S16 Q15, D27, D11 60 VMLAL.S16 Q15, D0, D12 61 VMLAL.S16 Q15, D1, D13 62 VMLAL.S16 Q15, D2, D14 63 VMLAL.S16 Q15, D3, D15 64 65 CMP r2, #64 66 BEQ Lable1 67 VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] 68 VLD1.S16 {Q2, Q3}, [r1]! 69 VMLAL.S16 Q15, D4, D0 70 VMLAL.S16 Q15, D5, D1 71 VMLAL.S16 Q15, D6, D2 72 VMLAL.S16 Q15, D7, D3 73 BL Lable1 74 75LOOP_EQ: 76 VLD1.S16 {Q0, Q1}, [r0]! 77 VLD1.S16 {Q2, Q3}, [r0]! 78 VLD1.S16 {Q4, Q5}, [r0]! 79 VLD1.S16 {Q6, Q7}, [r0]! 80 VMULL.S16 Q15, D0, D0 81 VMLAL.S16 Q15, D1, D1 82 VMLAL.S16 Q15, D2, D2 83 VMLAL.S16 Q15, D3, D3 84 VMLAL.S16 Q15, D4, D4 85 VMLAL.S16 Q15, D5, D5 86 VMLAL.S16 Q15, D6, D6 87 VMLAL.S16 Q15, D7, D7 88 VMLAL.S16 Q15, D8, D8 89 VMLAL.S16 Q15, D9, D9 90 VMLAL.S16 Q15, D10, D10 91 VMLAL.S16 Q15, D11, D11 92 VMLAL.S16 Q15, D12, D12 93 VMLAL.S16 Q15, D13, D13 94 VMLAL.S16 Q15, D14, D14 95 VMLAL.S16 Q15, D15, D15 96 97 CMP r2, #64 98 BEQ Lable1 99 VLD1.S16 {Q0, Q1}, [r0]! 100 VMLAL.S16 Q15, D0, D0 101 VMLAL.S16 Q15, D1, D1 102 VMLAL.S16 Q15, D2, D2 103 VMLAL.S16 Q15, D3, D3 104 105Lable1: 106 107 VQADD.S32 D30, D30, D31 108 VPADD.S32 D30, D30, D30 109 VMOV.S32 r12, D30[0] 110 111 ADD r12, r12, r12 112 ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 113 MOV r4, r12 114 CMP r12, #0 115 RSBLT r4, r12, #0 116 CLZ r10, r4 117 SUB r10, r10, #1 @ sft = norm_l(L_sum) 118 MOV r0, r12, LSL r10 @ L_sum = L_sum << sft 119 RSB r11, r10, #30 @ *exp = 30 - sft 120 STRH r11, [r3] 121 122Dot_product12_end: 123 124 LDMFD r13!, {r4 - r12, r15} 125 126 .end 127 128