@@ -219,25 +219,26 @@ class mat_33 : public mat_x3_template<vec_3> {
219219 vec128_t result, temp0, temp1, temp2, ones;
220220
221221 asm (" ### mat_33 trans_mult vec_3 ### \n "
222- " vmul %[temp0], col0], %[vec] \n "
223- " vmaxw %[ones], vf00, vf00 \n "
224- " vmul %[temp1], %[col1], %[vec] \n "
225- " vmul %[temp2], %[col2], %[vec] \n "
226- " vadday.x ACC, %[temp0], %[temp0] \n "
227- " vmaddz.x %[result], %[ones], %[temp0] \n "
228- " vaddax.y ACC, %[temp1], %[temp1] \n "
229- " vmaddz.y %[result], %[ones], %[temp1] \n "
230- " vaddax.z ACC, %[temp2], %[temp2] \n "
231- " vmaddy.z %[result], %[ones], %[temp2] \n "
232- : [result] " =&j" (result),
233- [temp0] " =&j" (temp0),
234- [temp1] " =&j" (temp1),
235- [temp2] " =&j" (temp2),
236- [ones] " =&j" (ones), " =r" (vu0_ACC)
237- : " j col0" (col0),
238- " j col1" (col1),
239- " j col2" (col2),
240- " j vec" (vec));
222+ " qmtc2 %[col0], $vf10 \n "
223+ " qmtc2 %[col1], $vf11 \n "
224+ " qmtc2 %[col2], $vf12 \n "
225+ " qmtc2 %[vec], $vf13 \n "
226+ " vmul $vf14, $vf10, $vf13 \n "
227+ " vmaxw $vf15, $vf0, $vf0 \n "
228+ " vmul $vf16, $vf11, $vf13 \n "
229+ " vmul $vf17, $vf12, $vf13 \n "
230+ " vadday.x $ACC, $vf14, $vf14 \n "
231+ " vmaddz.x $vf14, $vf15, $vf14 \n "
232+ " vaddax.y $ACC, $vf16, $vf16 \n "
233+ " vmaddz.y $vf14, $vf15, $vf16 \n "
234+ " vaddax.z $ACC, $vf17, $vf17 \n "
235+ " vmaddy.z $vf14, $vf15, $vf17 \n "
236+ " qmfc2 %[result], $vf14 \n "
237+ : [result] " =&r" (result), " =r" (vu0_ACC)
238+ : [col0] " r" (col0.vec128 ),
239+ [col1] " r" (col1.vec128 ),
240+ [col2] " r" (col2.vec128 ),
241+ [vec] " r" (vec.vec128 ));
241242
242243 return vec_3 (result);
243244 }
@@ -422,7 +423,7 @@ class mat_43 : public mat_x3_template<vec_4> {
422423 " vadday.x ACC, %[temp0], %[temp0] \n "
423424 " vmaddz.x %[result], %[ones], %[temp0] \n "
424425 " vaddax.y ACC, %[temp1], %[temp1] \n "
425- " vmaddz.y %[result, %[ones], %[temp1] \n "
426+ " vmaddz.y %[result] , %[ones], %[temp1] \n "
426427 " vaddax.z ACC, %[temp2], %[temp2] \n "
427428 " vmaddy.z %[result], %[ones], %[temp2] \n "
428429 : [result] " =&j" (result),
@@ -1377,7 +1378,7 @@ class transform_t {
13771378 " ### transform_t * vector_t ### \n "
13781379 " vmulax ACC, %[col0], %[vec] \n "
13791380 " vmadday ACC, %[col1], %[vec] \n "
1380- " vmaddz %[result], %[col2, %[vec] \n "
1381+ " vmaddz %[result], %[col2] , %[vec] \n "
13811382 : [result] " =&j" (result), " =r" (vu0_ACC)
13821383 : [vec] " j" (vec),
13831384 [col0] " j" (col0), [col1] " j" (col1), [col2] " j" (col2));
@@ -1761,7 +1762,7 @@ mat_33::inverse() const
17611762 " vaddx.y %[temp], vf00], %[inv2] # Do an in-place transpose, produces determinant(R)*Rinv \n "
17621763 " vadd.xz %[temp], vf00], %[inv1] \n "
17631764 " vaddy.x %[inv1], vf00], %[inv0] \n "
1764- " vdiv Q, vf00w, %[determinantx] # Q = 1/determinant(R) \n "
1765+ " vdiv Q, vf00w, %[determinant]x # Q = 1/determinant(R) \n "
17651766 " vaddy.z %[inv1], vf00, %[inv2] \n "
17661767 " vaddz.x %[inv2], vf00, %[inv0] \n "
17671768 " vaddy.z %[inv0], vf00, %[temp] \n "
@@ -1844,10 +1845,10 @@ mat_33::mult_tilde(vec_3 vec) const
18441845 mat_33 result;
18451846 asm (" ### mat_33 mult_tilde vec_3 ### \n "
18461847 " vmulaz ACC, %[col1], %[vec] \n "
1847- " vmsuby %[res0], %[col2, %[vec] \n "
1848- " vmulax ACC, %[col2, %[vec] \n "
1848+ " vmsuby %[res0], %[col2] , %[vec] \n "
1849+ " vmulax ACC, %[col2] , %[vec] \n "
18491850 " vmsubz %[res1], %[col0], %[vec] \n "
1850- " vmulay ACC, %[col0, %[vec] \n "
1851+ " vmulay ACC, %[col0] , %[vec] \n "
18511852 " vmsubx %[res2], %[col1], %[vec] \n "
18521853 : [res0] " =&j" (result.col0 ), [res1] " =&j" (result.col1 ), [res2] " =&j" (result.col2 ), " =r" (vu0_ACC)
18531854 : [col0] " j" (col0), [col1] " j" (col1), [col2] " j" (col2), [vec] " j" (vec));
@@ -2648,7 +2649,7 @@ transform_t::inverse() const
26482649 [inv1] " =&j" (result.col1 ),
26492650 [inv2] " =&j" (result.col2 ),
26502651 [temp] " =&j" (temp),
2651- [determinant] " =&j" (determinant), " =r" (vu0_ACC), " =j " (vu0_Q)
2652+ [determinant] " =&j" (determinant), " =r" (vu0_ACC)
26522653 : [col0] " j" (col0),
26532654 [col1] " j" (col1),
26542655 [col2] " j" (col2));
@@ -2669,7 +2670,7 @@ transform_t::inverse() const
26692670 [inv2] " +j" (result.col2 ),
26702671 [inv3] " =&j" (result.col3 ),
26712672 " =r" (vu0_ACC)
2672- : [col3] " j" (col3), " j " (vu0_Q) );
2673+ : [col3] " j" (col3));
26732674 return result;
26742675}
26752676
0 commit comments