Skip to content

Commit b4b84ba

Browse files
committed
Use VU0_VECTORs and ASM code
1 parent d67a4b8 commit b4b84ba

File tree

13 files changed

+2347
-831
lines changed

13 files changed

+2347
-831
lines changed

.github/workflows/compilation.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,12 @@ jobs:
1919
2020
- name: Compile project
2121
run: |
22-
make clean all install
22+
make -j $(getconf _NPROCESSORS_ONLN) clean
23+
make -j $(getconf _NPROCESSORS_ONLN) all
24+
make -j $(getconf _NPROCESSORS_ONLN) install
25+
26+
- name: Compile tests
27+
run: |
28+
cd tests
29+
make -j $(getconf _NPROCESSORS_ONLN) clean
30+
make -j $(getconf _NPROCESSORS_ONLN) all

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ objs_*
33
prebuilddone
44
*.o
55
*.a
6+
*.elf

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ endif
1111
# Disabling warnings
1212
WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null
1313

14-
# VU0 code is broken so disable for now
15-
EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM
16-
EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM
1714

1815
EE_OBJS = \
1916
src/core.o \

Makefile.builds

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ perf_INCDIRS := $(incdirs)
4141

4242
### native, no vu0 vectors ###
4343

44-
defines := NO_VU0_VECTORS
4544
optflags := -ffast-math -O2
4645

4746
# debug_no_vu0
@@ -69,7 +68,7 @@ release_no_vu0_INCDIRS := $(incdirs)
6968
incdirs := $(PS2STUFF)/linux/kernel_module
7069
libdirs := /usr/lib
7170
debug_flags := -D_DEBUG -g
72-
defines := PS2_LINUX NO_VU0_VECTORS
71+
defines := PS2_LINUX
7372
optflags := -ffast-math -O2
7473

7574
# linux
@@ -104,7 +103,7 @@ linux_release_PLATFORM := linux
104103
# cross_linux
105104
BUILDNAMES += cross_linux
106105
cross_linux_INCDIRS := $(incdirs)
107-
cross_linux_DEFINES := PS2_LINUX NO_VU0_VECTORS
106+
cross_linux_DEFINES := PS2_LINUX
108107
cross_linux_DEBUGFLAGS := -D_DEBUG -g
109108
cross_linux_OPTFLAGS := -ffast-math -O2
110109
cross_linux_PLATFORM := linux_cross

include/ps2s/cpu_vector.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ class cpu_vec_3 {
7070
"mtc1 _temp0, _z # z = value.z \n"
7171

7272
".endif \n"
73-
: "=&r,f _x"(x), "=&r,f _y"(y), "=&r,f _z"(z), "=&r,&r _temp0"(temp0)
74-
: "r,r _vec"(vec));
73+
: "=&r,&f"(x), "=&r,&f"(y), "=&r,&f"(z), "=&r"(temp0)
74+
: "r"(vec));
7575
}
7676

7777
explicit inline cpu_vec_3(const vec_3 vec) { set(vec.vec128); }
@@ -215,8 +215,8 @@ class cpu_vec_4 {
215215
"mtc1 _temp0, _w # w = value.w \n"
216216

217217
".endif \n"
218-
: "=&r,f _x"(x), "=&r,f _y"(y), "=&r,f _z"(z), "=&r,f _w"(w), "=&r,&r _temp0"(temp0)
219-
: "r,r _vec"(vec));
218+
: "=&r,&f"(x), "=&r,&f"(y), "=&r,&f"(z), "=&r,&f"(w), "=&r"(temp0)
219+
: "r"(vec));
220220
}
221221

222222
explicit inline cpu_vec_4(const vec_4 vec) { set(vec.vec128); }
@@ -335,12 +335,12 @@ cpu_vec_3::operator+(const cpu_vec_3& vec)
335335
#else
336336

337337
asm(" ### cpu_vec_3 + cpu_vec_3 ### \n"
338-
"add.s rx, v0x, v1x \n"
339-
"add.s ry, v0y, v1y \n"
340-
"add.s rz, v0z, v1z \n"
341-
: "=&f rx"(result.x), "=&f ry"(result.y), "=&f rz"(result.z)
342-
: "f v0x"(x), "f v0y"(y), "f v0z"(z),
343-
"f v1x"(vec.x), "f v1y"(vec.y), "f v1z"(vec.z));
338+
"add.s %[rx], %[v0x], %[v1x] \n"
339+
"add.s %[ry], %[v0y], %[v1y] \n"
340+
"add.s %[rz], %[v0z], %[v1z] \n"
341+
: [rx] "=&f"(result.x), [ry] "=&f"(result.y), [rz] "=&f"(result.z)
342+
: [v0x] "f"(x), [v0y] "f"(y), [v0z] "f"(z),
343+
[v1x] "f"(vec.x), [v1y] "f"(vec.y), [v1z] "f"(vec.z));
344344

345345
#endif
346346
return result;
@@ -359,12 +359,12 @@ cpu_vec_3::operator-(const cpu_vec_3& vec)
359359
#else
360360

361361
asm(" ### cpu_vec_3 - cpu_vec_3 ### \n"
362-
"sub.s rx, v0x, v1x \n"
363-
"sub.s ry, v0y, v1y \n"
364-
"sub.s rz, v0z, v1z \n"
365-
: "=&f rx"(result.x), "=&f ry"(result.y), "=&f rz"(result.z)
366-
: "f v0x"(x), "f v0y"(y), "f v0z"(z),
367-
"f v1x"(vec.x), "f v1y"(vec.y), "f v1z"(vec.z));
362+
"sub.s %[rx], %[v0x], %[v1x] \n"
363+
"sub.s %[ry], %[v0y], %[v1y] \n"
364+
"sub.s %[rz], %[v0z], %[v1z] \n"
365+
: [rx] "=&f"(result.x), [ry] "=&f"(result.y), [rz] "=&f"(result.z)
366+
: [v0x] "f"(x), [v0y] "f"(y), [v0z] "f"(z),
367+
[v1x] "f"(vec.x), [v1y] "f"(vec.y), [v1z] "f"(vec.z));
368368

369369
#endif
370370
return result;

include/ps2s/matrix.h

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -219,25 +219,26 @@ class mat_33 : public mat_x3_template<vec_3> {
219219
vec128_t result, temp0, temp1, temp2, ones;
220220

221221
asm("### mat_33 trans_mult vec_3 ### \n"
222-
"vmul %[temp0], col0], %[vec] \n"
223-
"vmaxw %[ones], vf00, vf00 \n"
224-
"vmul %[temp1], %[col1], %[vec] \n"
225-
"vmul %[temp2], %[col2], %[vec] \n"
226-
"vadday.x ACC, %[temp0], %[temp0] \n"
227-
"vmaddz.x %[result], %[ones], %[temp0] \n"
228-
"vaddax.y ACC, %[temp1], %[temp1] \n"
229-
"vmaddz.y %[result], %[ones], %[temp1] \n"
230-
"vaddax.z ACC, %[temp2], %[temp2] \n"
231-
"vmaddy.z %[result], %[ones], %[temp2] \n"
232-
: [result] "=&j"(result),
233-
[temp0] "=&j"(temp0),
234-
[temp1] "=&j"(temp1),
235-
[temp2] "=&j"(temp2),
236-
[ones] "=&j"(ones), "=r"(vu0_ACC)
237-
: "j col0"(col0),
238-
"j col1"(col1),
239-
"j col2"(col2),
240-
"j vec"(vec));
222+
"qmtc2 %[col0], $vf10 \n"
223+
"qmtc2 %[col1], $vf11 \n"
224+
"qmtc2 %[col2], $vf12 \n"
225+
"qmtc2 %[vec], $vf13 \n"
226+
"vmul $vf14, $vf10, $vf13 \n"
227+
"vmaxw $vf15, $vf0, $vf0 \n"
228+
"vmul $vf16, $vf11, $vf13 \n"
229+
"vmul $vf17, $vf12, $vf13 \n"
230+
"vadday.x $ACC, $vf14, $vf14 \n"
231+
"vmaddz.x $vf14, $vf15, $vf14 \n"
232+
"vaddax.y $ACC, $vf16, $vf16 \n"
233+
"vmaddz.y $vf14, $vf15, $vf16 \n"
234+
"vaddax.z $ACC, $vf17, $vf17 \n"
235+
"vmaddy.z $vf14, $vf15, $vf17 \n"
236+
"qmfc2 %[result], $vf14 \n"
237+
: [result] "=&r"(result), "=r"(vu0_ACC)
238+
: [col0] "r"(col0.vec128),
239+
[col1] "r"(col1.vec128),
240+
[col2] "r"(col2.vec128),
241+
[vec] "r"(vec.vec128));
241242

242243
return vec_3(result);
243244
}
@@ -422,7 +423,7 @@ class mat_43 : public mat_x3_template<vec_4> {
422423
"vadday.x ACC, %[temp0], %[temp0] \n"
423424
"vmaddz.x %[result], %[ones], %[temp0] \n"
424425
"vaddax.y ACC, %[temp1], %[temp1] \n"
425-
"vmaddz.y %[result, %[ones], %[temp1] \n"
426+
"vmaddz.y %[result], %[ones], %[temp1] \n"
426427
"vaddax.z ACC, %[temp2], %[temp2] \n"
427428
"vmaddy.z %[result], %[ones], %[temp2] \n"
428429
: [result] "=&j"(result),
@@ -1377,7 +1378,7 @@ class transform_t {
13771378
" ### transform_t * vector_t ### \n"
13781379
"vmulax ACC, %[col0], %[vec] \n"
13791380
"vmadday ACC, %[col1], %[vec] \n"
1380-
"vmaddz %[result], %[col2, %[vec] \n"
1381+
"vmaddz %[result], %[col2], %[vec] \n"
13811382
: [result] "=&j"(result), "=r"(vu0_ACC)
13821383
: [vec] "j"(vec),
13831384
[col0] "j"(col0), [col1] "j"(col1), [col2] "j"(col2));
@@ -1761,7 +1762,7 @@ mat_33::inverse() const
17611762
"vaddx.y %[temp], vf00], %[inv2] # Do an in-place transpose, produces determinant(R)*Rinv \n"
17621763
"vadd.xz %[temp], vf00], %[inv1] \n"
17631764
"vaddy.x %[inv1], vf00], %[inv0] \n"
1764-
"vdiv Q, vf00w, %[determinantx] # Q = 1/determinant(R) \n"
1765+
"vdiv Q, vf00w, %[determinant]x # Q = 1/determinant(R) \n"
17651766
"vaddy.z %[inv1], vf00, %[inv2] \n"
17661767
"vaddz.x %[inv2], vf00, %[inv0] \n"
17671768
"vaddy.z %[inv0], vf00, %[temp] \n"
@@ -1844,10 +1845,10 @@ mat_33::mult_tilde(vec_3 vec) const
18441845
mat_33 result;
18451846
asm("### mat_33 mult_tilde vec_3 ### \n"
18461847
"vmulaz ACC, %[col1], %[vec] \n"
1847-
"vmsuby %[res0], %[col2, %[vec] \n"
1848-
"vmulax ACC, %[col2, %[vec] \n"
1848+
"vmsuby %[res0], %[col2], %[vec] \n"
1849+
"vmulax ACC, %[col2], %[vec] \n"
18491850
"vmsubz %[res1], %[col0], %[vec] \n"
1850-
"vmulay ACC, %[col0, %[vec] \n"
1851+
"vmulay ACC, %[col0], %[vec] \n"
18511852
"vmsubx %[res2], %[col1], %[vec] \n"
18521853
: [res0] "=&j"(result.col0), [res1] "=&j"(result.col1), [res2] "=&j"(result.col2), "=r"(vu0_ACC)
18531854
: [col0] "j"(col0), [col1] "j"(col1), [col2] "j"(col2), [vec] "j"(vec));
@@ -2648,7 +2649,7 @@ transform_t::inverse() const
26482649
[inv1] "=&j"(result.col1),
26492650
[inv2] "=&j"(result.col2),
26502651
[temp] "=&j"(temp),
2651-
[determinant] "=&j"(determinant), "=r"(vu0_ACC), "=j"(vu0_Q)
2652+
[determinant] "=&j"(determinant), "=r"(vu0_ACC)
26522653
: [col0] "j"(col0),
26532654
[col1] "j"(col1),
26542655
[col2] "j"(col2));
@@ -2669,7 +2670,7 @@ transform_t::inverse() const
26692670
[inv2] "+j"(result.col2),
26702671
[inv3] "=&j"(result.col3),
26712672
"=r"(vu0_ACC)
2672-
: [col3] "j"(col3), "j"(vu0_Q));
2673+
: [col3] "j"(col3));
26732674
return result;
26742675
}
26752676

0 commit comments

Comments
 (0)