From 979af9a6cbdb8a29ade17d1939ce73feaecbcb51 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Tue, 10 Sep 2024 18:17:49 -0700 Subject: [PATCH 01/11] Added cordic demo program. --- programs/cordic.as | 410 ++++++++++++++++++++++++++++++++++++++++++ programs/cordic.mc | 212 ++++++++++++++++++++++ programs/cordic.schem | Bin 0 -> 5605 bytes 3 files changed, 622 insertions(+) create mode 100644 programs/cordic.as create mode 100644 programs/cordic.mc create mode 100644 programs/cordic.schem diff --git a/programs/cordic.as b/programs/cordic.as new file mode 100644 index 0000000..cd39d45 --- /dev/null +++ b/programs/cordic.as @@ -0,0 +1,410 @@ +// Cordic Demo by Dave Walker + +// A basic implementation of a CORDIC function operating in rotation mode. A CORDIC can be +// used to iteratively calculate sine and cosine of an angle. Due to the limitations of this +// 8-bit computer, the CORDIC isn't particularly accurate. A number of values in this code are +// represented as fixed point representations. Therefore, you'll see notations like u2.5 and +// s1.6. These notations denote signed/unsigned, the number of integer bits, and the number of +// fractional bits. For example, the sine/cosine outputs are all s1.6. +// +// In addition to the CORDIC, a draw_line function is included based on Bresenham's Algorithm. + +// Memory mapped IO port mapping offsets +// from memory_mapped_io_addr (248) +define memory_mapped_io_addr 248 +define pixel_x_offset -8 +define pixel_y_offset -7 +define draw_pixel_offset -6 +define clear_pixel_offset -5 +define load_pixel_offset -4 +define buffer_screen_offset -3 +define clear_screen_buffer_offset -2 +define write_char_offset -1 +define buffer_chars_offset 0 +define clear_chars_buffer_offset 1 +define show_number_offset 2 +define clear_number_offset 3 +define signed_mode_offset 4 +define unsigned_mode_offset 5 +define rng_offset 6 +define controller_input_offset 7 + +// Various RAM addresses +define x2_coord 0 +define y2_coord 1 +define register_stack_pointer 100 +define atan_LUT_strt_addr 232 + +// Load the arctan LUT into RAM. +CAL .load_atan_lut + +// Clear the screen and number display +LDI r15 memory_mapped_io_addr +STR r15 r0 clear_screen_buffer_offset +STR r15 r0 buffer_screen_offset +STR r15 r0 unsigned_mode_offset +STR r15 r0 clear_chars_buffer_offset +STR r15 r0 buffer_chars_offset + +// Write "CORDICDEMO" +STR r15 r0 clear_chars_buffer_offset +LDI r14 "C" +STR r15 r14 write_char_offset +LDI r14 "O" +STR r15 r14 write_char_offset +LDI r14 "R" +STR r15 r14 write_char_offset +LDI r14 "D" +STR r15 r14 write_char_offset +LDI r14 "I" +STR r15 r14 write_char_offset +LDI r14 "C" +STR r15 r14 write_char_offset +LDI r14 "D" +STR r15 r14 write_char_offset +LDI r14 "E" +STR r15 r14 write_char_offset +LDI r14 "M" +STR r15 r14 write_char_offset +LDI r14 "O" +STR r15 r14 write_char_offset +STR r15 r0 buffer_chars_offset + +// Store initial point of circle in RAM +LDI r15 x2_coord +LDI r14 31 +STR r15 r14 x2_coord +LDI r14 16 +STR r15 r14 y2_coord + +// Draw a circle using the CORDIC function +// as a simple demonstration. The CORDIC +// is used to generate points on the circle +// and lines are drawn between each point. + +// Go through angles from 0 to 200 (0 to 2*pi radians) +LDI r13 0 // Starting angle + +.circle_loop + MOV r13 r1 // Store the angle in r13 since r1 is modified by the CORDIC function + + // Use the CORDIC to calculate sine and cosine of angle (r1) + CAL .cordic + + // Scale sine/cosine values and center on screen + // also move them to r3/r4 for use in draw_line function + LDI r10 128 // Sign bit mask + AND r3 r10 r5 // Grab the sign bit for y + RSH r3 r4 + ADD r4 r5 r4 + RSH r4 r4 + ADD r4 r5 r4 + AND r2 r10 r5 // Grab the sign bit for x + RSH r2 r3 + ADD r3 r5 r3 + RSH r3 r3 + ADD r3 r5 r3 + ADI r3 16 + ADI r4 16 + // Grab xy coordinates from previous iteration from RAM + LDI r15 x2_coord + LOD r15 r1 x2_coord + LOD r15 r2 y2_coord + + // Push the r13 angle value to RAM since it gets modified inside the draw_line function + LDI r15 register_stack_pointer + STR r15 r13 + CAL .draw_line + // And pop it back off when finished + LDI r15 register_stack_pointer + LOD r15 r13 + + // Store the x1/y1 coordinates to RAM so they can be x2/y2 next iteration + LDI r15 x2_coord + STR r15 r3 x2_coord + STR r15 r4 y2_coord + + // Display current angle + LDI r15 memory_mapped_io_addr + STR r15 r13 show_number_offset + + // Increment the angle and loop + ADI r13 10 + LDI r14 201 // Ending angle + CMP r13 r14 + BRH lt .circle_loop +HLT + + +.cordic +// CORDIC function computes sine and cosine of angle. +// Input: +// r1 = angle in radians (fixed point in the form u2.5) +// (Values between 0 and 2*pi are supported.) +// Outputs: +// r2 = sine (r1) +// r3 = cosine(r1) +// Register usage +// r1 - angle in radians (s1.6) +// r2 - x (s1.6) +// r3 - y (s1.6) +// r4 - iteration counter (i) +// r5 - temp iteration counter/scratch +// r6 - shifted x +// r7 - shifted y +// r8 - total iterations +// r9 - holds current iteration atan value +// r10 - sign bit mask/scatch +// r11 - pointer to arctan table +// r12 - quadrant flag (determines whether to negate x and/or y result) +// +// The input angle comes in the form u2.5 with a +// range of 0 to 2*pi. CORDICs only work for +// +pi/2 to -pi/2 angles. To keep things simple, +// we'll only operate in one quadrant of the unit +// circle (0 to +pi/2). For the other quadrants, +// we'll modify the angle and outputs appropriately. + + LDI r5 50 // Load +pi/2 (1.5708*2^5 = ~50) + CMP r5 r1 + BRH ge .quadrant_0 + LDI r5 100 // Load +pi (3.1416*2^5 = ~100) + CMP r5 r1 + BRH ge .quadrant_1 + LDI r5 150 // Load +3/2*pi (4.712*2^5 = ~150) + CMP r5 r1 + BRH ge .quadrant_2 + JMP .quadrant_3 + + // For each quadrant, set the quadrant flag, which will be used + // at the end to negate the sine/cosine outputs accordingly. + // Also, adjust the input angle to all calculations are performed + // as if in quadrant 0. + .quadrant_0 + LDI r12 0b00 // Set quadrant flag to leave xy untouched + JMP .cordic_setup + .quadrant_1 + LDI r12 0b10 // Set quadrant flag to negate x + SUB r5 r1 r1 + JMP .cordic_setup + .quadrant_2 + LDI r12 0b11 // Set quadrant flag to negative x&y + LDI r5 100 // Load +pi (3.1416*2^5 = ~100) + SUB r1 r5 r1 + JMP .cordic_setup + .quadrant_3 + LDI r12 0x01 // Set quadrant flag to negate y + LDI r5 200 // Load +2*pi (6.2832*2^5 = ~200) + SUB r5 r1 r1 + + .cordic_setup + LSH r1 r1 // adjust input angle from u2.6 to s1.6; this step is needed because negative angles are needed during CORDIC operation + LDI r2 38 // x = 0.6072 (s1.6) = ~38/2^6 (this value has scaling factor K pre-applied) + LDI r3 0 // y = 0 + LDI r4 0 // iteration counter (i) + LDI r8 7 // total iterations + LDI r10 128 // angle sign bit mask + LDI r11 atan_LUT_strt_addr // Point to start of atan LUT + + .cordic_loop + // Make temporary copies of the i,x,y values for shifting + MOV r4 r5 + MOV r2 r6 + MOV r3 r7 + + // The current computer ALU does not support arithmetic shifting + // with RSH instruction, which presents a problem for negative + // numbers. When shifting a negative value right, ones should + // get shifted into the sign bit. That doesn't happen so + // negative shifts aren't handled properly. To get around this problem, + // for now, I'll OR in the sign bit after shifting. + AND r6 r10 r14 // Grab the sign bit for x + AND r7 r10 r10 // Grab the sign bit for y + .shift_loop + CMP r5 r0 + BRH eq .shift_done + RSH r6 r6 + ADD r6 r14 r6 // Add the sign bit after shifting + RSH r7 r7 + ADD r7 r10 r7 // Add the sign bit after shifting + DEC r5 + JMP .shift_loop + .shift_done + LOD r11 r9 // Load atan value for current iteration + + // Determine rotation direction + LDI r10 128 // Sign bit mask + AND r1 r10 r0 // Check the sign bit + BRH z .positive_rotation + + .negative_rotation // Clockwise + ADD r2 r7 r2 + SUB r3 r6 r3 + ADD r1 r9 r1 + JMP .next_iteration + + .positive_rotation // Counter clockwise + SUB r2 r7 r2 + ADD r3 r6 r3 + SUB r1 r9 r1 + + .next_iteration + INC r11 // Point to next atan value + INC r4 // Increment iteration (i) + CMP r4 r8 // Check against total iterations + BRH nz .cordic_loop + + // Adjust xy outputs accordingly based on quadrant + + // Adjust x? + .check_x_negate + LDI r5 0b10 + AND r12 r5 r0 + BRH nz .negate_x + JMP .check_y_negate + .negate_x + SUB r0 r2 r2 + + .check_y_negate + LDI r5 0b01 + AND r12 r5 r0 + BRH nz .negate_y + JMP .cordic_done + .negate_y + SUB r0 r3 r3 + .cordic_done + RET + + +.draw_line +// This function draws a line between two points +// utilizing Bresenham's Algorithm. I ported the +// algorithm to assembly using MattBatWing's python +// implementation as a guide (a.k.a. I swiped it). +// Inputs: +// r1 = x1 +// r2 = y1 +// r3 = x2 +// r4 = y2 +// Register Usage: +// r5 = dx = abs(x2 - x1) +// r6 = dy = abs(y2 - y1) +// r7 = sx = sign(x2 - x1) +// r8 = sy = sign(y2 - y1) +// r9 = Error = 2*dy - dx +// r10 = scratch +// r11 = A = 2*dy +// r12 = B = 2*dy - 2*dx +// r13 = interchange flag + + // Set sx/sy slope bits to 1 for positive slope (default) + LDI r7 1 + LDI r8 1 + + // Calculate x values dx and sx. + .calc_x + LDI r10 128 // Sign bit mask + SUB r3 r1 r5 // x2 - x1 + AND r5 r10 r0 // sx = sign(x2 - x1) + BRH nz .negate_x_dl + JMP .calc_y + .negate_x_dl + SUB r0 r5 r5 // dx = abs(x2 - x1) + LDI r7 -1 // sx = -1 (negative slope) + + // Calculate y values dy and sy. + .calc_y + SUB r4 r2 r6 // y2 - y1 + AND r6 r10 r0 // sy = sign(y2 - y1) + BRH nz .negate_y_dl + JMP .calc_interchange + .negate_y_dl + SUB r0 r6 r6 // dy = abs(y2 - y1) + LDI r8 -1 // sy = -1 (negative slope) + + .calc_interchange + LDI r13 0 // Set interchange flag to 0 (false) + SUB r5 r6 r0 // Is dx or dy is greater? + BRH ge .calc_err // If dx >= dy, proceed to calc error, A and B + MOV r5 r10 // If dy < dx, swap dx and dy + MOV r6 r5 + MOV r10 r6 + LDI r13 1 // and set interchange flag to 1 (true) + + .calc_err + LSH r6 r11 // A = 2*dy + LSH r5 r12 // 2*dx + SUB r0 r12 r12 // -2*dx + ADD r11 r12 r12 // B = 2*dy - 2*dx + SUB r0 r5 r9 // -dx + ADD r11 r9 r9 // Error = 2*dy - dx + + // Draw first pixel + LDI r15 memory_mapped_io_addr + STR r15 r1 pixel_x_offset + STR r15 r2 pixel_y_offset + STR r15 r0 draw_pixel_offset + + LDI r14 0 // Set i to 0 for loop + .draw_line_loop + LDI r10 128 // Sign bit mask + AND r9 r10 r0 // Is Error < 0? + BRH z .error_ge_zero + .error_lt_zero + ADD r9 r11 r9 // Error =+ A + CMP r13 r0 // Check interchange flag + BRH eq .inc_x // ... and increment either x or y + .inc_y + ADD r2 r8 r2 // y =+ s2 + JMP .draw_pixel + .inc_x + ADD r1 r7 r1 // x =+ s1 + JMP .draw_pixel + .error_ge_zero + ADD r2 r8 r2 // y =+ s2 + ADD r1 r7 r1 // x =+ s1 + ADD r9 r12 r9 // Error =+ B + + .draw_pixel + // Make sure we're in the range of the screen before + // drawing a pixel. + LDI r10 32 + CMP r1 r10 + BRH ge .next_pixel + CMP r2 r10 + BRH ge .next_pixel + STR r15 r1 pixel_x_offset + STR r15 r2 pixel_y_offset + STR r15 r0 draw_pixel_offset + + .next_pixel + INC r14 // Increment loop counter + CMP r14 r5 // Exit loop when i > dx + BRH ge .buffer_screen + JMP .draw_line_loop + + .buffer_screen + STR r15 r0 buffer_screen_offset + RET + + + +// Load the arctangent look-up table into RAM +// values are in the form S1.6 +.load_atan_lut + LDI r15 atan_LUT_strt_addr + LDI r14 50 // arctan(2^0) = ~50/2^6 + STR r15 r14 0 + LDI r14 30 // arctan(2^-1) = ~30/2^6 + STR r15 r14 1 + LDI r14 16 // arctan(2^-2) = ~16/2^6 + STR r15 r14 2 + LDI r14 8 // arctan(2^-3) = ~8/2^6 + STR r15 r14 3 + LDI r14 4 // arctan(2^-4) = ~4/2^6 + STR r15 r14 4 + LDI r14 2 // arctan(2^-5) = ~2/2^6 + STR r15 r14 5 + LDI r14 1 // arctan(2^-6) = ~1/2^6 + STR r15 r14 6 + RET diff --git a/programs/cordic.mc b/programs/cordic.mc new file mode 100644 index 0000000..f983ad6 --- /dev/null +++ b/programs/cordic.mc @@ -0,0 +1,212 @@ +1100000011000100 +1000111111111000 +1111111100001110 +1111111100001101 +1111111100000101 +1111111100000001 +1111111100000000 +1111111100000001 +1000111000000011 +1111111111101111 +1000111000001111 +1111111111101111 +1000111000010010 +1111111111101111 +1000111000000100 +1111111111101111 +1000111000001001 +1111111111101111 +1000111000000011 +1111111111101111 +1000111000000100 +1111111111101111 +1000111000000101 +1111111111101111 +1000111000001101 +1111111111101111 +1000111000001111 +1111111111101111 +1111111100000000 +1000111100000000 +1000111000011111 +1111111111100000 +1000111000010000 +1111111111100001 +1000110100000000 +0010110100000001 +1100000001000100 +1000101010000000 +0101001110100101 +0111001100000100 +0010010001010100 +0111010000000100 +0010010001010100 +0101001010100101 +0111001000000011 +0010001101010011 +0111001100000011 +0010001101010011 +1001001100010000 +1001010000010000 +1000111100000000 +1110111100010000 +1110111100100001 +1000111101100100 +1111111111010000 +1100000010001000 +1000111101100100 +1110111111010000 +1000111100000000 +1111111100110000 +1111111101000001 +1000111111111000 +1111111111010010 +1001110100001010 +1000111011001001 +0011110111100000 +1011110000100011 +0001000000000000 +1000010100110010 +0011010100010000 +1011100001001110 +1000010101100100 +0011010100010000 +1011100001010000 +1000010110010110 +0011010100010000 +1011100001010011 +1010000001010111 +1000110000000000 +1010000001011010 +1000110000000010 +0011010100010001 +1010000001011010 +1000110000000011 +1000010101100100 +0011000101010001 +1010000001011010 +1000110000000001 +1000010111001000 +0011010100010001 +0010000100010001 +1000001000100110 +1000001100000000 +1000010000000000 +1000100000000111 +1000101010000000 +1000101111101000 +0010010000000101 +0010001000000110 +0010001100000111 +0101011010101110 +0101011110101010 +0011010100000000 +1011000001101110 +0111011000000110 +0010011011100110 +0111011100000111 +0010011110100111 +1001010111111111 +1010000001100110 +1110101110010000 +1000101010000000 +0101000110100000 +1011000001110110 +0010001001110010 +0011001101100011 +0010000110010001 +1010000001111001 +0011001001110010 +0010001101100011 +0011000110010001 +1001101100000001 +1001010000000001 +0011010010000000 +1011010001100001 +1000010100000010 +0101110001010000 +1011010010000001 +1010000010000010 +0011000000100010 +1000010100000001 +0101110001010000 +1011010010000110 +1010000010000111 +0011000000110011 +1101000000000000 +1000011100000001 +1000100000000001 +1000101010000000 +0011001100010101 +0101010110100000 +1011010010001111 +1010000010010001 +0011000001010101 +1000011111111111 +0011010000100110 +0101011010100000 +1011010010010101 +1010000010010111 +0011000001100110 +1000100011111111 +1000110100000000 +0011010101100000 +1011100010011110 +0010010100001010 +0010011000000101 +0010101000000110 +1000110100000001 +0010011001101011 +0010010101011100 +0011000011001100 +0010101111001100 +0011000001011001 +0010101110011001 +1000111111111000 +1111111100011000 +1111111100101001 +1111111100001010 +1000111000000000 +1000101010000000 +0101100110100000 +1011000010110011 +0010100110111001 +0011110100000000 +1011000010110001 +0010001010000010 +1010000010110110 +0010000101110001 +1010000010110110 +0010001010000010 +0010000101110001 +0010100111001001 +1000101000100000 +0011000110100000 +1011100010111110 +0011001010100000 +1011100010111110 +1111111100011000 +1111111100101001 +1111111100001010 +1001111000000001 +0011111001010000 +1011100011000010 +1010000010101001 +1111111100001101 +1101000000000000 +1000111111101000 +1000111000110010 +1111111111100000 +1000111000011110 +1111111111100001 +1000111000010000 +1111111111100010 +1000111000001000 +1111111111100011 +1000111000000100 +1111111111100100 +1000111000000010 +1111111111100101 +1000111000000001 +1111111111100110 +1101000000000000 diff --git a/programs/cordic.schem b/programs/cordic.schem new file mode 100644 index 0000000000000000000000000000000000000000..28caac0383e07c8269e02876ee0cf9d017eb5153 GIT binary patch literal 5605 zcmb_gXH-*Zx5hFe16WW|KyVy{C`|!L6cA8Qh7wSb8brl_gcza}A(SA)5Oiot2qmGY z7>YE7&`TTvkr0YhO$b3sq$NNCLTLGddWV_y&HZt|@17rL?Q_;y`#k4)_kN!Jo4O1`Rq! z4{q9)JS!@Zd0x8lEvYKP9R{rp^G9zrdNDH@ zm!od^hXw)l*Vyd>4xg7)y~+#~tS$)FdS?y$HsBZn3YA8IGp_LEPMtN5#&6+%){-?enX(gcf}7d5>$G|GnnP78Ycm-mDE zK9g%!hQ17askUxEzHr>%zP&wzT{m#mu)1O(gIT*sh{|KkuW^@FN*L^(QO+06-o9&7 zMH<4)fvYz|QQ5wWi~Flvj$fOKG$bz%ANTLO#S0vMHlBuqE%p z0fn_bfrQAP$NjpariZ}6LDctusJb-%ZiG-8ag9{sNo!LB8>MU@N`>zz@hP$s)Yg7< z-gCrunXLS$%6B%YGGv@`cdN8sA;tj6TPg82IGyeUUG1LNA!d%uH@06mU6pa${(Q&x z;JQ3TkNnAC)I)zM$1*6W;r(uV=V!5Yoj?o~;;4nxtUuVD=zCHS*^=+CF_mjpnyLv} zsP*ChxSLq;n?y;Da?46$N|ADba=k=r(HliljM$gN3Y6ZLqrU2=9k4P%M#3C76F8|m z@_erq{{o>YJRN6zyfQ!d zxojK4iayH=nMvd4W~}bBpqgBn7?X4{g#~p~POJH0#&7&=4zHi<*zOkN11ujPUJbPm zTHwM{gT&;SG#&d|0{Q$##_!Qd^HTj6+q8@bEzOo)($ewC8B3L{8n=eK+uY4GezAlP zEmKuT#q25Z)Ca@*6^RLDaBo*TZ_9vt54(UDaBb%NtebC6?BPk`9tUNLp+GW64eyI#ctWwbuhcFk~-0+~iI1MfJb4tHMEXeBg0(BpYi;jw^8%kbP7jV5> zJaGlyxJCN5Z8#)tL~Hv->Ti)S|EQ@dtmPdWl2E$u@!mdDaAF1CvZXotAu%=Cf{+l6 zBA4AZ-Va_HscD?*ZjV@%7T!HcO+bmOK2Pm#f9Mzw3woXG9Pt$1{0tpsrgv2-sSJn4 zB2}2atTkoh3#l!#aj!k&|N4a@&~Ttf^>^Im_^Wi37NYZehOhdNB&Zw z!dfK++iXBEI)S)grx4l*vy<>ku=`zATrF()_|a>S6d+Ot_bSI1Q*q(wxOP9~XPpKa zn)4GE;!h2ivzg|J9CLt%m~FDOxp1-F->(6?HA&qm1oO%BA1P1r@}Elfn=TYeZoKAO z)S=3MjO*coxA2(CXn)^(DzuSk66toe)CmyQM@yR&!{ccf`!u+|+1yltf+Seb`~6Gf zHWewA`3){~9H+2b+93x^;x(Hp(5e5Dg)d9r63P)z|{Jx`EGxs7m`-R%JG{9Ct@3Kp*auz@k>&C1Ca!vb(X4$rz>v=q0C zQGXa=T-<3yP=*uod>?cHe{()JmFw;4UHCL?^zIZqDmo>UHTn8*p7gzCyG*9+BBSF|N615F z&^)SZPsqQ^i2?G*4R`^^Z%wa6u@f?9$1=6p?BSIgoIW6COe2zngA&7l;9j6T>|vMP zIgO{krK@Bsf0lI`q6~lNla|qTgyiauy-9I;u6|RxDIcF&Aqw4Db!VOcMRsql$x+tE z*K|9~?^(FNCF#+3l%D-QKlF|?pv0>~^4=qdmw0wN7)ieQi2j)x`3}r|)9X5Bf0aGb z``&Qi22vw?ODl@=OueZ*xMt|2Dn{4*v(u#^{bOSLyQ_iMDng_t86Edfq+%^#pEQwK ztZN%dJE@My8~^nCmT7T47ahHH>5XLHBMz~K7d2)R`}UaW9;U%$&OxErkFAAgE?XZg z@_n|?Mq}&@&DDJHL&;nb!WEpDk5{u5J+ldM^A|>pA1UT8?TtpxL;OKVUNfsV{#AG> zG#dXYm7W2Fds`};WV;tSfq@;!;Ve?+@#bF4zC-`kGxWfnc8S(77jJ=9|$XIGp(G5dYfOaKE2}ddWu1M zZ|5i%h}znV+;E()(N+CjhWX1Em#$a0)Bge0cdSoOh=_dHertN`A)>skEGvP8dz2YY z)509f)<-r8?lgDWZ2**-ae4w4B|z6sM6n9GFX9btOy?lYxqbXT!}h5LTQU>|=|(Cw zb>!@cyE_T}uy!%GfGxyVmI$2VZqH|KxW(6~D&VKYh7(xR)TzrfMXMJZl%?qOV?SvE z^hWA=Z<`Z|j4os<$;t^(fc3~>a{2FS{$i#gBHN9n>Ma#~id*o(D`>zWg~jPsoIKh- z&&pu&b?5?lX>ifi23tiCt_#%1s+Y#>y~C|YQ@tK)F<4JFE~ubVOOVqAOv z80UjuF^xz6asA1BY*lrspOgg)Tn7^@Ll~dSb;kJozslEO_oIf_W>$4WGt-{)3v_3s z9!fCn8LcI zSa0HIu?h^!76sP545r8v-O=>SPh)*M(p6XmRHKQalxB>Em5EC+XaI60qvJgFO_45%nb^`s(5hlFCoa)dRujuap;USUUV_ecEnHlT z0;@h)sL{@z1GGV?Yrw+y$S|7SM`o;!FaNelE2M^FUcTM5-}+vD0WPRe9cIi`e2IVl zw^76cejXx!MDtEOw&;=2(69LW2)nZX8I}tkCG^T$)xk2}u_tZt!8ZIXF1P?YHe7e) z5I>84wl3G2uzBo$@~IZmO8I$D%%dBf+_S2qgl*C$M3t$Kccv zy?w-AS zgc4Dy0~jSxKs^i{Rt#3X?|E>S!DAuXZe3L1gL6i?$fH^YQdG3zbo$q&bx8(^-s- zP${T`%L9&z@u(q%6Yel^)d#64@%}AC$V2BQy>QW!ul>9)ZSz2rki?o;+hWLQ{%sSv zH~%hngy60lne_+m5)DPU)ZYZw>5W*NAU?r8eo0hodDLzw1#*T&#Y!`p}eXc3IBQRo+<2uCKTM25&Fm;d$^2$p zZy*|8bedN-g;C`B`qX`6M->W>uq0kH+*{?M`JEmyf^C6T>It_?5*M)HB+$ zNPoc9#&LG;HML@})8c0aaROgz|H!4_RN9^~EN`s+t& zx90`%$cdm=ah-{2=MKC;q}pL`(s@9T^eY_x;2!t85;4v2mrQ)i9;+^3Z*KElG;uUv zX?STB2(xJ&FMk;1A8ffel0U*)LO2(TMt_PJNsj{Zv$7ZeY1iR(t3uLZ7&>!jeU&G? z)*Tq)yDn^9RrSpv_MXa+5c99qDikxRkp83*MW(ObJJ3ALMnwK1&~Lm^ZHIWt0sCgK z+PGo)(J)ZQQe}A7=tP#zMYQxy&J)OvT?mac-tX|`B%Py~zo|L~%ye1=9vwenY)*2H=Gv>U=h6d)tOsLAWbhY8swp4G%Z(_ ziW@y9($hO%le&U;o^iUKI<`^@?eBJqPf$q7MC6x=Bfu}OtGdv^Jk#`R*DbphFuTNL zP8g62!i4)8c^mdJ?j~27PNg~RItLu~__WWO$`;qNZL3kIh{T9RTvbw3SGVwXCNtm8 z!?}YsF{kJ8C4tXj1k%jt-sNZdpUfwrTlkD&SzclyKp%4y;iE35M-i)$OfA=h4|IBJ zRTmg9gWD)#9>&MxUN8OKj)eQh@rnOfI8R-o#0hwuYw~_4?fwWI;{EW>(22PTF+V(UpDiQ z0cG<~^@NV{^#K2Oq#jEaPDcIjO;n}b@4uyjuU~L_sFIeG)4i?u2;d9EsqE@Ks9Wjn zYEp1lIM2>AVQmEs>5vAG5DJS$8|>G&9;C$lPNW&JF6lc2x+VGz!YWiAR2l>XV+|EKkfnZaH4?}WCRJ$@u{X16#nJlG}!_`{uNLYFi! zul_)?QK$Tp_q^b8B6O{P?h)NycUSVI&7x6iSwGt6b{HXWECfW{0RvE)T(VH)C1$KR zPvmRE!V~jRaD65z%w_9W;*4~@`uYDdRhz^e6$-1=V`SB0&L6>D$*EK+*~-&y18;Rm zwv>2@>b2$Ng4zB$%{*pjU0#S~QHN&Xk6pz_nsjL@t@HyRBynQ1bd~5<_xl$PIQb70 zV>9-YG+Xr6w3ZmKG1*P4;JRFi6j@qFqvA{SpZ{OJ&DM~>xq!vyTlOI9&;{P)>gbyH T0(V|qAU+5C_3V+iq9XqSPtQo; literal 0 HcmV?d00001 From 5402078b97c97f1a6cc888594c2ddc3a3c158cec Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Thu, 12 Sep 2024 21:35:57 -0700 Subject: [PATCH 02/11] Initial version of wireframe program. --- programs/wireframe.as | 754 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 754 insertions(+) create mode 100644 programs/wireframe.as diff --git a/programs/wireframe.as b/programs/wireframe.as new file mode 100644 index 0000000..920d9de --- /dev/null +++ b/programs/wireframe.as @@ -0,0 +1,754 @@ +// Cordic Demo by Dave Walker + +// A basic implementation of a CORDIC function operating in rotation mode. A CORDIC can be +// used to iteratively calculate sine and cosine of an angle. Due to the limitations of this +// 8-bit computer, the CORDIC isn't particularly accurate. A number of values in this code are +// represented as fixed point representations. Therefore, you'll see notations like u2.5 and +// s1.6. These notations denote signed/unsigned, the number of integer bits, and the number of +// fractional bits. For example, the sine/cosine outputs are all s1.6. +// +// In addition to the CORDIC, a draw_line function is included based on Bresenham's Algorithm. +// +// (Note: I'm not crazy with how I pass inputs to functions in this code using registers. I think +// I'd prefer to pass them via memory (i.e. a stack). I'd also possibly like to dedicate a +// register as a stack pointer. I haven't written assembly in years so I'm not accustomed +// to dealing with this stuff directly. Perhaps I'll change it later... perhaps not.) + + +// Memory mapped IO port mapping offsets +// from memory_mapped_io_addr (248) +define memory_mapped_io_addr 248 +define pixel_x_offset -8 +define pixel_y_offset -7 +define draw_pixel_offset -6 +define clear_pixel_offset -5 +define load_pixel_offset -4 +define buffer_screen_offset -3 +define clear_screen_buffer_offset -2 +define write_char_offset -1 +define buffer_chars_offset 0 +define clear_chars_buffer_offset 1 +define show_number_offset 2 +define clear_number_offset 3 +define signed_mode_offset 4 +define unsigned_mode_offset 5 +define rng_offset 6 +define controller_input_offset 7 + +// Various RAM addresses +define x2_coord 0 +define y2_coord 1 +define register_stack_pointer 100 +define atan_LUT_strt_addr 232 + +//LDI r1 -8 +//LDI r2 2 +//CAL .mult + + +LDI r2 18 +LDI r1 101 +LDI r4 0 +LDI r3 -45 +CAL .div +STR r15 r1 4 +STR r15 r2 5 + +LDI r2 127 +LDI r1 255 +LDI r4 0 +LDI r3 3 +CAL .div +STR r15 r1 0 +STR r15 r2 1 + +HLT + +LDI r1 0 +LDI r2 81 +LDI r3 3 +CAL .div +STR r15 r1 2 +STR r15 r2 3 + +LDI r1 68 +LDI r2 214 +LDI r3 99 +CAL .div +STR r15 r1 6 +STR r15 r2 7 + +LDI r1 64 +LDI r2 16 +LDI r3 32 +LDI r4 64 +CAL .pixel_projection + +// Load the arctan LUT into RAM. +CAL .load_atan_lut + +// Clear the screen and number display +LDI r15 memory_mapped_io_addr +STR r15 r0 clear_screen_buffer_offset +STR r15 r0 buffer_screen_offset +STR r15 r0 unsigned_mode_offset +STR r15 r0 clear_chars_buffer_offset +STR r15 r0 buffer_chars_offset + +// Write "CORDICDEMO" +STR r15 r0 clear_chars_buffer_offset +LDI r14 "C" +STR r15 r14 write_char_offset +LDI r14 "O" +STR r15 r14 write_char_offset +LDI r14 "R" +STR r15 r14 write_char_offset +LDI r14 "D" +STR r15 r14 write_char_offset +LDI r14 "I" +STR r15 r14 write_char_offset +LDI r14 "C" +STR r15 r14 write_char_offset +LDI r14 "D" +STR r15 r14 write_char_offset +LDI r14 "E" +STR r15 r14 write_char_offset +LDI r14 "M" +STR r15 r14 write_char_offset +LDI r14 "O" +STR r15 r14 write_char_offset +STR r15 r0 buffer_chars_offset + +// Store initial point of circle in RAM +LDI r15 x2_coord +LDI r14 31 +STR r15 r14 x2_coord +LDI r14 16 + +// Draw a circle using the CORDIC function +// as a simple demonstration. The CORDIC +// is used to generate points on the circle +// and lines are drawn between each point. + +// Go through angles from 0 to 200 (0 to 2*pi radians) +LDI r13 0 // Starting angle + +.circle_loop + MOV r13 r1 // Store the angle in r13 since r1 is modified by the CORDIC function + + // Use the CORDIC to calculate sine and cosine of angle (r1) + CAL .cordic + + // Scale sine/cosine values and center on screen + // also move them to r3/r4 for use in draw_line function + LDI r10 128 // Sign bit mask + AND r3 r10 r5 // Grab the sign bit for y + RSH r3 r4 + ADD r4 r5 r4 + RSH r4 r4 + ADD r4 r5 r4 + AND r2 r10 r5 // Grab the sign bit for x + RSH r2 r3 + ADD r3 r5 r3 + RSH r3 r3 + ADD r3 r5 r3 + ADI r3 16 + ADI r4 16 + + // Grab xy coordinates from previous iteration from RAM + LDI r15 x2_coord + LOD r15 r1 x2_coord + LOD r15 r2 y2_coord + + // Push the r13 angle value to RAM since it gets modified inside the draw_line function + LDI r15 register_stack_pointer + STR r15 r13 + CAL .draw_line + // And pop it back off when finished + LDI r15 register_stack_pointer + LOD r15 r13 + + // Store the x1/y1 coordinates to RAM so they can be x2/y2 next iteration + LDI r15 x2_coord + STR r15 r3 x2_coord + STR r15 r4 y2_coord + + // Display current angle + LDI r15 memory_mapped_io_addr + STR r15 r13 show_number_offset + + // Increment the angle and loop + ADI r13 10 + LDI r14 201 // Ending angle + CMP r13 r14 + BRH lt .circle_loop + SUB r13 r14 r13 + JMP .circle_loop +HLT + + +.cordic +// CORDIC function computes sine and cosine of angle. +// Input: +// r1 = angle in radians (fixed point in the form u2.5) +// (Values between 0 and 2*pi are supported.) +// Outputs: +// r2 = sine (r1) +// r3 = cosine(r1) +// Register usage +// r1 - angle in radians (s1.6) +// r2 - x (s1.6) +// r3 - y (s1.6) +// r4 - iteration counter (i) +// r5 - temp iteration counter/scratch +// r6 - shifted x +// r7 - shifted y +// r8 - total iterations +// r9 - holds current iteration atan value +// r10 - sign bit mask/scatch +// r11 - pointer to arctan table +// r12 - quadrant flag (determines whether to negate x and/or y result) +// +// The input angle comes in the form u2.5 with a +// range of 0 to 2*pi. CORDICs only work for +// +pi/2 to -pi/2 angles. To keep things simple, +// we'll only operate in one quadrant of the unit +// circle (0 to +pi/2). For the other quadrants, +// we'll modify the angle and outputs appropriately. + + LDI r5 50 // Load +pi/2 (1.5708*2^5 = ~50) + CMP r5 r1 + BRH ge .quadrant_0 + LDI r5 100 // Load +pi (3.1416*2^5 = ~100) + CMP r5 r1 + BRH ge .quadrant_1 + LDI r5 150 // Load +3/2*pi (4.7124*2^5 = ~150) + CMP r5 r1 + BRH ge .quadrant_2 + JMP .quadrant_3 + + // For each quadrant, set the quadrant flag, which will be used + // at the end to negate the sine/cosine outputs accordingly. + // Also, adjust the input angle to all calculations are performed + // as if in quadrant 0. + .quadrant_0 + LDI r12 0b00 // Set quadrant flag to leave xy untouched + JMP .cordic_setup + .quadrant_1 + LDI r12 0b10 // Set quadrant flag to negate x + SUB r5 r1 r1 + JMP .cordic_setup + .quadrant_2 + LDI r12 0b11 // Set quadrant flag to negate x&y + LDI r5 100 // Load +pi (3.1416*2^5 = ~100) + SUB r1 r5 r1 + JMP .cordic_setup + .quadrant_3 + LDI r12 0x01 // Set quadrant flag to negate y + LDI r5 200 // Load +2*pi (6.2832*2^5 = ~200) + SUB r5 r1 r1 + + .cordic_setup + LSH r1 r1 // adjust input angle from u2.6 to s1.6; this step is needed because negative angles are needed during CORDIC operation + LDI r2 38 // x = 0.6072 (s1.6) = ~38/2^6 (this value has scaling factor K pre-applied) + LDI r3 0 // y = 0 + LDI r4 0 // iteration counter (i) + LDI r8 7 // total iterations + LDI r10 128 // angle sign bit mask + LDI r11 atan_LUT_strt_addr // Point to start of atan LUT + + .cordic_loop + // Make temporary copies of the i,x,y values for shifting + MOV r4 r5 + MOV r2 r6 + MOV r3 r7 + + // The current computer ALU does not support arithmetic shifting + // with RSH instruction, which presents a problem for negative + // numbers. When shifting a negative value right, ones should + // get shifted into the sign bit. That doesn't happen so + // negative shifts aren't handled properly. To get around this + // problem, I'll ADD in the sign bit after shifting. + // Note: The fact that the ALU only supports single bit shifts + // necessitates a loop and therefore slows down the CORDIC + // significantly. Multi-bit shifts are certainly possible but + // would make the ALU much larger... as always, tradeoffs. :) + AND r6 r10 r14 // Grab the sign bit for x + AND r7 r10 r10 // Grab the sign bit for y + .shift_loop + CMP r5 r0 + BRH eq .shift_done + RSH r6 r6 + ADD r6 r14 r6 // Add the sign bit after shifting + RSH r7 r7 + ADD r7 r10 r7 // Add the sign bit after shifting + DEC r5 + JMP .shift_loop + .shift_done + LOD r11 r9 // Load atan value for current iteration + + // Determine rotation direction + LDI r10 128 // Sign bit mask + AND r1 r10 r0 // Check the sign bit + BRH z .positive_rotation + + .negative_rotation // Clockwise + ADD r2 r7 r2 + SUB r3 r6 r3 + ADD r1 r9 r1 + JMP .next_iteration + + .positive_rotation // Counter clockwise + SUB r2 r7 r2 + ADD r3 r6 r3 + SUB r1 r9 r1 + + .next_iteration + INC r11 // Point to next atan value + INC r4 // Increment iteration (i) + CMP r4 r8 // Check against total iterations + BRH nz .cordic_loop + + // Adjust xy outputs accordingly based on quadrant + + // Adjust x? + .check_x_negate + LDI r5 0b10 + AND r12 r5 r0 + BRH nz .negate_x + JMP .check_y_negate + .negate_x + SUB r0 r2 r2 + + .check_y_negate + LDI r5 0b01 + AND r12 r5 r0 + BRH nz .negate_y + JMP .cordic_done + .negate_y + SUB r0 r3 r3 + .cordic_done + RET + + +.draw_line +// This function draws a line between two points +// utilizing Bresenham's Algorithm. I ported the +// algorithm to assembly using MattBatWing's python +// implementation as a guide (a.k.a. I swiped it). +// Inputs: +// r1 = x1 +// r2 = y1 +// r3 = x2 +// r4 = y2 +// Register Usage: +// r5 = dx = abs(x2 - x1) +// r6 = dy = abs(y2 - y1) +// r7 = sx = sign(x2 - x1) +// r8 = sy = sign(y2 - y1) +// r9 = Error = 2*dy - dx +// r10 = scratch +// r11 = A = 2*dy +// r12 = B = 2*dy - 2*dx +// r13 = interchange flag + + // Set sx/sy slope bits to 1 for positive slope (default) + LDI r7 1 + LDI r8 1 + + // Calculate x values dx and sx. + .calc_x + LDI r10 128 // Sign bit mask + SUB r3 r1 r5 // x2 - x1 + AND r5 r10 r0 // sx = sign(x2 - x1) + BRH nz .negate_x_dl + JMP .calc_y + .negate_x_dl + SUB r0 r5 r5 // dx = abs(x2 - x1) + LDI r7 -1 // sx = -1 (negative slope) + + // Calculate y values dy and sy. + .calc_y + SUB r4 r2 r6 // y2 - y1 + AND r6 r10 r0 // sy = sign(y2 - y1) + BRH nz .negate_y_dl + JMP .calc_interchange + .negate_y_dl + SUB r0 r6 r6 // dy = abs(y2 - y1) + LDI r8 -1 // sy = -1 (negative slope) + + .calc_interchange + LDI r13 0 // Set interchange flag to 0 (false) + SUB r5 r6 r0 // Is dx or dy is greater? + BRH ge .calc_err // If dx >= dy, proceed to calc error, A and B + MOV r5 r10 // If dy < dx, swap dx and dy + MOV r6 r5 + MOV r10 r6 + LDI r13 1 // and set interchange flag to 1 (true) + + .calc_err + LSH r6 r11 // A = 2*dy + LSH r5 r12 // 2*dx + SUB r0 r12 r12 // -2*dx + ADD r11 r12 r12 // B = 2*dy - 2*dx + SUB r0 r5 r9 // -dx + ADD r11 r9 r9 // Error = 2*dy - dx + + // Draw first pixel + LDI r15 memory_mapped_io_addr + STR r15 r1 pixel_x_offset + STR r15 r2 pixel_y_offset + STR r15 r0 draw_pixel_offset + + LDI r14 0 // Set i to 0 for loop + .draw_line_loop + LDI r10 128 // Sign bit mask + AND r9 r10 r0 // Is Error < 0? + BRH z .error_ge_zero + .error_lt_zero + ADD r9 r11 r9 // Error =+ A + CMP r13 r0 // Check interchange flag + BRH eq .inc_x // ... and increment either x or y + .inc_y + ADD r2 r8 r2 // y =+ s2 + JMP .draw_pixel + .inc_x + ADD r1 r7 r1 // x =+ s1 + JMP .draw_pixel + .error_ge_zero + ADD r2 r8 r2 // y =+ s2 + ADD r1 r7 r1 // x =+ s1 + ADD r9 r12 r9 // Error =+ B + + .draw_pixel + // Make sure we're in the range of the screen before + // drawing a pixel. + LDI r10 32 + CMP r1 r10 + BRH ge .next_pixel + CMP r2 r10 + BRH ge .next_pixel + STR r15 r1 pixel_x_offset + STR r15 r2 pixel_y_offset + STR r15 r0 draw_pixel_offset + + .next_pixel + INC r14 // Increment loop counter + CMP r14 r5 // Exit loop when i > dx + BRH ge .buffer_screen + JMP .draw_line_loop + + .buffer_screen + STR r15 r0 buffer_screen_offset + RET + + +.mult +// This function multiplies two 8-bit numbers together. The result is a 16-bit +// product, which gets stored in two registers. +// Inputs: +// r1 = multiplicand +// r2 = multiplier +// Outputs: +// r5:r4 = 16-bit product +// +// Register usage: +// r3 = Upper bits of multiplicand (as it gets shifted left) +// r6 = LSB mask +// r7 = carry flag (need separate flag because of oddity with LSH pseudo instruction) +// r8 = loop counter +// r9 = product sign flag + + // First things first... convert multiplicand and multiplier to positive + // values since this routinee doesn't handle negative 2's complement values + // properly. + + LDI r3 128 // Sign bit mask + LDI r9 0 // Set product sign to 0 (positive) + LDI r6 1 // LSB mask + AND r1 r3 r0 // Determine if r1 is negative + BRH z .r1_pos + XOR r9 r6 r9 // Toggle the product sign flag + SUB r0 r1 r1 // And negate it + .r1_pos + AND r2 r3 r0 // Determine if r2 is negative + BRH z .r2_pos + XOR r9 r6 r9 // Toggle the product sign flag + SUB r0 r2 r2 // And negate it + .r2_pos + + LDI r3 0 // Clear upper 8-bit of multiplicand (for later shifting) + LDI r4 0 // Clear the product registers + LDI r5 0 + LDI r8 8 // Loop counter (8-bits) + + .mult_loop + AND r2 r6 r0 // Check least significant bit of multiplier + RSH r2 r2 // and shift it to the right by 1 + BRH zero .mult_no_add // If least significant bit is 0, skip addition + .mult_add + ADD r1 r4 r4 // otherwise add multiplicand to product + BRH nc .prod_nc + ADI r5 1 // And handle carries into the upper 8-bits if needed + .prod_nc + ADD r3 r5 r5 + .mult_no_add + LDI r7 0 // Initialize carry flag to 0 + LSH r1 r1 // Shift multiplicand left to prep for next round + BRH nc .multiplicand_nc + LDI r7 1 // If a carry occurs, flag it + .multiplicand_nc + LSH r3 r3 // Now shift the upper 8-bits of the multiplicand + ADD r3 r7 r3 // And add back the carry bit + DEC r8 // Decrement the loop counter + BRH nz .mult_loop + + CMP r9 r0 // Determine if product sign flag is set + BRH z .mult_done + LDI r3 0xFF + XOR r3 r4 r4 // Negate the product + XOR r3 r5 r5 + INC r4 // And add 1 to low byte after negation (2's complement) + BRH nc .mult_done + INC r5 // Handle carry into high byte + .mult_done + RET + + +.div +// This function divides a 16-bit dividend by an 8-bit divisor. +// It results in an 8-bit quotient and remainder. +// Inputs: +// r2:r1 = Dividend (numerator) +// r4:r3 = Divisor (denominator) +// Outputs: +// r2:r1 = Quotient +// r3 = Remainder +// Register usage: +// r6:r5 = Remainder temp +// r8:r7 = Quotient temp (TBD... can use dividend register to be more efficient +// r9-10 = scratch +// r11 = loop counter +// r12 = quotient_sign +// +// The algorithm implemented below is detailed in the following +// video: +// www.youtube.com/watch?v=7m6I7_3XdZ8 +// + // Check to see if both bytes of divisor are zero; if so, + // a divide-by-zero error occurred. Use of the NOR instruction + // here necessitates a comparison to all ones. + LDI r8 0xFF + NOR r3 r4 r6 + CMP r8 r6 + BRH z .div_by_zero + + // This divide algorithm only works for unsigned values. Therefore, + // if the dividend and/or divisor are negative, convert them + // to positive numbers and set a flag to convert the results + // appropriately at the end. + LDI r9 128 // Sign bit mask + LDI r12 0 // Set quotient sign to 0 (positive by default) + LDI r6 1 // LSB mask + + // abs(Dividend) + AND r2 r9 r0 // Check dividend sign bit + BRH z .dividend_pos + XOR r12 r6 r12 // Toggle the quotient sign flag + XOR r8 r1 r1 // Negate the dividend by inverting all bits... + XOR r8 r2 r2 + INC r1 // ...and add 1 to low byte (2's complement) + BRH nc .dividend_pos + INC r2 // Handle carry into high byte + .dividend_pos + + // abs(Divisor) + AND r4 r9 r0 // Check divisor sign bit + BRH z .divisor_pos + XOR r12 r6 r12 // Toggle the quotient sign flag + XOR r8 r3 r3 // Negate the divisor by inverting all bits... + XOR r8 r4 r4 + INC r3 // ...and add 1 to low byte (2's complement) + BRH nc .divisor_pos + INC r4 // Handle carry into high byte + .divisor_pos + + // Initialize + LDI r5 0 // Clear registers utilized for calculations + LDI r6 0 + LDI r7 0 + LDI r8 0 + LDI r9 0 // Carry flag for low byte shifts + LDI r10 0 // Carry flag for high byte shifts + LDI r11 16 // Initialize loop counter to 16 (TBD) + + .div_loop + // Shift dividend left + LDI r9 0 // Clear low byte carry flag + LSH r1 r1 // Shift dividend low byte left + BRH nc .dividend_lh_nc + LDI r9 1 // If a carry occurs, flag it + .dividend_lh_nc + LDI r10 0 // Clear high byte carry flag + LSH r2 r2 // Shift dividend high byte left + BRH nc .dividend_h_nc + LDI r10 1 // If a carry occurs, flag it + .dividend_h_nc + ADD r2 r9 r2 // And add the carry bit + + // Shift the remainder left with carry out from dividend shift + LDI r9 0 // Clear carry flag + LSH r5 r5 // Shift remainder low byte left + BRH nc .remainder_lh_nc + LDI r9 1 // If a carry occurs, flag it + .remainder_lh_nc + ADD r5 r10 r5 // And add carry from dividend high byte + LDI r10 0 + LSH r6 r6 // Shift remainder high byte left + ADD r6 r9 r6 // And add the carry bit + + // Compare the remainder with the divisor to determine if + // a subtraction is possible. First, compare the high + // bytes + CMP r6 r4 + BRH lt .div_no_subtract // If remainder high byte < divisor high byte, skip subtract + BRH eq .cmp_low // If equal, compare the low bytes + JMP .div_subtract + + // The upper bytes are equal so the lower bytes need to be compared + // Note: I've seen code that skips the low byte comparison and proceeds + // with subtraction anyway potentially resulting in a negative remainder. + // Apparently, this negative remainder handles itself later but I had + // trouble getting it to work (and it's confusing). I'll take a small + // performance hit with the low byte comparison to keep things simple. + .cmp_low + CMP r5 r3 + BRH lt .div_no_subtract // If remainder low byte < divisor low byte, skip subtract + + // Perform remainder - divisor 16-bit subtraction (emulating SBB) + .div_subtract + LDI r9 0 // Clear borrow flag + SUB r5 r3 r5 // Subtract low bytes + BRH c .div_no_borrow + LDI r9 1 // If a borrow occurs, flag it + .div_no_borrow + SUB r6 r4 r6 // Subtract high bytes + SUB r6 r9 r6 // Handle borrow + + LDI R9 0 + INC r7 // Increment the quotient + // Note: it'll never carry into the higher byte + // because there will always be 'room' to add 1 due to + // the shift from the previous cycle. + + .div_no_subtract + // Shift Quotient left (TBD - again... I'll likely get rid of this since I'll use dividend instead but... + LDI r9 1 // Skip quotient shift if final iteration (i.e. i=1) + CMP r11 r9 + BRH z .no_quotient_shift // Skip quotient shift if final iteration (i.e. i=1) + LDI r9 0 // Clear low byte carry flag + LSH r7 r7 // Shift quotient low byte left + BRH nc .quotient_lh_nc + LDI r9 1 // If a carry occurs, flag it + .quotient_lh_nc + LSH r8 r8 // Shift quotient high byte left + ADD r8 r9 r8 + .no_quotient_shift + + // Next iteration + DEC r11 // Decrement loop counter + BRH nz .div_loop + + // Move results to appropriate output registers + MOV r8 r2 + MOV r7 r1 + MOV r5 r3 + + // Determine if the final quotient result should be negative. + // (We'll go ahead and leave the remainder as an unsigned value) + CMP r12 r0 // Determine if quotient sign flag is set + BRH z .div_done_done + LDI r9 0xFF + XOR r9 r2 r2 // Negate the quotient + XOR r9 r1 r1 + INC r1 // And add 1 to low byte after negation (2's complement) + BRH nc .div_done + INC r2 // Handle carry into high byte + .div_done + + RET + + // Halt when divide by zero encountered + .div_by_zero + HLT + + +.pixel_projection +// The following function is used to project a 3D point in space onto a 2D plane. +// Given a 3D coordinate [x,y,z], it'll return x_projected and y_projected by +// calculating the following: +// +// x_projected = (focal_length * x)/(focal length + z) +// y_projected = (focal_length * y)/(focal length + z) +// +// Inputs: +// r1 = x +// r2 = y +// r3 = z +// r4 = focal_length +// +// Outputs: +// r1 = x_projected +// r2 = y_projected +// +// Registers: +// r7:r6 = focal_length + z +// r9:r8 = focal_length * [x,y] + + LDI r7 0 // TODO - I probably should handle carries here... though it's unlikely + ADD r4 r3 r6 // r6 = focal_length + z + + // Push registers on the stack + LDI r15 register_stack_pointer + STR r15 r1 0 + STR r15 r2 1 + STR r15 r3 2 + STR r15 r4 3 + STR r15 r6 4 + STR r15 r7 5 + + // The mult function expects multiplier and multiplicand to be in r1 and r2 + // so move stuff around as required. + MOV r4 r2 // Move focal_length to r2 + CAL .mult + // Move results for div function. + MOV r5 r2 + MOV r4 r1 + LOD r15 r3 4 + LOD r15 r4 5 + CAL .div + NOP + +HLT +RET + + +// Load the arctangent look-up table into RAM. +// Values are in the form S1.6 +.load_atan_lut + LDI r15 atan_LUT_strt_addr + LDI r14 50 // arctan(2^0) = ~50/2^6 + STR r15 r14 0 + LDI r14 30 // arctan(2^-1) = ~30/2^6 + STR r15 r14 1 + LDI r14 16 // arctan(2^-2) = ~16/2^6 + STR r15 r14 2 + LDI r14 8 // arctan(2^-3) = ~8/2^6 + STR r15 r14 3 + LDI r14 4 // arctan(2^-4) = ~4/2^6 + STR r15 r14 4 + LDI r14 2 // arctan(2^-5) = ~2/2^6 + STR r15 r14 5 + LDI r14 1 // arctan(2^-6) = ~1/2^6 + STR r15 r14 6 + RET From f77d1c057786d587733be43a839baac6a981d085 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Fri, 13 Sep 2024 13:23:14 -0700 Subject: [PATCH 03/11] Fixed some comments. --- programs/cordic.as | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/programs/cordic.as b/programs/cordic.as index cd39d45..eb244dd 100644 --- a/programs/cordic.as +++ b/programs/cordic.as @@ -188,7 +188,7 @@ HLT SUB r5 r1 r1 JMP .cordic_setup .quadrant_2 - LDI r12 0b11 // Set quadrant flag to negative x&y + LDI r12 0b11 // Set quadrant flag to negate x&y LDI r5 100 // Load +pi (3.1416*2^5 = ~100) SUB r1 r5 r1 JMP .cordic_setup @@ -216,8 +216,12 @@ HLT // with RSH instruction, which presents a problem for negative // numbers. When shifting a negative value right, ones should // get shifted into the sign bit. That doesn't happen so - // negative shifts aren't handled properly. To get around this problem, - // for now, I'll OR in the sign bit after shifting. + // negative shifts aren't handled properly. To get around this + // problem, I'll ADD in the sign bit after shifting. + // Note: The fact that the ALU only supports single bit shifts + // necessitates a loop and therefore slows down the CORDIC + // significantly. Multi-bit shifts are certainly possible but + // would make the ALU much larger... as always, tradeoffs. :) AND r6 r10 r14 // Grab the sign bit for x AND r7 r10 r10 // Grab the sign bit for y .shift_loop From 77293481232f53e57248b2807c2b8a04877c562a Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Sat, 14 Sep 2024 23:05:32 -0700 Subject: [PATCH 04/11] Initial wireframe animation working The code is still quite messy and several optimizations can be made, but I wanted capture the working copy. --- programs/wireframe.as | 805 +++++++++++++++++++++++++++++++++--------- 1 file changed, 637 insertions(+), 168 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index 920d9de..2c11a09 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -1,4 +1,4 @@ -// Cordic Demo by Dave Walker +// Wireframe Demo by Dave Walker // A basic implementation of a CORDIC function operating in rotation mode. A CORDIC can be // used to iteratively calculate sine and cosine of an angle. Due to the limitations of this @@ -38,55 +38,24 @@ define controller_input_offset 7 // Various RAM addresses define x2_coord 0 define y2_coord 1 -define register_stack_pointer 100 +define register_stack_pointer 50 +define shape_vertices_edges_addr 100 +define projected_points_addr 150 define atan_LUT_strt_addr 232 -//LDI r1 -8 -//LDI r2 2 -//CAL .mult +// Other constants +define y_axis 0 +define x_axis 1 +define z_axis 2 +define focal_length 127 -LDI r2 18 -LDI r1 101 -LDI r4 0 -LDI r3 -45 -CAL .div -STR r15 r1 4 -STR r15 r2 5 - -LDI r2 127 -LDI r1 255 -LDI r4 0 -LDI r3 3 -CAL .div -STR r15 r1 0 -STR r15 r2 1 - -HLT - -LDI r1 0 -LDI r2 81 -LDI r3 3 -CAL .div -STR r15 r1 2 -STR r15 r2 3 - -LDI r1 68 -LDI r2 214 -LDI r3 99 -CAL .div -STR r15 r1 6 -STR r15 r2 7 - -LDI r1 64 -LDI r2 16 -LDI r3 32 -LDI r4 64 -CAL .pixel_projection - // Load the arctan LUT into RAM. CAL .load_atan_lut +// Load shape data into RAM. +CAL .load_shape_vertices_edges + // Clear the screen and number display LDI r15 memory_mapped_io_addr STR r15 r0 clear_screen_buffer_offset @@ -95,95 +64,119 @@ STR r15 r0 unsigned_mode_offset STR r15 r0 clear_chars_buffer_offset STR r15 r0 buffer_chars_offset -// Write "CORDICDEMO" +// Write "3DROTATION" STR r15 r0 clear_chars_buffer_offset -LDI r14 "C" -STR r15 r14 write_char_offset -LDI r14 "O" +LDI r14 " " STR r15 r14 write_char_offset LDI r14 "R" STR r15 r14 write_char_offset -LDI r14 "D" -STR r15 r14 write_char_offset -LDI r14 "I" +LDI r14 "O" STR r15 r14 write_char_offset -LDI r14 "C" +LDI r14 "T" STR r15 r14 write_char_offset -LDI r14 "D" +LDI r14 "A" STR r15 r14 write_char_offset -LDI r14 "E" +LDI r14 "T" STR r15 r14 write_char_offset -LDI r14 "M" +LDI r14 "I" STR r15 r14 write_char_offset LDI r14 "O" STR r15 r14 write_char_offset +LDI r14 "N" +STR r15 r14 write_char_offset +LDI r14 " " +STR r15 r14 write_char_offset STR r15 r0 buffer_chars_offset -// Store initial point of circle in RAM -LDI r15 x2_coord -LDI r14 31 -STR r15 r14 x2_coord -LDI r14 16 -// Draw a circle using the CORDIC function -// as a simple demonstration. The CORDIC -// is used to generate points on the circle -// and lines are drawn between each point. +// Initialze rotation angle +LDI r1 28 +.main_loop + + // Point to 3D share vertice/edge table + LDI r15 shape_vertices_edges_addr + LOD r15 r14 0 // Load number of vertices in r14 + INC r15 // And point to first vertice + + // Load 2D projected points table address + LDI r13 projected_points_addr + + // Now loop through all of the 3D vertices in memory to + // rotate and project them onto a 2D plane for display. + .vertice_loop + LOD r15 r2 0 + LOD r15 r3 1 + LOD r15 r4 2 + ADI r15 3 + LDI r5 y_axis // Rotation axis + + LDI r12 0 + STR r12 r1 0 // Store angle in RAM + STR r12 r14 1 // Store number of vertices RAM + STR r12 r15 2 // Store vertice pointer in RAM + // TODO: Change rotation function so it doesn't call CORDIC + // i.e. move CORDIC call outside of vertice loop since I don't + // need to calculate it again each time + CAL .rotation + LDI r7 focal_length + CAL .pixel_projection + ADI r1 16 + ADI r2 16 + + LDI r15 memory_mapped_io_addr + STR r15 r1 pixel_x_offset + STR r15 r2 pixel_y_offset + STR r15 r0 draw_pixel_offset + + LDI r12 0 + LOD r12 r1 0 // Store angle in RAM + LOD r12 r14 1 // Store number of vertices RAM + LOD r12 r15 2 // Store vertice pointer in RAM + + DEC r14 + BRH nz .vertice_loop + + LDI r12 0 + STR r12 r1 0 // Store angle in RAM + STR r12 r14 1 // Store number of vertices RAM + STR r12 r15 2 // Store vertice pointer in RAM -// Go through angles from 0 to 200 (0 to 2*pi radians) -LDI r13 0 // Starting angle + LDI r15 memory_mapped_io_addr + STR r15 r0 buffer_screen_offset + STR r15 r0 clear_screen_buffer_offset -.circle_loop - MOV r13 r1 // Store the angle in r13 since r1 is modified by the CORDIC function + LDI r12 0 + LOD r12 r1 0 // Store angle in RAM + LOD r12 r14 1 // Store number of vertices RAM + LOD r12 r15 2 // Store vertice pointer in RAM - // Use the CORDIC to calculate sine and cosine of angle (r1) - CAL .cordic - // Scale sine/cosine values and center on screen - // also move them to r3/r4 for use in draw_line function - LDI r10 128 // Sign bit mask - AND r3 r10 r5 // Grab the sign bit for y - RSH r3 r4 - ADD r4 r5 r4 - RSH r4 r4 - ADD r4 r5 r4 - AND r2 r10 r5 // Grab the sign bit for x - RSH r2 r3 - ADD r3 r5 r3 - RSH r3 r3 - ADD r3 r5 r3 - ADI r3 16 - ADI r4 16 - - // Grab xy coordinates from previous iteration from RAM - LDI r15 x2_coord - LOD r15 r1 x2_coord - LOD r15 r2 y2_coord - - // Push the r13 angle value to RAM since it gets modified inside the draw_line function - LDI r15 register_stack_pointer - STR r15 r13 - CAL .draw_line - // And pop it back off when finished - LDI r15 register_stack_pointer - LOD r15 r13 - // Store the x1/y1 coordinates to RAM so they can be x2/y2 next iteration - LDI r15 x2_coord - STR r15 r3 x2_coord - STR r15 r4 y2_coord + +// // Push the r13 angle value to RAM since it gets modified inside the draw_line function +// LDI r15 register_stack_pointer +// STR r15 r13 +// CAL .draw_line +// // And pop it back off when finished +// LDI r15 register_stack_pointer +// LOD r15 r13 + +// // Store the x1/y1 coordinates to RAM so they can be x2/y2 next iteration +// LDI r15 x2_coord +// STR r15 r3 x2_coord +// STR r15 r4 y2_coord // Display current angle LDI r15 memory_mapped_io_addr - STR r15 r13 show_number_offset + STR r15 r1 show_number_offset // Increment the angle and loop - ADI r13 10 + ADI r1 1 LDI r14 201 // Ending angle - CMP r13 r14 - BRH lt .circle_loop - SUB r13 r14 r13 - JMP .circle_loop + CMP r1 r14 + BRH lt .main_loop + SUB r1 r14 r1 + JMP .main_loop HLT @@ -208,7 +201,7 @@ HLT // r10 - sign bit mask/scatch // r11 - pointer to arctan table // r12 - quadrant flag (determines whether to negate x and/or y result) -// +// r13 - scratch // The input angle comes in the form u2.5 with a // range of 0 to 2*pi. CORDICs only work for // +pi/2 to -pi/2 angles. To keep things simple, @@ -273,13 +266,13 @@ HLT // necessitates a loop and therefore slows down the CORDIC // significantly. Multi-bit shifts are certainly possible but // would make the ALU much larger... as always, tradeoffs. :) - AND r6 r10 r14 // Grab the sign bit for x + AND r6 r10 r13 // Grab the sign bit for x AND r7 r10 r10 // Grab the sign bit for y .shift_loop CMP r5 r0 BRH eq .shift_done RSH r6 r6 - ADD r6 r14 r6 // Add the sign bit after shifting + ADD r6 r13 r6 // Add the sign bit after shifting RSH r7 r7 ADD r7 r10 r7 // Add the sign bit after shifting DEC r5 @@ -444,16 +437,15 @@ HLT .mult -// This function multiplies two 8-bit numbers together. The result is a 16-bit -// product, which gets stored in two registers. +// This function multiplies a 16-bit multiplicand with an +// 8-bit multiplier resulting in a 16-bit product. // Inputs: -// r1 = multiplicand -// r2 = multiplier +// r2:r1 = multiplicand +// r3 = multiplier // Outputs: // r5:r4 = 16-bit product // // Register usage: -// r3 = Upper bits of multiplicand (as it gets shifted left) // r6 = LSB mask // r7 = carry flag (need separate flag because of oddity with LSH pseudo instruction) // r8 = loop counter @@ -462,52 +454,59 @@ HLT // First things first... convert multiplicand and multiplier to positive // values since this routinee doesn't handle negative 2's complement values // properly. +// (TODO... add flag for signed vs unsigned operation) - LDI r3 128 // Sign bit mask + LDI r4 128 // Sign bit mask + LDI r5 0xFF // All ones mask LDI r9 0 // Set product sign to 0 (positive) LDI r6 1 // LSB mask - AND r1 r3 r0 // Determine if r1 is negative - BRH z .r1_pos + AND r2 r4 r0 // Determine if r2 is negative + BRH z .multiplicand_pos XOR r9 r6 r9 // Toggle the product sign flag - SUB r0 r1 r1 // And negate it - .r1_pos - AND r2 r3 r0 // Determine if r2 is negative - BRH z .r2_pos + XOR r5 r2 r2 // Invert all bits + XOR r5 r1 r1 + INC r1 // And add 1 to low byte after negation (2's complement) + BRH nc .multiplicand_pos + INC r2 // Handle carry into high byte + .multiplicand_pos + AND r3 r4 r0 // Determine if r3 is negative + BRH z .multiplier_pos XOR r9 r6 r9 // Toggle the product sign flag - SUB r0 r2 r2 // And negate it - .r2_pos + SUB r0 r3 r3 // And negate it + .multiplier_pos - LDI r3 0 // Clear upper 8-bit of multiplicand (for later shifting) LDI r4 0 // Clear the product registers LDI r5 0 - LDI r8 8 // Loop counter (8-bits) + LDI r8 8 // Initialize loop counter .mult_loop - AND r2 r6 r0 // Check least significant bit of multiplier - RSH r2 r2 // and shift it to the right by 1 + AND r3 r6 r0 // Check least significant bit of multiplier + RSH r3 r3 // and shift it to the right by 1 BRH zero .mult_no_add // If least significant bit is 0, skip addition .mult_add ADD r1 r4 r4 // otherwise add multiplicand to product BRH nc .prod_nc ADI r5 1 // And handle carries into the upper 8-bits if needed .prod_nc - ADD r3 r5 r5 + ADD r2 r5 r5 .mult_no_add LDI r7 0 // Initialize carry flag to 0 LSH r1 r1 // Shift multiplicand left to prep for next round BRH nc .multiplicand_nc LDI r7 1 // If a carry occurs, flag it .multiplicand_nc - LSH r3 r3 // Now shift the upper 8-bits of the multiplicand - ADD r3 r7 r3 // And add back the carry bit + LSH r2 r2 // Now shift the upper 8-bits of the multiplicand + ADD r2 r7 r2 // And add back the carry bit DEC r8 // Decrement the loop counter BRH nz .mult_loop + // TODO - add logic to handle multiplicand saturation + CMP r9 r0 // Determine if product sign flag is set BRH z .mult_done - LDI r3 0xFF - XOR r3 r4 r4 // Negate the product - XOR r3 r5 r5 + LDI r2 0xFF + XOR r2 r4 r4 // Negate the product + XOR r2 r5 r5 INC r4 // And add 1 to low byte after negation (2's complement) BRH nc .mult_done INC r5 // Handle carry into high byte @@ -516,8 +515,8 @@ HLT .div -// This function divides a 16-bit dividend by an 8-bit divisor. -// It results in an 8-bit quotient and remainder. +// This function divides a 16-bit dividend by a 16-bit divisor. +// It results in an 16-bit quotient and 8-bit remainder. // Inputs: // r2:r1 = Dividend (numerator) // r4:r3 = Divisor (denominator) @@ -526,14 +525,32 @@ HLT // r3 = Remainder // Register usage: // r6:r5 = Remainder temp -// r8:r7 = Quotient temp (TBD... can use dividend register to be more efficient +// r8:r7 = Quotient temp (TODO... can use dividend register to use less resources) // r9-10 = scratch // r11 = loop counter // r12 = quotient_sign // -// The algorithm implemented below is detailed in the following -// video: +// This code implements a non-restoring division algorithm, which is detailed in the +// following video: // www.youtube.com/watch?v=7m6I7_3XdZ8 +// +// Below is a rough block diagram: +// +// +---------------------+ +// | 16-bit Divisor | (r4:r3) +// +---------------------+ +// | +// | +------------+ +// +--->| ALU | +// +--------------->| (subtract) | +// | +------------+ +// | | +// | | +// | +-----------------------------------------------+ +// +---| Remainder Reg (r6:r5) | Dividend Reg (r2:r1) |<-- Quotient (shifted in) +// +-----------------------------------------------+ +// <-- shifted left +// // // Check to see if both bytes of divisor are zero; if so, // a divide-by-zero error occurred. Use of the NOR instruction @@ -580,7 +597,7 @@ HLT LDI r8 0 LDI r9 0 // Carry flag for low byte shifts LDI r10 0 // Carry flag for high byte shifts - LDI r11 16 // Initialize loop counter to 16 (TBD) + LDI r11 16 // Initialize loop counter to 16 .div_loop // Shift dividend left @@ -667,7 +684,7 @@ HLT // Determine if the final quotient result should be negative. // (We'll go ahead and leave the remainder as an unsigned value) CMP r12 r0 // Determine if quotient sign flag is set - BRH z .div_done_done + BRH z .div_done LDI r9 0xFF XOR r9 r2 r2 // Negate the quotient XOR r9 r1 r1 @@ -685,6 +702,7 @@ HLT .pixel_projection // The following function is used to project a 3D point in space onto a 2D plane. +// // Given a 3D coordinate [x,y,z], it'll return x_projected and y_projected by // calculating the following: // @@ -692,45 +710,392 @@ HLT // y_projected = (focal_length * y)/(focal length + z) // // Inputs: -// r1 = x -// r2 = y -// r3 = z -// r4 = focal_length +// r2:r1 = x +// r4:r3 = y +// r6:r5 = z +// r7 = focal_length // // Outputs: -// r1 = x_projected -// r2 = y_projected +// r1 = x_projected +// r2 = y_projected // // Registers: -// r7:r6 = focal_length + z -// r9:r8 = focal_length * [x,y] - - LDI r7 0 // TODO - I probably should handle carries here... though it's unlikely - ADD r4 r3 r6 // r6 = focal_length + z +// r9:r8 = focal_length + z +// r14 = stack pointer +// r15 = stack pointer + + // RAM offsets for temporary RAM storage (from r15) + define pp_x_low -8 // x_low + define pp_x_high -7 // x_high + define pp_y_low -6 // y_low + define pp_y_high -5 // y_high + define pp_z_low -4 // z_low + define pp_z_high -3 // z_high + define pp_x_projected -2 // x projected + define pp_y_projected -1 // y projected + define pp_fl 0 // focal_length + define pp_fl_plus_z_low 1 // focal_length + z (low) + define pp_fl_plus_z_high 2 // focal_length + z (high) + + MOV r6 r9 // Move z high byte to r9 + ADD r7 r5 r8 // r8 = focal_length + z + BRH nc .fl_z_nc // Handle carries into upper byte (TODO) + INC r9 + .fl_z_nc // Push registers on the stack LDI r15 register_stack_pointer - STR r15 r1 0 - STR r15 r2 1 - STR r15 r3 2 - STR r15 r4 3 - STR r15 r6 4 - STR r15 r7 5 - - // The mult function expects multiplier and multiplicand to be in r1 and r2 - // so move stuff around as required. - MOV r4 r2 // Move focal_length to r2 + ADI r15 8 + STR r15 r1 pp_x_low + STR r15 r2 pp_x_high + STR r15 r3 pp_y_low + STR r15 r4 pp_y_high + STR r15 r7 pp_fl + STR r15 r8 pp_fl_plus_z_low + STR r15 r9 pp_fl_plus_z_high + + // Calculate x_projected + LOD r15 r3 pp_fl CAL .mult - // Move results for div function. MOV r5 r2 MOV r4 r1 - LOD r15 r3 4 - LOD r15 r4 5 + LOD r15 r3 pp_fl_plus_z_low + LOD r15 r4 pp_fl_plus_z_high CAL .div - NOP -HLT -RET + // Trunc the results + LDI r3 2 + CAL .trunc + STR r15 r1 pp_x_projected + + // Calculate y_projected + LOD r15 r1 pp_y_low + LOD r15 r2 pp_y_high + LOD r15 r3 pp_fl + CAL .mult + MOV r5 r2 + MOV r4 r1 + LOD r15 r3 pp_fl_plus_z_low + LOD r15 r4 pp_fl_plus_z_high + CAL .div + + // Now truncate the results + LDI r3 2 + CAL .trunc + STR r15 r1 pp_y_projected + + // Move x_projected and y_projected to output registers + MOV r1 r2 // y_projected -> r2 + LOD r15 r1 pp_x_projected // x_projected -> r1 + RET + + +.rotation +// This function applies the following rotation matrix to +// a set of two coordinates: +// +// +- -+ +// | cos(A) -sin(A) | +// Rz = | | +// | sin(A) cos(A) | +// +- -+ +// +// Given a 3D coordinate (composed of xyz values), the +// coordinate pair utilized can be selected based on the +// coordinate pair selection input as follows: +// 0 = [x,y] +// 1 = [x,z] +// 2 = [y,z] +// +// Inputs: +// r1 = rotation angle in radians (fixed point in the form u2.5) +// (Values between 0 and 2*pi are supported.) // TODO... make input and outputs consistent +// r2 = x +// r3 = y +// r4 = z +// r5 = rotation axis +// +// Outputs: +// r2:r1 = rotated_x +// r4:r3 = rotated_y +// r6:r5 = rotated_z +// Registers: +// r1 - r7 = scratch +// r14 = stack pointer (offset 8 from r15) +// r15 = stack pointer + + // RAM offsets for temporary storage (offset from r15) + define rot_coord1 0 // 1st rotation coordinate + define rot_coord2 1 // 2nd rotation coordinate + define rot_coord3 2 // 3rd rotation coordinate (fixed) + define rot_axis_sel 3 // rotation axis + define rot_cosine_high 4 // Cosine + define rot_cosine_low 5 + define rot_sine_high 6 // Sine + define rot_sine_low 7 + + define coord1_x_cosine_low -1 + define coord1_x_cosine_high -2 + define coord2_x_sine_low -3 + define coord2_x_sine_high -4 + define coord1_x_sine_low -5 + define coord1_x_sine_high -6 + define coord2_x_cosine_low -7 + define coord2_x_cosine_high -8 + + // RAM offset for temp storage (offset from r14) + define rot_x_high -8 + define rot_x_low -7 + define rot_y_high -6 + define rot_y_low -5 + define rot_z_high -4 + define rot_z_low -3 + define rot_coord1_high -2 + define rot_coord1_low -1 + define rot_coord2_high 0 + define rot_coord2_low 1 + define rot_coord3_high 2 + define rot_coord3_low 3 + + // Set up pointer for temporary storage + LDI r15 register_stack_pointer + ADI r15 8 + MOV r15 r14 + ADI r14 8 + ADI r14 8 + + // Organize input coordinates in RAM according to the + // rotation axis selection input (r5) + STR r15 r5 rot_axis_sel + LDI r6 x_axis + CMP r5 r6 + BRH z .go_x_axis_sel + LDI r6 z_axis + CMP r5 r6 + BRH z .go_z_axis_sel + .go_y_axis_sel // Default + STR r15 r2 rot_coord1 + STR r15 r4 rot_coord2 + STR r15 r3 rot_coord3 + JMP .calc_cordic + .go_z_axis_sel + STR r15 r2 rot_coord1 + STR r15 r3 rot_coord2 + STR r15 r4 rot_coord3 + JMP .calc_cordic + .go_x_axis_sel + STR r15 r3 rot_coord1 + STR r15 r4 rot_coord2 + STR r15 r2 rot_coord3 + + .calc_cordic + // Call the CORDIC function to calculate sine and cosine for the + // rotation angle + CAL .cordic + + // Convert them to 16-bit values for the multiplications below. + LDI r12 128 + LDI r1 0 + AND r12 r2 r0 // Check sign bit + BRH z .cosine_pos + LDI r1 0xFF + .cosine_pos + STR r15 r1 rot_cosine_high + STR r15 r2 rot_cosine_low + LDI r1 0 + AND r12 r3 r0 // Check sign bit + BRH z .sine_pos + LDI r1 0xFF + .sine_pos + STR r15 r1 rot_sine_high + STR r15 r3 rot_sine_low + + // Now... calculate the four terms in the rotation and store in RAM + // Calculate coord1 * cosine + LOD r15 r1 rot_cosine_low + LOD r15 r2 rot_cosine_high + LOD r15 r3 rot_coord1 + CAL .mult + STR r15 r4 coord1_x_cosine_low + STR r15 r5 coord1_x_cosine_high + + // Calculate coord1 * sine + LOD r15 r1 rot_sine_low + LOD r15 r2 rot_sine_high + LOD r15 r3 rot_coord1 + CAL .mult + STR r15 r4 coord1_x_sine_low + STR r15 r5 coord1_x_sine_high + + // Calculate coord2 * sine + LOD r15 r1 rot_sine_low + LOD r15 r2 rot_sine_high + LOD r15 r3 rot_coord2 + CAL .mult + STR r15 r4 coord2_x_sine_low + STR r15 r5 coord2_x_sine_high + + // Calculate coord2 * cosine + LOD r15 r1 rot_cosine_low + LOD r15 r2 rot_cosine_high + LOD r15 r3 rot_coord2 + CAL .mult + STR r15 r4 coord2_x_cosine_low + STR r15 r5 coord2_x_cosine_high + + // Calculate coord1_rotation = coord1*cosine(A) +/- coord2*sine(A) + LOD r15 r3 coord1_x_cosine_low + LOD r15 r4 coord1_x_cosine_high + LOD r15 r5 coord2_x_sine_low + LOD r15 r6 coord2_x_sine_high + + // Perform 16-bit addition/subtraction (emulating SBB) + // Addition/subtraction depends on the axis of rotation TODO better comments + LOD r15 r8 rot_axis_sel // Restore axis selection + LDI r9 y_axis + CMP r8 r9 + BRH z .coord1_add + .coord1_sub // For rotations around axis x & z, subtraction is performed + LDI r7 0 + SUB r3 r5 r3 // Subtract low bytes + BRH c .rotx_no_borrow + LDI r7 1 + .rotx_no_borrow + SUB r4 r6 r4 // Subtract high bytes + SUB r4 r7 r4 // Handle borrow + JMP .coord1_trunc + .coord1_add // For rotations around y, the two terms are added together + LDI r7 0 + ADD r3 r5 r3 // Add low bytes + BRH nc .rotx_no_carry + LDI r7 1 + .rotx_no_carry + ADD r4 r6 r4 // Add the high bytes + ADD r4 r7 r4 // Handle carry + + .coord1_trunc + // Move and truncate the results (TODO remove truncation here) + MOV r3 r1 + MOV r4 r2 + LDI r3 6 + CAL .trunc + STR r14 r1 rot_coord1_low + STR r14 r2 rot_coord1_high + + // Calculate coord2_rotation = coord2*cosine(A) +- coord1*sine(A) + LOD r15 r3 coord1_x_sine_low + LOD r15 r4 coord1_x_sine_high + LOD r15 r5 coord2_x_cosine_low + LOD r15 r6 coord2_x_cosine_high + + // Perform 16-bit addition/subtraction TODO better comments + LOD r15 r8 rot_axis_sel // Restore axis selection + LDI r9 y_axis + CMP r8 r9 + BRH z .coord2_sub + .coord2_add // For rotations around axis x & z, adding is performed + LDI r7 0 + ADD r3 r5 r3 // Add low bytes + BRH nc .roty_no_carry + LDI r7 1 + .roty_no_carry + ADD r4 r6 r4 // Add high bytes + ADD r4 r7 r4 // And handle carry + JMP .coord2_trunc + .coord2_sub // For rotations around axis y, subtraction is performed + LDI r7 0 + SUB r5 r3 r3 // Subtract low bytes + BRH c .roty_no_borrow + LDI r7 1 + .roty_no_borrow + SUB r6 r4 r4 // Subtract high bytes + SUB r4 r7 r4 // Handle borrow + .coord2_trunc + + // Move and truncate the results (TODO remove truncation here) + MOV r3 r1 + MOV r4 r2 + LDI r3 6 + CAL .trunc + STR r14 r1 rot_coord2_low + STR r14 r2 rot_coord2_high + + // Multiply the fixed coordinate by 2^6 to match bit growth of + // rotated coordinates TODO FIX +// LDI r1 64 + LDI r1 1 + LDI r2 0 + LOD r15 r3 rot_coord3 + CAL .mult + STR r14 r4 rot_coord3_low + STR r14 r5 rot_coord3_high + + // Organize outputs coordinates based on the rotation axis selection. + LOD r15 r5 rot_axis_sel // Restore axis selection + LDI r6 y_axis + CMP r5 r6 + BRH z .rstr_xz_sel + LDI r6 x_axis + CMP r5 r6 + BRH z .rstr_yz_sel + .rstr_x_axis // Default selection + LOD r14 r2 rot_coord1_high + LOD r14 r1 rot_coord1_low + LOD r14 r4 rot_coord2_high + LOD r14 r3 rot_coord2_low + LOD r14 r6 rot_coord3_high + LOD r14 r5 rot_coord3_low + JMP .rot_exit + .rstr_xz_sel + LOD r14 r2 rot_coord1_high + LOD r14 r1 rot_coord1_low + LOD r14 r4 rot_coord3_high + LOD r14 r3 rot_coord3_low + LOD r14 r6 rot_coord2_high + LOD r14 r5 rot_coord2_low + JMP .rot_exit + .rstr_yz_sel + LOD r14 r2 rot_coord2_high + LOD r14 r1 rot_coord2_low + LOD r14 r6 rot_coord3_high + LOD r14 r5 rot_coord3_low + LOD r14 r4 rot_coord1_high + LOD r14 r3 rot_coord1_low + .rot_exit + RET + + +.trunc +// The following function performs simple truncation of a +// 16-bit input value. The number of bits truncated off +// is configurable. +// Inputs: +// r2:r1 = 16-bit input +// r3 = Number of bits to truncate +// Registers: +// r4 = Bit shifted from high to low +// r5 = Sign bit +// r6 = scratch +// TODO consider adding rounding to this logic and change name from trunc to round if so + + LDI r6 128 + AND r2 r6 r5 // Grab the sign bit + LDI r6 1 + .trunc_loop + CMP r3 r0 + BRH eq .trunc_done + AND r2 r6 r4 // Grab bit shifted from high to low byte + RSH r2 r2 + ADD r2 r5 r2 // Add the sign bit after shifting + RSH r1 r1 + CMP r4 r6 // Did a '1' move from high to low byte? + BRH ne .next_trunc_iteration + ADI r1 128 // If so, add it back + .next_trunc_iteration + DEC r3 + JMP .trunc_loop + .trunc_done + RET // Load the arctangent look-up table into RAM. @@ -752,3 +1117,107 @@ RET LDI r14 1 // arctan(2^-6) = ~1/2^6 STR r15 r14 6 RET + +.load_shape_vertices_edges + // Square pyramid shape + LDI r15 shape_vertices_edges_addr + LDI r14 8 // Number of vertices + STR r15 r14 0 + LDI r14 32 // x0 + STR r15 r14 1 + LDI r14 32 // y0 + STR r15 r14 2 + LDI r14 32 // z0 + STR r15 r14 3 + LDI r14 32 // x1 + STR r15 r14 4 + LDI r14 32 // y1 + STR r15 r14 5 + LDI r14 -32 // z1 + STR r15 r14 6 + LDI r14 32 // x2 + STR r15 r14 7 + + ADI r15 8 + LDI r14 -32 // y2 + STR r15 r14 0 + LDI r14 32 // z2 + STR r15 r14 1 + LDI r14 32 // x3 + STR r15 r14 2 + LDI r14 -32 // y3 + STR r15 r14 3 + LDI r14 -32 // z3 + STR r15 r14 4 + LDI r14 -32 // x4 + STR r15 r14 5 + LDI r14 32 // y4 + STR r15 r14 6 + LDI r14 32 // z4 + STR r15 r14 7 + + ADI r15 8 + LDI r14 -32 // x5 + STR r15 r14 0 + LDI r14 32 // y5 + STR r15 r14 1 + LDI r14 -32 // z5 + STR r15 r14 2 + LDI r14 -32 // x6 + STR r15 r14 3 + LDI r14 -32 // y6 + STR r15 r14 4 + LDI r14 32 // z6 + STR r15 r14 5 + LDI r14 -32 // x7 + STR r15 r14 6 + LDI r14 -32 // y7 + STR r15 r14 7 + + ADI r15 8 + LDI r14 -32 // z7 + STR r15 r14 0 + +// +// +// +// ADI r15 8 +// LDI r14 8 // Number of edges +// STR r15 r14 0 +// LDI r14 0 // Edge 0 (vertices 0,1) +// STR r15 r14 1 +// LDI r14 1 +// STR r15 r14 2 +// LDI r14 1 // Edge 1 (vertices 1,2) +// STR r15 r14 3 +// LDI r14 2 +// STR r15 r14 4 +// LDI r14 2 // Edge 2 (vertices 2,3) +// STR r15 r14 5 +// LDI r14 3 +// STR r15 r14 6 +// LDI r14 3 // Edge 3 (vertices 3,0) +// STR r15 r14 7 +// +// ADI r15 8 +// LDI r14 0 +// STR r15 r14 0 +// LDI r14 0 // Edge 4 (vertices 0,4) +// STR r15 r14 1 +// LDI r14 4 +// STR r15 r14 2 +// LDI r14 1 // Edge 5 (vertices 1,4) +// STR r15 r14 3 +// LDI r14 4 +// STR r15 r14 4 +// LDI r14 2 // Edge 6 (vertices 2,4) +// STR r15 r14 5 +// LDI r14 4 +// STR r15 r14 6 +// LDI r14 3 // Edge 7 (vertices 3,4) +// STR r15 r14 7 +// +// ADI r15 8 +// LDI r14 4 +// STR r15 r14 0 + RET From b5ab06b81fb699b63ac0b5a1facf754e9fd0b778 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Sun, 15 Sep 2024 13:06:30 -0700 Subject: [PATCH 05/11] Added edge line draw functionality. The code still requires a lot of cleanup and some optimizations. In particular, I need to move the CORDIC function call outside of the vertice loop to improve performance. --- programs/wireframe.as | 333 +++++++++++++++++++++++++++--------------- 1 file changed, 215 insertions(+), 118 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index 2c11a09..ca42932 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -47,7 +47,7 @@ define atan_LUT_strt_addr 232 define y_axis 0 define x_axis 1 define z_axis 2 -define focal_length 127 +define focal_length 127 // Load the arctan LUT into RAM. @@ -110,10 +110,12 @@ LDI r1 28 ADI r15 3 LDI r5 y_axis // Rotation axis - LDI r12 0 - STR r12 r1 0 // Store angle in RAM - STR r12 r14 1 // Store number of vertices RAM - STR r12 r15 2 // Store vertice pointer in RAM +// TODO define all of these offsets as constants + STR r0 r1 0 // Store angle in RAM + STR r0 r14 1 // Store number of vertices RAM + STR r0 r15 2 // Store vertice pointer in RAM + STR r0 r13 3 // Store the projected points pointer in RAM + // TODO: Change rotation function so it doesn't call CORDIC // i.e. move CORDIC call outside of vertice loop since I don't // need to calculate it again each time @@ -123,34 +125,83 @@ LDI r1 28 ADI r1 16 ADI r2 16 - LDI r15 memory_mapped_io_addr - STR r15 r1 pixel_x_offset - STR r15 r2 pixel_y_offset - STR r15 r0 draw_pixel_offset + // Store the projected points in RAM + LDI r12 0 + LOD r12 r13 3 + STR r13 r1 0 + STR r13 r2 1 + ADI r13 2 + STR r12 r13 3 + + // TODO get rid of this... +// LDI r15 memory_mapped_io_addr +// STR r15 r1 pixel_x_offset +// STR r15 r2 pixel_y_offset +// STR r15 r0 draw_pixel_offset +// STR r15 r0 buffer_screen_offset LDI r12 0 - LOD r12 r1 0 // Store angle in RAM - LOD r12 r14 1 // Store number of vertices RAM - LOD r12 r15 2 // Store vertice pointer in RAM + LOD r12 r1 0 // Recall angle from RAM + LOD r12 r14 1 // Restore number of vertices from RAM + LOD r12 r15 2 // Restore vertice pointer from RAM DEC r14 BRH nz .vertice_loop - LDI r12 0 - STR r12 r1 0 // Store angle in RAM - STR r12 r14 1 // Store number of vertices RAM - STR r12 r15 2 // Store vertice pointer in RAM - - LDI r15 memory_mapped_io_addr - STR r15 r0 buffer_screen_offset - STR r15 r0 clear_screen_buffer_offset - - LDI r12 0 - LOD r12 r1 0 // Store angle in RAM - LOD r12 r14 1 // Store number of vertices RAM - LOD r12 r15 2 // Store vertice pointer in RAM - + // Now loop through all shape edges to draw lines + // First grab the number of edges + // r15 - pointer to shape table + // r6:r5 - vertice pair for each edge + // r12 - pointer to projected points table + // r13 - number of vertices + // r11 - number of projected points left + // r10 - Index into projected points table + // r14 - Number of shape edges + LOD r15 r14 0 + INC r15 + .edge_loop + +//CAL .wait_for_user + // Load vertice pair index for edge + LOD r15 r5 0 + LOD r15 r6 1 + ADI r15 2 + LOD r0 r13 1 // Get number of vertices + STR r0 r15 2 + STR r0 r14 7 + LDI r11 2 // Keep track of number of projected points to grab + LDI r10 0 + LDI r12 projected_points_addr + .get_projected_xy + CMP r5 r10 + BRH eq .store_projected_x0y0 + CMP r6 r10 + BRH eq .store_projected_x1y1 + JMP .next_projected_xy + .store_projected_x0y0 + LOD r12 r1 0 + LOD r12 r2 1 + DEC r11 + JMP .next_projected_xy + .store_projected_x1y1 + LOD r12 r3 0 + LOD r12 r4 1 + DEC r11 + .next_projected_xy + ADI r12 2 + INC r10 + CMP r11 r0 + BRH nz .get_projected_xy + + CAL .draw_line +// LDI r12 0 + LOD r0 r15 2 // TODO... everywhere I use zero as an address needs r0 + LOD r0 r14 7 + DEC r14 + BRH nz .edge_loop + LDI r12 0 + LOD r12 r15 2 // // Push the r13 angle value to RAM since it gets modified inside the draw_line function @@ -169,8 +220,13 @@ LDI r1 28 // Display current angle LDI r15 memory_mapped_io_addr STR r15 r1 show_number_offset + STR r15 r0 buffer_screen_offset + STR r15 r0 clear_screen_buffer_offset + +//CAL .wait_for_user // Increment the angle and loop + LOD r0 r1 0 // Load angle ADI r1 1 LDI r14 201 // Ending angle CMP r1 r14 @@ -180,6 +236,18 @@ LDI r1 28 HLT +.wait_for_user +// This function waits until the user presses one of the controller inputs. +// Since the current VM doesn't have breakpoints, I use this function to +// effectively add breakpoints to the code. + LDI r10 memory_mapped_io_addr + .wait_for_user_loop + LOD r10 r9 controller_input_offset + CMP r9 r0 + BRH eq .wait_for_user_loop + RET + + .cordic // CORDIC function computes sine and cosine of angle. // Input: @@ -303,8 +371,6 @@ HLT BRH nz .cordic_loop // Adjust xy outputs accordingly based on quadrant - - // Adjust x? .check_x_negate LDI r5 0b10 AND r12 r5 r0 @@ -428,12 +494,11 @@ HLT .next_pixel INC r14 // Increment loop counter CMP r14 r5 // Exit loop when i > dx - BRH ge .buffer_screen + BRH ge .draw_line_exit JMP .draw_line_loop - .buffer_screen - STR r15 r0 buffer_screen_offset - RET + .draw_line_exit + RET .mult @@ -457,7 +522,7 @@ HLT // (TODO... add flag for signed vs unsigned operation) LDI r4 128 // Sign bit mask - LDI r5 0xFF // All ones mask + LDI r5 0xFF // All ones mask LDI r9 0 // Set product sign to 0 (positive) LDI r6 1 // LSB mask AND r2 r4 r0 // Determine if r2 is negative @@ -544,13 +609,13 @@ HLT // +--->| ALU | // +--------------->| (subtract) | // | +------------+ -// | | -// | | +// | | +// | | // | +-----------------------------------------------+ // +---| Remainder Reg (r6:r5) | Dividend Reg (r2:r1) |<-- Quotient (shifted in) // +-----------------------------------------------+ -// <-- shifted left -// +// <-- shifted left +// // // Check to see if both bytes of divisor are zero; if so, // a divide-by-zero error occurred. Use of the NOR instruction @@ -763,9 +828,9 @@ HLT LOD r15 r4 pp_fl_plus_z_high CAL .div - // Trunc the results + // round the results LDI r3 2 - CAL .trunc + CAL .round STR r15 r1 pp_x_projected // Calculate y_projected @@ -779,9 +844,9 @@ HLT LOD r15 r4 pp_fl_plus_z_high CAL .div - // Now truncate the results + // Now round the results LDI r3 2 - CAL .trunc + CAL .round STR r15 r1 pp_y_projected // Move x_projected and y_projected to output registers @@ -815,7 +880,7 @@ HLT // r4 = z // r5 = rotation axis // -// Outputs: +// Outputs: // r2:r1 = rotated_x // r4:r3 = rotated_y // r6:r5 = rotated_z @@ -874,17 +939,17 @@ HLT CMP r5 r6 BRH z .go_z_axis_sel .go_y_axis_sel // Default - STR r15 r2 rot_coord1 + STR r15 r2 rot_coord1 STR r15 r4 rot_coord2 - STR r15 r3 rot_coord3 + STR r15 r3 rot_coord3 JMP .calc_cordic .go_z_axis_sel - STR r15 r2 rot_coord1 + STR r15 r2 rot_coord1 STR r15 r3 rot_coord2 STR r15 r4 rot_coord3 JMP .calc_cordic .go_x_axis_sel - STR r15 r3 rot_coord1 + STR r15 r3 rot_coord1 STR r15 r4 rot_coord2 STR r15 r2 rot_coord3 @@ -900,14 +965,14 @@ HLT BRH z .cosine_pos LDI r1 0xFF .cosine_pos - STR r15 r1 rot_cosine_high + STR r15 r1 rot_cosine_high STR r15 r2 rot_cosine_low LDI r1 0 AND r12 r3 r0 // Check sign bit BRH z .sine_pos LDI r1 0xFF .sine_pos - STR r15 r1 rot_sine_high + STR r15 r1 rot_sine_high STR r15 r3 rot_sine_low // Now... calculate the four terms in the rotation and store in RAM @@ -963,7 +1028,7 @@ HLT .rotx_no_borrow SUB r4 r6 r4 // Subtract high bytes SUB r4 r7 r4 // Handle borrow - JMP .coord1_trunc + JMP .coord1_round .coord1_add // For rotations around y, the two terms are added together LDI r7 0 ADD r3 r5 r3 // Add low bytes @@ -973,12 +1038,12 @@ HLT ADD r4 r6 r4 // Add the high bytes ADD r4 r7 r4 // Handle carry - .coord1_trunc - // Move and truncate the results (TODO remove truncation here) + .coord1_round + // Move and round the results MOV r3 r1 MOV r4 r2 LDI r3 6 - CAL .trunc + CAL .round STR r14 r1 rot_coord1_low STR r14 r2 rot_coord1_high @@ -1001,7 +1066,7 @@ HLT .roty_no_carry ADD r4 r6 r4 // Add high bytes ADD r4 r7 r4 // And handle carry - JMP .coord2_trunc + JMP .coord2_round .coord2_sub // For rotations around axis y, subtraction is performed LDI r7 0 SUB r5 r3 r3 // Subtract low bytes @@ -1010,13 +1075,13 @@ HLT .roty_no_borrow SUB r6 r4 r4 // Subtract high bytes SUB r4 r7 r4 // Handle borrow - .coord2_trunc + .coord2_round - // Move and truncate the results (TODO remove truncation here) + // Move and round the results MOV r3 r1 MOV r4 r2 LDI r3 6 - CAL .trunc + CAL .round STR r14 r1 rot_coord2_low STR r14 r2 rot_coord2_high @@ -1043,14 +1108,14 @@ HLT LOD r14 r1 rot_coord1_low LOD r14 r4 rot_coord2_high LOD r14 r3 rot_coord2_low - LOD r14 r6 rot_coord3_high - LOD r14 r5 rot_coord3_low + LOD r14 r6 rot_coord3_high + LOD r14 r5 rot_coord3_low JMP .rot_exit .rstr_xz_sel LOD r14 r2 rot_coord1_high LOD r14 r1 rot_coord1_low - LOD r14 r4 rot_coord3_high - LOD r14 r3 rot_coord3_low + LOD r14 r4 rot_coord3_high + LOD r14 r3 rot_coord3_low LOD r14 r6 rot_coord2_high LOD r14 r5 rot_coord2_low JMP .rot_exit @@ -1059,42 +1124,61 @@ HLT LOD r14 r1 rot_coord2_low LOD r14 r6 rot_coord3_high LOD r14 r5 rot_coord3_low - LOD r14 r4 rot_coord1_high - LOD r14 r3 rot_coord1_low + LOD r14 r4 rot_coord1_high + LOD r14 r3 rot_coord1_low .rot_exit RET -.trunc -// The following function performs simple truncation of a -// 16-bit input value. The number of bits truncated off -// is configurable. +.round +// The following function performs rounding of the 16-bit input value. +// Round is done by right shifting the value for one minus the total +// number of bits to drop (held in r3). Then, 1 is added/subtracted to +// the value before truncating off the final bit, which effectively rounds +// to the nearest integer. // Inputs: // r2:r1 = 16-bit input -// r3 = Number of bits to truncate +// r3 = Number of bits to round off // Registers: // r4 = Bit shifted from high to low // r5 = Sign bit -// r6 = scratch -// TODO consider adding rounding to this logic and change name from trunc to round if so +// r7:r6 = scratch LDI r6 128 AND r2 r6 r5 // Grab the sign bit LDI r6 1 - .trunc_loop + .round_loop CMP r3 r0 - BRH eq .trunc_done + BRH eq .round_done + CMP r3 r6 + BRH ne .round_shift + .round_final_bit + CMP r5 r6 + BRH eq .round_neg + .round_pos + INC r1 + BRH nc .round_no_carry + INC r2 + .round_no_carry + JMP .round_shift + .round_neg + DEC r1 + BRH c .round_no_borrow + DEC r2 + .round_no_borrow + JMP .round_shift + .round_shift AND r2 r6 r4 // Grab bit shifted from high to low byte RSH r2 r2 ADD r2 r5 r2 // Add the sign bit after shifting RSH r1 r1 CMP r4 r6 // Did a '1' move from high to low byte? - BRH ne .next_trunc_iteration + BRH ne .next_round_iteration ADI r1 128 // If so, add it back - .next_trunc_iteration + .next_round_iteration DEC r3 - JMP .trunc_loop - .trunc_done + JMP .round_loop + .round_done RET @@ -1122,7 +1206,7 @@ HLT // Square pyramid shape LDI r15 shape_vertices_edges_addr LDI r14 8 // Number of vertices - STR r15 r14 0 + STR r15 r14 0 LDI r14 32 // x0 STR r15 r14 1 LDI r14 32 // y0 @@ -1177,47 +1261,60 @@ HLT ADI r15 8 LDI r14 -32 // z7 STR r15 r14 0 + LDI r14 12 // Number of edges + STR r15 r14 1 + LDI r14 0 // Edge 0 (vertices 0,1) + STR r15 r14 2 + LDI r14 1 + STR r15 r14 3 + LDI r14 1 // Edge 1 (vertices 1,3) + STR r15 r14 4 + LDI r14 3 + STR r15 r14 5 + LDI r14 2 // Edge 2 (vertices 2,3) + STR r15 r14 6 + LDI r14 3 + STR r15 r14 7 + + ADI r15 8 + LDI r14 2 // Edge 3 (vertices 2,0) + STR r15 r14 0 + LDI r14 0 + STR r15 r14 1 + LDI r14 4 // Edge 4 (vertices 4,5) + STR r15 r14 2 + LDI r14 5 + STR r15 r14 3 + LDI r14 5 // Edge 5 (vertices 5,7) + STR r15 r14 4 + LDI r14 7 + STR r15 r14 5 + LDI r14 6 // Edge 6 (vertices 6,7) + STR r15 r14 6 + LDI r14 7 + STR r15 r14 7 -// -// -// -// ADI r15 8 -// LDI r14 8 // Number of edges -// STR r15 r14 0 -// LDI r14 0 // Edge 0 (vertices 0,1) -// STR r15 r14 1 -// LDI r14 1 -// STR r15 r14 2 -// LDI r14 1 // Edge 1 (vertices 1,2) -// STR r15 r14 3 -// LDI r14 2 -// STR r15 r14 4 -// LDI r14 2 // Edge 2 (vertices 2,3) -// STR r15 r14 5 -// LDI r14 3 -// STR r15 r14 6 -// LDI r14 3 // Edge 3 (vertices 3,0) -// STR r15 r14 7 -// -// ADI r15 8 -// LDI r14 0 -// STR r15 r14 0 -// LDI r14 0 // Edge 4 (vertices 0,4) -// STR r15 r14 1 -// LDI r14 4 -// STR r15 r14 2 -// LDI r14 1 // Edge 5 (vertices 1,4) -// STR r15 r14 3 -// LDI r14 4 -// STR r15 r14 4 -// LDI r14 2 // Edge 6 (vertices 2,4) -// STR r15 r14 5 -// LDI r14 4 -// STR r15 r14 6 -// LDI r14 3 // Edge 7 (vertices 3,4) -// STR r15 r14 7 -// -// ADI r15 8 -// LDI r14 4 -// STR r15 r14 0 + ADI r15 8 + LDI r14 4 // Edge 7 (vertices 4,6) + STR r15 r14 0 + LDI r14 6 + STR r15 r14 1 + LDI r14 0 // Edge 8 (vertices 0,4) + STR r15 r14 2 + LDI r14 4 + STR r15 r14 3 + LDI r14 1 // Edge 9 (vertices 1,5) + STR r15 r14 4 + LDI r14 5 + STR r15 r14 5 + LDI r14 2 // Edge 10 (vertices 2,6) + STR r15 r14 6 + LDI r14 6 + STR r15 r14 7 + + ADI r15 8 + LDI r14 3 // Edge 11 (vertices 3,7) + STR r15 r14 0 + LDI r14 7 + STR r15 r14 1 RET From fb4190154d76ba83c47d5555e8524d8231f0b2f6 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Sun, 15 Sep 2024 23:15:21 -0700 Subject: [PATCH 06/11] Added some comments. --- programs/wireframe.as | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index ca42932..1bb6f40 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -1,18 +1,27 @@ // Wireframe Demo by Dave Walker -// A basic implementation of a CORDIC function operating in rotation mode. A CORDIC can be -// used to iteratively calculate sine and cosine of an angle. Due to the limitations of this -// 8-bit computer, the CORDIC isn't particularly accurate. A number of values in this code are -// represented as fixed point representations. Therefore, you'll see notations like u2.5 and -// s1.6. These notations denote signed/unsigned, the number of integer bits, and the number of -// fractional bits. For example, the sine/cosine outputs are all s1.6. +// This program is an implementation of a 3D wireframe renderer on MattBatWing's BatPU-2 +// Minecraft computer. I created it because I wanted to try to reproduce the one Matt created +// here: // -// In addition to the CORDIC, a draw_line function is included based on Bresenham's Algorithm. +// https://www.youtube.com/watch?v=hFRlnNci3Rs // -// (Note: I'm not crazy with how I pass inputs to functions in this code using registers. I think -// I'd prefer to pass them via memory (i.e. a stack). I'd also possibly like to dedicate a -// register as a stack pointer. I haven't written assembly in years so I'm not accustomed -// to dealing with this stuff directly. Perhaps I'll change it later... perhaps not.) +// It began as a simple demo of a CORDIC function operating in rotation mode. A CORDIC can be +// used to iteratively calculate sine and cosine of an angle. After the CORDIC, I "simply" had +// to add Bresenham's line drawing algorithm, a 16-bit multiplier, 16-bit divider, a 3D rotation +// function, a 3D-to-2D project function, and some other bits and bobs. :) +// +// It was fun to make, and I'm reasonably happy with it. However, it's SLOW. Way slower than +// Matt's hardware implementation shown in the video above, which is totally expected. +// +// Note: The code is still pretty messy. I need to do a clean-up pass on it and make a few +// more performance improvements (like moving the CORDIC function call outside of the vertice +// loop). I'm also considering modifying the CORDIC to use 16-bit math to improve its +// accuracy, which should make the cube animation a bit smoother. Also, I'm not crazy with +// how I pass inputs to functions in this code using registers. I think I'd prefer to pass +// them via memory (i.e. a stack). I'd also possibly like to dedicate a register as a +// stack pointer. I haven't written assembly in years so I'm not accustomed to dealing with +// this stuff directly. Perhaps I'll change it later... perhaps not.) // Memory mapped IO port mapping offsets @@ -36,8 +45,6 @@ define rng_offset 6 define controller_input_offset 7 // Various RAM addresses -define x2_coord 0 -define y2_coord 1 define register_stack_pointer 50 define shape_vertices_edges_addr 100 define projected_points_addr 150 @@ -90,7 +97,7 @@ STR r15 r0 buffer_chars_offset // Initialze rotation angle -LDI r1 28 +LDI r1 0 .main_loop // Point to 3D share vertice/edge table @@ -216,6 +223,7 @@ LDI r1 28 // LDI r15 x2_coord // STR r15 r3 x2_coord // STR r15 r4 y2_coord + LOD r0 r1 0 // Load angle // Display current angle LDI r15 memory_mapped_io_addr @@ -226,8 +234,7 @@ LDI r1 28 //CAL .wait_for_user // Increment the angle and loop - LOD r0 r1 0 // Load angle - ADI r1 1 + ADI r1 5 LDI r14 201 // Ending angle CMP r1 r14 BRH lt .main_loop From b97953ad3e89ccc33745d85ac1c2c23cbcb22d87 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Mon, 16 Sep 2024 00:26:29 -0700 Subject: [PATCH 07/11] Cleaned up main loop. --- programs/wireframe.as | 179 +++++++++++++++++++++--------------------- 1 file changed, 90 insertions(+), 89 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index 1bb6f40..454eecd 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -1,18 +1,20 @@ // Wireframe Demo by Dave Walker -// This program is an implementation of a 3D wireframe renderer on MattBatWing's BatPU-2 -// Minecraft computer. I created it because I wanted to try to reproduce the one Matt created -// here: +// This program is an implementation of a 3D wireframe renderer running on MattBatWing's +// BatPU-2 Minecraft computer. I created it because I wanted to try to reproduce renderer +// Matt created here: // // https://www.youtube.com/watch?v=hFRlnNci3Rs // // It began as a simple demo of a CORDIC function operating in rotation mode. A CORDIC can be -// used to iteratively calculate sine and cosine of an angle. After the CORDIC, I "simply" had -// to add Bresenham's line drawing algorithm, a 16-bit multiplier, 16-bit divider, a 3D rotation -// function, a 3D-to-2D project function, and some other bits and bobs. :) +// used to iteratively calculate sine and cosine of an angle. After the CORDIC, I "only" had +// to add Bresenham's line drawing algorithm, a 16-bit multiplier, a 16-bit divider, a 3D +// rotation function, a 3D-to-2D project function, and some other bits and bobs. :) // -// It was fun to make, and I'm reasonably happy with it. However, it's SLOW. Way slower than -// Matt's hardware implementation shown in the video above, which is totally expected. +// It was fun to make, and I'm reasonably happy with it. However, it's SLOW... way slower than +// Matt's hardware implementation shown in the video above, which is totally expected. I mean... +// dedicated hardware is always going to be much faster than software running on a general +// purpose processor. // // Note: The code is still pretty messy. I need to do a clean-up pass on it and make a few // more performance improvements (like moving the CORDIC function call outside of the vertice @@ -44,10 +46,25 @@ define unsigned_mode_offset 5 define rng_offset 6 define controller_input_offset 7 +// Shape Table Offsets +define x_offset 0 +define y_offset 1 +define z_offset 2 +define number_of_edges 0 +define edge_vertice_0 0 +define edge_vertice_1 1 + +// Various addresses for storing values. All are offset from r0. +define rotation_angle 0 +define number_of_vertices 1 +define shape_table_pointer 2 +define projected_xy_pointer 3 +define edges_remaining 4 + // Various RAM addresses define register_stack_pointer 50 -define shape_vertices_edges_addr 100 -define projected_points_addr 150 +define projected_points_addr 100 +define shape_vertices_edges_addr 150 define atan_LUT_strt_addr 232 // Other constants @@ -55,6 +72,8 @@ define y_axis 0 define x_axis 1 define z_axis 2 define focal_length 127 +define rotation_angle_increment 5 +define rotation_angle_max 201 // Load the arctan LUT into RAM. @@ -71,7 +90,7 @@ STR r15 r0 unsigned_mode_offset STR r15 r0 clear_chars_buffer_offset STR r15 r0 buffer_chars_offset -// Write "3DROTATION" +// Write "ROTATION" STR r15 r0 clear_chars_buffer_offset LDI r14 " " STR r15 r14 write_char_offset @@ -100,7 +119,7 @@ STR r15 r0 buffer_chars_offset LDI r1 0 .main_loop - // Point to 3D share vertice/edge table + // Point to 3D shape vertice/edge table LDI r15 shape_vertices_edges_addr LOD r15 r14 0 // Load number of vertices in r14 INC r15 // And point to first vertice @@ -111,136 +130,118 @@ LDI r1 0 // Now loop through all of the 3D vertices in memory to // rotate and project them onto a 2D plane for display. .vertice_loop - LOD r15 r2 0 - LOD r15 r3 1 - LOD r15 r4 2 - ADI r15 3 - LDI r5 y_axis // Rotation axis - -// TODO define all of these offsets as constants - STR r0 r1 0 // Store angle in RAM - STR r0 r14 1 // Store number of vertices RAM - STR r0 r15 2 // Store vertice pointer in RAM - STR r0 r13 3 // Store the projected points pointer in RAM + // Load the 3D x,y,z coordinates from RAM + LOD r15 r2 x_offset + LOD r15 r3 y_offset + LOD r15 r4 z_offset + ADI r15 3 // Point to the next set of coordinates + LDI r5 y_axis // Set the rotation axis + + // Push variables into RAM + STR r0 r1 rotation_angle + STR r0 r14 number_of_vertices + STR r0 r15 shape_table_pointer + STR r0 r13 projected_xy_pointer // TODO: Change rotation function so it doesn't call CORDIC // i.e. move CORDIC call outside of vertice loop since I don't - // need to calculate it again each time + // need to recalculate it for each vertice. Doing so should + // significantly improve performance. CAL .rotation LDI r7 focal_length CAL .pixel_projection + // Center the projected points on the screen ADI r1 16 ADI r2 16 // Store the projected points in RAM - LDI r12 0 - LOD r12 r13 3 - STR r13 r1 0 - STR r13 r2 1 + LOD r0 r13 projected_xy_pointer + STR r13 r1 x_offset + STR r13 r2 y_offset ADI r13 2 - STR r12 r13 3 - - // TODO get rid of this... -// LDI r15 memory_mapped_io_addr -// STR r15 r1 pixel_x_offset -// STR r15 r2 pixel_y_offset -// STR r15 r0 draw_pixel_offset -// STR r15 r0 buffer_screen_offset + STR r0 r13 projected_xy_pointer - LDI r12 0 - LOD r12 r1 0 // Recall angle from RAM - LOD r12 r14 1 // Restore number of vertices from RAM - LOD r12 r15 2 // Restore vertice pointer from RAM + // Recall variables from RAM + LOD r0 r1 rotation_angle + LOD r0 r14 number_of_vertices + LOD r0 r15 shape_table_pointer - DEC r14 + DEC r14 // Decrement the vertice counter and loop if more exist BRH nz .vertice_loop - // Now loop through all shape edges to draw lines - // First grab the number of edges + // After calculating projected xy for all vertices, loop through all shape edges to + // draw lines between them. Each edge is defined as a pair of vertices. // r15 - pointer to shape table - // r6:r5 - vertice pair for each edge + // r14 - Number of shape edges // r12 - pointer to projected points table // r13 - number of vertices - // r11 - number of projected points left - // r10 - Index into projected points table - // r14 - Number of shape edges - LOD r15 r14 0 + // r11 - number of projected points left for the current edge + // r10 - Projected points table index + // r6:r5 - vertice pair for each edge + + // First grab the number of edges from the shape table + LOD r15 r14 number_of_edges INC r15 .edge_loop -//CAL .wait_for_user // Load vertice pair index for edge - LOD r15 r5 0 - LOD r15 r6 1 + LOD r15 r5 edge_vertice_0 + LOD r15 r6 edge_vertice_1 + // And point to the next edge vertice pair ADI r15 2 - LOD r0 r13 1 // Get number of vertices - STR r0 r15 2 - STR r0 r14 7 - LDI r11 2 // Keep track of number of projected points to grab - LDI r10 0 + STR r0 r15 shape_table_pointer + STR r0 r14 edges_remaining LDI r12 projected_points_addr + LDI r11 2 // Number of projected xy points to grab for each edge + LDI r10 0 // Projected xy table index .get_projected_xy + // Check both edge vertices again the projected xy table index CMP r5 r10 BRH eq .store_projected_x0y0 CMP r6 r10 BRH eq .store_projected_x1y1 JMP .next_projected_xy .store_projected_x0y0 - LOD r12 r1 0 - LOD r12 r2 1 + LOD r12 r1 x_offset + LOD r12 r2 y_offset DEC r11 JMP .next_projected_xy .store_projected_x1y1 - LOD r12 r3 0 - LOD r12 r4 1 + LOD r12 r3 x_offset + LOD r12 r4 y_offset DEC r11 .next_projected_xy - ADI r12 2 - INC r10 - CMP r11 r0 + ADI r12 2 // Point to the next project xy point + INC r10 // Increment the table index + CMP r11 r0 // ... and check if done BRH nz .get_projected_xy - CAL .draw_line -// LDI r12 0 - LOD r0 r15 2 // TODO... everywhere I use zero as an address needs r0 - LOD r0 r14 7 + // Draw a line between the two projected xy points + CAL .draw_line + + LOD r0 r15 shape_table_pointer + LOD r0 r14 edges_remaining DEC r14 BRH nz .edge_loop - LDI r12 0 - LOD r12 r15 2 - - -// // Push the r13 angle value to RAM since it gets modified inside the draw_line function -// LDI r15 register_stack_pointer -// STR r15 r13 -// CAL .draw_line -// // And pop it back off when finished -// LDI r15 register_stack_pointer -// LOD r15 r13 - -// // Store the x1/y1 coordinates to RAM so they can be x2/y2 next iteration -// LDI r15 x2_coord -// STR r15 r3 x2_coord -// STR r15 r4 y2_coord - LOD r0 r1 0 // Load angle + // Load the rotation angle from RAM + LOD r0 r1 rotation_angle // Display current angle LDI r15 memory_mapped_io_addr STR r15 r1 show_number_offset + + // Update the screen STR r15 r0 buffer_screen_offset STR r15 r0 clear_screen_buffer_offset -//CAL .wait_for_user - // Increment the angle and loop - ADI r1 5 - LDI r14 201 // Ending angle + ADI r1 rotation_angle_increment + LDI r14 rotation_angle_max CMP r1 r14 BRH lt .main_loop SUB r1 r14 r1 JMP .main_loop -HLT .wait_for_user From 161608f689c77b50b4728d91194fc7dbed5f1656 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Mon, 16 Sep 2024 01:55:16 -0700 Subject: [PATCH 08/11] Moved CORDIC function call outside of vertice loop. Moving the CORDIC improves performance but not nearly as much as I expected. Move of the time is spent in the multiply and divide functions I suppose. --- programs/wireframe.as | 97 ++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 39 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index 454eecd..20b78a0 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -60,6 +60,8 @@ define number_of_vertices 1 define shape_table_pointer 2 define projected_xy_pointer 3 define edges_remaining 4 +define cosine 5 +define sine 6 // Various RAM addresses define register_stack_pointer 50 @@ -72,7 +74,7 @@ define y_axis 0 define x_axis 1 define z_axis 2 define focal_length 127 -define rotation_angle_increment 5 +define rotation_angle_increment 1 define rotation_angle_max 201 @@ -115,9 +117,20 @@ STR r15 r14 write_char_offset STR r15 r0 buffer_chars_offset -// Initialze rotation angle +// Initialze rotation angle and store it in RAM LDI r1 0 + .main_loop + // Save the rotation angle at the start of each loop (since the + // CORDIC function modifies it) + STR r0 r1 rotation_angle + + // Call the CORDIC function to calculate sine and cosine for the + // rotation angle + CAL .cordic + LOD r0 r1 rotation_angle + STR r0 r2 cosine + STR r0 r3 sine // Point to 3D shape vertice/edge table LDI r15 shape_vertices_edges_addr @@ -131,11 +144,11 @@ LDI r1 0 // rotate and project them onto a 2D plane for display. .vertice_loop // Load the 3D x,y,z coordinates from RAM - LOD r15 r2 x_offset - LOD r15 r3 y_offset - LOD r15 r4 z_offset + LOD r15 r3 x_offset + LOD r15 r4 y_offset + LOD r15 r5 z_offset ADI r15 3 // Point to the next set of coordinates - LDI r5 y_axis // Set the rotation axis + LDI r6 y_axis // Set the rotation axis // Push variables into RAM STR r0 r1 rotation_angle @@ -143,10 +156,9 @@ LDI r1 0 STR r0 r15 shape_table_pointer STR r0 r13 projected_xy_pointer - // TODO: Change rotation function so it doesn't call CORDIC - // i.e. move CORDIC call outside of vertice loop since I don't - // need to recalculate it for each vertice. Doing so should - // significantly improve performance. + LOD r0 r1 cosine + LOD r0 r2 sine + CAL .rotation LDI r7 focal_length CAL .pixel_projection @@ -211,7 +223,7 @@ LDI r1 0 LOD r12 r4 y_offset DEC r11 .next_projected_xy - ADI r12 2 // Point to the next project xy point + ADI r12 2 // Point to the next projected xy point INC r10 // Increment the table index CMP r11 r0 // ... and check if done BRH nz .get_projected_xy @@ -262,8 +274,8 @@ LDI r1 0 // r1 = angle in radians (fixed point in the form u2.5) // (Values between 0 and 2*pi are supported.) // Outputs: -// r2 = sine (r1) -// r3 = cosine(r1) +// r2 = cosine(r1) +// r3 = sine (r1) // Register usage // r1 - angle in radians (s1.6) // r2 - x (s1.6) @@ -881,12 +893,12 @@ LDI r1 0 // 2 = [y,z] // // Inputs: -// r1 = rotation angle in radians (fixed point in the form u2.5) -// (Values between 0 and 2*pi are supported.) // TODO... make input and outputs consistent -// r2 = x -// r3 = y -// r4 = z -// r5 = rotation axis +// r1 = cosine(A) (fixed point in the form s1.6) +// r2 = sine (A) (fixed point in the form s1.6) +// r3 = x +// r4 = y +// r5 = z +// r6 = rotation axis // // Outputs: // r2:r1 = rotated_x @@ -938,35 +950,42 @@ LDI r1 0 ADI r14 8 // Organize input coordinates in RAM according to the - // rotation axis selection input (r5) - STR r15 r5 rot_axis_sel - LDI r6 x_axis - CMP r5 r6 + // rotation axis selection input (r6) + STR r15 r6 rot_axis_sel + LDI r7 x_axis + CMP r6 r7 BRH z .go_x_axis_sel - LDI r6 z_axis - CMP r5 r6 + LDI r7 z_axis + CMP r6 r7 BRH z .go_z_axis_sel .go_y_axis_sel // Default - STR r15 r2 rot_coord1 - STR r15 r4 rot_coord2 - STR r15 r3 rot_coord3 - JMP .calc_cordic - .go_z_axis_sel - STR r15 r2 rot_coord1 - STR r15 r3 rot_coord2 + STR r15 r3 rot_coord1 + STR r15 r5 rot_coord2 STR r15 r4 rot_coord3 JMP .calc_cordic - .go_x_axis_sel + .go_z_axis_sel STR r15 r3 rot_coord1 STR r15 r4 rot_coord2 - STR r15 r2 rot_coord3 + STR r15 r5 rot_coord3 + JMP .calc_cordic + .go_x_axis_sel + STR r15 r4 rot_coord1 + STR r15 r5 rot_coord2 + STR r15 r3 rot_coord3 .calc_cordic - // Call the CORDIC function to calculate sine and cosine for the - // rotation angle - CAL .cordic - - // Convert them to 16-bit values for the multiplications below. + // Move cosine/sine inputs to match output of cordic function + // previous called below (so I don't have to juggle around a bunch + // of registers) + MOV r2 r3 + MOV r1 r2 + // NOTE: This call was moved outside of the main vertice loop to + // improve performance. +// // Call the CORDIC function to calculate sine and cosine for the +// // rotation angle +// CAL .cordic + + // Convert sine and cosine to 16-bit values for the multiplication`s below. LDI r12 128 LDI r1 0 AND r12 r2 r0 // Check sign bit From eb36339813cf44311e07af352655a2cb9fcbc794 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Mon, 16 Sep 2024 01:58:00 -0700 Subject: [PATCH 09/11] Set angle increment back to 5. --- programs/wireframe.as | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index 20b78a0..d2c5d62 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -74,7 +74,7 @@ define y_axis 0 define x_axis 1 define z_axis 2 define focal_length 127 -define rotation_angle_increment 1 +define rotation_angle_increment 5 define rotation_angle_max 201 From 2df03b1635a60fa1d26effbbefefd195765b121e Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Mon, 16 Sep 2024 02:49:23 -0700 Subject: [PATCH 10/11] More comment changes. --- programs/wireframe.as | 9 --------- 1 file changed, 9 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index d2c5d62..fa1a3b4 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -15,15 +15,6 @@ // Matt's hardware implementation shown in the video above, which is totally expected. I mean... // dedicated hardware is always going to be much faster than software running on a general // purpose processor. -// -// Note: The code is still pretty messy. I need to do a clean-up pass on it and make a few -// more performance improvements (like moving the CORDIC function call outside of the vertice -// loop). I'm also considering modifying the CORDIC to use 16-bit math to improve its -// accuracy, which should make the cube animation a bit smoother. Also, I'm not crazy with -// how I pass inputs to functions in this code using registers. I think I'd prefer to pass -// them via memory (i.e. a stack). I'd also possibly like to dedicate a register as a -// stack pointer. I haven't written assembly in years so I'm not accustomed to dealing with -// this stuff directly. Perhaps I'll change it later... perhaps not.) // Memory mapped IO port mapping offsets From 80b42eaa1e74f9178323e133473dd72a7141b492 Mon Sep 17 00:00:00 2001 From: Dave <83719612+DaveJWalker@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:33:10 -0700 Subject: [PATCH 11/11] Fixed x_axis rotation bug. --- programs/wireframe.as | 115 ++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 49 deletions(-) diff --git a/programs/wireframe.as b/programs/wireframe.as index fa1a3b4..e35489f 100644 --- a/programs/wireframe.as +++ b/programs/wireframe.as @@ -139,7 +139,7 @@ LDI r1 0 LOD r15 r4 y_offset LOD r15 r5 z_offset ADI r15 3 // Point to the next set of coordinates - LDI r6 y_axis // Set the rotation axis + LDI r6 x_axis // Set the rotation axis // Push variables into RAM STR r0 r1 rotation_angle @@ -147,8 +147,8 @@ LDI r1 0 STR r0 r15 shape_table_pointer STR r0 r13 projected_xy_pointer - LOD r0 r1 cosine - LOD r0 r2 sine + LOD r0 r1 cosine + LOD r0 r2 sine CAL .rotation LDI r7 focal_length @@ -165,9 +165,9 @@ LDI r1 0 STR r0 r13 projected_xy_pointer // Recall variables from RAM - LOD r0 r1 rotation_angle - LOD r0 r14 number_of_vertices - LOD r0 r15 shape_table_pointer + LOD r0 r1 rotation_angle + LOD r0 r14 number_of_vertices + LOD r0 r15 shape_table_pointer DEC r14 // Decrement the vertice counter and loop if more exist BRH nz .vertice_loop @@ -778,6 +778,9 @@ LDI r1 0 .pixel_projection // The following function is used to project a 3D point in space onto a 2D plane. +// It utilizes "weak" perspective projection as described below: +// +// https://en.wikipedia.org/wiki/3D_projection // // Given a 3D coordinate [x,y,z], it'll return x_projected and y_projected by // calculating the following: @@ -862,26 +865,38 @@ LDI r1 0 // Move x_projected and y_projected to output registers MOV r1 r2 // y_projected -> r2 - LOD r15 r1 pp_x_projected // x_projected -> r1 + LOD r15 r1 pp_x_projected // x_projected -> r1 RET .rotation -// This function applies the following rotation matrix to -// a set of two coordinates: +// This function rotates a 3D xyz coordinate around a +// selected axis of rotation. It does so by multiplying +// the [x,y,z] vector by one of three rotation matrices R. +// +// +- -+ +// | cos(A) -sin(A) | +// Rx = | | +// | sin(A) cos(A) | +// +- -+ +// +// +- -+ +// | cos(A) sin(A) | +// Ry = | | +// | -sin(A) cos(A) | +// +- -+ // -// +- -+ -// | cos(A) -sin(A) | -// Rz = | | -// | sin(A) cos(A) | -// +- -+ +// +- -+ +// | cos(A) -sin(A) | +// Rz = | | +// | sin(A) cos(A) | +// +- -+ // -// Given a 3D coordinate (composed of xyz values), the -// coordinate pair utilized can be selected based on the -// coordinate pair selection input as follows: -// 0 = [x,y] -// 1 = [x,z] -// 2 = [y,z] +// In all cases, the coordinate for the axis of rotation +// does not change. Refer to the following Wikipedia +// article for details: +// +// https://en.wikipedia.org/wiki/Rotation_matrix // // Inputs: // r1 = cosine(A) (fixed point in the form s1.6) @@ -905,9 +920,9 @@ LDI r1 0 define rot_coord2 1 // 2nd rotation coordinate define rot_coord3 2 // 3rd rotation coordinate (fixed) define rot_axis_sel 3 // rotation axis - define rot_cosine_high 4 // Cosine + define rot_cosine_high 4 // cosine define rot_cosine_low 5 - define rot_sine_high 6 // Sine + define rot_sine_high 6 // sine define rot_sine_low 7 define coord1_x_cosine_low -1 @@ -933,7 +948,7 @@ LDI r1 0 define rot_coord3_high 2 define rot_coord3_low 3 - // Set up pointer for temporary storage + // Set up pointers for temporary storage LDI r15 register_stack_pointer ADI r15 8 MOV r15 r14 @@ -953,18 +968,18 @@ LDI r1 0 STR r15 r3 rot_coord1 STR r15 r5 rot_coord2 STR r15 r4 rot_coord3 - JMP .calc_cordic + JMP .adjst_trig .go_z_axis_sel STR r15 r3 rot_coord1 STR r15 r4 rot_coord2 STR r15 r5 rot_coord3 - JMP .calc_cordic + JMP .adjst_trig .go_x_axis_sel STR r15 r4 rot_coord1 STR r15 r5 rot_coord2 STR r15 r3 rot_coord3 - .calc_cordic + .adjst_trig // Move cosine/sine inputs to match output of cordic function // previous called below (so I don't have to juggle around a bunch // of registers) @@ -1026,14 +1041,15 @@ LDI r1 0 STR r15 r4 coord2_x_cosine_low STR r15 r5 coord2_x_cosine_high + // Calculate coord1_rotation = coord1*cosine(A) +/- coord2*sine(A) LOD r15 r3 coord1_x_cosine_low LOD r15 r4 coord1_x_cosine_high LOD r15 r5 coord2_x_sine_low LOD r15 r6 coord2_x_sine_high - // Perform 16-bit addition/subtraction (emulating SBB) - // Addition/subtraction depends on the axis of rotation TODO better comments + // Perform 16-bit addition/subtraction, depending on the axis + // of rotation. LOD r15 r8 rot_axis_sel // Restore axis selection LDI r9 y_axis CMP r8 r9 @@ -1071,12 +1087,13 @@ LDI r1 0 LOD r15 r5 coord2_x_cosine_low LOD r15 r6 coord2_x_cosine_high - // Perform 16-bit addition/subtraction TODO better comments + // Perform 16-bit addition/subtraction, again depending on the axis + // of rotation. LOD r15 r8 rot_axis_sel // Restore axis selection LDI r9 y_axis CMP r8 r9 BRH z .coord2_sub - .coord2_add // For rotations around axis x & z, adding is performed + .coord2_add // For rotations around axis x & z, addition is performed LDI r7 0 ADD r3 r5 r3 // Add low bytes BRH nc .roty_no_carry @@ -1093,8 +1110,8 @@ LDI r1 0 .roty_no_borrow SUB r6 r4 r4 // Subtract high bytes SUB r4 r7 r4 // Handle borrow - .coord2_round + .coord2_round // Move and round the results MOV r3 r1 MOV r4 r2 @@ -1115,21 +1132,13 @@ LDI r1 0 // Organize outputs coordinates based on the rotation axis selection. LOD r15 r5 rot_axis_sel // Restore axis selection - LDI r6 y_axis - CMP r5 r6 - BRH z .rstr_xz_sel LDI r6 x_axis CMP r5 r6 - BRH z .rstr_yz_sel - .rstr_x_axis // Default selection - LOD r14 r2 rot_coord1_high - LOD r14 r1 rot_coord1_low - LOD r14 r4 rot_coord2_high - LOD r14 r3 rot_coord2_low - LOD r14 r6 rot_coord3_high - LOD r14 r5 rot_coord3_low - JMP .rot_exit - .rstr_xz_sel + BRH z .rstr_x_axis + LDI r6 z_axis + CMP r5 r6 + BRH z .rstr_z_axis + .rstr_y_axis // Default selection LOD r14 r2 rot_coord1_high LOD r14 r1 rot_coord1_low LOD r14 r4 rot_coord3_high @@ -1137,13 +1146,21 @@ LDI r1 0 LOD r14 r6 rot_coord2_high LOD r14 r5 rot_coord2_low JMP .rot_exit - .rstr_yz_sel - LOD r14 r2 rot_coord2_high - LOD r14 r1 rot_coord2_low - LOD r14 r6 rot_coord3_high - LOD r14 r5 rot_coord3_low + .rstr_x_axis + LOD r14 r2 rot_coord3_high + LOD r14 r1 rot_coord3_low LOD r14 r4 rot_coord1_high LOD r14 r3 rot_coord1_low + LOD r14 r6 rot_coord2_high + LOD r14 r5 rot_coord2_low + JMP .rot_exit + .rstr_z_axis + LOD r14 r2 rot_coord1_high + LOD r14 r1 rot_coord1_low + LOD r14 r4 rot_coord2_high + LOD r14 r3 rot_coord2_low + LOD r14 r6 rot_coord3_high + LOD r14 r5 rot_coord3_low .rot_exit RET