diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index dc2d51207b..84e0168002 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -39,6 +39,7 @@ atom_resolver/2, literal_resolver/2, type_resolver/2, + import_resolver/2, set_native_code/3 ]). @@ -135,6 +136,14 @@ literal_resolver(_Module, _Index) -> type_resolver(_Module, _Index) -> erlang:nif_error(undefined). +%% @doc Get an imported function triplet from its index +%% @return The imported function as {Module, Function, Arity} +%% @param Module module to get the imported function from +%% @param Index imported function index in the module +-spec import_resolver(Module :: module(), Index :: non_neg_integer()) -> {atom(), atom(), non_neg_integer()}. +import_resolver(_Module, _Index) -> + erlang:nif_error(undefined). + %% @doc Associate a native code stream with a module %% @return ok %% @param Module module to set the native code of @@ -164,6 +173,9 @@ load(Module) -> code_server:literal_resolver(Module, Index) end, TypeResolver = fun(Index) -> code_server:type_resolver(Module, Index) end, + ImportResolver = fun(Index) -> + code_server:import_resolver(Module, Index) + end, {StreamModule, Stream0} = jit:stream(jit_mmap_size(byte_size(Code))), {BackendModule, BackendState0} = jit:backend(StreamModule, Stream0), {LabelsCount, BackendState1} = jit:compile( @@ -171,6 +183,7 @@ load(Module) -> AtomResolver, LiteralResolver, TypeResolver, + ImportResolver, BackendModule, BackendState0 ), diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index c67105b3bb..0fe89368fc 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -24,7 +24,8 @@ stream/1, backend/2, beam_chunk_header/3, - compile/6 + compile/7, + decode_value64/1 ]). % NIFs @@ -101,6 +102,7 @@ atom_resolver :: fun((integer()) -> atom()), literal_resolver :: fun((integer()) -> any()), type_resolver :: fun((integer()) -> any()), + import_resolver :: fun((integer()) -> {atom(), atom(), non_neg_integer()}), tail_cache :: [{tuple(), non_neg_integer()}] }). @@ -114,6 +116,14 @@ -define(ASSERT_ALL_NATIVE_FREE(St), ok). -define(ASSERT(Expr), ok). +%-define(JIT_INSTRUMENT, true). + +-ifdef(JIT_INSTRUMENT). +-define(INSTRUMENT(Tag, State, MSt), instrument(Tag, State, MSt)). +-else. +-define(INSTRUMENT(Tag, State, MSt), ok). +-endif. + %%----------------------------------------------------------------------------- %% @param LabelsCount number of labels %% @param Arch code for the architecture @@ -134,32 +144,40 @@ compile( AtomResolver, LiteralResolver, TypeResolver, + ImportResolver, MMod, MSt0 ) when OpcodeMax =< ?OPCODE_MAX -> - MSt1 = MMod:jump_table(MSt0, LabelsCount), State0 = #state{ line_offsets = [], labels_count = LabelsCount, atom_resolver = AtomResolver, literal_resolver = LiteralResolver, type_resolver = TypeResolver, + import_resolver = ImportResolver, tail_cache = [] }, + ?INSTRUMENT("compile_start", State0, MSt0), + MSt1 = MMod:jump_table(MSt0, LabelsCount), + ?INSTRUMENT("after_jump_table", State0, MSt1), {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), + ?INSTRUMENT("after_first_pass", State1, MSt2), MSt3 = second_pass(MMod, MSt2, State1), + ?INSTRUMENT("after_second_pass", State1, MSt3), MSt4 = MMod:flush(MSt3), + ?INSTRUMENT("after_flush", State1, MSt4), {LabelsCount, MSt4}; compile( <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>, _AtomResolver, _LiteralResolver, _TypeResolver, + _ImportResolver, _MMod, _MSt ) -> error(badarg, [OpcodeMax]); -compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) -> +compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _ImportResolver, _MMod, _MSt) -> error(badarg, [CodeChunk]). % 1 @@ -553,16 +571,10 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), - {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), - {MSt2, Arg2, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0), + {MSt1, Arg1, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), + {MSt2, Arg2, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0), ?TRACE("OP_IS_GE ~p, ~p, ~p\n", [Label, Arg1, Arg2]), - {MSt3, ResultReg} = MMod:call_primitive(MSt2, ?PRIM_TERM_COMPARE, [ - ctx, jit_state, {free, Arg1}, {free, Arg2}, ?TERM_COMPARE_NO_OPTS - ]), - MSt4 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt3), - MSt5 = cond_jump_to_label( - {'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt4 - ), + MSt5 = op_is_ge(MMod, MSt2, Label, Arg1, Arg2), ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 41 @@ -1472,31 +1484,21 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 125 -first_pass(<>, MMod, MSt0, State0) -> +first_pass( + <>, MMod, MSt0, #state{import_resolver = ImportResolver} = State0 +) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), - {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), - MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), - CappedLive = - if - Live > ?MAX_REG -> ?MAX_REG; - true -> Live - end, {Bif, Rest3} = decode_literal(Rest2), - {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [ - jit_state, Bif - ]), - {MSt4, Arg1, Rest4} = decode_compact_term(Rest3, MMod, MSt3, State0), - {MSt5, Arg2, Rest5} = decode_compact_term(Rest4, MMod, MSt4, State0), - {MSt6, Dest, Rest6} = decode_dest(Rest5, MMod, MSt5), + {MSt1, Arg1, Rest4} = decode_typed_compact_term(Rest3, MMod, MSt0, State0), + {MSt2, Arg2, Rest5} = decode_typed_compact_term(Rest4, MMod, MSt1, State0), + {MSt3, Dest, Rest6} = decode_dest(Rest5, MMod, MSt2), + {BifModule, BifFunName, 2} = ImportResolver(Bif), ?TRACE("OP_GC_BIF2 ~p, ~p, ~p, ~p, ~p, ~p\n", [FailLabel, Live, Bif, Arg1, Arg2, Dest]), - {MSt7, ResultReg} = MMod:call_func_ptr(MSt6, {free, FuncPtr}, [ - ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2} - ]), - MSt8 = bif_faillabel_test(FailLabel, MMod, MSt7, {free, ResultReg}, {free, Dest}), - ?ASSERT_ALL_NATIVE_FREE(MSt8), - first_pass(Rest6, MMod, MSt8, State0); + MSt4 = op_gc_bif2(MMod, MSt3, FailLabel, Live, Bif, BifModule, BifFunName, Arg1, Arg2, Dest), + ?ASSERT_ALL_NATIVE_FREE(MSt4), + first_pass(Rest6, MMod, MSt4, State0); % 129 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -3187,6 +3189,255 @@ first_pass_bs_match_skip(MatchState, BSOffsetReg, J0, Rest0, MMod, MSt0) -> ?TRACE("{skip,~p},", [Stride]), {J0 - 1, Rest1, MatchState, BSOffsetReg, MSt1}. +op_gc_bif2( + MMod, + MSt0, + FailLabel, + Live, + Bif, + erlang, + '+', + {typed, Arg1, {t_integer, Range1}}, + {typed, Arg2, {t_integer, Range2}}, + Dest +) -> + op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, erlang, '+', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest +) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG -> + % Arg2 is a small integer literal, extract its value and create a range + Arg2Value = Arg2 bsr 4, + Range2 = {Arg2Value, Arg2Value}, + op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, + MSt0, + FailLabel, + Live, + Bif, + erlang, + '-', + {typed, Arg1, {t_integer, Range1}}, + {typed, Arg2, {t_integer, Range2}}, + Dest +) -> + op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, erlang, '-', {typed, Arg1, {t_integer, Range1}}, Arg2, Dest +) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG -> + % Arg2 is a small integer literal, extract its value and create a range + Arg2Value = Arg2 bsr 4, + Range2 = {Arg2Value, Arg2Value}, + op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2); +% Default case +op_gc_bif2( + MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, {typed, Arg2, _}, Dest +) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, Arg2, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, {typed, Arg2, _}, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest); +op_gc_bif2(MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, Arg1, Arg2, Dest) -> + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest). + +op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) -> + {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), + MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), + CappedLive = + if + Live > ?MAX_REG -> ?MAX_REG; + true -> Live + end, + {MSt3, FuncPtr} = MMod:call_primitive(MSt2, ?PRIM_GET_IMPORTED_BIF, [ + jit_state, Bif + ]), + {MSt4, ResultReg} = MMod:call_func_ptr(MSt3, {free, FuncPtr}, [ + ctx, FailLabel, CappedLive, {free, Arg1}, {free, Arg2} + ]), + bif_faillabel_test(FailLabel, MMod, MSt4, {free, ResultReg}, {free, Dest}). + +% Check if addition can overflow based on type ranges +% Returns true if the result is guaranteed to fit in a small integer +can_inline_add(Range1, Range2, MMod) -> + % Platform-specific bounds + {MinSafe, MaxSafe} = + case MMod:word_size() of + % 32-bit + 4 -> {-(1 bsl 27), (1 bsl 27) - 1}; + % 64-bit + 8 -> {-(1 bsl 59), (1 bsl 59) - 1} + end, + + case {Range1, Range2} of + {{Min1, Max1}, {Min2, Max2}} when + is_integer(Min1), + is_integer(Max1), + is_integer(Min2), + is_integer(Max2) + -> + % Calculate min and max possible results + MinResult = Min1 + Min2, + MaxResult = Max1 + Max2, + % Check if both are in safe range + MinResult >= MinSafe andalso MaxResult =< MaxSafe; + _ -> + % Unbounded range (has '-inf' or '+inf'), cannot optimize + false + end. + +% Optimized addition with compile-time range checking +op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when + is_integer(Arg2) +-> + case can_inline_add(Range1, Range2, MMod) of + true -> + % Safe to inline - no overflow possible + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1), + MSt2 = MMod:add(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))), + MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest), + MMod:free_native_registers(MSt3, [Reg]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end; +op_gc_bif2_add(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) -> + case can_inline_add(Range1, Range2, MMod) of + true -> + % Safe to inline both arguments + {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1), + {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2), + % Strip tag from Reg2 using AND, then add to Reg1 (Reg1 keeps its tag) + {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)), + MSt4 = MMod:add(MSt3, Reg1, Reg2Stripped), + MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest), + MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end. + +% Check if subtraction can overflow based on type ranges +% Returns true if the result is guaranteed to fit in a small integer +can_inline_sub(Range1, Range2, MMod) -> + % Platform-specific bounds + {MinSafe, MaxSafe} = + case MMod:word_size() of + 4 -> {-(1 bsl 27), (1 bsl 27) - 1}; + % 32-bit + 8 -> {-(1 bsl 59), (1 bsl 59) - 1} + % 64-bit + end, + + case {Range1, Range2} of + {{Min1, Max1}, {Min2, Max2}} when + is_integer(Min1), + is_integer(Max1), + is_integer(Min2), + is_integer(Max2) + -> + % Calculate min and max possible results + % Min result: Min1 - Max2 (smallest value minus largest value) + % Max result: Max1 - Min2 (largest value minus smallest value) + MinResult = Min1 - Max2, + MaxResult = Max1 - Min2, + % Check if both are in safe range + MinResult >= MinSafe andalso MaxResult =< MaxSafe; + _ -> + % Unbounded range (has '-inf' or '+inf'), cannot optimize + false + end. + +% Optimized subtraction with compile-time range checking +op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when + is_integer(Arg2) +-> + case can_inline_sub(Range1, Range2, MMod) of + true -> + % Safe to inline - no overflow possible + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1), + MSt2 = MMod:sub(MSt1, Reg, Arg2 band (bnot (?TERM_IMMED_TAG_MASK))), + MSt3 = MMod:move_to_vm_register(MSt2, Reg, Dest), + MMod:free_native_registers(MSt3, [Reg]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end; +op_gc_bif2_sub(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) -> + case can_inline_sub(Range1, Range2, MMod) of + true -> + % Safe to inline both arguments + {MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1), + {MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2), + % Strip tag from Reg2 using AND, then subtract from Reg1 (Reg1 keeps its tag) + {MSt3, Reg2Stripped} = MMod:and_(MSt2, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)), + MSt4 = MMod:sub(MSt3, Reg1, Reg2Stripped), + MSt5 = MMod:move_to_vm_register(MSt4, Reg1, Dest), + MMod:free_native_registers(MSt5, [Reg1, Reg2Stripped]); + false -> + % Cannot prove safety, use default BIF call + op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest) + end. + +% Helper to unwrap typed arguments +unwrap_typed({typed, Arg, _Type}) -> Arg; +unwrap_typed(Arg) -> Arg. + +% Optimized >= comparison for typed integers +% Test if Arg1 >= Arg2, jump to Label if false (i.e., if Arg1 < Arg2) +op_is_ge(MMod, MSt0, Label, Arg1, {typed, Arg2, {t_integer, _Range}}) when is_integer(Arg1) -> + % Arg1 is integer literal (already tagged by decode_compact_term), Arg2 is typed integer + % If Arg2 is boxed (bignum), the comparison result depends on the sign + {MSt1, Arg2Reg} = MMod:move_to_native_register(MSt0, Arg2), + % Check if Arg2 is a small integer (tagged with 0xF) + MSt2 = MMod:if_block(MSt1, {Arg2Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun( + BSt0 + ) -> + % Arg2 is boxed (bignum) - need to determine comparison result + % For small Arg1, if Arg2 is positive bignum -> Arg1 < Arg2 (fail) + % For small Arg1, if Arg2 is negative bignum -> Arg1 > Arg2 (pass) + % We need to check the sign of the boxed integer + {BSt1, BoxedReg} = MMod:and_(BSt0, Arg2Reg, bnot (?TERM_PRIMARY_MASK)), + BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg), + {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK), + % Jump to label if it's a positive bignum (tag = 0x8) + % For negative bignum (tag = 0x28), Arg1 >= Arg2 is true, so don't jump + cond_jump_to_label({{free, TagReg}, '==', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3) + end), + % If we're here, Arg2 is a small integer - do inline comparison + % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2 + % Arg1 is already tagged, use it directly + cond_jump_to_label({Arg1, '<', {free, Arg2Reg}}, Label, MMod, MSt2); +op_is_ge(MMod, MSt0, Label, {typed, Arg1, {t_integer, _Range}}, Arg2) when is_integer(Arg2) -> + % Arg1 is typed integer, Arg2 is integer literal (already tagged by decode_compact_term) + % If Arg1 is boxed (bignum), the comparison result depends on the sign + {MSt1, Arg1Reg} = MMod:move_to_native_register(MSt0, Arg1), + % Check if Arg1 is a small integer (tagged with 0xF) + MSt2 = MMod:if_block(MSt1, {Arg1Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun( + BSt0 + ) -> + % Arg1 is boxed (bignum) - need to determine comparison result + % For small Arg2, if Arg1 is positive bignum -> Arg1 > Arg2 (pass), don't jump + % For small Arg2, if Arg1 is negative bignum -> Arg1 < Arg2 (fail), jump + {BSt1, BoxedReg} = MMod:and_(BSt0, Arg1Reg, bnot (?TERM_PRIMARY_MASK)), + BSt2 = MMod:move_array_element(BSt1, BoxedReg, 0, BoxedReg), + {BSt3, TagReg} = MMod:and_(BSt2, {free, BoxedReg}, ?TERM_BOXED_TAG_MASK), + % Jump to label if it's a negative bignum (tag = 0x28) + % For positive bignum (tag = 0x8), Arg1 >= Arg2 is true, so don't jump + cond_jump_to_label({{free, TagReg}, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, Label, MMod, BSt3) + end), + % If we're here, Arg1 is a small integer - do inline comparison + % is_ge tests Arg1 >= Arg2, jump to Label if Arg1 < Arg2 + % Arg2 is already tagged, use it directly + cond_jump_to_label({{free, Arg1Reg}, '<', Arg2}, Label, MMod, MSt2); +% Fallback: use term_compare +op_is_ge(MMod, MSt0, Label, Arg1, Arg2) -> + {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TERM_COMPARE, [ + ctx, jit_state, {free, unwrap_typed(Arg1)}, {free, unwrap_typed(Arg2)}, ?TERM_COMPARE_NO_OPTS + ]), + MSt2 = handle_error_if({'(int)', ResultReg, '==', ?TERM_COMPARE_MEMORY_ALLOC_FAIL}, MMod, MSt1), + cond_jump_to_label({'(int)', {free, ResultReg}, '==', ?TERM_LESS_THAN}, Label, MMod, MSt2). + term_alloc_bin_match_state(Live, Src, Dest, MMod, MSt0) -> {MSt1, TrimResultReg} = MMod:call_primitive(MSt0, ?PRIM_TRIM_LIVE_REGS, [ctx, Live]), MSt2 = MMod:free_native_registers(MSt1, [TrimResultReg]), @@ -4016,3 +4267,37 @@ backend(StreamModule, Stream) -> Variant = ?MODULE:variant(), BackendState = BackendModule:new(Variant, StreamModule, Stream), {BackendModule, BackendState}. + +-ifdef(JIT_INSTRUMENT). +instrument(Tag, #state{line_offsets = Lines, tail_cache = TC}, MSt) -> + StateSize = erts_debug:flat_size({Lines, TC}), + MStSize = erts_debug:flat_size(MSt), + LinesCount = length(Lines), + TCCount = length(TC), + + % Extract branches count from backend state + % state record: {state, stream_module, stream, offset, branches, jump_table_start, ...} + BranchesCount = + case element(1, MSt) of + state -> length(element(5, MSt)); + _ -> unknown + end, + + {heap_size, HeapSize} = process_info(self(), heap_size), + {total_heap_size, TotalHeapSize} = process_info(self(), total_heap_size), + + io:format( + "~s: mst=~p words, state=~p words (lines=~p, tc=~p, br=~p), " + "heap=~p, total_heap=~p~n", + [ + Tag, + MStSize, + StateSize, + LinesCount, + TCCount, + BranchesCount, + HeapSize, + TotalHeapSize + ] + ). +-endif. diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index cb6504c485..52c28ad465 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -154,6 +154,7 @@ -type condition() :: {aarch64_register(), '<', integer()} | {maybe_free_aarch64_register(), '<', aarch64_register()} + | {integer(), '<', maybe_free_aarch64_register()} | {maybe_free_aarch64_register(), '==', integer()} | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} | {'(int)', maybe_free_aarch64_register(), '==', integer()} @@ -369,10 +370,67 @@ jump_table0( N, LabelsCount ) -> - BranchInstr = jit_aarch64_asm:b(0), + % Placeholder jumps to next entry (1 instruction forward = 4 bytes) + BranchInstr = jit_aarch64_asm:b(1), Stream1 = StreamModule:append(Stream0, BranchInstr), jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); + {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); + b -> jit_aarch64_asm:b(Rel) + end, + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. %% @end @@ -391,14 +449,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Rel = LabelOffset - Offset, - NewInstr = - case Type of - {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); - {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); - b -> jit_aarch64_asm:b(Rel) - end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -707,15 +758,47 @@ if_else_block( jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, non_neg_integer() }. -if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0}) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I = jit_aarch64_asm:tbz(Reg, 63, 0), Stream1 = StreamModule:append(Stream0, I), - State1 = State0#state{stream = Stream1}, - {State1, {tbz, Reg, 63}, 0}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbz, Reg, 63}, 0}; +% Handle {Val, '<', Reg} - means Val < Reg, jump if false (i.e., if Val >= Reg or Reg <= Val) +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, Val), + % le = less than or equal + I2 = jit_aarch64_asm:bcc(le, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, le, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val) -> + {RegOrTuple, '<', Val} +) when is_integer(Val), Val =/= 0 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_aarch64_asm:cmp(Reg, Val), % ge = greater than or equal I2 = jit_aarch64_asm:bcc(ge, 0), @@ -724,8 +807,9 @@ if_block_cond( I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -1855,17 +1939,30 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - I1 = jit_aarch64_asm:adr(Temp, 0), - Reloc = {Label, Offset, {adr, Temp}}, - I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct adr without relocation + Rel = LabelOffset - Offset, + I1 = jit_aarch64_asm:adr(Temp, Rel), + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. %%----------------------------------------------------------------------------- %% @doc Set the continuation address to the current offset, creating a @@ -2010,8 +2107,10 @@ add(State, Reg, Val) -> %% @param Val immediate value to subtract %% @return Updated backend state %%----------------------------------------------------------------------------- --spec sub(state(), aarch64_register(), integer()) -> state(). -sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> +-spec sub(state(), aarch64_register(), integer() | aarch64_register()) -> state(). +sub(State, Reg, Val) when is_integer(Val) -> + op_imm(State, sub, Reg, Reg, Val); +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when is_atom(Val) -> I1 = jit_aarch64_asm:sub(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. @@ -2152,6 +2251,7 @@ call_only_or_schedule_next( stream_module = StreamModule, stream = Stream0, branches = Branches, + labels = Labels, available_regs = [Temp | _] } = State0, Label @@ -2164,11 +2264,22 @@ call_only_or_schedule_next( I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), - % Branch to label if reduction count is not zero - I4 = jit_aarch64_asm:bcc(ne, 0), - Reloc1 = {Label, BNEOffset, {bcc, ne}}, - Stream2 = StreamModule:append(Stream1, I4), - State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch with calculated offset + % Calculate relative offset (must be 4-byte aligned) + Rel = LabelOffset - BNEOffset, + I4 = jit_aarch64_asm:bcc(ne, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2}; + false -> + % Label not yet known, emit placeholder and add relocation + I4 = jit_aarch64_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). @@ -2349,6 +2460,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -2360,6 +2472,18 @@ add_label( RelativeOffset = LabelOffset - JumpTableEntryOffset, BranchInstr = jit_aarch64_asm:b(RelativeOffset), Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, BranchInstr), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl index 6237294614..277a97d9ea 100644 --- a/libs/jit/src/jit_aarch64_asm.erl +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -948,6 +948,8 @@ sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm = RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +sub(_Rd, _Rn, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}); sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> sub(Rd, Rn, Rm, {lsl, 0}). diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 602339f0a3..37b067aaf1 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -155,6 +155,7 @@ -type condition() :: {armv6m_register(), '<', integer()} | {maybe_free_armv6m_register(), '<', armv6m_register()} + | {integer(), '<', maybe_free_armv6m_register()} | {maybe_free_armv6m_register(), '==', integer()} | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} | {'(int)', maybe_free_armv6m_register(), '==', integer()} @@ -405,23 +406,17 @@ jump_table0( jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- -%% @doc Rewrite stream to update all branches for labels. +%% @doc Patch a single branch in the stream %% @end -%% @param State current backend state -%% @return Updated backend state +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream %%----------------------------------------------------------------------------- --spec update_branches(state()) -> state(). -update_branches(#state{branches = []} = State) -> - State; -update_branches( - #state{ - stream_module = StreamModule, - stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT], - labels = Labels - } = State -) -> - {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> Rel = LabelOffset - Offset, NewInstr = case Type of @@ -497,7 +492,62 @@ update_branches( end end end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -1068,19 +1118,26 @@ if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, { CC = pl, ?ASSERT(byte_size(jit_armv6m_asm:bcc(pl, 0)) =:= 2), Stream1 = StreamModule:append(Stream0, <>), - State1 = State0#state{stream = Stream1}, - {State1, CC, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + {RegOrTuple, '<', Val} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_armv6m_asm:cmp(Reg, Val), % ge = greater than or equal CC = ge, ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream1 = StreamModule:append(Stream0, <>), - State1 = State0#state{stream = Stream1}, - {State1, CC, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {Reg, '<', Val} @@ -1096,6 +1153,25 @@ if_block_cond( Stream2 = StreamModule:append(Stream1, <>), State2 = State1#state{stream = Stream2}, {State2, CC, Offset1 - Offset0 + byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream0 = State1#state.stream, + I1 = jit_armv6m_asm:cmp(Reg, Temp), + % le = less than or equal (branch when Val >= Reg, i.e., NOT Val < Reg) + CC = le, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream1}, + {State3, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -2038,33 +2114,95 @@ move_array_element( Reg, Index, {x_reg, X} -) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _]} = + State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + % Load offset remainder into temp register and add to base + State1 = mov_immediate(State, Temp1, Remainder), + Stream1 = State1#state.stream, + % add Temp1, Reg (Temp1 = Temp1 + Reg) + I1 = jit_armv6m_asm:add(Temp1, Reg), + % ldr Temp2, [Temp1, #124] + I2 = jit_armv6m_asm:ldr(Temp2, {Temp1, LdrOffset}), + % str Temp2, [r0, #X*4] + I3 = jit_armv6m_asm:str(Temp2, ?X_REG(X)), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, Reg, Index, {ptr, Dest} -) when is_atom(Reg) andalso is_integer(Index) -> +) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = + State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + % Load offset remainder into temp register and add to base + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:ldr(Temp, {Temp, LdrOffset}), + I3 = jit_armv6m_asm:str(Temp, {Dest, 0}), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = State, Reg, Index, {y_reg, Y} -) when is_atom(Reg) andalso is_integer(Index) -> +) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), YCode = str_y_reg(Temp2, Y, Temp1, AT), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, available_regs = [Temp1, Temp2 | AT] + } = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + % For large offsets, use max offset (124) in ldr + remainder in temp register + Offset = Index * 4, + LdrOffset = 124, + Remainder = Offset - LdrOffset, + State1 = mov_immediate(State, Temp2, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp2, Reg), + I2 = jit_armv6m_asm:ldr(Temp2, {Temp2, LdrOffset}), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream2 = StreamModule:append(Stream1, Code), + State1#state{stream = Stream2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = State, @@ -2164,10 +2302,32 @@ get_array_element( } = State, {free, Reg}, Index -) -> +) when Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + available_regs = [Temp | _] + } = State, + {free, Reg}, + Index +) -> + % For large offsets, split into ldr immediate (max 124) + remainder in temp register + Offset = Index * 4, + LdrOffset = (Offset div 4) * 4, + LdrOffset1 = min(LdrOffset, 124), + Remainder = Offset - LdrOffset1, + % Load offset remainder into temp register and add to Reg + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + % add Reg, Temp (Reg = Reg + Temp) + I1 = jit_armv6m_asm:add(Reg, Temp), + % ldr Reg, [Reg, #LdrOffset1] + I2 = jit_armv6m_asm:ldr(Reg, {Reg, LdrOffset1}), + Stream2 = StreamModule:append(Stream1, <>), + {State1#state{stream = Stream2}, Reg}; get_array_element( #state{ stream_module = StreamModule, @@ -2177,7 +2337,7 @@ get_array_element( } = State, Reg, Index -) -> +) when Index * 4 =< 124 -> I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), { @@ -2185,6 +2345,32 @@ get_array_element( stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] }, ElemReg + }; +get_array_element( + #state{ + stream_module = StreamModule, + available_regs = [ElemReg, Temp | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + % For large offsets, split into ldr immediate (max 124) + remainder in temp register + Offset = Index * 4, + Remainder = Offset - 124, + % Load offset remainder into temp register + State1 = mov_immediate(State, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:ldr(ElemReg, {Temp, 124}), + Stream2 = StreamModule:append(Stream1, <>), + { + State1#state{ + stream = Stream2, + available_regs = [Temp | AvailableT], + used_regs = [ElemReg | UsedRegs0] + }, + ElemReg }. %% @doc move an integer, a vm or native register to reg[x] @@ -2196,10 +2382,26 @@ move_to_array_element( ValueReg, Reg, Index -) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) andalso Index < 32 -> I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + % For large offsets, split into str immediate (max 124) + remainder in temp register + Offset = Index * 4, + Remainder = Offset - 124, + % Load offset remainder into temp register + State1 = mov_immediate(State0, Temp, Remainder), + Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:add(Temp, Reg), + I2 = jit_armv6m_asm:str(ValueReg, {Temp, 124}), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{stream = Stream2}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, ValueReg, @@ -2216,7 +2418,7 @@ move_to_array_element( Value, Reg, Index -) -> +) when not ?IS_GPR(Value) andalso ?IS_GPR(Reg) -> {State1, Temp} = copy_to_native_register(State0, Value), State2 = move_to_array_element(State1, Temp, Reg, Index), free_native_register(State2, Temp). @@ -3282,6 +3484,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -3305,6 +3508,18 @@ add_label( DataBytes = <>, Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index 930b79dc37..5d91690498 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -19,7 +19,7 @@ % -module(jit_precompile). --export([start/0, compile/3, atom_resolver/1, type_resolver/1]). +-export([start/0, compile/3, atom_resolver/1, type_resolver/1, import_resolver/2]). -include_lib("jit.hrl"). @@ -84,6 +84,15 @@ compile(Target, Dir, Path) -> end, TypeResolver = type_resolver(TypesChunk), + ImportedFunctionsChunk = + case lists:keyfind("ImpT", 1, InitialChunks) of + {"ImpT", ImportedFunctionsChunk0} -> + ImportedFunctionsChunk0; + false -> + <<>> + end, + ImportedFunctionResolver = import_resolver(ImportedFunctionsChunk, AtomResolver), + % Parse target to extract arch and variant {BaseTarget, RequestedVariant} = parse_target(Target), Backend = list_to_atom("jit_" ++ BaseTarget), @@ -107,7 +116,7 @@ compile(Target, Dir, Path) -> Stream2 = Backend:new(RequestedVariant, jit_stream_binary, Stream1), {LabelsCount, Stream3} = jit:compile( - CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, ImportedFunctionResolver, Backend, Stream2 ), NativeCode = Backend:stream(Stream3), UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], @@ -175,6 +184,26 @@ parse_literals_chunk0(N, <>, Term = binary_to_term(TermBin), parse_literals_chunk0(N - 1, Rest, [Term | Acc]). +import_resolver(FunctionChunks, AtomResolver) -> + ImportedFunctions = parse_imported_functions_chunk(FunctionChunks, AtomResolver), + fun(Index) -> lists:nth(Index + 1, ImportedFunctions) end. + +%% @doc Parse imported functions chunk to extract {Module, Function, Arity} triplets +parse_imported_functions_chunk(<>, AtomResolver) -> + parse_imported_functions_chunk0(FunctionsCount, Rest, AtomResolver, []); +parse_imported_functions_chunk(<<>>, _AtomResolver) -> + []. + +parse_imported_functions_chunk0(0, <<>>, _AtomResolver, Acc) -> + lists:reverse(Acc); +parse_imported_functions_chunk0( + N, <>, AtomResolver, Acc +) -> + Module = AtomResolver(ModuleIndex), + Function = AtomResolver(FunctionIndex), + ImportedFunction = {Module, Function, Arity}, + parse_imported_functions_chunk0(N - 1, Rest, AtomResolver, [ImportedFunction | Acc]). + %% Version (from beam_types.hrl) -define(BEAM_TYPES_VERSION, 3). diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 4cfb9a8f65..4a57d91d3e 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -164,6 +164,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [riscv32_register()], used_regs :: [riscv32_register()], labels :: [{integer() | reference(), integer()}], @@ -183,6 +184,7 @@ -type condition() :: {riscv32_register(), '<', integer()} | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {integer(), '<', maybe_free_riscv32_register()} | {maybe_free_riscv32_register(), '==', integer()} | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} | {'(int)', maybe_free_riscv32_register(), '==', integer()} @@ -271,6 +273,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -404,46 +407,35 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> % Create jump table entry: AUIPC + JALR (8 bytes total) - % This will be patched later in update_branches/2 - Offset = StreamModule:offset(Stream0), + % This will be patched in add_label when the label offset is known JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, Stream1 = StreamModule:append(Stream0, JumpEntry), - - % Record both AUIPC and JALR offsets for patching - Reloc = {N, Offset, jump_table_auipc_jalr}, - UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, - - jump_table0(UpdatedState, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- -%% @doc Rewrite stream to update all branches for labels. +%% @doc Patch a single branch in the stream %% @end -%% @param State current backend state -%% @return Updated backend state +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream %%----------------------------------------------------------------------------- --spec update_branches(state()) -> state(). -update_branches(#state{branches = []} = State) -> - State; -update_branches( - #state{ - stream_module = StreamModule, - stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT], - labels = Labels - } = State -) -> - {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> Rel = LabelOffset - Offset, NewInstr = case Type of @@ -490,37 +482,64 @@ update_branches( 6 -> <>; 8 -> Entry end - end; - jump_table_auipc_jalr -> - % Calculate PC-relative offset from AUIPC instruction to target - % AUIPC is at Offset, JALR is at Offset+4 - % Target is at LabelOffset - % Offset from AUIPC PC to target - PCRelOffset = LabelOffset - Offset, - - % Split into upper 20 bits and lower 12 bits - % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) - % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper - Upper20 = (PCRelOffset + 16#800) bsr 12, - Lower12 = PCRelOffset band 16#FFF, - % Sign-extend lower 12 bits for JALR immediate - Lower12Signed = - if - Lower12 >= 16#800 -> Lower12 - 16#1000; - true -> Lower12 - end, - - % Encode AUIPC and JALR with computed offsets - I1 = jit_riscv32_asm:auipc(a3, Upper20), - I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), - % Map to 8 bytes - JumpTableEntry = <>, - case byte_size(JumpTableEntry) of - 6 -> <>; - 8 -> JumpTableEntry end end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -937,16 +956,29 @@ if_else_block( {beq | bne | blt | bge, atom(), atom() | integer()}, non_neg_integer() }. -if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), - State1 = State0#state{stream = Stream1}, - {State1, {bge, Reg, zero}, 0}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, zero}, 0}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + {RegOrTuple, '<', Val} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) % Load immediate into a temp register for comparison [Temp | _] = State0#state.available_regs, @@ -956,12 +988,18 @@ if_block_cond( BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), - State2 = State1#state{stream = Stream2}, - {State2, {bge, Reg, Temp}, BranchDelta}; + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, - {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val) -> + {RegOrTuple, '<', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) OffsetBefore = StreamModule:offset(Stream0), State1 = mov_immediate(State0, Temp, Val), @@ -969,8 +1007,47 @@ if_block_cond( BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), - State2 = State1#state{stream = Stream2}, - {State2, {bge, Reg, Temp}, BranchDelta}; + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val), Val >= 0, Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Temp, Reg}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Val, '<', RegOrTuple} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Temp, Reg, offset (branch if Val >= Reg, i.e., NOT Val < Reg) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bge, Temp, Reg}, BranchDelta}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -2355,23 +2432,32 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> - % Similar to AArch64: use pc_relative_address with a relocation that will be - % resolved to point directly to the label's actual address (not the jump table entry) Offset = StreamModule:offset(Stream0), - % Emit placeholder for pc_relative_address (auipc + addi) - % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming - % The relocation will replace these with the correct offset - I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, - Reloc = {Label, Offset, {adr, Temp}}, - % Store continuation (jit_state is in a1) - I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct pc-relative address without relocation + Rel = LabelOffset - Offset, + I1 = pc_relative_address(Temp, Rel), + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming + % The relocation will replace these with the correct offset + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. %% @doc Set the contination to a given offset %% Return a reference so the offset will be updated with update_branches @@ -3062,5 +3148,60 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jump table entry is AUIPC + JALR (8 bytes) + JumpTableEntryOffset = JumpTableStart + Label * 8, + + % Calculate PC-relative offset from AUIPC instruction to target + PCRelOffset = LabelOffset - JumpTableEntryOffset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + % Create 8-byte jump table entry + JumpTableEntry = <>, + PaddedEntry = + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end, + + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, PaddedEntry), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 815dc40d95..8344b9c023 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -137,6 +137,7 @@ -type condition() :: {x86_64_register(), '<', integer()} | {maybe_free_x86_64_register(), '<', x86_64_register()} + | {integer(), '<', maybe_free_x86_64_register()} | {maybe_free_x86_64_register(), '==', integer()} | {maybe_free_x86_64_register(), '!=', x86_64_register() | integer()} | {'(int)', maybe_free_x86_64_register(), '==', integer()} @@ -358,6 +359,58 @@ jump_table0( Stream1 = StreamModule:append(Stream0, I1), jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Size size of the branch in bits +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), non_neg_integer(), non_neg_integer()) -> + stream(). +patch_branch(StreamModule, Stream, Offset, Size, LabelOffset) -> + StreamModule:map(Stream, Offset, Size div 8, fun(<>) -> + <<(Delta + LabelOffset - Offset):Size/little>> + end). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), non_neg_integer()}] +) -> {stream(), [{integer(), non_neg_integer(), non_neg_integer()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Size} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. %% @end @@ -376,9 +429,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(<>) -> - <<(Delta + LabelOffset - Offset):Size/little>> - end), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -689,20 +740,73 @@ if_block_cond(#state{stream_module = StreamModule} = State0, Cond) -> {State2, ReplaceDelta}. -spec if_block_cond0(state(), condition()) -> {state(), binary(), non_neg_integer()}. -if_block_cond0(State0, {Reg, '<', 0}) when is_atom(Reg) -> +if_block_cond0(State0, {RegOrTuple, '<', 0}) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, I1 = jit_x86_64_asm:testq(Reg, Reg), {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), - {State0, <>, byte_size(I1) + RelocJGEOffset}; -if_block_cond0(State0, {RegOrTuple, '<', Value}) -> + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJGEOffset}; +% Handle {Value, '<', Reg} - means Value < Reg, jump if false (i.e., if Value >= Reg or Reg <= Value) +if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, I1 = jit_x86_64_asm:cmpq(Value, Reg), + {RelocJLEOffset, I2} = jit_x86_64_asm:jle_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJLEOffset}; +% Catch-all for large values outside SINT32_T range +if_block_cond0(State0, {Value, '<', RegOrTuple}) when is_integer(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Load large value into a temporary register + I1 = jit_x86_64_asm:movabsq(Value, r11), + I2 = jit_x86_64_asm:cmpq(r11, Reg), + {RelocJLEOffset, I3} = jit_x86_64_asm:jle_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + byte_size(I2) + RelocJLEOffset}; +if_block_cond0(State0, {RegOrTuple, '<', Value}) when ?IS_SINT32_T(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_x86_64_asm:cmpq(Value, Reg), + {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + RelocJGEOffset}; +if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_x86_64_asm:cmpq(RegB, Reg), {RelocJGEOffset, I2} = jit_x86_64_asm:jge_rel8(1), State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJGEOffset}; +% Catch-all for large values outside SINT32_T range +if_block_cond0(State0, {RegOrTuple, '<', Value}) when is_integer(Value) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Load large value into a temporary register + I1 = jit_x86_64_asm:movabsq(Value, r11), + I2 = jit_x86_64_asm:cmpq(r11, Reg), + {RelocJGEOffset, I3} = jit_x86_64_asm:jge_rel8(1), + State1 = if_block_free_reg(RegOrTuple, State0), + {State1, <>, byte_size(I1) + byte_size(I2) + RelocJGEOffset}; if_block_cond0(State0, {RegOrTuple, '==', 0}) -> Reg = case RegOrTuple of @@ -1795,17 +1899,31 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), - Reloc = {Label, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct leaq without relocation + % leaq instruction is 7 bytes, RIP points to next instruction + RelOffset = LabelOffset - (Offset + 7), + I1 = jit_x86_64_asm:leaq({rip, RelOffset}, Temp), + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + Reloc = {Label, Offset + RewriteLEAOffset, 32}, + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. set_continuation_to_offset( #state{ @@ -1890,6 +2008,22 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. +sub( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | _] + } = State, + Reg, + Val +) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + % Immediate too large for 32-bit, load into temporary register + I1 = jit_x86_64_asm:movabsq(Val, TempReg), + I2 = jit_x86_64_asm:subq(TempReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + Stream2 = StreamModule:append(Stream1, I2), + % Free temporary register immediately + State#state{stream = Stream2}; sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:subq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), @@ -1950,19 +2084,38 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - branches = Branches + branches = Branches, + labels = Labels } = State0, Label ) -> Offset = StreamModule:offset(Stream0), I1 = jit_x86_64_asm:decl(?JITSTATE_REMAINING_REDUCTIONS), - {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), - I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), - Sz = byte_size(I1) + byte_size(I2), - Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}, + I1Size = byte_size(I1), + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct jmp with calculated offset + % jz is 2 bytes, jmp_rel32 is 5 bytes + JmpSize = 5, + I2 = jit_x86_64_asm:jz(JmpSize + 2), + I2Size = byte_size(I2), + % Calculate relative offset: target - current + RelOffset = LabelOffset - (Offset + I1Size + I2Size), + {_RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(RelOffset), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), + I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), + Sz = I1Size + byte_size(I2), + Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). @@ -2093,6 +2246,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -2104,6 +2258,18 @@ add_label( RelativeOffset = LabelOffset - JumpTableEntryOffset, {_RelocOffset, JmpInstruction} = jit_x86_64_asm:jmp_rel32(RelativeOffset), Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, JmpInstruction), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_x86_64_asm.erl b/libs/jit/src/jit_x86_64_asm.erl index c3b54fbe06..f5867bc5c0 100644 --- a/libs/jit/src/jit_x86_64_asm.erl +++ b/libs/jit/src/jit_x86_64_asm.erl @@ -35,6 +35,8 @@ jnz_rel8/1, jge/1, jge_rel8/1, + jle/1, + jle_rel8/1, jmp/1, jmp_rel8/1, jmp_rel32/1, @@ -360,6 +362,14 @@ jge(Offset) when Offset >= -126 andalso Offset =< 129 -> jge_rel8(Offset) when Offset >= -126 andalso Offset =< 129 -> {1, jge(Offset)}. +jle(Offset) when Offset >= -126 andalso Offset =< 129 -> + % Use short jump (matches assembler behavior) + AdjustedOffset = Offset - 2, + <<16#7E, AdjustedOffset>>. + +jle_rel8(Offset) when Offset >= -126 andalso Offset =< 129 -> + {1, jle(Offset)}. + jmp(Offset) when Offset >= -126 andalso Offset =< 129 -> % Use short jump (matches assembler behavior) AdjustedOffset = Offset - 2, @@ -483,6 +493,17 @@ addq(SrcReg, DestReg) when is_atom(SrcReg), is_atom(DestReg) -> {REX_B, MODRM_RM} = x86_64_x_reg(DestReg), <>. +subq(Imm, Reg) when ?IS_SINT8_T(Imm), is_atom(Reg) -> + case x86_64_x_reg(Reg) of + {0, Index} -> <<16#48, 16#83, (16#E8 + Index), Imm>>; + {1, Index} -> <<16#49, 16#83, (16#E8 + Index), Imm>> + end; +subq(Imm, rax) when ?IS_SINT32_T(Imm) -> + % Special short encoding for sub imm32, %rax + <<16#48, 16#2D, Imm:32/little>>; +subq(Imm, Reg) when ?IS_SINT32_T(Imm), is_atom(Reg) -> + {REX_B, MODRM_RM} = x86_64_x_reg(Reg), + <>; subq(RegA, RegB) when is_atom(RegA), is_atom(RegB) -> {REX_R, MODRM_REG} = x86_64_x_reg(RegA), {REX_B, MODRM_RM} = x86_64_x_reg(RegB), diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c index 5e68bf888f..91199e4d73 100644 --- a/src/libAtomVM/context.c +++ b/src/libAtomVM/context.c @@ -1300,7 +1300,7 @@ COLD_FUNC void context_dump(Context *ctx) fprintf(stderr, "process_count = %zu\n", process_count); fprintf(stderr, "ports_count = %zu\n", ports_count); fprintf(stderr, "atoms_count = %zu\n", atom_table_count(glb->atom_table)); - fprintf(stderr, "refc_binary_total_size = %zu\n", refc_binary_total_size(ctx)); + refc_binary_dump_info(ctx); } fprintf(stderr, "\n\n**End Of Crash Report**\n"); } diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index d3c7f2f44a..3a695dfde6 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -319,13 +319,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary return NULL; } -#ifdef ENABLE_ADVANCED_TRACE - mod->import_table = beam_file + offsets[IMPT]; -#endif if (offsets[CODE]) { mod->code = (CodeChunk *) (beam_file + offsets[CODE]); } mod->export_table = beam_file + offsets[EXPT]; + mod->import_table = beam_file + offsets[IMPT]; mod->local_table = beam_file + offsets[LOCT]; mod->atom_table = beam_file + offsets[AT8U]; mod->fun_table = beam_file + offsets[FUNT]; diff --git a/src/libAtomVM/module.h b/src/libAtomVM/module.h index d65b09cc58..5a388534a9 100644 --- a/src/libAtomVM/module.h +++ b/src/libAtomVM/module.h @@ -109,12 +109,9 @@ struct Module { int module_index; -#ifdef ENABLE_ADVANCED_TRACE - void *import_table; -#endif - CodeChunk *code; void *export_table; + void *import_table; void *local_table; void *atom_table; void *fun_table; diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index d011776a73..7a1691130e 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -205,6 +205,7 @@ static term nif_code_server_code_chunk(Context *ctx, int argc, term argv[]); static term nif_code_server_atom_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_literal_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]); +static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[]); static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]); #endif static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]); @@ -777,6 +778,10 @@ static const struct Nif code_server_type_resolver_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_type_resolver }; +static const struct Nif code_server_import_resolver_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_code_server_import_resolver +}; static const struct Nif code_server_set_native_code_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_code_server_set_native_code @@ -5633,10 +5638,60 @@ static term nif_code_server_type_resolver(Context *ctx, int argc, term argv[]) if (IS_NULL_PTR(mod)) { RAISE_ERROR(BADARG_ATOM); } + int type_index = term_to_int(argv[1]); return module_get_type_by_index(mod, type_index, ctx); } +static term nif_code_server_import_resolver(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + VALIDATE_VALUE(argv[0], term_is_atom); + VALIDATE_VALUE(argv[1], term_is_integer); + + term module_name = argv[0]; + Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name)); + if (IS_NULL_PTR(mod)) { + RAISE_ERROR(BADARG_ATOM); + } + int import_index = term_to_int(argv[1]); + + // Get the imported function entry at the given index + if (IS_NULL_PTR(mod->imported_funcs) || import_index < 0) { + RAISE_ERROR(BADARG_ATOM); + } + + // Parse the import table to get the module, function, and arity + // Import table format: each entry is 12 bytes (module_atom_index, function_atom_index, arity) + const uint8_t *import_table = mod->import_table; + if (IS_NULL_PTR(import_table)) { + RAISE_ERROR(BADARG_ATOM); + } + + int functions_count = READ_32_UNALIGNED(import_table + 8); + if (import_index >= functions_count) { + RAISE_ERROR(BADARG_ATOM); + } + + int local_module_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 12); + int local_function_atom_index = READ_32_UNALIGNED(import_table + import_index * 12 + 4 + 12); + uint32_t arity = READ_32_UNALIGNED(import_table + import_index * 12 + 8 + 12); + + term module_atom = module_get_atom_term_by_id(mod, local_module_atom_index); + term function_atom = module_get_atom_term_by_id(mod, local_function_atom_index); + term arity_term = term_from_int(arity); + + if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(3)) != MEMORY_GC_OK)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + term result = term_alloc_tuple(3, &ctx->heap); + term_put_tuple_element(result, 0, module_atom); + term_put_tuple_element(result, 1, function_atom); + term_put_tuple_element(result, 2, arity_term); + + return result; +} static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) { UNUSED(argc); diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index 5cf304b88b..8d1a0d5172 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -183,6 +183,7 @@ code_server:code_chunk/1, IF_HAVE_JIT(&code_server_code_chunk_nif) code_server:atom_resolver/2, IF_HAVE_JIT(&code_server_atom_resolver_nif) code_server:literal_resolver/2, IF_HAVE_JIT(&code_server_literal_resolver_nif) code_server:type_resolver/2, IF_HAVE_JIT(&code_server_type_resolver_nif) +code_server:import_resolver/2, IF_HAVE_JIT(&code_server_import_resolver_nif) code_server:set_native_code/3, IF_HAVE_JIT(&code_server_set_native_code_nif) console:print/1, &console_print_nif base64:encode/1, &base64_encode_nif diff --git a/src/libAtomVM/refc_binary.c b/src/libAtomVM/refc_binary.c index 9d579fb2f9..f5af02ef17 100644 --- a/src/libAtomVM/refc_binary.c +++ b/src/libAtomVM/refc_binary.c @@ -146,3 +146,86 @@ size_t refc_binary_total_size(Context *ctx) synclist_unlock(&ctx->global->refc_binaries); return size; } + +COLD_FUNC void refc_binary_dump_info(Context *ctx) +{ + struct ListHead *item; + struct ListHead *refc_binaries = synclist_rdlock(&ctx->global->refc_binaries); + + // Note: This only counts non-const refc binaries (ones that allocate memory). + // Const binaries (created by term_from_const_binary) point to existing data + // and are never added to the global refc_binaries list, so they don't appear here. + + // First pass: count and calculate total size + size_t count = 0; + size_t total_size = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + count++; + total_size += refc->size; + } + + fprintf(stderr, "refc_binary_count = %d\n", (int) count); + fprintf(stderr, "refc_binary_total_size = %d\n", (int) total_size); + + if (count == 0) { + synclist_unlock(&ctx->global->refc_binaries); + return; + } + +// Find top 5 largest binaries +#define TOP_N 5 + struct RefcBinary *top[TOP_N] = { NULL }; + size_t top_indices[TOP_N] = { 0 }; + + size_t index = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + + // Try to insert into top 5 + for (size_t i = 0; i < TOP_N; i++) { + if (top[i] == NULL || refc->size > top[i]->size) { + // Shift down + for (size_t j = TOP_N - 1; j > i; j--) { + top[j] = top[j - 1]; + top_indices[j] = top_indices[j - 1]; + } + top[i] = refc; + top_indices[i] = index; + break; + } + } + index++; + } + + // Display top binaries + fprintf(stderr, "\nTop %d largest refc binaries:\n", TOP_N); + for (size_t i = 0; i < TOP_N && top[i] != NULL; i++) { + struct RefcBinary *refc = top[i]; + fprintf(stderr, " [%zu] size=%d bytes (%.1f%%), refcount=%d", + top_indices[i], + (int) refc->size, + (double) refc->size * 100.0 / (double) total_size, + (int) refc->ref_count); + + if (refc->resource_type) { + fprintf(stderr, " [resource]"); + } + + // Print first 32 bytes as hex + fprintf(stderr, "\n data: "); + size_t print_size = refc->size < 32 ? refc->size : 32; + for (size_t j = 0; j < print_size; j++) { + fprintf(stderr, "%02x", refc->data[j]); + if (j % 4 == 3 && j < print_size - 1) { + fprintf(stderr, " "); + } + } + if (refc->size > 32) { + fprintf(stderr, "..."); + } + fprintf(stderr, "\n"); + } + + synclist_unlock(&ctx->global->refc_binaries); +} diff --git a/src/libAtomVM/refc_binary.h b/src/libAtomVM/refc_binary.h index 3fc1784bd8..7ff38f545e 100644 --- a/src/libAtomVM/refc_binary.h +++ b/src/libAtomVM/refc_binary.h @@ -142,6 +142,16 @@ term refc_binary_create_binary_info(Context *ctx); */ size_t refc_binary_total_size(Context *ctx); +/** + * @brief Dump detailed information about reference counted binaries + * + * @details This function prints diagnostic information including the count, + * total size, and details about the top 5 largest binaries including + * their first bytes. Used for debugging memory issues. + * @param ctx the context + */ +COLD_FUNC void refc_binary_dump_info(Context *ctx); + #ifdef __cplusplus } #endif diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 4ddc362924..4dc81e0aad 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,6 +51,11 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) +# By default, JIT is disabled +set(AVM_DISABLE_JIT OFF) + +project(atomvm-esp32) + # JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) # Configuration comes from idf.py menuconfig (KConfig), not CMake options if(CONFIG_JIT_ENABLED) @@ -67,8 +72,6 @@ else() message(STATUS "JIT compilation disabled") endif() -project(atomvm-esp32) - # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index 465e0d4b6d..e416400415 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,7 +25,8 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" - "jit_stream_flash.c" + "jit_stream_flash_platform.c" + "../../../../libAtomVM/jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c new file mode 100644 index 0000000000..bfaed52215 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c @@ -0,0 +1,141 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include +#include + +#include "esp32_sys.h" + +#if ESP_IDF_VERSION_MAJOR >= 5 +#include +#endif + +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV +#include +#endif + +struct JSFlashPlatformContext +{ + const esp_partition_t *partition; +}; + +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + const esp_partition_t *partition = esp_partition_find_first( + ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, JIT_PARTITION_NAME); + if (IS_NULL_PTR(partition)) { + fprintf(stderr, "Failed to find partition '%s' for JIT cache\n", JIT_PARTITION_NAME); + return NULL; + } + + struct JSFlashPlatformContext *pf_ctx = malloc(sizeof(struct JSFlashPlatformContext)); + if (IS_NULL_PTR(pf_ctx)) { + return NULL; + } + + pf_ctx->partition = partition; + return pf_ctx; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx) +{ + free(ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_erase_range(ctx->partition, + flash_offset - ctx->partition->address, FLASH_SECTOR_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to erase sector at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_write(ctx->partition, + flash_offset - ctx->partition->address, data, FLASH_PAGE_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to write page at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // Convert data cache address to instruction cache address for RISC-V targets + // On ESP32-C3/C6/H2, flash is mapped to both DBUS (0x3C...) and IBUS (0x42...) + // but only IBUS addresses are executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_DBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_IBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // Convert instruction cache address to data cache address for RISC-V targets + // This is the reverse of ptr_to_executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_IBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_DBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif) + +#endif // AVM_NO_JIT diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h similarity index 64% rename from src/platforms/esp32/components/avm_sys/jit_stream_flash.c rename to src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h index 77dfcca908..6f8d9bffc5 100644 --- a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h @@ -18,17 +18,26 @@ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later */ -#ifndef AVM_NO_JIT +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ -#include "context.h" -#include "jit.h" -#include "term.h" +#include +#include +#include -ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) -{ - UNUSED(ctx); - UNUSED(jit_stream); - return NULL; -} +#ifdef __cplusplus +extern "C" { +#endif + +// ESP32 flash constants +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 +// JIT code is stored in main.avm partition +#define JIT_PARTITION_NAME "main.avm" + +#ifdef __cplusplus +} #endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index ed8f4ac8fc..67fe67c438 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -620,6 +620,8 @@ compile_erlang(test_lists_member) compile_erlang(test_lists_keymember) compile_erlang(test_lists_keyfind) +compile_erlang(test_inline_arith) + if(Erlang_VERSION VERSION_GREATER_EQUAL "23") set(OTP23_OR_GREATER_TESTS test_op_bs_start_match_asm.beam @@ -1142,6 +1144,8 @@ set(erlang_test_beams test_lists_keymember.beam test_lists_keyfind.beam + test_inline_arith.beam + test_code_server_nifs.beam test_op_bs_start_match.beam diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 9fdc65f403..2d53ddd930 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -829,6 +829,82 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004d b.le 0x14\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004d b.le 0x14\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f10190ff cmp x7, #0x64\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -933,6 +1009,38 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2c), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: b9401027 ldr w7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: b9001027 str w7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -1223,16 +1331,45 @@ wait_test() -> Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State6), Dump = << " 0: 14000000 b 0x0\n" " 4: 14000005 b 0x18\n" - " 8: 14000000 b 0x8\n" + " 8: 1400003e b 0x100\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000747 adr x7, 0x100\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000005 b 0x18\n" + " 8: 1400003e b 0x100\n" " c: 14000000 b 0xc\n" " 10: 14000000 b 0x10\n" " 14: 14000000 b 0x14\n" - " 18: 10000007 adr x7, 0x18\n" + " 18: 10000747 adr x7, 0x100\n" " 1c: f9000427 str x7, [x1, #8]\n" " 20: f9407447 ldr x7, [x2, #232]\n" " 24: d61f00e0 br x7" diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 9c7d3632d2..c91a65ce5d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2852,6 +2852,34 @@ move_array_element_test_() -> " 2: 68be ldr r6, [r7, #8]\n" " 4: 62c6 str r6, [r0, #44] ; 0x2c" >>) + end), + %% move_array_element: reg[32] to x_reg (large offset, index 32, offset 128) + ?_test(begin + move_array_element_test0(State0, r3, 32, {x_reg, 0}, << + " 0: 2704 movs r7, #4\n" + " 2: 441f add r7, r3\n" + " 4: 6ffe ldr r6, [r7, #124] ; 0x7c\n" + " 6: 6186 str r6, [r0, #24]" + >>) + end), + %% move_array_element: reg[32] to ptr (large offset) + ?_test(begin + move_array_element_test0(State0, r3, 32, {ptr, r5}, << + " 0: 2704 movs r7, #4\n" + " 2: 441f add r7, r3\n" + " 4: 6fff ldr r7, [r7, #124] ; 0x7c\n" + " 6: 602f str r7, [r5, #0]" + >>) + end), + %% move_array_element: reg[32] to y_reg (large offset) + ?_test(begin + move_array_element_test0(State0, r3, 32, {y_reg, 2}, << + " 0: 2604 movs r6, #4\n" + " 2: 441e add r6, r3\n" + " 4: 6ff6 ldr r6, [r6, #124] ; 0x7c\n" + " 6: 6947 ldr r7, [r0, #20]\n" + " 8: 60be str r6, [r7, #8]" + >>) end) ] end}. @@ -2872,6 +2900,19 @@ get_array_element_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(r7, Reg) + end), + %% get_array_element: reg[x] with large offset (index 32, offset 128) + %% For offset 128, we use ldr with max offset 124 + temp register for remainder (4) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 32), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 2604 movs r6, #4\n" + " 2: 4426 add r6, r4\n" + " 4: 6ff7 ldr r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) end) ] end}. @@ -2893,6 +2934,18 @@ move_to_array_element_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end), + %% move_to_array_element/4: x_reg to reg[x], larger immediate offset + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 32), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2604 movs r6, #4\n" + " 4: 441e add r6, r3\n" + " 6: 67f7 str r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), %% move_to_array_element/4: x_reg to reg[reg] ?_test(begin State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4), diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 21ce325526..ab13f91c28 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -1076,6 +1076,82 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 06400e93 li t4,100\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 06400e93 li t4,100\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {1024, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01fed363 bge t4,t6,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -1188,6 +1264,42 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#36), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 03c68067 jr 60(a3) # 0x3c\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 02668067 jr 38(a3) # 0x36\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f9a63 bnez t6,0x36\n" + " 26: 00000f97 auipc t6,0x0\n" + " 2a: 0fc1 addi t6,t6,16 # 0x36\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 00862f83 lw t6,8(a2)\n" + " 34: 8f82 jr t6\n" + " 36: 00062f83 lw t6,0(a2)\n" + " 3a: 8f82 jr t6\n" + " 3c: 00462f83 lw t6,4(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + %% Test with large gap (256+ bytes) to force mov_immediate path call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -1333,11 +1445,12 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) @@ -1354,36 +1467,42 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 0f72 slli t5,t5,0x1c\n" - " a: 020f0f63 beqz t5,0x48\n" - " e: 8f7e mv t5,t6\n" - " 10: 4e8d li t4,3\n" - " 12: 01df7f33 and t5,t5,t4\n" - " 16: 4e89 li t4,2\n" - " 18: 01df0663 beq t5,t4,0x24\n" - " 1c: a0d5 j 0x100\n" - " 1e: 0001 nop\n" - " 20: 00000013 nop\n" - " 24: 4f0d li t5,3\n" - " 26: ffff4f13 not t5,t5\n" - " 2a: 01efffb3 and t6,t6,t5\n" - " 2e: 000faf83 lw t6,0(t6)\n" - " 32: 03f00f13 li t5,63\n" - " 36: 01efffb3 and t6,t6,t5\n" - " 3a: 4f21 li t5,8\n" - " 3c: 01ef8663 beq t6,t5,0x48\n" - " 40: a0c1 j 0x100\n" - " 42: 0001 nop\n" - " 44: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 020f0f63 beqz t5,0x58\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f21 li t5,8\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1395,11 +1514,12 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> %% Keep the unoptimized version to test the and case. is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), @@ -1416,119 +1536,146 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 0f72 slli t5,t5,0x1c\n" - " a: 040f0763 beqz t5,0x58\n" - " e: 8f7e mv t5,t6\n" - " 10: 4e8d li t4,3\n" - " 12: 01df7f33 and t5,t5,t4\n" - " 16: 4e89 li t4,2\n" - " 18: 01df0663 beq t5,t4,0x24\n" - " 1c: a0d5 j 0x100\n" - " 1e: 0001 nop\n" - " 20: 00000013 nop\n" - " 24: 4f0d li t5,3\n" - " 26: ffff4f13 not t5,t5\n" - " 2a: 01efffb3 and t6,t6,t5\n" - " 2e: 000faf83 lw t6,0(t6)\n" - " 32: 8f7e mv t5,t6\n" - " 34: 03f00e93 li t4,63\n" - " 38: 01df7f33 and t5,t5,t4\n" - " 3c: 4ea1 li t4,8\n" - " 3e: 01df0d63 beq t5,t4,0x58\n" - " 42: 03f00f13 li t5,63\n" - " 46: 01efffb3 and t6,t6,t5\n" - " 4a: 4f61 li t5,24\n" - " 4c: 01ef8663 beq t6,t5,0x58\n" - " 50: a845 j 0x100\n" - " 52: 0001 nop\n" - " 54: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 040f0763 beqz t5,0x68\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 8f7e mv t5,t6\n" + " 44: 03f00e93 li t4,63\n" + " 48: 01df7f33 and t5,t5,t4\n" + " 4c: 4ea1 li t4,8\n" + " 4e: 01df0d63 beq t5,t4,0x68\n" + " 52: 03f00f13 li t5,63\n" + " 56: 01efffb3 and t6,t6,t5\n" + " 5a: 4f61 li t5,24\n" + " 5c: 01ef8663 beq t6,t5,0x68\n" + " 60: a045 j 0x100\n" + " 62: 0001 nop\n" + " 64: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: a0fd j 0x100\n" - " 14: 0001 nop\n" - " 16: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: a8f9 j 0x100\n" + " 24: 0001 nop\n" + " 26: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State1 = ?BACKEND:jump_table(State0, 1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#1000), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#1000), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: 7ef0006f j 0x1000\n" - " 16: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 7df0006f j 0x1000\n" + " 26: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_known_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: 00001f17 auipc t5,0x1\n" - " 16: feef0067 jr -18(t5) # 0x1000" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 00001f17 auipc t5,0x1\n" + " 26: fdef0067 jr -34(t5) # 0x1000" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1618,70 +1765,130 @@ wait_test() -> Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: ffffffff .insn 4, 0xffffffff\n" - " 4: ffffffff .insn 4, 0xffffffff\n" - " 6: ffffffff .insn 4, 0xffffffff\n" - " a: ffffffff .insn 4, 0xffffffff\n" - " c: ffffffff .insn 4, 0xffffffff\n" - " 10: ffffffff .insn 4, 0xffffffff\n" - " 12: ffffffff .insn 4, 0xffffffff\n" - " 16: ffffffff .insn 4, 0xffffffff\n" - " 18: ffffffff .insn 4, 0xffffffff\n" - " 1c: ffffffff .insn 4, 0xffffffff\n" - " 1e: ffffffff .insn 4, 0xffffffff\n" - " 22: ffffffff .insn 4, 0xffffffff\n" - " 24: ffffffff .insn 4, 0xffffffff\n" - " 28: ffffffff .insn 4, 0xffffffff\n" - " 2c: 01f5a223 sw t6,4(a1)\n" - " 30: 07462f83 lw t6,116(a2)\n" - " 34: 8f82 jr t6" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), - % Should have generated auipc + addi + ret + labels table + lines table - % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes - % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + % Should have jump table + generated code with label/line tables ?assert(byte_size(Stream) >= 32), - % Expected: auipc a0, 0 + addi a0, a0, 10 + ret + padding + labels table + lines table - % The data tables start at offset 0xa (10) because of alignment padding + % Expected: jump table (3 entries, 24 bytes) + auipc + addi + ret + padding + labels table + lines table Dump = << - " 0: 00000517 auipc a0,0x0\n" - " 4: 0529 addi a0,a0,10 # 0xa\n" - " 6: 8082 ret\n" - " 8: 0200ffff .insn 4, 0x0200ffff\n" - " c: 0100 addi s0,sp,128\n" - " e: 0000 unimp\n" - " 10: 1000 addi s0,sp,32\n" - " 12: 0200 addi s0,sp,256\n" - " 14: 0000 unimp\n" - " 16: 2000 fld fs0,0(s0)\n" - " 18: 0200 addi s0,sp,256\n" - " 1a: 0a00 addi s0,sp,272\n" - " 1c: 0000 unimp\n" - " 1e: 1000 addi s0,sp,32\n" - " 20: 1400 addi s0,sp,544\n" - " 22: 0000 unimp\n" - " 24: 2000 fld fs0,0(s0)" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00868067 jr 8(a3) # 0x10\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 01068067 jr 16(a3) # 0x20\n" + " 18: 00000517 auipc a0,0x0\n" + " 1c: 0529 addi a0,a0,10 # 0x22\n" + " 1e: 8082 ret\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: 0200 addi s0,sp,256\n" + " 24: 0100 addi s0,sp,128\n" + " 26: 0000 unimp\n" + " 28: 1000 addi s0,sp,32\n" + " 2a: 0200 addi s0,sp,256\n" + " 2c: 0000 unimp\n" + " 2e: 2000 fld fs0,0(s0)\n" + " 30: 0200 addi s0,sp,256\n" + " 32: 0a00 addi s0,sp,272\n" + " 34: 0000 unimp\n" + " 36: 1000 addi s0,sp,32\n" + " 38: 1400 addi s0,sp,544\n" + " 3a: 0000 unimp\n" + " 3c: 2000 fld fs0,0(s0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2335,8 +2542,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(7, State1, [a3, t3]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), [a3, t3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), @@ -2351,8 +2558,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(7, State1, [a3, t3]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), [a3, t3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 2328522753..849313cbdc 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -112,6 +112,7 @@ compile_minimal_x86_64_test() -> fun(_) -> undefined end, fun(_) -> undefined end, fun(_) -> any end, + fun(_) -> undefined end, jit_x86_64, Stream2 ), @@ -159,10 +160,11 @@ compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) -> AtomResolver = jit_precompile:atom_resolver(AtomChunk), LiteralResolver = fun(_) -> test_literal end, TypeResolver = jit_precompile:type_resolver(TypeChunk), + ImportResolver = fun(_) -> test_function end, % Compile with typed register support {LabelsCount, Stream3} = jit:compile( - CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, ImportResolver, Backend, Stream2 ), Backend:stream(Stream3). diff --git a/tests/libs/jit/jit_x86_64_asm_tests.erl b/tests/libs/jit/jit_x86_64_asm_tests.erl index 797ed9077c..a1c9bb949f 100644 --- a/tests/libs/jit/jit_x86_64_asm_tests.erl +++ b/tests/libs/jit/jit_x86_64_asm_tests.erl @@ -866,6 +866,19 @@ jge_rel8_test_() -> ) ]. +jle_test_() -> + [ + ?_assertAsmEqual(<<16#7e, 16#f4>>, "jle .-10", jit_x86_64_asm:jle(-10)) + ]. + +jle_rel8_test_() -> + [ + ?_assertEqual( + {1, jit_tests_common:asm(x86_64, <<16#7e, 16#05>>, "jle .+7")}, + jit_x86_64_asm:jle_rel8(7) + ) + ]. + jmp_rel8_test_() -> [ ?_assertEqual( @@ -914,9 +927,50 @@ andb_test_() -> subq_test_() -> [ + % Register-register forms ?_assertAsmEqual(<<16#48, 16#29, 16#c1>>, "subq %rax, %rcx", jit_x86_64_asm:subq(rax, rcx)), ?_assertAsmEqual(<<16#49, 16#29, 16#c2>>, "subq %rax, %r10", jit_x86_64_asm:subq(rax, r10)), - ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx)) + ?_assertAsmEqual(<<16#4c, 16#29, 16#c1>>, "subq %r8, %rcx", jit_x86_64_asm:subq(r8, rcx)), + % 8-bit immediate forms + ?_assertAsmEqual( + <<16#48, 16#83, 16#e8, 16#0a>>, "subq $10, %rax", jit_x86_64_asm:subq(10, rax) + ), + ?_assertAsmEqual( + <<16#48, 16#83, 16#e9, 16#05>>, "subq $5, %rcx", jit_x86_64_asm:subq(5, rcx) + ), + ?_assertAsmEqual( + <<16#49, 16#83, 16#ea, 16#08>>, "subq $8, %r10", jit_x86_64_asm:subq(8, r10) + ), + ?_assertAsmEqual( + <<16#49, 16#83, 16#eb, 16#7f>>, "subq $127, %r11", jit_x86_64_asm:subq(127, r11) + ), + % 32-bit immediate, special short form for %rax + ?_assertAsmEqual( + <<16#48, 16#2d, 16#00, 16#01, 16#00, 16#00>>, + "subq $256, %rax", + jit_x86_64_asm:subq(256, rax) + ), + ?_assertAsmEqual( + <<16#48, 16#2d, 16#00, 16#04, 16#00, 16#00>>, + "subq $1024, %rax", + jit_x86_64_asm:subq(1024, rax) + ), + % 32-bit immediate forms for other registers + ?_assertAsmEqual( + <<16#48, 16#81, 16#e9, 16#00, 16#01, 16#00, 16#00>>, + "subq $256, %rcx", + jit_x86_64_asm:subq(256, rcx) + ), + ?_assertAsmEqual( + <<16#49, 16#81, 16#ea, 16#00, 16#04, 16#00, 16#00>>, + "subq $1024, %r10", + jit_x86_64_asm:subq(1024, r10) + ), + ?_assertAsmEqual( + <<16#49, 16#81, 16#eb, 16#00, 16#10, 16#00, 16#00>>, + "subq $4096, %r11", + jit_x86_64_asm:subq(4096, r11) + ) ]. decl_test_() -> diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index c4a94678e7..a95f631b7f 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -789,6 +789,166 @@ if_block_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7e 04 jle 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {100, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7e 04 jle 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7d 04 jge 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 100}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 48 83 f8 64 cmp $0x64,%rax\n" + " c: 7d 04 jge 0x12\n" + " e: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 16#100000000}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7d 04 jge 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', 16#100000000}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7d 04 jge 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {16#100000000, '<', RegA}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7e 04 jle 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {16#100000000, '<', {free, RegA}}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " f: 00 00 00 \n" + " 12: 4c 39 d8 cmp %r11,%rax\n" + " 15: 7e 04 jle 0x1b\n" + " 17: 49 83 c3 02 add $0x2,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end) ] end}. @@ -893,6 +1053,37 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2a), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: e9 2a 00 00 00 jmpq 0x2f\n" + " 5: e9 05 00 00 00 jmpq 0xf\n" + " a: e9 1b 00 00 00 jmpq 0x2a\n" + " f: ff 4e 10 decl 0x10(%rsi)\n" + " 12: 74 05 je 0x19\n" + " 14: e9 11 00 00 00 jmpq 0x2a\n" + " 19: 48 8d 05 0a 00 00 00 lea 0xa(%rip),%rax # 0x2a\n" + " 20: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 24: 48 8b 42 10 mov 0x10(%rdx),%rax\n" + " 28: ff e0 jmpq *%rax\n" + " 2a: 48 8b 02 mov (%rdx),%rax\n" + " 2d: ff e0 jmpq *%rax\n" + " 2f: 48 8b 42 08 mov 0x8(%rdx),%rax\n" + " 33: ff e0 jmpq *%rax\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -1607,6 +1798,62 @@ jump_to_continuation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test set_continuation_to_label with unknown label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). diff --git a/tests/test.c b/tests/test.c index f1024f27ed..0ebe2bf79f 100644 --- a/tests/test.c +++ b/tests/test.c @@ -610,6 +610,8 @@ struct Test tests[] = { TEST_CASE(test_lists_keymember), TEST_CASE(test_lists_keyfind), + TEST_CASE(test_inline_arith), + // TEST CRASHES HERE: TEST_CASE(memlimit), { NULL, 0, false, false }