Skip to content

Commit 6b4d772

Browse files
committed
Add generator for capstone
1 parent cbcb87e commit 6b4d772

File tree

3 files changed

+305
-2
lines changed

3 files changed

+305
-2
lines changed

backends/generators/tasks.rake

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
1+
# typed: false
12
# frozen_string_literal: true
23

34
require "udb/resolver"
4-
require 'json'
5-
require 'tempfile'
5+
require "json"
6+
require "tempfile"
67

78
directory "#{$root}/gen/go"
89
directory "#{$root}/gen/c_header"
910
directory "#{$root}/gen/sverilog"
1011

12+
UDB_CAPSTONE_DIR = "#{$root}/tools/python-packages/udb-capstone"
13+
directory UDB_CAPSTONE_DIR
14+
1115
def with_resolved_exception_codes(cfg_arch)
1216
# Process ERB templates in exception codes using Ruby ERB processing
1317
resolved_exception_codes = []
@@ -127,4 +131,28 @@ namespace :gen do
127131
"--output=#{output_dir}riscv_decode_package.svh --include-all"
128132
end
129133
end
134+
135+
desc <<~DESC
136+
Generate Capstone CSR switch from RISC-V CSR definitions
137+
138+
Options:
139+
* CONFIG - Configuration name (defaults to "_")
140+
* OUTPUT_DIR - Output directory for generated Capstone code (defaults to "#{$root}/gen/capstone/")
141+
DESC
142+
task capstone: "#{$root}/gen/capstone/" do
143+
config_name = ENV["CONFIG"] || "_"
144+
output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/capstone/"
145+
146+
# Ensure the output directory exists
147+
FileUtils.mkdir_p output_dir
148+
149+
# Get the arch paths based on the config
150+
resolver = Udb::Resolver.new
151+
cfg_arch = resolver.cfg_arch_for(config_name)
152+
inst_dir = cfg_arch.path / "inst"
153+
csr_dir = cfg_arch.path / "csr"
154+
155+
# Run the Capstone CSR switch generator Python script
156+
sh "#{$root}/.home/.venv/bin/python3 #{UDB_CAPSTONE_DIR}/generate_csr_switch.py --csr-dir=#{csr_dir} --arch=BOTH --output=#{output_dir}csr_switch.c"
157+
end
130158
end
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) Salil Mittal
4+
# SPDX-License-Identifier: BSD-3-Clause-Clear
5+
"""
6+
Generate a C function mapping RISC-V CSR numbers to names using a switch statement.
7+
"""
8+
9+
import sys
10+
import os
11+
12+
# Import functions from generator.py
13+
generator_dir = os.path.abspath(
14+
os.path.join(os.path.dirname(__file__), "../../../backends/generators")
15+
)
16+
sys.path.append(generator_dir)
17+
from generator import load_csrs
18+
19+
20+
def generate_csr_switch(csrs, output_file):
21+
with open(output_file, "w", encoding="utf-8") as f:
22+
fn_str = "/* SP" + "DX-License-Identifier: BSD-3-Clause */" + '"'
23+
fn_str = """
24+
/* Copyright (c) 2025 RISC-V International */
25+
/*
26+
* This file is auto-generated by riscv-unified-db
27+
*/
28+
static const char *getCSRSystemRegisterName(unsigned CsrNo)
29+
{
30+
switch (CsrNo) {
31+
"""
32+
for addr, name in sorted(csrs.items()):
33+
fn_str += f'\tcase 0x{addr:04x}:\n\t\treturn "{name.lower()}";\n'
34+
35+
fn_str += """ }
36+
return NULL;
37+
}
38+
"""
39+
f.write(fn_str)
40+
41+
42+
def main():
43+
import argparse
44+
45+
parser = argparse.ArgumentParser(description="Generate C switch for RISC-V CSRs")
46+
parser.add_argument(
47+
"--csr-dir",
48+
default=os.path.abspath(
49+
os.path.join(os.path.dirname(__file__), "../../../arch/csr/")
50+
),
51+
help="Directory containing CSR YAML files",
52+
)
53+
parser.add_argument(
54+
"--extensions",
55+
default="",
56+
help="Comma-separated list of enabled extensions (default: all)",
57+
)
58+
parser.add_argument(
59+
"--arch",
60+
default="BOTH",
61+
choices=["RV32", "RV64", "BOTH"],
62+
help="Target architecture (RV32, RV64, BOTH)",
63+
)
64+
parser.add_argument(
65+
"--output",
66+
default=os.path.join(os.path.dirname(__file__), "csr_switch.c"),
67+
help="Output C file name",
68+
)
69+
args = parser.parse_args()
70+
71+
enabled_extensions = (
72+
[ext.strip() for ext in args.extensions.split(",") if ext.strip()]
73+
if args.extensions
74+
else []
75+
)
76+
include_all = not enabled_extensions
77+
csrs = load_csrs(args.csr_dir, enabled_extensions, include_all, args.arch)
78+
79+
generate_csr_switch(csrs, args.output)
80+
print(f"Generated: {args.output}")
81+
82+
83+
if __name__ == "__main__":
84+
main()
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) Salil Mittal
4+
# SPDX-License-Identifier: BSD-3-Clause-Clear
5+
6+
import re
7+
import os
8+
import argparse
9+
import yaml
10+
import sys
11+
12+
from capstone import Cs, CS_ARCH_RISCV, CS_MODE_32
13+
14+
15+
# Parse CSRs from the switch case file
16+
def parse_cases(file_path):
17+
csrs = set()
18+
with open(file_path, encoding="utf-8") as f:
19+
case_addr = None
20+
for line in f:
21+
m = re.match(r"\s*case\s+(0x[0-9a-fA-F]+):", line)
22+
if m:
23+
case_addr = int(m.group(1), 0) # convert to int
24+
continue
25+
if case_addr:
26+
n = re.match(r'\s*return\s+"([^"]+)";', line)
27+
if n:
28+
csr_name = n.group(1)
29+
csrs.add((csr_name, case_addr))
30+
case_addr = None
31+
return csrs
32+
33+
34+
# Retrieve CSRs present in the Capstone package
35+
# Adds (CSR name / pseudo-instruction, corresponding address) to result set
36+
def get_capstone_csrs():
37+
csrs = set()
38+
39+
md = Cs(CS_ARCH_RISCV, CS_MODE_32)
40+
for CSR in range(2**12 - 1):
41+
csrr_hex = f"{CSR:03x}020f3"
42+
43+
# byte swap
44+
csrr = (
45+
csrr_hex[6]
46+
+ csrr_hex[7]
47+
+ csrr_hex[4]
48+
+ csrr_hex[5]
49+
+ csrr_hex[2]
50+
+ csrr_hex[3]
51+
+ csrr_hex[0]
52+
+ csrr_hex[1]
53+
)
54+
csrr_bytes = bytes.fromhex(csrr)
55+
56+
for i in md.disasm(csrr_bytes, 0x1000):
57+
# Case 1: CSRs having pseudo-instructions
58+
# Example: rdinstreth ra
59+
if i.mnemonic != "csrr":
60+
csrs.add((i.mnemonic, CSR))
61+
continue
62+
63+
# Case 2: named CSR operand
64+
# Example: csrr ra, sstatus
65+
csr_name_split = i.op_str.split(",")
66+
if len(csr_name_split) == 2:
67+
csr_name = csr_name_split[1].strip()
68+
if not csr_name.isnumeric():
69+
csrs.add((csr_name, CSR))
70+
return csrs
71+
72+
73+
# Extract CSR address from pseudo-instructions which are in the form:
74+
# xs1 == 0 && csr == <addr>
75+
# Returns the CSR address if the condition is in the above format else None
76+
def extract_csr_addr(cond):
77+
parts = cond.split("&&")
78+
if len(parts) != 2:
79+
return None
80+
81+
parts = [p.strip() for p in parts]
82+
83+
xs1_valid = False
84+
csr_addr = None
85+
86+
for p in parts:
87+
if "==" not in p:
88+
return None
89+
90+
# split lhs and rhs in equality
91+
left, right = (x.strip() for x in p.split("==", 1))
92+
93+
if left == "xs1":
94+
if right != "0":
95+
return None
96+
xs1_valid = True
97+
continue
98+
99+
if left == "csr":
100+
try:
101+
csr_addr = int(right, 0) # parse both dec and hex addreses
102+
except ValueError:
103+
return None
104+
continue
105+
106+
# unknown left-hand identifier
107+
return None
108+
109+
if not xs1_valid or csr_addr is None:
110+
return None
111+
112+
return csr_addr
113+
114+
115+
# Get pseudo-instructions for `csrrs` to read specific CSRs
116+
def get_pseudo_instr():
117+
csrrs_path = (
118+
f"{os.path.dirname(__file__)}/../../../spec/std/isa/inst/Zicsr/csrrs.yaml"
119+
)
120+
121+
with open(csrrs_path, encoding="utf-8") as f:
122+
data = yaml.safe_load(f)
123+
pseudo_instructions = data["pseudoinstructions"]
124+
res = set()
125+
126+
for d in pseudo_instructions:
127+
addr = extract_csr_addr(d["when"])
128+
if addr != None:
129+
res.add((addr, d["to"]))
130+
return res
131+
132+
return None
133+
134+
135+
def main():
136+
parser = argparse.ArgumentParser(
137+
description="Compare CSR switch cases in two C files."
138+
)
139+
parser.add_argument(
140+
"--csr_switch",
141+
help="Path to C file containing CSR switch case",
142+
default=f"{os.path.dirname(__file__)}/../../../gen/capstone/csr_switch.c",
143+
)
144+
args = parser.parse_args()
145+
146+
cases_gen = parse_cases(args.csr_switch) # cases generated using Capstone generator
147+
capstone_csrs = get_capstone_csrs()
148+
149+
diff = capstone_csrs - cases_gen
150+
151+
pseudo_instr_csrs = get_pseudo_instr()
152+
153+
unhandled_cases = [
154+
# from the removed N extension
155+
"utvec",
156+
"sedeleg",
157+
"uip",
158+
"uepc",
159+
"ustatus",
160+
"ucause",
161+
"sideleg",
162+
"uie",
163+
"utval",
164+
"uscratch",
165+
"dscratch", # defined as dscratch0, dscratch1 in UDB
166+
]
167+
168+
# remove diff cases handled by pseudo-instructions
169+
for t in pseudo_instr_csrs:
170+
addr = t[0]
171+
for t1 in diff:
172+
addr1 = t1[1]
173+
if addr == addr1:
174+
diff.remove(t1)
175+
break
176+
177+
# remove diff cases which are unhandled
178+
diff_cpy = diff.copy()
179+
for t in diff_cpy:
180+
csr = t[0]
181+
if csr in unhandled_cases:
182+
diff.remove(t)
183+
184+
if len(diff) == 0:
185+
sys.exit(0) # pass
186+
else:
187+
sys.exit(1) # fail
188+
189+
190+
if __name__ == "__main__":
191+
main()

0 commit comments

Comments
 (0)