Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion igzip/riscv64/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@
lsrc_riscv64 += \
igzip/riscv64/igzip_multibinary_riscv64_dispatcher.c \
igzip/riscv64/igzip_multibinary_riscv64.S \
igzip/riscv64/igzip_isal_adler32_rvv.S
igzip/riscv64/igzip_isal_adler32_rvv.S \
igzip/riscv64/igzip_isal_adler32_rvv128.S
125 changes: 125 additions & 0 deletions igzip/riscv64/igzip_isal_adler32_rvv128.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**********************************************************************
Copyright (c) 2025 ZTE Corporation.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ZTE Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#if HAVE_RVV
.text
.align 3
.option arch, +v
.globl adler32_rvv128
.type adler32_rvv128, @function
adler32_rvv128:
li t3, 0x80078071 // Barrett reduction magic constant
li t4, 65521
slli t5, a0, 48
srli t5, t5, 48 // t5: A = adler32 & 0xffff
srliw t6, a0, 16 // t6: B = adler32 >> 16
li t0, 32
bltu a2, t0, tail_bytes

vsetvli zero, t0, e8, m2, ta, ma
la a7, factors
vle8.v v0, (a7)
vmv.v.i v4, 0
srli t1, a2, 5 // t1 = length / 32

outer_loop:
beqz t1, tail_bytes
li t2, 173
bgeu t1, t2, 1f
mv t2, t1
1:
slli a7, t2, 5
add a7, a1, a7

inner_loop:
vle8.v v2, (a1)
addi a1, a1, 32
slli a5, t5, 5
add t6, t6, a5
vwredsumu.vs v12, v2, v4
vwmulu.vv v16, v2, v0

vsetvli zero, t0, e16, m4, ta, ma
vmv.x.s a6, v12
add t5, t5, a6
vwredsumu.vs v20, v16, v4

vsetvli zero, t0, e32, m4, ta, ma
vmv.x.s a6, v20
add t6, t6, a6 // B += weighted_sum

vsetvli zero, t0, e8, m2, ta, ma
bne a1, a7, inner_loop

mul a3, t5, t3
srli a3, a3, 47
mul a4, a3, t4
sub t5, t5, a4 // A %= 65521
mul a3, t6, t3
srli a3, a3, 47
mul a4, a3, t4
sub t6, t6, a4 // B %= 65521
sub t1, t1, t2
j outer_loop

tail_bytes:
andi a2, a2, 31
beqz a2, finalize
add a2, a1, a2

tail_loop:
beq a1, a2, finalize
lbu a3, 0(a1)
add t5, t5, a3
add t6, t6, t5
addi a1, a1, 1
j tail_loop

finalize:
mul a3, t5, t3
srli a3, a3, 47
mul a4, a3, t4
sub t5, t5, a4
mul a3, t6, t3
srli a3, a3, 47
mul a4, a3, t4
sub t6, t6, a4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can directly copy the same 8 lines with the logic above. Not changing the temporary registers makes it a bit clearer. Then combine these commits into one, and it should be ready to be merged.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks for the review!

slli t6, t6, 16
or a0, t5, t6

ret
.size adler32_rvv128, .-adler32_rvv128

.section .rodata
.align 4
factors:
.byte 32, 31, 30, 29, 28, 27, 26, 25
.byte 24, 23, 22, 21, 20, 19, 18, 17
.byte 16, 15, 14, 13, 12, 11, 10, 9
.byte 8, 7, 6, 5, 4, 3, 2, 1
#endif
13 changes: 10 additions & 3 deletions igzip/riscv64/igzip_multibinary_riscv64_dispatcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,22 @@
extern uint32_t
adler32_rvv(uint32_t, uint8_t *, uint64_t);
extern uint32_t
adler32_rvv128(uint32_t, uint8_t *, uint64_t);
extern uint32_t
adler32_base(uint32_t, uint8_t *, uint64_t);

DEFINE_INTERFACE_DISPATCHER(isal_adler32)
{
#if HAVE_RVV
const unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_RV('V'))
return adler32_rvv;
else
if (hwcap & HWCAP_RV('V')) {
unsigned long vlenb;
__asm__ volatile("csrr %0, vlenb" : "=r"(vlenb));
if (vlenb == 16)
return adler32_rvv128;
else
return adler32_rvv;
} else
#endif
return adler32_base;
}
Loading