Skip to content

Commit 8d2b1c4

Browse files
committed
make tryMerge and friends close to safe
1 parent a3c2cec commit 8d2b1c4

File tree

1 file changed

+87
-109
lines changed

1 file changed

+87
-109
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 87 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use libc::{free, malloc, memcpy, size_t};
55
use crate::lib::common::bits::{ZSTD_NbCommonBytes, ZSTD_highbit32};
66
use crate::lib::common::error_private::{ERR_getErrorName, ERR_isError, Error};
77
use crate::lib::common::huf::{HUF_CElt, HUF_CTABLE_WORKSPACE_SIZE_U32, HUF_WORKSPACE_SIZE};
8-
use crate::lib::common::mem::{MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32};
8+
use crate::lib::common::mem::{MEM_readLE32, MEM_readST, MEM_writeLE32};
99
use crate::lib::common::xxhash::ZSTD_XXH64;
1010
use crate::lib::common::zstd_internal::{
1111
repStartValue, LLFSELog, MLFSELog, MaxLL, MaxML, OffFSELog, ZSTD_REP_NUM,
@@ -391,142 +391,120 @@ unsafe fn ZDICT_analyzePos(
391391
solution
392392
}
393393

394-
unsafe fn isIncluded(
395-
ip: *const core::ffi::c_char,
396-
into: *const core::ffi::c_char,
397-
length: size_t,
398-
) -> bool {
399-
for u in 0..length {
400-
if *ip.add(u) != *into.add(u) {
401-
return false;
402-
}
403-
}
394+
fn isIncluded(ip: &[u8], into: &[u8], length: size_t) -> bool {
395+
// NOTE: the slices may not actually have `length` elements,
396+
// that is OK if there is an unequal value before that.
397+
let a = ip.iter().take(length);
398+
let b = into.iter().take(length);
404399

405-
true
400+
a.eq(b)
406401
}
407402

408-
unsafe fn ZDICT_tryMerge(
409-
table: *mut DictItem,
403+
fn ZDICT_tryMerge(
404+
table: &mut [DictItem],
410405
mut elt: DictItem,
411406
eltNbToSkip: u32,
412-
buffer: *const core::ffi::c_void,
407+
buffer: &[u8],
413408
) -> u32 {
414-
let tableSize = (*table).pos;
409+
let tableSize = table[0].pos;
415410
let eltEnd = (elt.pos).wrapping_add(elt.length);
416-
let buf = buffer as *const core::ffi::c_char;
417-
let mut u: u32 = 0;
418-
u = 1;
419-
while u < tableSize {
420-
if (u != eltNbToSkip)
421-
&& (*table.offset(u as isize)).pos > elt.pos
422-
&& (*table.offset(u as isize)).pos <= eltEnd
411+
let buf = buffer;
412+
413+
/* tail overlap */
414+
let mut u = 1usize;
415+
while u < tableSize as usize {
416+
if (u as u32 != eltNbToSkip)
417+
&& table[u].pos > elt.pos
418+
&& table[u].pos <= eltEnd
423419
{
424-
let addedLength = ((*table.offset(u as isize)).pos).wrapping_sub(elt.pos);
425-
let fresh2 = &mut (*table.offset(u as isize)).length;
426-
*fresh2 = (*fresh2).wrapping_add(addedLength);
427-
(*table.offset(u as isize)).pos = elt.pos;
428-
let fresh3 = &mut (*table.offset(u as isize)).savings;
429-
*fresh3 = (*fresh3).wrapping_add(elt.savings * addedLength / elt.length);
430-
let fresh4 = &mut (*table.offset(u as isize)).savings;
431-
*fresh4 = (*fresh4).wrapping_add(elt.length / 8);
432-
elt = *table.offset(u as isize);
433-
while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings {
434-
*table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize);
435-
u = u.wrapping_sub(1);
420+
/* append */
421+
let addedLength = table[u].pos - elt.pos;
422+
table[u].length += addedLength;
423+
table[u].pos = elt.pos;
424+
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
425+
table[u].savings += elt.length / 8; /* rough approx bonus */
426+
elt = table[u];
427+
/* sort : improve rank */
428+
while (u > 1) && (table[u - 1].savings < elt.savings) {
429+
table[u] = table[u - 1];
430+
u -= 1;
436431
}
437-
*table.offset(u as isize) = elt;
438-
return u;
432+
table[u] = elt;
433+
return u as u32;
439434
}
440435
u = u.wrapping_add(1);
441436
}
442-
u = 1;
443-
while u < tableSize {
444-
if u != eltNbToSkip {
445-
if ((*table.offset(u as isize)).pos).wrapping_add((*table.offset(u as isize)).length)
446-
>= elt.pos
447-
&& (*table.offset(u as isize)).pos < elt.pos
448-
{
449-
let addedLength_0 = eltEnd as core::ffi::c_int
450-
- ((*table.offset(u as isize)).pos)
451-
.wrapping_add((*table.offset(u as isize)).length)
452-
as core::ffi::c_int;
453-
let fresh5 = &mut (*table.offset(u as isize)).savings;
454-
*fresh5 = (*fresh5).wrapping_add(elt.length / 8);
455-
if addedLength_0 > 0 {
456-
let fresh6 = &mut (*table.offset(u as isize)).length;
457-
*fresh6 = (*fresh6 as core::ffi::c_uint)
458-
.wrapping_add(addedLength_0 as core::ffi::c_uint);
459-
let fresh7 = &mut (*table.offset(u as isize)).savings;
460-
*fresh7 = (*fresh7 as core::ffi::c_uint).wrapping_add(
461-
(elt.savings)
462-
.wrapping_mul(addedLength_0 as core::ffi::c_uint)
463-
.wrapping_div(elt.length),
464-
);
465-
}
466-
elt = *table.offset(u as isize);
467-
while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings {
468-
*table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize);
469-
u = u.wrapping_sub(1);
470-
}
471-
*table.offset(u as isize) = elt;
472-
return u;
437+
438+
/* front overlap */
439+
let mut u = 1usize;
440+
while u < tableSize as usize {
441+
if u == eltNbToSkip as usize {
442+
u = u.wrapping_add(1);
443+
continue;
444+
}
445+
446+
/* overlap, existing < new */
447+
if (table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos) {
448+
/* append */
449+
let addedLength = eltEnd as i32 - (table[u].pos + table[u].length) as i32; /* note: can be negative */
450+
table[u].savings += elt.length / 8; /* rough approx bonus */
451+
if addedLength > 0 {
452+
/* otherwise, elt fully included into existing */
453+
table[u].length += addedLength.unsigned_abs();
454+
/* rough approx */
455+
table[u].savings += elt.savings * addedLength.unsigned_abs() / elt.length;
473456
}
474-
if MEM_read64(
475-
buf.offset((*table.offset(u as isize)).pos as isize) as *const core::ffi::c_void
476-
) == MEM_read64(buf.offset(elt.pos as isize).add(1) as *const core::ffi::c_void)
477-
&& isIncluded(
478-
buf.offset((*table.offset(u as isize)).pos as isize),
479-
buf.offset(elt.pos as isize).add(1),
480-
(*table.offset(u as isize)).length as size_t,
481-
)
482-
{
483-
let addedLength_1 = Ord::max(
484-
(elt.length).wrapping_sub((*table.offset(u as isize)).length),
485-
1,
486-
) as size_t;
487-
(*table.offset(u as isize)).pos = elt.pos;
488-
let fresh8 = &mut (*table.offset(u as isize)).savings;
489-
*fresh8 = (*fresh8).wrapping_add(
490-
(elt.savings as size_t * addedLength_1 / elt.length as size_t) as u32,
491-
);
492-
(*table.offset(u as isize)).length = Ord::min(
493-
elt.length,
494-
((*table.offset(u as isize)).length).wrapping_add(1),
495-
);
496-
return u;
457+
/* sort : improve rank */
458+
elt = table[u];
459+
while (u > 1) && (table[u - 1].savings < elt.savings) {
460+
table[u] = table[u - 1];
461+
u -= 1;
497462
}
463+
table[u] = elt;
464+
return u as u32;
498465
}
466+
467+
if buf[table[u].pos as usize..][..8] == buf[elt.pos as usize + 1..][..8] {
468+
if isIncluded(
469+
&buf[table[u].pos as usize..],
470+
&buf[elt.pos as usize + 1..],
471+
table[u].length as usize,
472+
) {
473+
let addedLength = elt.length.checked_sub(table[u].length).unwrap_or(1);
474+
table[u].pos = elt.pos;
475+
table[u].savings += elt.savings * addedLength / elt.length;
476+
table[u].length = Ord::min(elt.length, table[u].length + 1);
477+
return u as u32;
478+
}
479+
}
480+
499481
u = u.wrapping_add(1);
500482
}
483+
501484
0
502485
}
503486

504-
unsafe fn ZDICT_removeDictItem(table: *mut DictItem, id: u32) {
487+
fn ZDICT_removeDictItem(table: &mut [DictItem], id: u32) {
505488
debug_assert_ne!(id, 0);
506489
if id == 0 {
507490
return; // protection, should never happen
508491
}
509-
let max = (*table).pos as isize; // convention: table[0].pos stores the number of elements
510-
for u in id as isize..max.wrapping_sub(1) {
511-
*table.offset(u) = *table.offset(u.wrapping_add(1));
492+
let max = table[0].pos as usize; // convention: table[0].pos stores the number of elements
493+
for u in id as usize..max.wrapping_sub(1) {
494+
table[u] = table[u + 1];
512495
}
513-
(*table).pos = ((*table).pos).wrapping_sub(1);
496+
table[0].pos -= 1;
514497
}
515498

516-
unsafe fn ZDICT_insertDictItem(
517-
table: &mut [DictItem],
518-
elt: DictItem,
519-
buffer: *const core::ffi::c_void,
520-
) {
499+
fn ZDICT_insertDictItem(table: &mut [DictItem], elt: DictItem, buffer: &[u8]) {
521500
let maxSize = table.len() as u32;
522-
let table = table.as_mut_ptr();
523501

524502
// merge if possible
525503
let mut mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
526504
if mergeId != 0 {
527505
let mut newMerge = 1;
528506
while newMerge != 0 {
529-
newMerge = ZDICT_tryMerge(table, *table.offset(mergeId as isize), mergeId, buffer);
507+
newMerge = ZDICT_tryMerge(table, table[mergeId as usize], mergeId, buffer);
530508
if newMerge != 0 {
531509
ZDICT_removeDictItem(table, mergeId);
532510
}
@@ -537,17 +515,17 @@ unsafe fn ZDICT_insertDictItem(
537515

538516
// insert
539517
let mut current: u32 = 0;
540-
let mut nextElt = (*table).pos;
518+
let mut nextElt = table[0].pos;
541519
if nextElt >= maxSize {
542520
nextElt = maxSize.wrapping_sub(1);
543521
}
544522
current = nextElt.wrapping_sub(1);
545-
while (*table.offset(current as isize)).savings < elt.savings {
546-
*table.offset(current.wrapping_add(1) as isize) = *table.offset(current as isize);
523+
while (table[current as usize]).savings < elt.savings {
524+
table[current.wrapping_add(1) as usize] = table[current as usize];
547525
current = current.wrapping_sub(1);
548526
}
549-
*table.offset(current.wrapping_add(1) as isize) = elt;
550-
(*table).pos = nextElt.wrapping_add(1);
527+
table[current as usize + 1] = elt;
528+
table[0].pos = nextElt.wrapping_add(1);
551529
}
552530

553531
unsafe fn ZDICT_dictSize(dictList: &[DictItem]) -> u32 {
@@ -655,7 +633,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
655633
continue;
656634
}
657635

658-
ZDICT_insertDictItem(dictList, solution, buffer.as_ptr().cast());
636+
ZDICT_insertDictItem(dictList, solution, buffer);
659637
cursor += solution.length as usize;
660638

661639
if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate {

0 commit comments

Comments
 (0)