Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 79 additions & 38 deletions harmony_model_checker/charm/charm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2654,61 +2654,102 @@ static void do_work_b(struct worker *w){
}
}

static inline void do_work2_single_state(struct worker *w, struct shard *shard, struct state_header *sh){

struct state *sc = (struct state *) &sh[1];
unsigned int size = state_size(sc);

// See if this state has been computed before by looking up the node,
// or allocate if not.
bool new;
struct dict_assoc *hn = sdict_find_new(shard->states, &w->allocator,
sc, size, sh->noutgoing * sizeof(struct edge), &new, sh->hash);
// sc, size, sh->noutgoing * sizeof(struct edge), &new, meiyan3(sc));
struct node *next = (struct node *) &hn[1];
struct edge *edge = &node_edges(sh->node)[sh->edge_index];
edge->dst = next;

if (new) {
next->failed = edge->failed;
next->initial = false;
next->parent = sh->node;
next->len = sh->node->len + 1;
if (next->len > w->diameter) {
w->diameter = next->len;
}
next->nedges = sh->noutgoing;

assert(shard->tb_tail->nresults == shard->tb_size % NRESULTS);
assert(shard->tb_tail->next == NULL);
shard->tb_size++;
shard->tb_tail->results[shard->tb_tail->nresults++] = next;
if (shard->tb_tail->nresults == NRESULTS) {
struct results_block *rb = walloc_fast(w, sizeof(*shard->tb_tail));
rb->nresults = 0;
rb->next = NULL;
shard->tb_tail->next = rb;
shard->tb_tail = rb;
}
assert(shard->tb_tail->nresults == shard->tb_size % NRESULTS);
}

// See if the node points sideways or backwards, in which
// case cycles in the graph are possible
else {
w->miss++;
if (next != sh->node && next->len <= sh->node->len) {
w->cycles_possible = true;
}
}
}

static void do_work2(struct worker *w){
struct shard *shard = &w->shard;

// printf("WORK 2: %u: %u %lu\n", w->index, shard->sb_index, sizeof(shard->state_buffer));

int pre_fetch_buf_size = 20;
struct state_header **pre_fetch_buf = malloc(sizeof(struct state_header *) * pre_fetch_buf_size);
int pre_fetch_buf_cnt = 0;

for (unsigned int i = 0; i < w->nworkers; i++) {
struct shard *s2 = &global.workers[i].shard;
for (struct state_header *sh = s2->peers[w->index].first;
sh != NULL; sh = sh->next) {
struct state *sc = (struct state *) &sh[1];
unsigned int size = state_size(sc);

if (pre_fetch_buf_cnt >= pre_fetch_buf_size) {

// See if this state has been computed before by looking up the node,
// or allocate if not.
bool new;
struct dict_assoc *hn = sdict_find_new(shard->states, &w->allocator,
sc, size, sh->noutgoing * sizeof(struct edge), &new, sh->hash);
// sc, size, sh->noutgoing * sizeof(struct edge), &new, meiyan3(sc));
struct node *next = (struct node *) &hn[1];
struct edge *edge = &node_edges(sh->node)[sh->edge_index];
edge->dst = next;
struct state_header *tmp_sh = sh;

if (new) {
next->failed = edge->failed;
next->initial = false;
next->parent = sh->node;
next->len = sh->node->len + 1;
if (next->len > w->diameter) {
w->diameter = next->len;
for (int i = 0; i < pre_fetch_buf_cnt; i++) {
sh = pre_fetch_buf[i];
sdict_prefetch_beginning_assoc(shard->states, sh->hash);
}
next->nedges = sh->noutgoing;

assert(shard->tb_tail->nresults == shard->tb_size % NRESULTS);
assert(shard->tb_tail->next == NULL);
shard->tb_size++;
shard->tb_tail->results[shard->tb_tail->nresults++] = next;
if (shard->tb_tail->nresults == NRESULTS) {
struct results_block *rb = walloc_fast(w, sizeof(*shard->tb_tail));
rb->nresults = 0;
rb->next = NULL;
shard->tb_tail->next = rb;
shard->tb_tail = rb;
for (int i = 0; i < pre_fetch_buf_cnt; i++) {
sh = pre_fetch_buf[i];
do_work2_single_state(w, shard, sh);
}
assert(shard->tb_tail->nresults == shard->tb_size % NRESULTS);
}
sh = tmp_sh;

// See if the node points sideways or backwards, in which
// case cycles in the graph are possible
pre_fetch_buf_cnt = 0;
pre_fetch_buf[pre_fetch_buf_cnt++] = sh;
sdict_prefetch_base_ptr(shard->states, sh->hash);
}
else {
w->miss++;
if (next != sh->node && next->len <= sh->node->len) {
w->cycles_possible = true;
}
pre_fetch_buf[pre_fetch_buf_cnt++] = sh;
sdict_prefetch_base_ptr(shard->states, sh->hash);
}
}

for (int i = 0; i < pre_fetch_buf_cnt; i++) {
struct state_header *sh = pre_fetch_buf[i];
sdict_prefetch_beginning_assoc(shard->states, sh->hash);
}

for (int i = 0; i < pre_fetch_buf_cnt; i++) {
struct state_header *sh = pre_fetch_buf[i];
do_work2_single_state(w, shard, sh);
}
}

assert(shard->tb_index <= shard->tb_size);
Expand Down
88 changes: 88 additions & 0 deletions harmony_model_checker/charm/fastrange.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* Fair maps to intervals without division.
* Reference : http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
*
* (c) Daniel Lemire
* Apache License 2.0
*/
#ifndef INCLUDE_FASTRANGE_H
#define INCLUDE_FASTRANGE_H
#include <iso646.h> // mostly for Microsoft compilers
#include <stdint.h> // part of Visual Studio 2010 and better
#include <stddef.h> // for size_t in C
#include <limits.h> // for size_t in C

/**
* Given a value "word", produces an integer in [0,p) without division.
* The function is as fair as possible in the sense that if you iterate
* through all possible values of "word", then you will generate all
* possible outputs as uniformly as possible.
*/
static inline uint32_t fastrange32(uint32_t word, uint32_t p) {
return (uint32_t)(((uint64_t)word * (uint64_t)p) >> 32);
}

#if defined(_MSC_VER) && defined (_WIN64)
#include <intrin.h>// should be part of all recent Visual Studio
#pragma intrinsic(_umul128)
#endif // defined(_MSC_VER) && defined (_WIN64)


/**
* Given a value "word", produces an integer in [0,p) without division.
* The function is as fair as possible in the sense that if you iterate
* through all possible values of "word", then you will generate all
* possible outputs as uniformly as possible.
*/
static inline uint64_t fastrange64(uint64_t word, uint64_t p) {
#ifdef __SIZEOF_INT128__ // then we know we have a 128-bit int
return (uint64_t)(((__uint128_t)word * (__uint128_t)p) >> 64);
#elif defined(_MSC_VER) && defined(_WIN64)
// supported in Visual Studio 2005 and better
uint64_t highProduct;
_umul128(word, p, &highProduct); // ignore output
return highProduct;
unsigned __int64 _umul128(
unsigned __int64 Multiplier,
unsigned __int64 Multiplicand,
unsigned __int64 *HighProduct
);
#else
return word % p; // fallback
#endif // __SIZEOF_INT128__
}


#ifndef UINT32_MAX
#define UINT32_MAX (0xffffffff)
#endif // UINT32_MAX

/**
* Given a value "word", produces an integer in [0,p) without division.
* The function is as fair as possible in the sense that if you iterate
* through all possible values of "word", then you will generate all
* possible outputs as uniformly as possible.
*/
static inline size_t fastrangesize(size_t word, size_t p) {
#if (SIZE_MAX == UINT32_MAX)
return (size_t)fastrange32(word, p);
#else // assume 64-bit
return (size_t)fastrange64(word, p);
#endif // SIZE_MAX == UINT32_MAX
}

/**
* Given a value "word", produces an integer in [0,p) without division.
* The function is as fair as possible in the sense that if you iterate
* through all possible values of "word", then you will generate all
* possible outputs as uniformly as possible.
*/
static inline int fastrangeint(int word, int p) {
#if (SIZE_MAX == UINT32_MAX)
return (int)fastrange32(word, p);
#else // assume 64-bit
return (int)fastrange64(word, p);
#endif // (SIZE_MAX == UINT32_MAX)
}

#endif// INCLUDE_FASTRANGE_H
15 changes: 15 additions & 0 deletions harmony_model_checker/charm/prefetcher.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef PREFETCHER_H
#define PREFETCHER_H

#include <stdint.h>

#if defined(__GNUC__) || defined(__clang__)
#define PREFETCH(addr, rw, locality) __builtin_prefetch((addr), (rw), (locality))
#elif defined(_WIN32)
#include <xmmintrin.h>
#define PREFETCH(addr, rw, locality) _mm_prefetch((const char*)(addr), _MM_HINT_T0)
#else
#define PREFETCH(addr, rw, locality) // No prefetch available
#endif

#endif // PREFETCHER_H
1 change: 1 addition & 0 deletions harmony_model_checker/charm/sdict.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "fastrange.h"
#include "head.h"

#include <assert.h>
Expand Down
9 changes: 9 additions & 0 deletions harmony_model_checker/charm/sdict.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@
#include <string.h> /* memcpy/memcmp */
#include <stdbool.h>

#include "fastrange.h"
#include "prefetcher.h"
#include "hashdict.h"
#include "thread.h"


#define sdict_prefetch_base_ptr(dict, hash) \
PREFETCH((dict)->stable + (hash) % (dict)->length, 1, 1)

#define sdict_prefetch_beginning_assoc(dict, hash) \
PREFETCH(&(dict)->stable[(hash) % (dict)->length], 1, 1)

struct sdict {
char *whoami;
unsigned int value_len;
Expand Down