Skip to content

Commit

Permalink
Initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan-Willem Maessen committed Dec 23, 2021
0 parents commit a45a0b8
Show file tree
Hide file tree
Showing 4 changed files with 376 additions and 0 deletions.
9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[package]
name = "bkk_hash"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rand = "*"
256 changes: 256 additions & 0 deletions src/hash_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
use std::{cmp::min, collections::hash_map::HashMap, mem::size_of};

const SHIFT: usize = 10;
pub const N: usize = 1 << SHIFT;
const RSHIFT: usize = size_of::<usize>() * 8 - SHIFT;

pub type Key = usize;

pub type ProbeHist = HashMap<Key, usize>;

#[derive(Copy, Clone, Debug)]
enum Entry {
Empty,
Full(usize, Key),
Tombstone(usize, Key),
}

#[derive(Debug)]
pub struct HashSet {
bkk: bool,
size: usize,
pub sum_probe_len: usize,
probe_hist: ProbeHist,
pub probe_hists: Vec<ProbeHist>,
pub insert_lens: Vec<usize>,
set: [Entry; N],
pub set_order: Vec<Key>,
next_clean_size: usize,
curr_x: usize,
}

pub fn bucket(k: Key) -> usize {
k >> RSHIFT
}

fn entry_later(i: usize, bk: usize, k: Key, bn: usize, n: Key) -> bool {
if bn <= i {
// No wrap.
if bk <= i {
// No wrap.
n <= k
} else {
false
}
} else {
// wrap
if bk <= i {
// No wrap.
true
} else {
n <= k
}
}
}

impl HashSet {
pub fn new(bkk: bool) -> Self {
HashSet {
bkk,
size: 0,
sum_probe_len: 0,
probe_hist: ProbeHist::default(),
probe_hists: Vec::with_capacity(N),
insert_lens: Vec::with_capacity(N),
set: [Entry::Empty; N],
set_order: Vec::with_capacity(N + 1),
next_clean_size: N / 4,
curr_x: 1,
}
}

pub fn size(&self) -> usize {
self.size
}

// Insert elt with key n, bumping elements as necessary. Returns final
// bucket position (possibly + N if we wrapped around).
fn insert_loop(&mut self, mut bn: usize, mut n: Key, mut b: usize) -> usize {
loop {
let i = b & (N - 1);
match self.set[i] {
Entry::Empty | Entry::Tombstone(_, _) => {
self.set[i] = Entry::Full(bn, n);
self.register_insert(bn, n, b);
break;
}
Entry::Full(_, k) if n == k => {
break;
}
Entry::Full(bk, k) => {
self.set[i] = Entry::Full(bn, n);
self.register_remove(k, b);
self.register_insert(bn, n, b);
n = k;
bn = bk;
if b >= N + bn {
b -= N;
} else if bn > b {
b += N;
}
}
}
b += 1;
}
return b + 1;
}

pub fn insert(&mut self, n: Key) {
self.set_order.push(n);
let b = self.probe_loc(n);
let bn = bucket(n);
let b = self.insert_loop(bn, n, b);
if self.bkk && self.size == self.next_clean_size {
self.curr_x = N / (N - self.size());
let x4 = 4 * self.curr_x;
let x2 = 2 * self.curr_x;
let nx2 = N / x2;
for ii in 1..=nx2 {
let i = ii * x2;
let k = (i << RSHIFT) - 1;
self.insert_tombstone(k);
}
self.next_clean_size += N / x4;
}
self.register_insert_len(b - 1, bn);
}

pub fn remove_tombstone(&mut self, n: Key) {
let mut i = self.probe_loc(n) & (N - 1);
match self.set[i] {
Entry::Tombstone(_, k) if n == k => { }
_ => return,
}
loop {
let i1 = (i + 1) & (N - 1);
match self.set[i1] {
Entry::Full(bk, _) | Entry::Tombstone(bk, _) if bk != i1 => {
self.set[i] = self.set[i1];
}
_ => {
self.set[i] = Entry::Empty;
return;
}
}
i = i1;
}
}

fn insert_tombstone(&mut self, n: Key) {
let b = self.probe_loc(n) & (N - 1);
let i = b & (N - 1);
let bn = bucket(n);
match self.set[i] {
Entry::Empty => { }
Entry::Full(_, k) if k == n => { }
Entry::Full(bk, k) => {
self.register_remove(k, b);
self.set[i] = Entry::Tombstone(bn, n);
self.insert_loop(bk, k, b + 1);
}
Entry::Tombstone(_, _) => {
self.set[i] = Entry::Tombstone(bn, n);
}
}
}

fn register_remove(&mut self, k: Key, b: usize) {
let bk = bucket(k);
let mut d = if bk > b { b + N - bk } else { b - bk };
if d >= N {
d -= N;
}
self.sum_probe_len -= d;
*self.probe_hist.get_mut(&d).unwrap() -= 1;
self.size -= 1;
}

fn register_insert(&mut self, b0: usize, _: Key, b: usize) {
let d = if b0 <= b {
b - b0
} else {
b + N - b0
};
self.sum_probe_len += d;
*self.probe_hist.entry(d).or_insert(0) += 1;
self.size += 1;
}

fn register_insert_len(&mut self, b: usize, b0: usize) {
let d = if b < b0 { b + N - b0 } else { b - b0 };
self.insert_lens.push(d);
self.probe_hists.push(self.probe_hist.clone());
}

fn probe_loc(&self, n: Key) -> usize {
let bn = bucket(n);
let mut b = bn;
loop {
let i = b & (N - 1);
match self.set[i] {
Entry::Empty => break,
Entry::Tombstone(bk, k) | Entry::Full(bk, k) => {
if n == k || entry_later(i, bk, k, bn, n) {
break;
}
}
}
b += 1;
}
b
}

pub fn probe_len(&self, n: Key) -> usize {
let bn = bucket(n);
let b = self.probe_loc(n);
b - bn
}

pub fn iter(&'_ self) -> HashSetIter<'_> {
HashSetIter {
set: self,
next_bucket: 0,
}
}
}

pub struct HashSetIter<'a> {
set: &'a HashSet,
next_bucket: usize,
}

impl<'a> Iterator for HashSetIter<'a> {
type Item = &'a Key;

fn size_hint(&self) -> (usize, Option<usize>) {
let upper = min(N - self.next_bucket, self.set.size);
let lower = if self.next_bucket <= self.set.size {
self.set.size - self.next_bucket
} else {
0
};
(lower, Some(upper))
}

fn next(&mut self) -> Option<Self::Item> {
while self.next_bucket < N {
let b = self.next_bucket;
self.next_bucket += 1;
match &self.set.set[b] {
Entry::Full(_, k) => return Some(k),
_ => {}
}
}
return None;
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#![allow(dead_code)]
pub mod hash_set;
109 changes: 109 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use bkk_hash::hash_set::{bucket, HashSet, Key, ProbeHist, N};
use rand::random;

fn dump_hists(desc: &str, hists: Vec<ProbeHist>) {
for (i, h) in hists.into_iter().enumerate() {
let x = N as f64 / (N - i) as f64;
print!("{}, {:3}, {:.3},", desc, i, x);
let mx = h.iter().map(|(&n, _)| n).max().unwrap_or(0);
let sum_x: usize = h.iter().map(|(&n, &v)| n * v).sum();
let sum_x = sum_x as f64;
let sum_x2: usize = h.iter().map(|(&n, &v)| n * n * v).sum();
let sum_x2 = sum_x2 as f64;
let n: usize = h.iter().map(|(_, &v)| v).sum();
let n = n as f64;
let mean = sum_x / n;
let stddev = ((sum_x2 / n) - (mean * mean)).sqrt();
print!("\t{:3.3},\t{:3.3},", mean, stddev);
print!("\t{:3.3},\t{:3.3},", mean / x, stddev / x);
for l in 0..=mx {
if let Some(&n) = h.get(&l) {
print!("\t{},", n);
} else {
print!("\t,");
}
}
println!("");
}
}

fn agg_to_hists(hists: &mut Vec<ProbeHist>, new_hists: Vec<ProbeHist>) {
for (i, h) in new_hists.into_iter().enumerate() {
let hist = &mut hists[i];
for (k, v) in h.into_iter() {
*hist.entry(k).or_default() += v;
}
}
}

fn do_one(bkk: bool) {
let mut agg_probe_hists = vec![ProbeHist::default(); N];
let mut agg_insert_hists = vec![ProbeHist::default(); N];
for trial in 0..1000 {
let mut set = HashSet::new(bkk);
while set.size() < N {
let i: Key = random();
set.insert(i);
let mut h = ProbeHist::new();
let mut sum_probe_len = 0;
for &j in set.iter() {
let l = set.probe_len(j);
sum_probe_len += l;
*h.entry(l).or_insert(0) += 1;
}
if sum_probe_len != set.sum_probe_len {
for &j in set.iter() {
println!("pl {:3} {} = {:3}", bucket(j), j, set.probe_len(j));
}
}
assert_eq!(sum_probe_len, set.sum_probe_len, "{:#?}", set);
set.probe_hists.last_mut().unwrap().retain(|_, v| *v != 0);
assert_eq!(&h, set.probe_hists.last().unwrap());
set.probe_hists.last().unwrap().iter().all(|(k, _)| *k <= set.size());
assert_eq!(
set.probe_hists
.last()
.unwrap()
.iter()
.map(|(_, v)| *v)
.sum::<usize>(),
set.size()
);
assert_eq!(
set.probe_hists
.last()
.unwrap()
.iter()
.map(|(l, v)| *l * *v)
.sum::<usize>(),
set.sum_probe_len
);
assert_eq!(set.size(), set.insert_lens.len());
}
if true {
println!(
"{:3},\t{:.3}",
trial,
(set.sum_probe_len as f64 / set.size() as f64),
);
}
if set.probe_hists[N - 1].get(&(N - 1)).unwrap_or(&0) > &0 {
for (i, &v) in set.iter().enumerate() {
println!("{:3}: {:3} {}", i, bucket(v), v);
}
panic!("Full probe len!\n{:#?}", set.set_order);
}
agg_to_hists(&mut agg_probe_hists, set.probe_hists);
for (i, &l) in set.insert_lens.iter().enumerate() {
*agg_insert_hists[i].entry(l).or_insert(0) += 1;
}
}
println!("what, i, x,\tmean,\tstddev,\tmean/x,\tstddev/x");
dump_hists("pr", agg_probe_hists);
dump_hists("in", agg_insert_hists);
}

fn main() {
do_one(true);
do_one(false);
}

0 comments on commit a45a0b8

Please sign in to comment.