chore: move from tools/dsa (#1)
Moved the implementation of this hash table from `tools/dsa` #1. Reviewed-on: #1 Co-authored-by: M.V. Hutz <git@maximhutz.me> Co-committed-by: M.V. Hutz <git@maximhutz.me>
This commit was merged in pull request #1.
This commit is contained in:
62
.gitea/workflows/lint.yml
Normal file
62
.gitea/workflows/lint.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- uses: golangci/golangci-lint-action@v7
|
||||
with:
|
||||
version: latest
|
||||
|
||||
unit-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Run unit tests
|
||||
run: go test ./... -cover -v
|
||||
|
||||
fuzz-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Run fuzz tests
|
||||
run: |
|
||||
for func in $(grep -r --include='*_test.go' -oh 'func Fuzz\w*' . | sed 's/func //'); do
|
||||
go test ./... -fuzz="^${func}$" -fuzztime=30s
|
||||
done
|
||||
|
||||
mutation-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Install gremlins
|
||||
run: go install github.com/go-gremlins/gremlins/cmd/gremlins@latest
|
||||
|
||||
- name: Run mutation tests
|
||||
run: gremlins unleash
|
||||
235
.golangci.yml
Normal file
235
.golangci.yml
Normal file
@@ -0,0 +1,235 @@
|
||||
---
|
||||
# golangci-lint configuration file made by @ccoVeille
|
||||
# Source: https://github.com/ccoVeille/golangci-lint-config-examples/
|
||||
# Author: @ccoVeille
|
||||
# License: MIT
|
||||
# Variant: 03-safe
|
||||
# Version: v2.0.0
|
||||
#
|
||||
version: "2"
|
||||
|
||||
formatters:
|
||||
enable:
|
||||
# format the code
|
||||
- gofmt
|
||||
# format the block of imports
|
||||
- gci
|
||||
|
||||
settings:
|
||||
# format the code with Go standard library
|
||||
gofmt:
|
||||
# simplify the code
|
||||
# https://pkg.go.dev/cmd/gofmt#hdr-The_simplify_command
|
||||
simplify: true
|
||||
rewrite-rules:
|
||||
# replace `interface{}` with `any` in the code on format
|
||||
- pattern: 'interface{}'
|
||||
replacement: 'any'
|
||||
|
||||
# make sure imports are always in a deterministic order
|
||||
# https://github.com/daixiang0/gci/
|
||||
gci: # define the section orders for imports
|
||||
sections:
|
||||
# Standard section: captures all standard packages.
|
||||
- standard
|
||||
# Default section: catchall that is not standard or custom
|
||||
- default
|
||||
# linters that related to local tool, so they should be separated
|
||||
- localmodule
|
||||
|
||||
linters:
|
||||
exclusions:
|
||||
# these presets where present in the v1 version of golangci-lint
|
||||
# it's interesting to keep them when migrating, but removing them should be the goal
|
||||
presets:
|
||||
# exclude check on comments format in godoc
|
||||
# These are common false positives in poor code
|
||||
# you should not use this on recent code you write from scratch
|
||||
# More information: https://golangci-lint.run/usage/false-positives/#comments
|
||||
#
|
||||
# Please uncomment the following line if your code is not using the godoc format
|
||||
# - comments
|
||||
|
||||
# Common false positives
|
||||
# feel free to remove this if you don't have any false positives
|
||||
# More information: https://golangci-lint.run/usage/false-positives/#common-false-positives
|
||||
- common-false-positives
|
||||
|
||||
# Legacy preset is not recommended anymore
|
||||
# More information: https://golangci-lint.run/usage/false-positives/#legacy
|
||||
- legacy
|
||||
|
||||
# std-error-handling is a set of rules that avoid reporting unhandled errors on common functions/methods
|
||||
# More information: https://golangci-lint.run/usage/false-positives/#std-error-handling
|
||||
- std-error-handling
|
||||
|
||||
# some linters are enabled by default
|
||||
# https://golangci-lint.run/usage/linters/
|
||||
#
|
||||
# enable some extra linters
|
||||
enable:
|
||||
# Errcheck is a program for checking for unchecked errors in Go code.
|
||||
- errcheck
|
||||
|
||||
# Vet examines Go source code and reports suspicious constructs.
|
||||
- govet
|
||||
|
||||
# Detects when assignments to existing variables are not used.
|
||||
- ineffassign
|
||||
|
||||
# It's a set of rules from staticcheck. See https://staticcheck.io/
|
||||
- staticcheck
|
||||
|
||||
# Checks Go code for unused constants, variables, functions and types.
|
||||
- unused
|
||||
|
||||
# Fast, configurable, extensible, flexible, and beautiful linter for Go.
|
||||
# Drop-in replacement of golint.
|
||||
- revive
|
||||
|
||||
# make sure to use t.Helper() when needed
|
||||
- thelper
|
||||
|
||||
# mirror suggests rewrites to avoid unnecessary []byte/string conversion
|
||||
- mirror
|
||||
|
||||
# detect the possibility to use variables/constants from the Go standard library.
|
||||
- usestdlibvars
|
||||
|
||||
# Finds commonly misspelled English words.
|
||||
- misspell
|
||||
|
||||
# Checks for duplicate words in the source code.
|
||||
- dupword
|
||||
|
||||
# linter to detect errors invalid key values count
|
||||
- loggercheck
|
||||
|
||||
# detect when a package or method could be replaced by one from the standard library
|
||||
- exptostd
|
||||
|
||||
# detects nested contexts in loops or function literals
|
||||
- fatcontext
|
||||
|
||||
# Reports uses of functions with replacement inside the testing package.
|
||||
- usetesting
|
||||
|
||||
settings:
|
||||
revive:
|
||||
rules:
|
||||
# these are the default revive rules
|
||||
# you can remove the whole "rules" node if you want
|
||||
# BUT
|
||||
# ! /!\ they all need to be present when you want to add more rules than the default ones
|
||||
# otherwise, you won't have the default rules, but only the ones you define in the "rules" node
|
||||
|
||||
# Blank import should be only in a main or test package, or have a comment justifying it.
|
||||
- name: blank-imports
|
||||
|
||||
# Packages should have comments of the form "Package x ...".
|
||||
- name: package-comments
|
||||
|
||||
# context.Context() should be the first parameter of a function when provided as argument.
|
||||
- name: context-as-argument
|
||||
arguments:
|
||||
- allowTypesBefore: "*testing.T"
|
||||
|
||||
# Basic types should not be used as a key in `context.WithValue`
|
||||
- name: context-keys-type
|
||||
|
||||
# Importing with `.` makes the programs much harder to understand
|
||||
- name: dot-imports
|
||||
|
||||
# Empty blocks make code less readable and could be a symptom of a bug or unfinished refactoring.
|
||||
- name: empty-block
|
||||
|
||||
# for better readability, variables of type `error` must be named with the prefix `err`.
|
||||
- name: error-naming
|
||||
|
||||
# for better readability, the errors should be last in the list of returned values by a function.
|
||||
- name: error-return
|
||||
|
||||
# for better readability, error messages should not be capitalized or end with punctuation or a newline.
|
||||
- name: error-strings
|
||||
|
||||
# report when replacing `errors.New(fmt.Sprintf())` with `fmt.Errorf()` is possible
|
||||
- name: errorf
|
||||
|
||||
# check naming and commenting conventions on exported symbols.
|
||||
- name: exported
|
||||
arguments:
|
||||
# make error messages clearer
|
||||
- "sayRepetitiveInsteadOfStutters"
|
||||
# require comments on public interface methods
|
||||
- "checkPublicInterface"
|
||||
|
||||
# incrementing an integer variable by 1 is recommended to be done using the `++` operator
|
||||
- name: increment-decrement
|
||||
|
||||
# highlights redundant else-blocks that can be eliminated from the code
|
||||
# - name: indent-error-flow
|
||||
|
||||
# This rule suggests a shorter way of writing ranges that do not use the second value.
|
||||
- name: range
|
||||
|
||||
# receiver names in a method should reflect the struct name (p for Person, for example)
|
||||
- name: receiver-naming
|
||||
|
||||
# redefining built in names (true, false, append, make) can lead to bugs very difficult to detect.
|
||||
- name: redefines-builtin-id
|
||||
|
||||
# redundant else-blocks that can be eliminated from the code.
|
||||
# - name: superfluous-else
|
||||
|
||||
# prevent confusing name for variables when using `time` package
|
||||
- name: time-naming
|
||||
|
||||
# warns when an exported function or method returns a value of an un-exported type.
|
||||
- name: unexported-return
|
||||
|
||||
# spots and proposes to remove unreachable code. also helps to spot errors
|
||||
- name: unreachable-code
|
||||
|
||||
# Functions or methods with unused parameters can be a symptom of an unfinished refactoring or a bug.
|
||||
- name: unused-parameter
|
||||
|
||||
# report when a variable declaration can be simplified
|
||||
- name: var-declaration
|
||||
|
||||
# warns when initialism, variable or package naming conventions are not followed.
|
||||
- name: var-naming
|
||||
|
||||
misspell:
|
||||
# Correct spellings using locale preferences for US or UK.
|
||||
# Setting locale to US will correct the British spelling of 'colour' to 'color'.
|
||||
# Default ("") is to use a neutral variety of English.
|
||||
locale: US
|
||||
|
||||
# List of words to ignore
|
||||
# among the one defined in https://github.com/golangci/misspell/blob/master/words.go
|
||||
ignore-rules: []
|
||||
# - valor
|
||||
# - and
|
||||
|
||||
# Extra word corrections.
|
||||
extra-words: []
|
||||
# - typo: "whattever"
|
||||
# correction: "whatever"
|
||||
|
||||
output:
|
||||
# Order to use when sorting results.
|
||||
# Possible values: `file`, `linter`, and `severity`.
|
||||
#
|
||||
# If the severity values are inside the following list, they are ordered in this order:
|
||||
# 1. error
|
||||
# 2. warning
|
||||
# 3. high
|
||||
# 4. medium
|
||||
# 5. low
|
||||
# Either they are sorted alphabetically.
|
||||
#
|
||||
# Default: ["file"]
|
||||
sort-order:
|
||||
- linter
|
||||
- severity
|
||||
- file # filepath, line, and column.
|
||||
7
.gremlins.yaml
Normal file
7
.gremlins.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
# yaml-language-server: $schema=https://gremlins.dev/0.6/schema/configuration.json
|
||||
|
||||
unleash:
|
||||
timeout-coefficient: 50
|
||||
|
||||
workers: 4
|
||||
dry-run: false
|
||||
12
Makefile
Normal file
12
Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
unit:
|
||||
go test ./... -cover -v
|
||||
|
||||
mutation:
|
||||
gremlins unleash
|
||||
|
||||
fuzz:
|
||||
go test ./... -fuzz=$(FN)
|
||||
|
||||
docs:
|
||||
@echo ">>> Visit: http://localhost:6060/pkg/git.maximhutz.com/tools/dsa/"
|
||||
godoc -http=:6060
|
||||
73
bucket.go
Normal file
73
bucket.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package cuckoo
|
||||
|
||||
type entry[K, V any] struct {
|
||||
key K
|
||||
value V
|
||||
}
|
||||
|
||||
type slot[K, V any] struct {
|
||||
entry[K, V]
|
||||
occupied bool
|
||||
}
|
||||
|
||||
type bucket[K, V any] struct {
|
||||
hash Hash[K]
|
||||
slots []slot[K, V]
|
||||
capacity, size uint64
|
||||
compare EqualFunc[K]
|
||||
}
|
||||
|
||||
func (b bucket[K, V]) location(key K) uint64 {
|
||||
return b.hash(key) % b.capacity
|
||||
}
|
||||
|
||||
func (b bucket[K, V]) get(key K) (value V, found bool) {
|
||||
slot := b.slots[b.location(key)]
|
||||
return slot.value, slot.occupied && b.compare(slot.key, key)
|
||||
}
|
||||
|
||||
func (b *bucket[K, V]) resize(capacity uint64) {
|
||||
b.slots = make([]slot[K, V], capacity)
|
||||
b.capacity = capacity
|
||||
b.size = 0
|
||||
}
|
||||
|
||||
func (b bucket[K, V]) update(key K, value V) (updated bool) {
|
||||
slot := &b.slots[b.location(key)]
|
||||
|
||||
if slot.occupied && b.compare(slot.key, key) {
|
||||
slot.value = value
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], eviction bool) {
|
||||
slot := &b.slots[b.location(insertion.key)]
|
||||
|
||||
if !slot.occupied {
|
||||
slot.entry = insertion
|
||||
slot.occupied = true
|
||||
b.size++
|
||||
return
|
||||
}
|
||||
|
||||
if b.compare(slot.key, insertion.key) {
|
||||
slot.value = insertion.value
|
||||
return
|
||||
}
|
||||
|
||||
insertion, slot.entry = slot.entry, insertion
|
||||
return insertion, true
|
||||
}
|
||||
|
||||
func newBucket[K, V any](capacity uint64, hash Hash[K], compare EqualFunc[K]) bucket[K, V] {
|
||||
return bucket[K, V]{
|
||||
hash: hash,
|
||||
capacity: capacity,
|
||||
compare: compare,
|
||||
size: 0,
|
||||
slots: make([]slot[K, V], capacity),
|
||||
}
|
||||
}
|
||||
17
compare.go
Normal file
17
compare.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package cuckoo
|
||||
|
||||
// An EqualFunc determines whethers two keys are 'equal'. Keys that are 'equal'
|
||||
// are teated as the same by the [Table]. A good EqualFunc is pure,
|
||||
// deterministic, and fast. By default, [NewTable] uses [DefaultEqualFunc].
|
||||
//
|
||||
// This function MUST NOT return true if the [Hash] digest of two keys
|
||||
// are different: the [Table] will not work.
|
||||
type EqualFunc[K any] = func(a, b K) bool
|
||||
|
||||
// DefaultEqualFunc compares two keys by strict equality. Returns true if the
|
||||
// keys have [equal values].
|
||||
//
|
||||
// [equal values]: https://go.dev/ref/spec#Comparison_operators
|
||||
func DefaultEqualFunc[K comparable](a, b K) bool {
|
||||
return a == b
|
||||
}
|
||||
37
compare_example_test.go
Normal file
37
compare_example_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package cuckoo_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.maximhutz.com/tools/go-cuckoo"
|
||||
)
|
||||
|
||||
// This example demonstrates what happens when EqualFunc and Hash disagree on
|
||||
// equality. Although 'isEqual' only compares user IDs, but the hashes use the
|
||||
// entire 'User' object. So, two objects with the same ID but different names
|
||||
// hash to different slots, so the table cannot find them.
|
||||
func ExampleEqualFunc_badEqualFunc() {
|
||||
type User struct{ ID, Name string }
|
||||
|
||||
// Two users with the same ID are equal.
|
||||
isEqual := func(a, b User) bool { return a.ID == b.ID }
|
||||
|
||||
hashA, hashB := cuckoo.NewDefaultHash[User](), cuckoo.NewDefaultHash[User]()
|
||||
userbase := cuckoo.NewCustomTable[User, bool](hashA, hashB, isEqual)
|
||||
|
||||
(userbase.Put(User{"1", "Robert Doe"}, true))
|
||||
|
||||
fmt.Println("Has Robert?", userbase.Has(User{"1", "Robert Doe"}))
|
||||
fmt.Println("Has Johanna?", userbase.Has(User{"2", "Johanna Smith"}))
|
||||
|
||||
// The hashes are different, so even though the equal function returns true,
|
||||
// the table does not recognize it.
|
||||
fmt.Println("Equal?", isEqual(User{"1", "Rob Doe"}, User{"1", "Robert Doe"}))
|
||||
fmt.Println("Has Rob?", userbase.Has(User{"1", "Rob Doe"}))
|
||||
|
||||
// Output:
|
||||
// Has Robert? true
|
||||
// Has Johanna? false
|
||||
// Equal? true
|
||||
// Has Rob? false
|
||||
}
|
||||
49
cuckoo_fuzz_test.go
Normal file
49
cuckoo_fuzz_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package cuckoo_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"git.maximhutz.com/tools/go-cuckoo"
|
||||
)
|
||||
|
||||
func offsetHash(seed uint32) cuckoo.Hash[uint32] {
|
||||
return func(x uint32) uint64 {
|
||||
v := uint64(x) ^ uint64(seed)
|
||||
v = (v ^ (v >> 30)) * 0xbf58476d1ce4e5b9
|
||||
v = (v ^ (v >> 27)) * 0x94d049bb133111eb
|
||||
return v ^ (v >> 31)
|
||||
}
|
||||
}
|
||||
|
||||
func FuzzInsertLookup(f *testing.F) {
|
||||
f.Fuzz(func(t *testing.T, data []byte, seedA, seedB uint32) {
|
||||
assert := assert.New(t)
|
||||
|
||||
table := cuckoo.NewCustomTable[uint32, uint32](
|
||||
offsetHash(seedA),
|
||||
offsetHash(seedB),
|
||||
func(a, b uint32) bool { return a == b },
|
||||
)
|
||||
|
||||
if seedA == seedB {
|
||||
return
|
||||
}
|
||||
|
||||
r := bytes.NewReader(data)
|
||||
var key, value uint32
|
||||
for binary.Read(r, binary.LittleEndian, &key) == nil &&
|
||||
binary.Read(r, binary.LittleEndian, &value) == nil {
|
||||
|
||||
err := table.Put(key, value)
|
||||
assert.NoError(err)
|
||||
|
||||
found, err := table.Get(key)
|
||||
assert.NoError(err)
|
||||
assert.Equal(value, found)
|
||||
}
|
||||
})
|
||||
}
|
||||
30
cuckoo_internal_test.go
Normal file
30
cuckoo_internal_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package cuckoo
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMaxEvictions(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
for i := 16; i < 116; i++ {
|
||||
table := NewTable[int, bool](Capacity(i / 2))
|
||||
expectedEvictions := 3 * math.Floor(math.Log2(float64(i)))
|
||||
|
||||
assert.Equal(table.maxEvictions(), int(expectedEvictions))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
table := NewTable[int, bool](Capacity(8))
|
||||
|
||||
for i := range 16 {
|
||||
err := table.Put(i, true)
|
||||
assert.NoError(err)
|
||||
assert.Equal(float64(table.Size())/float64(table.Capacity()), table.load())
|
||||
}
|
||||
}
|
||||
130
cuckoo_test.go
Normal file
130
cuckoo_test.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package cuckoo_test
|
||||
|
||||
import (
|
||||
"maps"
|
||||
"math/rand/v2"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"git.maximhutz.com/tools/go-cuckoo"
|
||||
)
|
||||
|
||||
func TestNewTable(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
table := cuckoo.NewTable[int, bool]()
|
||||
|
||||
assert.NotNil(table)
|
||||
assert.Zero(table.Size())
|
||||
}
|
||||
|
||||
func TestAddItem(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
key, value := 0, true
|
||||
table := cuckoo.NewTable[int, bool]()
|
||||
|
||||
err := table.Put(key, value)
|
||||
|
||||
assert.NoError(err)
|
||||
assert.Equal(1, table.Size())
|
||||
assert.True(table.Has(key))
|
||||
}
|
||||
|
||||
func TestPutOverwrite(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
key, value, newValue := 0, 1, 2
|
||||
table := cuckoo.NewTable[int, int]()
|
||||
(table.Put(key, value))
|
||||
|
||||
err := table.Put(key, newValue)
|
||||
|
||||
assert.NoError(err)
|
||||
assert.Equal(1, table.Size())
|
||||
assert.True(table.Has(key))
|
||||
found, _ := table.Get(key)
|
||||
assert.Equal(newValue, found)
|
||||
}
|
||||
|
||||
func TestSameHash(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
hash := func(int) uint64 { return 0 }
|
||||
table := cuckoo.NewCustomTable[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int])
|
||||
|
||||
errA := table.Put(0, true)
|
||||
errB := table.Put(1, true)
|
||||
errC := table.Put(2, true)
|
||||
|
||||
assert.NoError(errA)
|
||||
assert.NoError(errB)
|
||||
assert.ErrorContains(errC, "bad hash")
|
||||
}
|
||||
|
||||
func TestStartingCapacity(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
table := cuckoo.NewTable[int, bool](cuckoo.Capacity(64))
|
||||
|
||||
assert.Equal(uint64(128), table.Capacity())
|
||||
}
|
||||
|
||||
func TestResizeCapacity(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
table := cuckoo.NewTable[int, bool](
|
||||
cuckoo.Capacity(8),
|
||||
cuckoo.GrowthFactor(2),
|
||||
)
|
||||
|
||||
for table.Capacity() == 16 {
|
||||
err := table.Put(rand.Int(), true)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
assert.Equal(uint64(32), table.Capacity())
|
||||
}
|
||||
|
||||
func TestPutMany(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
expected, actual := map[int]bool{}, cuckoo.NewTable[int, bool]()
|
||||
|
||||
for i := range 1_000 {
|
||||
expected[i] = true
|
||||
err := actual.Put(i, true)
|
||||
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
assert.Equal(maps.Collect(actual.Entries()), expected)
|
||||
assert.Equal(len(expected), actual.Size())
|
||||
}
|
||||
|
||||
func TestGetMany(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
table := cuckoo.NewTable[int, bool]()
|
||||
|
||||
for i := range 1_000 {
|
||||
err := table.Put(i, true)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
for i := range 2_000 {
|
||||
value, err := table.Get(i)
|
||||
if i < 1_000 {
|
||||
assert.NoError(err)
|
||||
assert.Equal(value, true)
|
||||
} else {
|
||||
assert.Error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemove(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
table := cuckoo.NewTable[int, bool]()
|
||||
|
||||
assert.False(table.Has(0))
|
||||
|
||||
err := table.Put(0, true)
|
||||
assert.NoError(err)
|
||||
|
||||
assert.True(table.Has(0))
|
||||
}
|
||||
9
doc.go
Normal file
9
doc.go
Normal file
@@ -0,0 +1,9 @@
|
||||
// Package cuckoo provides a hash table that uses cuckoo hashing to achieve
|
||||
// a worst-case O(1) lookup time.
|
||||
//
|
||||
// While a [NewTable] only supports comparable keys by default, you can create
|
||||
// a table with any key type using [NewCustomTable]. Custom [Hash] functions and
|
||||
// key comparison are also supported.
|
||||
//
|
||||
// See more: https://en.wikipedia.org/wiki/Cuckoo_hashing
|
||||
package cuckoo
|
||||
32
doc_example_test.go
Normal file
32
doc_example_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
// This example
|
||||
package cuckoo_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.maximhutz.com/tools/go-cuckoo"
|
||||
)
|
||||
|
||||
func Example_basic() {
|
||||
table := cuckoo.NewTable[int, string]()
|
||||
|
||||
if err := table.Put(1, "Hello, World!"); err != nil {
|
||||
fmt.Println("Put error:", err)
|
||||
}
|
||||
|
||||
if item, err := table.Get(1); err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
} else {
|
||||
fmt.Println("Found 1:", item)
|
||||
}
|
||||
|
||||
if item, err := table.Get(0); err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
} else {
|
||||
fmt.Println("Found 0:", item)
|
||||
}
|
||||
|
||||
// Output:
|
||||
// Found 1: Hello, World!
|
||||
// Error: key '0' not found
|
||||
}
|
||||
11
go.mod
Normal file
11
go.mod
Normal file
@@ -0,0 +1,11 @@
|
||||
module git.maximhutz.com/tools/go-cuckoo
|
||||
|
||||
go 1.25.6
|
||||
|
||||
require github.com/stretchr/testify v1.11.1
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
9
go.sum
Normal file
9
go.sum
Normal file
@@ -0,0 +1,9 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
27
hash.go
Normal file
27
hash.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package cuckoo
|
||||
|
||||
import (
|
||||
"hash/maphash"
|
||||
)
|
||||
|
||||
// A Hash function maps any data to a fixed-length value (in this case, a
|
||||
// [uint64]).
|
||||
//
|
||||
// It is used by the [Table] to evenly distribute values
|
||||
// amongst its slots. A good hash function is uniform, [chaotic], and
|
||||
// deterministic. [Table] uses [NewDefaultHash] by default, which is built on
|
||||
// [maphash.Comparable].
|
||||
//
|
||||
// [chaotic]: https://en.wikipedia.org/wiki/Avalanche_effect
|
||||
type Hash[K any] = func(key K) (digest uint64)
|
||||
|
||||
// NewDefaultHash returns a new [Hash] which uses [maphash.Comparable].
|
||||
//
|
||||
// Each hash has a random seed, so calling this function again will return a new
|
||||
// hash. Do not use this for testing.
|
||||
func NewDefaultHash[K comparable]() Hash[K] {
|
||||
seed := maphash.MakeSeed()
|
||||
return func(key K) (digest uint64) {
|
||||
return maphash.Comparable(seed, key)
|
||||
}
|
||||
}
|
||||
34
hash_example_test.go
Normal file
34
hash_example_test.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package cuckoo_test
|
||||
|
||||
import (
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"git.maximhutz.com/tools/go-cuckoo"
|
||||
)
|
||||
|
||||
func NewStringHash(seed uint64) cuckoo.Hash[string] {
|
||||
key := binary.LittleEndian.AppendUint64(nil, seed)
|
||||
hash := hmac.New(sha256.New, key)
|
||||
|
||||
return func(key string) uint64 {
|
||||
hash.Reset()
|
||||
// This will never return an error, as part of the [hash.Hash] contract. We
|
||||
// can safely ignore it.
|
||||
(io.WriteString(hash, key))
|
||||
return binary.LittleEndian.Uint64(hash.Sum(nil))
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleHash_stringHash() {
|
||||
hash := NewStringHash(1)
|
||||
fmt.Printf("Digest 'Hello, world!': %x\n", hash("Hello, world!"))
|
||||
fmt.Printf("Digest 'Hello, world?': %x\n", hash("Hello, world?"))
|
||||
|
||||
// Output:
|
||||
// Digest 'Hello, world!': dc6602d6edcdf549
|
||||
// Digest 'Hello, world?': 432c2ddc1ae9f14b
|
||||
}
|
||||
45
settings.go
Normal file
45
settings.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package cuckoo
|
||||
|
||||
// DefaultCapacity is the initial capacity of a [Table]. It is inspired from
|
||||
// Java's [HashMap] implementation, which also uses 16.
|
||||
//
|
||||
// [HashMap]: https://docs.oracle.com/javase/8/docs/api/java/util/HashMap.html#HashMap--
|
||||
const DefaultCapacity uint64 = 16
|
||||
|
||||
// DefaultGrowthFactor is the standard resize multiplier for a [Table]. Most
|
||||
// hash table implementations use 2.
|
||||
const DefaultGrowthFactor uint64 = 2
|
||||
|
||||
// DefaultMinimumLoad is the default lowest acceptable occupancy of a [Table].
|
||||
// The value of 5% is taken from [libcuckoo].
|
||||
//
|
||||
// [libcuckoo]: https://github.com/efficient/libcuckoo/blob/656714705a055df2b7a605eb3c71586d9da1e119/libcuckoo/cuckoohash_config.hh#L21
|
||||
const DefaultMinimumLoad float64 = 0.05
|
||||
|
||||
type settings struct {
|
||||
growthFactor uint64
|
||||
minLoadFactor float64
|
||||
bucketSize uint64
|
||||
}
|
||||
|
||||
// An Option modifies the settings of a [Table]. It is used in its constructors
|
||||
// like [NewTable], for example.
|
||||
type Option func(*settings)
|
||||
|
||||
// Capacity modifies the starting capacity of each bucket of the [Table]. The
|
||||
// value must be greater than 0.
|
||||
func Capacity(value int) Option {
|
||||
return func(s *settings) { s.bucketSize = uint64(value) }
|
||||
}
|
||||
|
||||
// MinimumLoad modifies the [DefaultMinimumLoad] of the [Table]. The value must
|
||||
// be between 0.00 and 1.00.
|
||||
func MinimumLoad(value float64) Option {
|
||||
return func(s *settings) { s.minLoadFactor = value }
|
||||
}
|
||||
|
||||
// GrowthFactor controls how much the capacity of the [Table] multiplies when
|
||||
// it must resize. The value must be greater than 1.
|
||||
func GrowthFactor(value int) Option {
|
||||
return func(s *settings) { s.growthFactor = uint64(value) }
|
||||
}
|
||||
203
table.go
Normal file
203
table.go
Normal file
@@ -0,0 +1,203 @@
|
||||
package cuckoo
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"iter"
|
||||
"math/bits"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Table is hash table that uses cuckoo hashing to resolve collision. Create
|
||||
// one with [NewTable]. Or if you want more granularity, use [NewTableBy] or
|
||||
// [NewCustomTable].
|
||||
type Table[K, V any] struct {
|
||||
bucketA, bucketB bucket[K, V]
|
||||
growthFactor uint64
|
||||
minLoadFactor float64
|
||||
}
|
||||
|
||||
// Capacity returns the number of slots allocated for the [Table]. To get the
|
||||
// number of slots filled, look at [Table.Size].
|
||||
func (t Table[K, V]) Capacity() uint64 {
|
||||
return t.bucketA.capacity + t.bucketB.capacity
|
||||
}
|
||||
|
||||
// Size returns how many slots are filled in the [Table].
|
||||
func (t Table[K, V]) Size() int {
|
||||
return int(t.bucketA.size + t.bucketB.size)
|
||||
}
|
||||
|
||||
func log2(n uint64) (m int) {
|
||||
return bits.Len64(n) - 1
|
||||
}
|
||||
|
||||
func (t Table[K, V]) maxEvictions() int {
|
||||
return 3 * log2(t.Capacity())
|
||||
}
|
||||
|
||||
func (t Table[K, V]) load() float64 {
|
||||
return float64(t.Size()) / float64(t.Capacity())
|
||||
}
|
||||
|
||||
func (t *Table[K, V]) resize() error {
|
||||
entries := make([]entry[K, V], 0, t.Size())
|
||||
for k, v := range t.Entries() {
|
||||
entries = append(entries, entry[K, V]{k, v})
|
||||
}
|
||||
|
||||
t.bucketA.resize(t.growthFactor * t.bucketA.capacity)
|
||||
t.bucketB.resize(t.growthFactor * t.bucketB.capacity)
|
||||
|
||||
for _, entry := range entries {
|
||||
if err := t.Put(entry.key, entry.value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get fetches the value for a key in the [Table]. Returns an error if no value
|
||||
// is found.
|
||||
func (t Table[K, V]) Get(key K) (value V, err error) {
|
||||
if item, ok := t.bucketA.get(key); ok {
|
||||
return item, nil
|
||||
}
|
||||
|
||||
if item, ok := t.bucketB.get(key); ok {
|
||||
return item, nil
|
||||
}
|
||||
|
||||
return value, fmt.Errorf("key '%v' not found", key)
|
||||
}
|
||||
|
||||
// Has returns true if a key has a value in the table.
|
||||
func (t Table[K, V]) Has(key K) (exists bool) {
|
||||
_, err := t.Get(key)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// Put sets the value for a key. Returns error if its value cannot be set.
|
||||
func (t *Table[K, V]) Put(key K, value V) (err error) {
|
||||
if t.bucketA.update(key, value) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if t.bucketB.update(key, value) {
|
||||
return nil
|
||||
}
|
||||
|
||||
entry, eviction := entry[K, V]{key, value}, false
|
||||
for range t.maxEvictions() {
|
||||
if entry, eviction = t.bucketA.evict(entry); !eviction {
|
||||
return nil
|
||||
}
|
||||
|
||||
if entry, eviction = t.bucketB.evict(entry); !eviction {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if t.load() < t.minLoadFactor {
|
||||
return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.Capacity(), t.load())
|
||||
}
|
||||
|
||||
if err := t.resize(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return t.Put(entry.key, entry.value)
|
||||
}
|
||||
|
||||
// Drop removes a value for a key in the table. Returns an error if its value
|
||||
// cannot be removed.
|
||||
//
|
||||
// Deprecated: Do not use.
|
||||
func (t Table[K, V]) Drop(_ K) {
|
||||
panic("Not implemented")
|
||||
}
|
||||
|
||||
// Entries returns an unordered sequence of all key-value pairs in the table.
|
||||
func (t Table[K, V]) Entries() iter.Seq2[K, V] {
|
||||
return func(yield func(K, V) bool) {
|
||||
for _, slot := range t.bucketA.slots {
|
||||
if slot.occupied {
|
||||
if !yield(slot.key, slot.value) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, slot := range t.bucketB.slots {
|
||||
if slot.occupied {
|
||||
if !yield(slot.key, slot.value) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// String returns the entries of the table as a string in the format:
|
||||
// "table[k1:v1 h2:v2 ...]".
|
||||
func (t Table[K, V]) String() string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("table[")
|
||||
|
||||
first := true
|
||||
for k, v := range t.Entries() {
|
||||
if !first {
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
|
||||
fmt.Fprintf(&sb, "%v:%v", k, v)
|
||||
first = false
|
||||
}
|
||||
|
||||
sb.WriteString("]")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// NewCustomTable creates a [Table] with custom [Hash] and [EqualFunc]
|
||||
// functions, along with any [Option] the user provides.
|
||||
func NewCustomTable[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...Option) *Table[K, V] {
|
||||
settings := &settings{
|
||||
growthFactor: DefaultGrowthFactor,
|
||||
bucketSize: DefaultCapacity,
|
||||
minLoadFactor: DefaultMinimumLoad,
|
||||
}
|
||||
|
||||
for _, option := range options {
|
||||
option(settings)
|
||||
}
|
||||
|
||||
return &Table[K, V]{
|
||||
growthFactor: settings.growthFactor,
|
||||
minLoadFactor: settings.minLoadFactor,
|
||||
bucketA: newBucket[K, V](settings.bucketSize, hashA, compare),
|
||||
bucketB: newBucket[K, V](settings.bucketSize, hashB, compare),
|
||||
}
|
||||
}
|
||||
|
||||
func pipe[X, Y, Z any](a func(X) Y, b func(Y) Z) func(X) Z {
|
||||
return func(x X) Z { return b(a(x)) }
|
||||
}
|
||||
|
||||
// NewTableBy creates a [Table] for any key type by using keyFunc to derive a
|
||||
// comparable key. Two keys with the same derived key are treated as equal.
|
||||
func NewTableBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[K, V] {
|
||||
return NewCustomTable[K, V](
|
||||
pipe(keyFunc, NewDefaultHash[C]()),
|
||||
pipe(keyFunc, NewDefaultHash[C]()),
|
||||
func(a, b K) bool { return keyFunc(a) == keyFunc(b) },
|
||||
options...,
|
||||
)
|
||||
}
|
||||
|
||||
// NewTable creates a [Table] using the default [Hash] and [EqualFunc]. Use
|
||||
// the [Option] functions to configure its behavior. Note that this constructor
|
||||
// is only provided for comparable keys. For arbitrary keys, consider
|
||||
// [NewTableBy] or [NewCustomTable].
|
||||
func NewTable[K comparable, V any](options ...Option) *Table[K, V] {
|
||||
return NewCustomTable[K, V](NewDefaultHash[K](), NewDefaultHash[K](), DefaultEqualFunc[K], options...)
|
||||
}
|
||||
Reference in New Issue
Block a user