4 Commits

Author SHA1 Message Date
e2ba398a62 fix: Table.Drop() was not a pointer receiver 2026-03-19 20:05:35 -04:00
c1314f8a3c fix: resize of Table.Drop() now decreases size
- Generalized Table.resize() to accept a capacity size for each bucket.
2026-03-19 20:01:50 -04:00
74ed81761c fix: drop() decrements bucket size 2026-03-19 20:01:14 -04:00
d4acdda95b feat: implement drop functionality
- Added `drop()` function in buckets.
- Implemented `Drop()` function for Table.
2026-03-19 19:54:16 -04:00
28 changed files with 250 additions and 1173 deletions

View File

@@ -1,26 +0,0 @@
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-forms.json
name: 🐛 Bug Report
about: Report a bug in this project
title: "[BUG]: "
body:
- type: textarea
id: context
attributes:
label: Context
placeholder: What circumstances led to the bug?
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected Behavior
placeholder: What did you expect would happen?
validations:
required: true
- type: textarea
id: actual-behavior
attributes:
label: Actual Behavior
placeholder: What happened, and why was it unexpected?
validations:
required: true

View File

@@ -1,37 +0,0 @@
# 'Feature Request Template' By @cheehwatang
# https://github.com/cheehwatang/.github/blob/master/.github/ISSUE_TEMPLATE/feature_request.yml
#
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-forms.json
name: ✨ Feature Request
about: Suggest an idea for this project
title: "[FEATURE]: "
body:
- type: dropdown
attributes:
multiple: false
label: Feature Type
options:
- "✨ New Feature"
- "📝 Documentation"
- "🎨 Style and UI"
- "🔨 Code Refactor"
- "⚡ Performance Improvements"
- "✅ New Test"
validations:
required: true
- type: textarea
id: description
attributes:
label: Description
placeholder: |
Give us a brief description of the feature or enhancement you would
like!
validations:
required: true
- type: textarea
id: additional-information
attributes:
label: Additional Information
placeholder: |
Give us some additional information on the feature request like proposed
solutions, links, screenshots, etc.

View File

@@ -1,2 +0,0 @@
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-config.json
blank_issues_enabled: false

View File

@@ -1,17 +0,0 @@
---
name: "New Pull Request"
about: "Standard PR template"
title: ""
ref: "main"
---
## Description
## Changes
### Design Decisions
## Checklist
- [ ] Tests pass
- [ ] Docs updated

View File

@@ -6,19 +6,6 @@ on:
pull_request:
jobs:
check-pr-title:
name: Check PR Title
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
env:
TITLE: ${{ gitea.event.pull_request.title }}
steps:
- run: |
if ! echo "$TITLE" | grep -qE '^(WIP: )?(feat|fix|docs|chore|ci|test|refactor|perf|build|style|revert)(\(.+\))?(!)?: .+'; then
echo "::error::Pull Request title must follow conventional commits"
exit 1
fi
lint-go:
name: Go Lint
runs-on: ubuntu-latest
@@ -52,14 +39,6 @@ jobs:
- name: Run mutation tests
run: make lint-makefile
lint-markdown:
name: Markdown Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DavidAnson/markdownlint-cli2-action@v19
test-unit:
name: Unit Tests
runs-on: ubuntu-latest

1
.gitignore vendored
View File

@@ -24,3 +24,4 @@ go.work.sum
# env file
.env

View File

@@ -114,9 +114,6 @@ linters:
# Reports uses of functions with replacement inside the testing package.
- usetesting
# Reports mixed receiver types in structs/interfaces.
- recvcheck
settings:
revive:
rules:

View File

@@ -5,5 +5,3 @@ unleash:
workers: 4
dry-run: false
threshold:
efficacy: 1.0

View File

@@ -1,17 +0,0 @@
default: true
heading-style:
style: atx
ul-indent:
indent: 2
line-length: false
no-duplicate-heading:
siblings_only: true
no-inline-html:
allowed_elements:
- br
- details
- summary
- img
- picture
- source
first-line-heading: true

View File

@@ -1,4 +1,4 @@
.PHONY: all help install clean test-unit test-mutation test-fuzz test docs lint-go lint-makefile lint-markdown lint
.PHONY: all help install clean test-unit test-mutation test-fuzz test docs lint-go lint-makefile lint
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*##' $(MAKEFILE_LIST) | awk -F ':.*## ' '{printf " %-15s %s\n", $$1, $$2}'
@@ -16,12 +16,10 @@ test-unit: ## Run unit tests with coverage
test-mutation: ## Run mutation tests with gremlins
gremlins unleash
FUZZ_TIME ?= 30
test-fuzz: ## Run all fuzz tests for 30s each
@for func in $$(grep -r --include='*_test.go' -oh 'func Fuzz\w*' . | sed 's/func //'); do \
echo "Fuzzing $$func..."; \
go test ./... -fuzz="^$$func$$" -fuzztime=$(FUZZ_TIME)s; \
go test ./... -fuzz="^$$func$$" -fuzztime=30s; \
done
test: test-unit test-mutation test-fuzz ## Run all tests
@@ -32,10 +30,7 @@ lint-go: ## Lint Go code
lint-makefile: ## Lint the Makefile
checkmake Makefile
lint-markdown: ## Lint Markdown files
docker run --rm -v $(CURDIR):/workdir davidanson/markdownlint-cli2 "**/*.md"
lint: lint-go lint-makefile lint-markdown ## Lint all code
lint: lint-go lint-makefile ## Lint all code
docs: ## Serve godoc locally
@echo ">>> Visit: http://localhost:6060/pkg/$$(go list -m)"

View File

@@ -1,3 +1,3 @@
# <img height="30" src="assets/logo.svg" alt="Go Cuckoo, by `mvhutz`."> Go Cuckoo
# go-cuckoo
A hash table that uses cuckoo hashing to achieve a worst-case O(1) lookup time. Read more about it in [the package documentation](https://pkg.go.dev/git.maximhutz.com/tools/go-cuckoo).
A hash table that uses cuckoo hashing to achieve a worst-case O(1) lookup time.

View File

@@ -1,542 +0,0 @@
# Designing an Idiomatic API Interface
We (the maintainers) built `go-cuckoo`'s API interface without design intent.
Up until now, we paid more attention implementing the underlying functionality of the cuckoo hashing.
With the fundamentals of the algorithm built, we should revisit the interface.
It should align closer to the following principles:
- **Congruency**
A `go-cuckoo` table should have the same core functionality as Go's built-in map.
- **Familiarity**
A `go-cuckoo` table should behave similarly to Go's standard map, so users will intuitively know how to use it.
In effect, its users will carry less cognitive load.
## Current State
### Interface of the built-in Map
Listed below is every interface provided by Go to the built-in map object.
Also included, are the functions from the package `maps` in the standard library.
<details>
<summary>Interfaces</summary>
| # | built-in Interface | Description |
| --- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `m := make(map[K]V)` | Returns an empty map using the built-in `make()` function. |
| 2 | `m := make(map[K]V, hint)` | Returns an empty map using `make()`, with a capacity 'hint'. This hint is how many items the map expects to hold, _not_ a measure of how large it is. |
| 3 | `m := map[K]V{...}` | Returns a map, which may be filled with entries in the ellipsis (optional). |
| 4 | `var m map[K]V` | Defines an empty _variable_ that holds a map. This differs from #1 because `m` is uninitialized (nil) here. |
| 5 | `m[k] := v` | Assigns the value of `k` to `v`. |
| 6 | `v := m[k]` | Returns the value of `k` if it exists. Otherwise, `v` is uninitialized. |
| 7 | `v, ok := m[k]` | Similar to #6, except `ok` is equal to whether `v` is initialized. This is comma-ok notation. |
| 8 | `for k, v := range m` | Iterates over every key-value pair in `m`. The order is random. |
| 9 | `delete(m, k)` | Unassigns the value `k`. Returns no value. |
| 10 | `clear(m)` | Unassigns all keys in `m`. Returns no value. |
| 11 | `n := len(m)` | Returns the number of entries in `m`. If nil, `m` returns 0. |
| 12 | `m2 := maps.Clone(m)` | Returns a copy of `m`. |
| 13 | `maps.Copy(dst, src)` | Assigns every entry of `src` in `dst`. |
| 14 | `ok := maps.Equal(m1, m2)` | Returns true iff `m1` and `m2` the same entries. |
| 15 | `ok := maps.EqualFunc(m1, m2, fn)` | Like #14, but with a custom comparator for non-comparable values. |
| 16 | `maps.DeleteFunc(m, fn)` | Removes every entry in `m` which satisfies `fn`. Returns no value. |
| 17 | `it2 := maps.All(m)` | Returns an 2D iterator over every key-value pair. |
| 18 | `it := maps.Keys(m)` | Returns an iterator over every key. |
| 19 | `it := maps.Values(m)` | Returns an iterator over every value. There can be duplicates. |
| 20 | `m := maps.Collect(seq)` | Returns a map, with every entry defined in a 2D iterator over key-value pairs. |
| 21 | `maps.Insert(m, seq)` | Assigns to `m` all key-value pairs in 2D iterator `seq`. Returns no value. |
</details>
### Interface of `go-cuckoo`
On the other hand, here is the current contract for `go-cuckoo`.
<details>
<summary>Interfaces</summary>
| # | built-in Interface | Description |
| --- | -------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
| 1 | `m := New(opts...)` | Creates a table using the default hash and equal function. The options configure its behavior. Confined to comparable keys. |
| 2 | `m := NewBy(keyFunc, opts...)` | Like #1, but allows any key type. A `keyFunc` is used to derive a comparable key. |
| 3 | `m := NewCustom(hashA, hashB, equalFunc, opts...)` | Like #1, but allows control over the hashes used to allow any key type. An `equalFunc` determines key equality. |
| 4 | `seq := m.Entries()` | Returns an unordered 2D iterator of all key-value pairs in the table. |
| 5 | `v := m.Find(k)` | Removes the value for `k`. Returns true if `k` existed. |
| 6 | `v, ok := m.Get(k)` | Returns the value for `k` in the table. Also, returns true if the `k` exists, otherwise false. When false, `v` is undefined. |
| 7 | `ok := m.Has(k)` | Returns true if `k` is in the table. |
| 8 | `err := m.Put(k, v)` | Sets value `v` for key `k`. Otherwise, returns error. |
| 9 | `n := m.Size()` | Returns the number of items in `m`. |
| 10 | `str := m.String()` | Returns `m` as a string in the format "table[k1:v1 k2:v2 ...]". |
| 11 | `cap := m.TotalCapacity()` | Returns how many slots `m` has allocated. |
| 12 | `ok := m.Drop(k)` | Removes `k` from the table. Returns whether the key had existed. |
</details>
### Determining Congruency
So, how does the core functionality compare?
Listed below is an analysis of every interface in Go's standard map.
Each is compared against what `go-cuckoo` offers, and categorized into the following groups:
- ✅ Covered: an analog exists.
- ⚠️ Partial: workaround available.
- ❌ Gap: no analog yet; addressed in [Target State](#solving-congruency).
Specifically, here we are checking for functionality.
Is there functionality that this offers which `go-cuckoo` does not?
We are checking accessibility, but not discoverability.
The latter will be considered later.
<details>
<summary>✅ <code>m := make(map[K]V)</code></summary>
The analog is `m := New()`.
</details>
<details>
<summary>⚠️ <code>m := make(map[K]V, hint)</code></summary>
This has no simple analog.
It is close to `m := New(Capacity(hint))`, but it assigns starting capacity, not expected size.
For the built-in map, these are two separate things.
- Capacity is an internal measure, used to optimize space/speed.
It is hidden from the user because it depends on the underlying implementation, which may change.
- Expected size requires the map must hold a number of items before resizing.
This is tangeable and agnostic to implementation, hence why it is given to the user.
In short, this interface defines expected size, but `Capacity()` defines capacity.
</details>
<details>
<summary>❌ <code>m := map[K]V{...}</code></summary>
This has no simple analog, the closest being:
```go
m := New[K, V]()
for k, v := range startingEntries {
m.Put(k, v)
}
```
It is idiomatic, but far less ergonomic.
</details>
<details>
<summary>✅ <code>var m map[K]V</code></summary>
The analog is `var m Table[K, V]`.
</details>
<details>
<summary>✅ <code>m[k] := v</code></summary>
The analog is `err := m.Put(k, v)`.
</details>
<details>
<summary>✅ <code>v := m[k]</code></summary>
The analog is `v := m.Find(k)`.
</details>
<details>
<summary>✅ <code>v, ok := m[k]</code></summary>
The analog is `v, ok := m.Get(k)`.
</details>
<details>
<summary>✅ <code>for k, v := range m</code></summary>
The analog is `for k, v := range m.Entries()`.
</details>
<details>
<summary>✅ <code>delete(m, k)</code></summary>
The analog is `ok := m.Drop(k)`.
</details>
<details>
<summary>❌ <code>clear(m)</code></summary>
There is no analog.
The easiest may to do this is to delete all items individually:
```go
for k := range m.Entries() {
m.Drop(k)
}
```
</details>
<details>
<summary>✅ <code>n := len(m)</code></summary>
The analog is `n := m.Size()`.
</details>
<details>
<summary>❌ <code>m2 := maps.Clone(m)</code></summary>
There is no analog.
The easiest way to do this currently is to make a new map, and manually add the items.
```go
m2 := cuckoo.Table[K, V]()
for k, v := range m.Entries() {
m2.Put(k, v)
}
```
This gets complicated by the various options available to the user.
Furthermore, any custom `EqualFunc`, `keyFunc` or `Hash` is not transferred.
</details>
<details>
<summary>❌ <code>maps.Copy(dst, src)</code></summary>
There is no analog.
The simplest way to do this is with a for-loop.
```go
for k, v := range src.Entries() {
dst.Put(k, v)
}
```
</details>
<details>
<summary>❌ <code>ok := maps.Equal(m1, m2)</code></summary>
There is no analog.
Users have to manually check the key-value pairs to determine equality.
</details>
<details>
<summary>❌ <code>ok := maps.EqualFunc(m1, m2, fn)</code></summary>
There is no analog.
Users have to manually check the key-value pairs to determine equality.
</details>
<details>
<summary>❌ <code>maps.DeleteFunc(m, fn)</code></summary>
There is no analog.
Users have to manually delete keys.
</details>
<details>
<summary>✅ <code>it2 := maps.All(m)</code></summary>
The analog is `it2 := m.Entries()`.
</details>
<details>
<summary>⚠️ <code>it := maps.Keys(m)</code></summary>
There is no simple analog.
A close neighbor is `it2 := m.Entries()`.
Users can use this in a for-loop, and pick out just the keys:
```go
for k := range m.Entries() {
// ...
}
```
</details>
<details>
<summary>⚠️ <code>it := maps.Values(m)</code></summary>
There is no simple analog.
A close neighbor is `it2 := m.Entries()`.
Users can use this in a for-loop, and pick out just the values:
```go
for _, v := range m.Entries() {
// ...
}
```
</details>
<details>
<summary>❌ <code>m := maps.Collect(seq)</code></summary>
There is no analog.
</details>
<details>
<summary>❌ <code>maps.Insert(m, seq)</code></summary>
There is no analog.
</details>
## Target State
### Solving Congruency
We should make the following changes to accomodate for congruency:
<details>
<summary><code>ok := maps.EqualFunc(m1, m2, fn)</code></summary>
We should implement a new function:
```go
func EqualFunc[K, V1, V2 any](t1 *Table[K, V1], t2 *Table[K, V2], eq func(V1, V2) bool) bool
```
This function is free, and not bound as a receiver function.
(It is called `cuckoo.Equal(t1, t2)`, not `t1.Equals(t2)`.)
The latter implies `t1` has authority, when in fact neither do.
We define equality as:
1. Neither table has a key the other doesn't.
2. Each key has the same value in each table.
Parameter `eq` determines this equality.
Custom `EqualFunc`'s complicate this, as they modulate key identity in tables.
If two tables may differ on whether two keys are different, this function might break.
So, we must assume that:
- Both tables have `EqualFunc`'s which 'agree' on the identity of the keys present in the tables.
Agreement is defined as: if two keys are distinct in one table, they are distinct in the other.
The name `EqualFunc` is already taken by `EqualFunc[K, V]`: an alias for `func(a, b K) bool`.
Inlining `EqualFunc[K, V]` would solve this problem.
We will move the documentation attached to it to `DefaultEqualFunc`.
</details>
<details>
<summary><code>ok := maps.Equal(m1, m2)</code></summary>
We should implement a new function, to conform with the standard library:
```go
func Equal[K any, V comparable](t1, t2 *Table[K, V]) bool
```
It uses the same equality check as in `EqualFunc`.
Once again, the function is free because it is symmetric.
</details>
<details>
<summary><code>maps.Insert(m, seq)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) Insert(seq iter.Seq2[K, V]) error
```
A receiver fits better even though `maps.Insert` is a free function, because copying it is asymmetric.
Map `dst` receives entries from map `src`.
It's only free because Go's standard map is built into the language, and so cannot have receivers.
In terms of naming, `t.Extend` is more accurate, and has precedent in [Python](docs.python.org/3/tutorial/datastructures.html#more-on-lists) and [Rust](https://doc.rust-lang.org/std/iter/trait.Extend.html).
When [adding iterator function](https://github.com/golang/go/issues/61900) to the `maps` package, the Go team chose to frame it as 'sources' and 'sinks'.
With this model, `maps.Insert` made more sense than `maps.Extend`.
Ultimately, `t.Insert()` is a better choice to be consistent with `maps`.
</details>
<details>
<summary><code>maps.Copy(dst, src)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) Copy(src *Table[K, V]) error
```
It's functionality should match that of `t.Insert()`.
A receiver fits better even though `maps.Copy` is a free function, 'copying' it is asymmetric: `dst` is writen into by `src`.
It is only free because Go's standard map is built into the language, and so cannot have receivers.
The name `t.Merge()` might be more accurate, but it does work because:
- `t.Copy()` matches Go's built-in `copy()`, and `io.Copy()`. The Go team used [the same logic](https://github.com/golang/go/discussions/47330#discussioncomment-1167799) to name `maps.Copy()`.
In this case, `t.Merge()` would be an outlier.
- `t.Merge()` implies some sort of conflict-resolution, when there is not.
It simply overwrites the values.
</details>
<details>
<summary><code>maps.DeleteFunc(m, fn)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) DeleteFunc(del func(K, V) bool)
```
It would have the same functionality as `maps.DeleteFunc`.
A free function could work here, but `t` has clear authority over `del`.
Other than being consistent with the `maps` package, `t.DeleteFunc` follows the Go convention of appending `Func` to higher-order equivalents of functions.
This trumps names like `t.DeleteIf`, which lend more to [Java](https://docs.oracle.com/javase/8/docs/api/java/util/ArrayList.html#removeIf-java.util.function.Predicate-) or [C++](https://en.cppreference.com/cpp/algorithm/remove).
The word `Delete` is also convention, tying back to the built-in `delete()`.
</details>
<details>
<summary><code>m := maps.Collect(seq)</code></summary>
We should implement a new constructor.
```go
func Collect[K comparable, V any](seq iter.Seq2[K, V]) (*Table[K, V], error)
```
It would create a `New()` table, and insert all entries in `seq`.
This reveicer only supports the standard table constructor, with comparable keys.
It is tempting to add `CollectBy` or `CollectCustom` to support all table types, but doing so would pollute the public interface.
It would be just one more line to initialize the table and then call `t.Insert` directly:
```go
t := // ...
err := t.Insert(seq)
```
</details>
<details>
<summary><code>m := map[K]V{...}</code></summary>
We should make a new constructor, because entries are generic.
So, creating an option with inialized entries doesn't work.
With the previous additions, users have a few options.
If they want to use a `New()` table, `t.Collect` matches well:
```go
t, err := cuckoo.Collect(func(yield func(K, V) bool) {
yield(key1, val1)
yield(key2, val2)
})
```
For `NewCustom()` or `NewBy()` tables, users can call `t.Insert` after initialization:
```go
t := // ...
err := t.Insert(func(yield func(K, V) bool) {
yield(key1, val1)
yield(key2, val2)
})
```
It is one more line.
But, the alternative is polluting the public interface with corresponding `*WithEntries` constuctors.
</details>
<details>
<summary><code>m := make(map[K]V, hint)</code></summary>
We should add a new option:
```go
func ExpectedSize(n int) Option
```
When fed to a table, it will allocate enough space to hold `n` entries without a resize.
</details>
<details>
<summary><code>clear(m)</code></summary>
We should implement a new receiver:
```go
func (t *Table[K, V]) Clear()
```
It will remove all entries from the table.
</details>
<details>
<summary><code>m2 := maps.Clone(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Clone() *Table[K, V]
```
Also, it will copy the hash, equality function, and options used in the table.
</details>
<details>
<summary><code>it := maps.Keys(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Keys() iter.Seq[K]
```
It is tempting to just have `All()`, but it returns a `Seq2`, not a `Seq`.
There is no iterator adaptor between `Seq` and `Seq2`, and will not be for the foreseeable future.
This function, while it feels superfluous, is required.
</details>
<details>
<summary><code>it := maps.Values(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Values() iter.Seq[V]
```
For the same reason we need `Keys()`, we also need `Values()`.
</details>

View File

@@ -1 +0,0 @@
<svg width="16" height="16" xmlns="http://www.w3.org/2000/svg" shape-rendering="crispEdges"><path fill="#2a1512" d="M3,6h1v1h-1v-1M2,7h1v1h-1v-1M12,7h1v1h-1v-1M1,8h1v1h-1v-1M4,8h1v2h-1v-2M14,8h1v1h-1v-1M11,9h1v2h-2v-1h1v-1M5,10h1v1h-1v-1M1,13h2v1h1v1h-2v-1h-1v-1M13,13h2v1h-1v1h-1v1h-1v-2h1v-1M4,15h1v1h-1v-1"/><path fill="#38231f" d="M4,6h1v1h-1v-1M11,6h1v1h-1v-1M13,7h1v1h-1v-1M1,9h1v2h-1v-2M14,9h1v1h-1v-1M14,11h1v2h-1v-2M1,12h1v1h-1v-1M5,15h3v1h-3v-1M10,15h2v1h-2v-1"/><path fill="#3f3f74" d="M7,3h2v1h-2v-1M6,4h1v1h-1v-1M9,4h1v1h-1v-1M5,5h1v5h-1v-5M10,5h1v5h-1v-5M6,10h4v1h-4v-1"/><path fill="#6262ab" d="M9,5h1v1h-1v-1M6,9h1v1h-1v-1M9,9h1v1h-1v-1"/><path fill="#663931" d="M11,8h1v1h-1v-1M2,9h1v1h-1v-1M14,10h1v1h-1v-1M1,11h1v1h-1v-1M3,11h2v1h-2v-1M13,11h1v1h-1v-1M2,12h1v1h-1v-1M5,12h1v1h-1v-1M10,12h3v2h-1v-1h-2v-1M3,13h1v1h-1v-1M8,13h2v1h-2v-1M4,14h3v1h-3v-1M10,14h2v1h-2v-1M8,15h2v1h-2v-1"/><path fill="#8d8dcb" d="M9,6h1v3h-1v1h-2v-1h1v-1h1v-2M6,8h1v1h-1v-1"/><path fill="#8f563b" d="M2,8h1v1h-1v-1M12,9h1v1h-1v-1M2,10h1v1h-1v-1M13,10h1v1h-1v-1M7,11h1v1h-1v-1M11,11h2v1h-2v-1M4,12h1v1h1v-1h1v2h-3v-2M13,12h1v1h-1v-1M10,13h2v1h-2v-1M7,14h3v1h-3v-1"/><path fill="#a4a4d5" d="M8,4h1v4h-1v1h-1v-1h-1v-2h1v-1h1v-1"/><path fill="#ab764a" d="M4,7h1v1h-1v-1M3,8h1v2h-1v-2M12,10h1v1h-1v-1M2,11h1v1h-1v-1M8,11h1v1h-1v-1M3,12h1v1h-1v-1M7,12h1v2h-1v-2"/><path fill="#cacaea" d="M7,4h1v1h-1v-1M6,5h1v1h-1v-1"/><path fill="#d9a066" d="M3,7h1v1h-1v-1M11,7h1v1h-1v-1M12,8h2v2h-1v-1h-1v-1M3,10h2v1h-2v-1M5,11h2v1h-2v-1M9,11h2v1h-1v1h-2v-1h1v-1"/></svg>

Before

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 963 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 418 B

85
bucket.go Normal file
View File

@@ -0,0 +1,85 @@
package cuckoo
type entry[K, V any] struct {
key K
value V
}
type slot[K, V any] struct {
entry[K, V]
occupied bool
}
type bucket[K, V any] struct {
hash Hash[K]
slots []slot[K, V]
capacity, size uint64
compare EqualFunc[K]
}
func (b bucket[K, V]) location(key K) uint64 {
return b.hash(key) % b.capacity
}
func (b bucket[K, V]) get(key K) (value V, found bool) {
slot := b.slots[b.location(key)]
return slot.value, slot.occupied && b.compare(slot.key, key)
}
func (b *bucket[K, V]) drop(key K) (occupied bool) {
slot := &b.slots[b.location(key)]
if slot.occupied && b.compare(slot.key, key) {
slot.occupied = false
b.size--
return true
}
return false
}
func (b *bucket[K, V]) resize(capacity uint64) {
b.slots = make([]slot[K, V], capacity)
b.capacity = capacity
b.size = 0
}
func (b bucket[K, V]) update(key K, value V) (updated bool) {
slot := &b.slots[b.location(key)]
if slot.occupied && b.compare(slot.key, key) {
slot.value = value
return true
}
return false
}
func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], eviction bool) {
slot := &b.slots[b.location(insertion.key)]
if !slot.occupied {
slot.entry = insertion
slot.occupied = true
b.size++
return
}
if b.compare(slot.key, insertion.key) {
slot.value = insertion.value
return
}
insertion, slot.entry = slot.entry, insertion
return insertion, true
}
func newBucket[K, V any](capacity uint64, hash Hash[K], compare EqualFunc[K]) bucket[K, V] {
return bucket[K, V]{
hash: hash,
capacity: capacity,
compare: compare,
size: 0,
slots: make([]slot[K, V], capacity),
}
}

View File

@@ -2,7 +2,7 @@ package cuckoo
// An EqualFunc determines whethers two keys are 'equal'. Keys that are 'equal'
// are teated as the same by the [Table]. A good EqualFunc is pure,
// deterministic, and fast. By default, [New] uses [DefaultEqualFunc].
// deterministic, and fast. By default, [NewTable] uses [DefaultEqualFunc].
//
// This function MUST NOT return true if the [Hash] digest of two keys
// are different: the [Table] will not work.

View File

@@ -28,7 +28,7 @@ func ExampleEqualFunc_badEqualFunc() {
// Two users with the same ID are equal.
isEqual := func(a, b User) bool { return a.ID == b.ID }
userbase := cuckoo.NewCustom[User, bool](makeHash(1), makeHash(2), isEqual)
userbase := cuckoo.NewCustomTable[User, bool](makeHash(1), makeHash(2), isEqual)
(userbase.Put(User{"1", "Robert Doe"}, true))

View File

@@ -1,13 +1,11 @@
package cuckoo_test
import (
"fmt"
"maps"
"os"
"bytes"
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
go_fuzz_utils "github.com/trailofbits/go-fuzz-utils"
"git.maximhutz.com/tools/go-cuckoo"
)
@@ -21,73 +19,31 @@ func offsetHash(seed uint32) cuckoo.Hash[uint32] {
}
}
type fuzzStep struct {
drop bool
key, value uint32
}
type fuzzScenario struct {
seedA, seedB uint32
capacity, growthFactor uint8
steps []fuzzStep
}
func FuzzInsertLookup(f *testing.F) {
f.Fuzz(func(t *testing.T, data []byte) {
var scenario fuzzScenario
f.Fuzz(func(t *testing.T, data []byte, seedA, seedB uint32) {
assert := assert.New(t)
if tp, err := go_fuzz_utils.NewTypeProvider(data); err != nil {
return
} else if err := tp.Fill(&scenario); err != nil {
return
}
seedA, seedB := scenario.seedA, scenario.seedB
growthFactor := max(2, int(scenario.growthFactor))
capacity := int(scenario.capacity)
// If they are the same number, the hashes will clash, always causing an
// error.
if seedA == seedB {
t.Skip()
}
fmt.Fprintf(os.Stderr, "seedA=%d seedB=%d capacity=%d growthFactor=%d\n",
seedA, seedB, capacity, growthFactor)
actual := cuckoo.NewCustom[uint32, uint32](
table := cuckoo.NewCustomTable[uint32, uint32](
offsetHash(seedA),
offsetHash(seedB),
func(a, b uint32) bool { return a == b },
cuckoo.Capacity(capacity),
cuckoo.GrowthFactor(growthFactor),
)
expected := map[uint32]uint32{}
for _, step := range scenario.steps {
if step.drop {
ok := actual.Drop(step.key)
_, has := expected[step.key]
assert.Equal(ok, has)
delete(expected, step.key)
_, ok = actual.Get(step.key)
assert.False(ok)
} else {
err := actual.Put(step.key, step.value)
assert.NoError(err)
expected[step.key] = step.value
found, ok := actual.Get(step.key)
assert.True(ok)
assert.Equal(step.value, found)
if seedA == seedB {
return
}
assert.Equal(expected, maps.Collect(actual.Entries()))
r := bytes.NewReader(data)
var key, value uint32
for binary.Read(r, binary.LittleEndian, &key) == nil &&
binary.Read(r, binary.LittleEndian, &value) == nil {
err := table.Put(key, value)
assert.NoError(err)
found, err := table.Get(key)
assert.NoError(err)
assert.Equal(value, found)
}
})
}

View File

@@ -11,7 +11,7 @@ func TestMaxEvictions(t *testing.T) {
assert := assert.New(t)
for i := 16; i < 116; i++ {
table := New[int, bool](Capacity(i / 2))
table := NewTable[int, bool](Capacity(i / 2))
expectedEvictions := 3 * math.Floor(math.Log2(float64(i)))
assert.Equal(table.maxEvictions(), int(expectedEvictions))
@@ -20,7 +20,7 @@ func TestMaxEvictions(t *testing.T) {
func TestLoad(t *testing.T) {
assert := assert.New(t)
table := New[int, bool](Capacity(8))
table := NewTable[int, bool](Capacity(8))
for i := range 16 {
err := table.Put(i, true)

View File

@@ -1,7 +1,6 @@
package cuckoo_test
import (
"errors"
"maps"
"math/rand/v2"
"testing"
@@ -14,7 +13,7 @@ import (
func TestNewTable(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool]()
table := cuckoo.NewTable[int, bool]()
assert.NotNil(table)
assert.Zero(table.Size())
@@ -23,7 +22,7 @@ func TestNewTable(t *testing.T) {
func TestAddItem(t *testing.T) {
assert := assert.New(t)
key, value := 0, true
table := cuckoo.New[int, bool]()
table := cuckoo.NewTable[int, bool]()
err := table.Put(key, value)
@@ -35,7 +34,7 @@ func TestAddItem(t *testing.T) {
func TestPutOverwrite(t *testing.T) {
assert := assert.New(t)
key, value, newValue := 0, 1, 2
table := cuckoo.New[int, int]()
table := cuckoo.NewTable[int, int]()
(table.Put(key, value))
err := table.Put(key, newValue)
@@ -50,7 +49,7 @@ func TestPutOverwrite(t *testing.T) {
func TestSameHash(t *testing.T) {
assert := assert.New(t)
hash := func(int) uint64 { return 0 }
table := cuckoo.NewCustom[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int])
table := cuckoo.NewCustomTable[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int])
errA := table.Put(0, true)
errB := table.Put(1, true)
@@ -63,14 +62,14 @@ func TestSameHash(t *testing.T) {
func TestStartingCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool](cuckoo.Capacity(64))
table := cuckoo.NewTable[int, bool](cuckoo.Capacity(64))
assert.Equal(uint64(128), table.TotalCapacity())
}
func TestResizeCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool](
table := cuckoo.NewTable[int, bool](
cuckoo.Capacity(8),
cuckoo.GrowthFactor(2),
)
@@ -85,7 +84,7 @@ func TestResizeCapacity(t *testing.T) {
func TestPutMany(t *testing.T) {
assert := assert.New(t)
expected, actual := map[int]bool{}, cuckoo.New[int, bool]()
expected, actual := map[int]bool{}, cuckoo.NewTable[int, bool]()
for i := range 1_000 {
expected[i] = true
@@ -100,7 +99,7 @@ func TestPutMany(t *testing.T) {
func TestGetMany(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool]()
table := cuckoo.NewTable[int, bool]()
for i := range 1_000 {
err := table.Put(i, true)
@@ -108,119 +107,37 @@ func TestGetMany(t *testing.T) {
}
for i := range 2_000 {
value, ok := table.Get(i)
value, err := table.Get(i)
if i < 1_000 {
assert.True(ok)
assert.NoError(err)
assert.Equal(value, true)
} else {
assert.False(ok)
assert.Error(err)
}
}
}
func TestDropExistingItem(t *testing.T) {
func TestRemove(t *testing.T) {
assert := assert.New(t)
table := cuckoo.NewTable[int, bool]()
assert.False(table.Has(0))
err := table.Put(0, true)
assert.NoError(err)
assert.True(table.Has(0))
}
func TestDropItem(t *testing.T) {
assert := assert.New(t)
key, value := 0, true
table := cuckoo.New[int, bool]()
table := cuckoo.NewTable[int, bool]()
(table.Put(key, value))
had := table.Drop(key)
err := table.Drop(key)
assert.True(had)
assert.NoError(err)
assert.Equal(0, table.Size())
assert.False(table.Has(key))
}
func TestDropNoItem(t *testing.T) {
assert := assert.New(t)
key := 0
table := cuckoo.New[int, bool]()
had := table.Drop(key)
assert.False(had)
assert.Equal(0, table.Size())
assert.False(table.Has(key))
}
func TestDropItemCapacity(t *testing.T) {
assert := assert.New(t)
key := 0
table := cuckoo.New[int, bool](
cuckoo.Capacity(64),
cuckoo.GrowthFactor(2),
)
startingCapacity := table.TotalCapacity()
table.Drop(key)
endingCapacity := table.TotalCapacity()
assert.Equal(0, table.Size())
assert.Equal(uint64(128), startingCapacity)
assert.Equal(uint64(64), endingCapacity)
}
func TestPutNoCapacity(t *testing.T) {
assert := assert.New(t)
key, value := 0, true
table := cuckoo.New[int, bool](
cuckoo.Capacity(0),
)
err := table.Put(key, value)
assert.NoError(err)
assert.Equal(1, table.Size())
assert.True(table.Has(key))
}
func TestBadHashCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.NewCustom[int, bool](
func(int) uint64 { return 0 },
func(int) uint64 { return 0 },
func(a, b int) bool { return a == b },
cuckoo.Capacity(20),
)
err1 := table.Put(0, true)
err2 := table.Put(1, true)
err3 := table.Put(2, true)
assert.NoError(err1)
assert.NoError(err2)
assert.Error(err3)
assert.Equal(uint64(80), table.TotalCapacity())
}
func TestDropResizeCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool](
cuckoo.Capacity(10),
)
err1 := table.Put(0, true)
err2 := table.Put(1, true)
table.Drop(1)
assert.NoError(errors.Join(err1, err2))
assert.Equal(uint64(20), table.TotalCapacity())
}
func TestNewTableBy(t *testing.T) {
type User struct {
_ func()
id string
name string
}
assert := assert.New(t)
table := cuckoo.NewBy[User, bool](func(u User) string { return u.id })
err := table.Put(User{nil, "1", "Robert"}, true)
assert.NoError(err)
assert.Equal(1, table.Size())
assert.True(table.Has(User{nil, "1", "Robbie"}))
}

7
doc.go
View File

@@ -1,12 +1,9 @@
// Package cuckoo provides a hash table that uses cuckoo hashing to achieve
// a worst-case O(1) lookup time.
//
// While a [New] only supports comparable keys by default, you can create
// a table with any key type using [NewCustom]. Custom [Hash] functions and
// While a [NewTable] only supports comparable keys by default, you can create
// a table with any key type using [NewCustomTable]. Custom [Hash] functions and
// key comparison are also supported.
//
// NOTE: The [Table] is a look-up structure, and not a source of truth. If
// [ErrBadHash] occurs, the data cannot be restored.
//
// See more: https://en.wikipedia.org/wiki/Cuckoo_hashing
package cuckoo

View File

@@ -8,25 +8,25 @@ import (
)
func Example_basic() {
table := cuckoo.New[int, string]()
table := cuckoo.NewTable[int, string]()
if err := table.Put(1, "Hello, World!"); err != nil {
fmt.Println("Put error:", err)
}
if item, ok := table.Get(1); !ok {
fmt.Println("Not Found 1!")
if item, err := table.Get(1); err != nil {
fmt.Println("Error:", err)
} else {
fmt.Println("Found 1:", item)
}
if item, ok := table.Get(0); !ok {
fmt.Println("Not Found 0!")
if item, err := table.Get(0); err != nil {
fmt.Println("Error:", err)
} else {
fmt.Println("Found 0:", item)
}
// Output:
// Found 1: Hello, World!
// Not Found 0!
// Error: key '0' not found
}

3
go.mod
View File

@@ -4,11 +4,8 @@ go 1.25.6
require github.com/stretchr/testify v1.11.1
require github.com/kr/pretty v0.3.1 // indirect
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589
gopkg.in/yaml.v3 v3.0.1 // indirect
)

13
go.sum
View File

@@ -1,21 +1,10 @@
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589 h1:UmBZCTPdDYore2IEHN+U4eIqEaRq6METh9pKiPumkqc=
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589/go.mod h1:zh+T+w9XT/3o4E0WLEGCdmLJ8Yqx/zY3o538tQY3OjY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,7 +1,5 @@
package cuckoo
import "fmt"
// DefaultCapacity is the initial capacity of a [Table]. It is inspired from
// Java's [HashMap] implementation, which also uses 16.
//
@@ -9,20 +7,14 @@ import "fmt"
const DefaultCapacity uint64 = 16
// DefaultGrowthFactor is the standard resize multiplier for a [Table]. Most
// implementations use 2.
// hash table implementations use 2.
const DefaultGrowthFactor uint64 = 2
// defaultMinimumLoad is the default lowest acceptable occupancy of a [Table].
// The higher the minimum load, the more likely that a [Table.Put] will not
// succeed. The value of 5% is taken from [libcuckoo].
// DefaultMinimumLoad is the default lowest acceptable occupancy of a [Table].
// The value of 5% is taken from [libcuckoo].
//
// [libcuckoo]: https://github.com/efficient/libcuckoo/blob/656714705a055df2b7a605eb3c71586d9da1e119/libcuckoo/cuckoohash_config.hh#L21
const defaultMinimumLoad float64 = 0.05
// defaultGrowthLimit is the maximum number of times a [Table] can grow in a
// single [Table.Put], before the library infers it will lead to a stack
// overflow. The value of '64' was chosen arbirarily.
const defaultGrowthLimit uint64 = 64
const DefaultMinimumLoad float64 = 0.05
type settings struct {
growthFactor uint64
@@ -31,25 +23,23 @@ type settings struct {
}
// An Option modifies the settings of a [Table]. It is used in its constructors
// like [New], for example.
// like [NewTable], for example.
type Option func(*settings)
// Capacity modifies the starting capacity of each subtable of the [Table]. The
// value must be non-negative.
// Capacity modifies the starting capacity of each bucket of the [Table]. The
// value must be greater than 0.
func Capacity(value int) Option {
if value < 0 {
panic(fmt.Sprintf("go-cuckoo: Capacity must be non-negative, got %d", value))
}
return func(s *settings) { s.bucketSize = uint64(value) }
}
// MinimumLoad modifies the [DefaultMinimumLoad] of the [Table]. The value must
// be between 0.00 and 1.00.
func MinimumLoad(value float64) Option {
return func(s *settings) { s.minLoadFactor = value }
}
// GrowthFactor controls how much the capacity of the [Table] multiplies when
// it must resize. The value must be greater than 1.
func GrowthFactor(value int) Option {
if value < 2 {
panic(fmt.Sprintf("go-cuckoo: GrowthFactor must be greater than 1, got %d", value))
}
return func(s *settings) { s.growthFactor = uint64(value) }
}

View File

@@ -1,107 +0,0 @@
package cuckoo
// An entry is a key-value pair.
type entry[K, V any] struct {
key K
value V
}
type slot[K, V any] struct {
entry[K, V]
occupied bool
}
type subtable[K, V any] struct {
hash Hash[K]
slots []slot[K, V]
capacity, size uint64
compare EqualFunc[K]
}
// location determines where in the subtable a certain key would be placed. If
// the capacity is 0, this will panic.
func (t *subtable[K, V]) location(key K) uint64 {
return t.hash(key) % t.capacity
}
func (t *subtable[K, V]) get(key K) (value V, found bool) {
if t.capacity == 0 {
return
}
slot := t.slots[t.location(key)]
return slot.value, slot.occupied && t.compare(slot.key, key)
}
func (t *subtable[K, V]) drop(key K) (occupied bool) {
if t.capacity == 0 {
return
}
slot := &t.slots[t.location(key)]
if slot.occupied && t.compare(slot.key, key) {
slot.occupied = false
t.size--
return true
}
return false
}
func (t *subtable[K, V]) resized(capacity uint64) *subtable[K, V] {
return &subtable[K, V]{
slots: make([]slot[K, V], capacity),
capacity: capacity,
hash: t.hash,
compare: t.compare,
}
}
func (t *subtable[K, V]) update(key K, value V) (updated bool) {
if t.capacity == 0 {
return
}
slot := &t.slots[t.location(key)]
if slot.occupied && t.compare(slot.key, key) {
slot.value = value
return true
}
return false
}
func (t *subtable[K, V]) insert(insertion entry[K, V]) (evicted entry[K, V], eviction bool) {
if t.capacity == 0 {
return insertion, true
}
slot := &t.slots[t.location(insertion.key)]
if !slot.occupied {
slot.entry = insertion
slot.occupied = true
t.size++
return
}
if t.compare(slot.key, insertion.key) {
slot.value = insertion.value
return
}
insertion, slot.entry = slot.entry, insertion
return insertion, true
}
func newSubtable[K, V any](capacity uint64, hash Hash[K], compare EqualFunc[K]) *subtable[K, V] {
return &subtable[K, V]{
hash: hash,
capacity: capacity,
compare: compare,
size: 0,
slots: make([]slot[K, V], capacity),
}
}

235
table.go
View File

@@ -1,206 +1,131 @@
package cuckoo
import (
"errors"
"fmt"
"iter"
"math/bits"
"strings"
)
// ErrBadHash occurs when the hashes given to a [Table] cause too many key
// collisions. Discard the old table, rebuild it from your source data, and try:
//
// 1. Different hash seeds. Equal seeds produce equal hash functions, which
// always cycle.
// 2. A different [Hash] algorithm.
var ErrBadHash = errors.New("bad hash")
// A Table which uses cuckoo hashing to resolve collision. Create
// one with [New]. Or if you want more granularity, use [NewBy] or
// [NewCustom].
// A Table is hash table that uses cuckoo hashing to resolve collision. Create
// one with [NewTable]. Or if you want more granularity, use [NewTableBy] or
// [NewCustomTable].
type Table[K, V any] struct {
tableA, tableB *subtable[K, V]
bucketA, bucketB bucket[K, V]
growthFactor uint64
minLoadFactor float64
}
// TotalCapacity returns the number of slots allocated for the [Table]. To get the
// number of slots filled, look at [Table.Size].
func (t *Table[K, V]) TotalCapacity() uint64 {
return t.tableA.capacity + t.tableB.capacity
func (t Table[K, V]) TotalCapacity() uint64 {
return t.bucketA.capacity + t.bucketB.capacity
}
// Size returns how many slots are filled in the [Table].
func (t *Table[K, V]) Size() int {
return int(t.tableA.size + t.tableB.size)
func (t Table[K, V]) Size() int {
return int(t.bucketA.size + t.bucketB.size)
}
func log2(n uint64) (m int) {
return max(0, bits.Len64(n)-1)
return bits.Len64(n) - 1
}
func (t *Table[K, V]) maxEvictions() int {
func (t Table[K, V]) maxEvictions() int {
return 3 * log2(t.TotalCapacity())
}
func (t *Table[K, V]) load() float64 {
// When there are no slots in the table, we still treat the load as 100%.
// Every slot in the table is full.
if t.TotalCapacity() == 0 {
return 1.0
}
func (t Table[K, V]) load() float64 {
return float64(t.Size()) / float64(t.TotalCapacity())
}
// insert attempts to put/update an entry in the table, without modifying the
// size of the table. Returns a displaced entry and 'homeless = true' if an
// entry could not be placed after exhausting evictions.
func (t *Table[K, V]) insert(entry entry[K, V]) (displaced entry[K, V], homeless bool) {
if t.tableA.update(entry.key, entry.value) {
return
}
if t.tableB.update(entry.key, entry.value) {
return
}
for range t.maxEvictions() {
if entry, homeless = t.tableA.insert(entry); !homeless {
return
}
if entry, homeless = t.tableB.insert(entry); !homeless {
return
}
}
return entry, true
}
// resized creates an empty copy of the table, with a new capacity for each
// bucket.
func (t *Table[K, V]) resized(capacity uint64) *Table[K, V] {
return &Table[K, V]{
growthFactor: t.growthFactor,
minLoadFactor: t.minLoadFactor,
tableA: t.tableA.resized(capacity),
tableB: t.tableB.resized(capacity),
}
}
// resize creates a new [Table.resized] with 'capacity', inserts all items into
// the array, and replaces the current table. It is a helper function for
// [Table.grow] and [Table.shrink]; use them instead.
func (t *Table[K, V]) resize(capacity uint64) bool {
updated := t.resized(capacity)
func (t *Table[K, V]) resize(capacity uint64) error {
entries := make([]entry[K, V], 0, t.Size())
for k, v := range t.Entries() {
if _, failed := updated.insert(entry[K, V]{k, v}); failed {
return false
entries = append(entries, entry[K, V]{k, v})
}
t.bucketA.resize(capacity)
t.bucketB.resize(capacity)
for _, entry := range entries {
if err := t.Put(entry.key, entry.value); err != nil {
return err
}
}
*t = *updated
return true
return nil
}
// grow increases the table's capacity by the growth factor. If the
// capacity is 0, it increases it to 1.
func (t *Table[K, V]) grow() bool {
var newCapacity uint64
if t.TotalCapacity() == 0 {
newCapacity = 1
} else {
newCapacity = t.tableA.capacity * t.growthFactor
// Get fetches the value for a key in the [Table]. Returns an error if no value
// is found.
func (t Table[K, V]) Get(key K) (value V, err error) {
if item, ok := t.bucketA.get(key); ok {
return item, nil
}
return t.resize(newCapacity)
}
// shrink reduces the table's capacity by the growth factor. It may
// reduce it down to 0.
func (t *Table[K, V]) shrink() bool {
return t.resize(t.tableA.capacity / t.growthFactor)
}
// Get fetches the value for a key in the [Table]. Matches the comma-ok pattern
// of a builtin map; see [Table.Find] for plain indexing.
func (t *Table[K, V]) Get(key K) (value V, ok bool) {
if item, ok := t.tableA.get(key); ok {
return item, true
if item, ok := t.bucketB.get(key); ok {
return item, nil
}
if item, ok := t.tableB.get(key); ok {
return item, true
}
return
}
// Find fetches the value of a key. Matches direct indexing of a builtin map;
// see [Table.Get] for a comma-ok pattern.
func (t *Table[K, V]) Find(key K) (value V) {
value, _ = t.Get(key)
return
return value, fmt.Errorf("key '%v' not found", key)
}
// Has returns true if a key has a value in the table.
func (t *Table[K, V]) Has(key K) (exists bool) {
_, exists = t.Get(key)
return
func (t Table[K, V]) Has(key K) (exists bool) {
_, err := t.Get(key)
return err == nil
}
// Put sets the value for a key. If it cannot be set, an error is returned.
// Put sets the value for a key. Returns error if its value cannot be set.
func (t *Table[K, V]) Put(key K, value V) (err error) {
var (
entry = entry[K, V]{key, value}
homeless bool
)
for range defaultGrowthLimit {
if entry, homeless = t.insert(entry); !homeless {
return
if t.bucketA.update(key, value) {
return nil
}
if t.bucketB.update(key, value) {
return nil
}
entry, eviction := entry[K, V]{key, value}, false
for range t.maxEvictions() {
if entry, eviction = t.bucketA.evict(entry); !eviction {
return nil
}
if entry, eviction = t.bucketB.evict(entry); !eviction {
return nil
}
}
// Both this and the growth limit are necessary: this catches bad hashes
// early when the table is sparse, while the latter catches cases where
// growing never helps.
if t.load() < t.minLoadFactor {
return fmt.Errorf("hash functions produced a cycle at load %d/%d: %w", t.Size(), t.TotalCapacity(), ErrBadHash)
return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.TotalCapacity(), t.load())
}
// It is theoretically possible to have a table with a larger capacity
// that is valid. But this chance is astronomically small, so we ignore
// it in this implementation.
if grew := t.grow(); !grew {
return fmt.Errorf("could not redistribute entries into larger table: %w", ErrBadHash)
}
if err := t.resize(t.growthFactor * t.bucketA.capacity); err != nil {
return err
}
return fmt.Errorf("could not place entry after %d resizes: %w", defaultGrowthLimit, ErrBadHash)
return t.Put(entry.key, entry.value)
}
// Drop removes a value for a key in the table. Returns whether the key had
// existed.
func (t *Table[K, V]) Drop(key K) bool {
occupied := t.tableA.drop(key) || t.tableB.drop(key)
// Drop removes a value for a key in the table. Returns an error if its value
// cannot be removed.
func (t *Table[K, V]) Drop(key K) (err error) {
t.bucketA.drop(key)
t.bucketB.drop(key)
if t.load() < t.minLoadFactor {
// The error is not handled here, because table-shrinking is an internal
// optimization.
t.shrink()
return t.resize(t.bucketA.capacity / t.growthFactor)
}
return occupied
return nil
}
// Entries returns an unordered sequence of all key-value pairs in the table.
func (t *Table[K, V]) Entries() iter.Seq2[K, V] {
func (t Table[K, V]) Entries() iter.Seq2[K, V] {
return func(yield func(K, V) bool) {
for _, slot := range t.tableA.slots {
for _, slot := range t.bucketA.slots {
if slot.occupied {
if !yield(slot.key, slot.value) {
return
@@ -208,7 +133,7 @@ func (t *Table[K, V]) Entries() iter.Seq2[K, V] {
}
}
for _, slot := range t.tableB.slots {
for _, slot := range t.bucketB.slots {
if slot.occupied {
if !yield(slot.key, slot.value) {
return
@@ -219,8 +144,8 @@ func (t *Table[K, V]) Entries() iter.Seq2[K, V] {
}
// String returns the entries of the table as a string in the format:
// "table[k1:v1 k2:v2 ...]".
func (t *Table[K, V]) String() string {
// "table[k1:v1 h2:v2 ...]".
func (t Table[K, V]) String() string {
var sb strings.Builder
sb.WriteString("table[")
@@ -238,13 +163,13 @@ func (t *Table[K, V]) String() string {
return sb.String()
}
// NewCustom creates a [Table] with custom [Hash] and [EqualFunc]
// NewCustomTable creates a [Table] with custom [Hash] and [EqualFunc]
// functions, along with any [Option] the user provides.
func NewCustom[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...Option) *Table[K, V] {
func NewCustomTable[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...Option) *Table[K, V] {
settings := &settings{
growthFactor: DefaultGrowthFactor,
bucketSize: DefaultCapacity,
minLoadFactor: defaultMinimumLoad,
minLoadFactor: DefaultMinimumLoad,
}
for _, option := range options {
@@ -254,8 +179,8 @@ func NewCustom[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...
return &Table[K, V]{
growthFactor: settings.growthFactor,
minLoadFactor: settings.minLoadFactor,
tableA: newSubtable[K, V](settings.bucketSize, hashA, compare),
tableB: newSubtable[K, V](settings.bucketSize, hashB, compare),
bucketA: newBucket[K, V](settings.bucketSize, hashA, compare),
bucketB: newBucket[K, V](settings.bucketSize, hashB, compare),
}
}
@@ -263,10 +188,10 @@ func pipe[X, Y, Z any](a func(X) Y, b func(Y) Z) func(X) Z {
return func(x X) Z { return b(a(x)) }
}
// NewBy creates a [Table] for any key type by using keyFunc to derive a
// NewTableBy creates a [Table] for any key type by using keyFunc to derive a
// comparable key. Two keys with the same derived key are treated as equal.
func NewBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[K, V] {
return NewCustom[K, V](
func NewTableBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[K, V] {
return NewCustomTable[K, V](
pipe(keyFunc, NewDefaultHash[C]()),
pipe(keyFunc, NewDefaultHash[C]()),
func(a, b K) bool { return keyFunc(a) == keyFunc(b) },
@@ -274,10 +199,10 @@ func NewBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[
)
}
// New creates a [Table] using the default [Hash] and [EqualFunc]. Use
// NewTable creates a [Table] using the default [Hash] and [EqualFunc]. Use
// the [Option] functions to configure its behavior. Note that this constructor
// is only provided for comparable keys. For arbitrary keys, consider
// [NewBy] or [NewCustom].
func New[K comparable, V any](options ...Option) *Table[K, V] {
return NewCustom[K, V](NewDefaultHash[K](), NewDefaultHash[K](), DefaultEqualFunc[K], options...)
// [NewTableBy] or [NewCustomTable].
func NewTable[K comparable, V any](options ...Option) *Table[K, V] {
return NewCustomTable[K, V](NewDefaultHash[K](), NewDefaultHash[K](), DefaultEqualFunc[K], options...)
}