2 Commits

Author SHA1 Message Date
9726670dea feat: only one CI job at a time per PR
All checks were successful
CI / lint (pull_request) Successful in 52s
CI / unit-test (pull_request) Successful in 26s
CI / fuzz-test (pull_request) Successful in 1m3s
CI / mutation-test (pull_request) Successful in 43s
This prevents the Gitea runner from getting clogged up with stale CI checks.
2026-03-17 19:51:19 -04:00
62bc15a979 refactor: rename lint.yml to ci.yml
The `lint.yml` workflow contains more than just linting jobs. Renaming it to `ci.yml`, which is more general, and common practice.
2026-03-17 19:49:21 -04:00
28 changed files with 247 additions and 1247 deletions

View File

@@ -1,26 +0,0 @@
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-forms.json
name: 🐛 Bug Report
about: Report a bug in this project
title: "[BUG]: "
body:
- type: textarea
id: context
attributes:
label: Context
placeholder: What circumstances led to the bug?
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected Behavior
placeholder: What did you expect would happen?
validations:
required: true
- type: textarea
id: actual-behavior
attributes:
label: Actual Behavior
placeholder: What happened, and why was it unexpected?
validations:
required: true

View File

@@ -1,37 +0,0 @@
# 'Feature Request Template' By @cheehwatang
# https://github.com/cheehwatang/.github/blob/master/.github/ISSUE_TEMPLATE/feature_request.yml
#
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-forms.json
name: ✨ Feature Request
about: Suggest an idea for this project
title: "[FEATURE]: "
body:
- type: dropdown
attributes:
multiple: false
label: Feature Type
options:
- "✨ New Feature"
- "📝 Documentation"
- "🎨 Style and UI"
- "🔨 Code Refactor"
- "⚡ Performance Improvements"
- "✅ New Test"
validations:
required: true
- type: textarea
id: description
attributes:
label: Description
placeholder: |
Give us a brief description of the feature or enhancement you would
like!
validations:
required: true
- type: textarea
id: additional-information
attributes:
label: Additional Information
placeholder: |
Give us some additional information on the feature request like proposed
solutions, links, screenshots, etc.

View File

@@ -1,2 +0,0 @@
# yaml-language-server: $schema=https://www.schemastore.org/gitea-issue-config.json
blank_issues_enabled: false

View File

@@ -1,17 +0,0 @@
---
name: "New Pull Request"
about: "Standard PR template"
title: ""
ref: "main"
---
## Description
## Changes
### Design Decisions
## Checklist
- [ ] Tests pass
- [ ] Docs updated

View File

@@ -1,26 +1,16 @@
name: CI name: CI
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
on: on:
push: push:
branches: [main] branches: [main]
pull_request: pull_request:
jobs: jobs:
check-pr-title: lint:
name: Check PR Title
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
env:
TITLE: ${{ gitea.event.pull_request.title }}
steps:
- run: |
if ! echo "$TITLE" | grep -qE '^(WIP: )?(feat|fix|docs|chore|ci|test|refactor|perf|build|style|revert)(\(.+\))?(!)?: .+'; then
echo "::error::Pull Request title must follow conventional commits"
exit 1
fi
lint-go:
name: Go Lint
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -29,39 +19,11 @@ jobs:
with: with:
go-version-file: go.mod go-version-file: go.mod
- name: Check go mod tidy
run: go mod tidy && git diff --exit-code go.mod go.sum
- uses: golangci/golangci-lint-action@v7 - uses: golangci/golangci-lint-action@v7
with: with:
version: latest version: latest
lint-makefile: unit-test:
name: Makefile Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
- name: Install gremlins
run: go install github.com/checkmake/checkmake/cmd/checkmake@latest
- name: Run mutation tests
run: make lint-makefile
lint-markdown:
name: Markdown Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DavidAnson/markdownlint-cli2-action@v19
test-unit:
name: Unit Tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -71,10 +33,9 @@ jobs:
go-version-file: go.mod go-version-file: go.mod
- name: Run unit tests - name: Run unit tests
run: make test-unit run: go test ./... -cover -v
test-fuzz: fuzz-test:
name: Fuzz Tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -84,10 +45,12 @@ jobs:
go-version-file: go.mod go-version-file: go.mod
- name: Run fuzz tests - name: Run fuzz tests
run: make test-fuzz run: |
for func in $(grep -r --include='*_test.go' -oh 'func Fuzz\w*' . | sed 's/func //'); do
go test ./... -fuzz="^${func}$" -fuzztime=30s
done
test-mutation: mutation-test:
name: Mutation Tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -100,4 +63,4 @@ jobs:
run: go install github.com/go-gremlins/gremlins/cmd/gremlins@latest run: go install github.com/go-gremlins/gremlins/cmd/gremlins@latest
- name: Run mutation tests - name: Run mutation tests
run: make test-mutation run: gremlins unleash

1
.gitignore vendored
View File

@@ -24,3 +24,4 @@ go.work.sum
# env file # env file
.env .env

View File

@@ -114,9 +114,6 @@ linters:
# Reports uses of functions with replacement inside the testing package. # Reports uses of functions with replacement inside the testing package.
- usetesting - usetesting
# Reports mixed receiver types in structs/interfaces.
- recvcheck
settings: settings:
revive: revive:
rules: rules:

View File

@@ -5,5 +5,3 @@ unleash:
workers: 4 workers: 4
dry-run: false dry-run: false
threshold:
efficacy: 1.0

View File

@@ -1,17 +0,0 @@
default: true
heading-style:
style: atx
ul-indent:
indent: 2
line-length: false
no-duplicate-heading:
siblings_only: true
no-inline-html:
allowed_elements:
- br
- details
- summary
- img
- picture
- source
first-line-heading: true

View File

@@ -1,47 +1,12 @@
.PHONY: all help install clean test-unit test-mutation test-fuzz test docs lint-go lint-makefile lint-markdown lint unit:
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*##' $(MAKEFILE_LIST) | awk -F ':.*## ' '{printf " %-15s %s\n", $$1, $$2}'
install: ## Install dev tools
go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
go install github.com/checkmake/checkmake/cmd/checkmake@latest
go install github.com/go-gremlins/gremlins/cmd/gremlins@latest
go install golang.org/x/tools/cmd/godoc@latest
go install golang.org/x/vuln/cmd/govulncheck@latest
test-unit: ## Run unit tests with coverage
go test ./... -cover -v go test ./... -cover -v
test-mutation: ## Run mutation tests with gremlins mutation:
gremlins unleash gremlins unleash
FUZZ_TIME ?= 30 fuzz:
go test ./... -fuzz=$(FN)
test-fuzz: ## Run all fuzz tests for 30s each docs:
@for func in $$(grep -r --include='*_test.go' -oh 'func Fuzz\w*' . | sed 's/func //'); do \ @echo ">>> Visit: http://localhost:6060/pkg/git.maximhutz.com/tools/dsa/"
echo "Fuzzing $$func..."; \
go test ./... -fuzz="^$$func$$" -fuzztime=$(FUZZ_TIME)s; \
done
test: test-unit test-mutation test-fuzz ## Run all tests
lint-go: ## Lint Go code
golangci-lint run ./...
lint-makefile: ## Lint the Makefile
checkmake Makefile
lint-markdown: ## Lint Markdown files
docker run --rm -v $(CURDIR):/workdir davidanson/markdownlint-cli2 "**/*.md"
lint: lint-go lint-makefile lint-markdown ## Lint all code
docs: ## Serve godoc locally
@echo ">>> Visit: http://localhost:6060/pkg/$$(go list -m)"
godoc -http=:6060 godoc -http=:6060
clean: ## Clean build and test caches
go clean -cache -testcache
all: lint test ## Run all checks and tests

View File

@@ -1,3 +1,3 @@
# <img height="30" src="assets/logo.svg" alt="Go Cuckoo, by `mvhutz`."> Go Cuckoo # go-cuckoo
A hash table that uses cuckoo hashing to achieve a worst-case O(1) lookup time. Read more about it in [the package documentation](https://pkg.go.dev/git.maximhutz.com/tools/go-cuckoo). A hash table that uses cuckoo hashing to achieve a worst-case O(1) lookup time.

View File

@@ -1,542 +0,0 @@
# Designing an Idiomatic API Interface
We (the maintainers) built `go-cuckoo`'s API interface without design intent.
Up until now, we paid more attention implementing the underlying functionality of the cuckoo hashing.
With the fundamentals of the algorithm built, we should revisit the interface.
It should align closer to the following principles:
- **Congruency**
A `go-cuckoo` table should have the same core functionality as Go's built-in map.
- **Familiarity**
A `go-cuckoo` table should behave similarly to Go's standard map, so users will intuitively know how to use it.
In effect, its users will carry less cognitive load.
## Current State
### Interface of the built-in Map
Listed below is every interface provided by Go to the built-in map object.
Also included, are the functions from the package `maps` in the standard library.
<details>
<summary>Interfaces</summary>
| # | built-in Interface | Description |
| --- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `m := make(map[K]V)` | Returns an empty map using the built-in `make()` function. |
| 2 | `m := make(map[K]V, hint)` | Returns an empty map using `make()`, with a capacity 'hint'. This hint is how many items the map expects to hold, _not_ a measure of how large it is. |
| 3 | `m := map[K]V{...}` | Returns a map, which may be filled with entries in the ellipsis (optional). |
| 4 | `var m map[K]V` | Defines an empty _variable_ that holds a map. This differs from #1 because `m` is uninitialized (nil) here. |
| 5 | `m[k] := v` | Assigns the value of `k` to `v`. |
| 6 | `v := m[k]` | Returns the value of `k` if it exists. Otherwise, `v` is uninitialized. |
| 7 | `v, ok := m[k]` | Similar to #6, except `ok` is equal to whether `v` is initialized. This is comma-ok notation. |
| 8 | `for k, v := range m` | Iterates over every key-value pair in `m`. The order is random. |
| 9 | `delete(m, k)` | Unassigns the value `k`. Returns no value. |
| 10 | `clear(m)` | Unassigns all keys in `m`. Returns no value. |
| 11 | `n := len(m)` | Returns the number of entries in `m`. If nil, `m` returns 0. |
| 12 | `m2 := maps.Clone(m)` | Returns a copy of `m`. |
| 13 | `maps.Copy(dst, src)` | Assigns every entry of `src` in `dst`. |
| 14 | `ok := maps.Equal(m1, m2)` | Returns true iff `m1` and `m2` the same entries. |
| 15 | `ok := maps.EqualFunc(m1, m2, fn)` | Like #14, but with a custom comparator for non-comparable values. |
| 16 | `maps.DeleteFunc(m, fn)` | Removes every entry in `m` which satisfies `fn`. Returns no value. |
| 17 | `it2 := maps.All(m)` | Returns an 2D iterator over every key-value pair. |
| 18 | `it := maps.Keys(m)` | Returns an iterator over every key. |
| 19 | `it := maps.Values(m)` | Returns an iterator over every value. There can be duplicates. |
| 20 | `m := maps.Collect(seq)` | Returns a map, with every entry defined in a 2D iterator over key-value pairs. |
| 21 | `maps.Insert(m, seq)` | Assigns to `m` all key-value pairs in 2D iterator `seq`. Returns no value. |
</details>
### Interface of `go-cuckoo`
On the other hand, here is the current contract for `go-cuckoo`.
<details>
<summary>Interfaces</summary>
| # | built-in Interface | Description |
| --- | -------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
| 1 | `m := New(opts...)` | Creates a table using the default hash and equal function. The options configure its behavior. Confined to comparable keys. |
| 2 | `m := NewBy(keyFunc, opts...)` | Like #1, but allows any key type. A `keyFunc` is used to derive a comparable key. |
| 3 | `m := NewCustom(hashA, hashB, equalFunc, opts...)` | Like #1, but allows control over the hashes used to allow any key type. An `equalFunc` determines key equality. |
| 4 | `seq := m.Entries()` | Returns an unordered 2D iterator of all key-value pairs in the table. |
| 5 | `v := m.Find(k)` | Removes the value for `k`. Returns true if `k` existed. |
| 6 | `v, ok := m.Get(k)` | Returns the value for `k` in the table. Also, returns true if the `k` exists, otherwise false. When false, `v` is undefined. |
| 7 | `ok := m.Has(k)` | Returns true if `k` is in the table. |
| 8 | `err := m.Put(k, v)` | Sets value `v` for key `k`. Otherwise, returns error. |
| 9 | `n := m.Size()` | Returns the number of items in `m`. |
| 10 | `str := m.String()` | Returns `m` as a string in the format "table[k1:v1 k2:v2 ...]". |
| 11 | `cap := m.TotalCapacity()` | Returns how many slots `m` has allocated. |
| 12 | `ok := m.Drop(k)` | Removes `k` from the table. Returns whether the key had existed. |
</details>
### Determining Congruency
So, how does the core functionality compare?
Listed below is an analysis of every interface in Go's standard map.
Each is compared against what `go-cuckoo` offers, and categorized into the following groups:
- ✅ Covered: an analog exists.
- ⚠️ Partial: workaround available.
- ❌ Gap: no analog yet; addressed in [Target State](#solving-congruency).
Specifically, here we are checking for functionality.
Is there functionality that this offers which `go-cuckoo` does not?
We are checking accessibility, but not discoverability.
The latter will be considered later.
<details>
<summary>✅ <code>m := make(map[K]V)</code></summary>
The analog is `m := New()`.
</details>
<details>
<summary>⚠️ <code>m := make(map[K]V, hint)</code></summary>
This has no simple analog.
It is close to `m := New(Capacity(hint))`, but it assigns starting capacity, not expected size.
For the built-in map, these are two separate things.
- Capacity is an internal measure, used to optimize space/speed.
It is hidden from the user because it depends on the underlying implementation, which may change.
- Expected size requires the map must hold a number of items before resizing.
This is tangeable and agnostic to implementation, hence why it is given to the user.
In short, this interface defines expected size, but `Capacity()` defines capacity.
</details>
<details>
<summary>❌ <code>m := map[K]V{...}</code></summary>
This has no simple analog, the closest being:
```go
m := New[K, V]()
for k, v := range startingEntries {
m.Put(k, v)
}
```
It is idiomatic, but far less ergonomic.
</details>
<details>
<summary>✅ <code>var m map[K]V</code></summary>
The analog is `var m Table[K, V]`.
</details>
<details>
<summary>✅ <code>m[k] := v</code></summary>
The analog is `err := m.Put(k, v)`.
</details>
<details>
<summary>✅ <code>v := m[k]</code></summary>
The analog is `v := m.Find(k)`.
</details>
<details>
<summary>✅ <code>v, ok := m[k]</code></summary>
The analog is `v, ok := m.Get(k)`.
</details>
<details>
<summary>✅ <code>for k, v := range m</code></summary>
The analog is `for k, v := range m.Entries()`.
</details>
<details>
<summary>✅ <code>delete(m, k)</code></summary>
The analog is `ok := m.Drop(k)`.
</details>
<details>
<summary>❌ <code>clear(m)</code></summary>
There is no analog.
The easiest may to do this is to delete all items individually:
```go
for k := range m.Entries() {
m.Drop(k)
}
```
</details>
<details>
<summary>✅ <code>n := len(m)</code></summary>
The analog is `n := m.Size()`.
</details>
<details>
<summary>❌ <code>m2 := maps.Clone(m)</code></summary>
There is no analog.
The easiest way to do this currently is to make a new map, and manually add the items.
```go
m2 := cuckoo.Table[K, V]()
for k, v := range m.Entries() {
m2.Put(k, v)
}
```
This gets complicated by the various options available to the user.
Furthermore, any custom `EqualFunc`, `keyFunc` or `Hash` is not transferred.
</details>
<details>
<summary>❌ <code>maps.Copy(dst, src)</code></summary>
There is no analog.
The simplest way to do this is with a for-loop.
```go
for k, v := range src.Entries() {
dst.Put(k, v)
}
```
</details>
<details>
<summary>❌ <code>ok := maps.Equal(m1, m2)</code></summary>
There is no analog.
Users have to manually check the key-value pairs to determine equality.
</details>
<details>
<summary>❌ <code>ok := maps.EqualFunc(m1, m2, fn)</code></summary>
There is no analog.
Users have to manually check the key-value pairs to determine equality.
</details>
<details>
<summary>❌ <code>maps.DeleteFunc(m, fn)</code></summary>
There is no analog.
Users have to manually delete keys.
</details>
<details>
<summary>✅ <code>it2 := maps.All(m)</code></summary>
The analog is `it2 := m.Entries()`.
</details>
<details>
<summary>⚠️ <code>it := maps.Keys(m)</code></summary>
There is no simple analog.
A close neighbor is `it2 := m.Entries()`.
Users can use this in a for-loop, and pick out just the keys:
```go
for k := range m.Entries() {
// ...
}
```
</details>
<details>
<summary>⚠️ <code>it := maps.Values(m)</code></summary>
There is no simple analog.
A close neighbor is `it2 := m.Entries()`.
Users can use this in a for-loop, and pick out just the values:
```go
for _, v := range m.Entries() {
// ...
}
```
</details>
<details>
<summary>❌ <code>m := maps.Collect(seq)</code></summary>
There is no analog.
</details>
<details>
<summary>❌ <code>maps.Insert(m, seq)</code></summary>
There is no analog.
</details>
## Target State
### Solving Congruency
We should make the following changes to accomodate for congruency:
<details>
<summary><code>ok := maps.EqualFunc(m1, m2, fn)</code></summary>
We should implement a new function:
```go
func EqualFunc[K, V1, V2 any](t1 *Table[K, V1], t2 *Table[K, V2], eq func(V1, V2) bool) bool
```
This function is free, and not bound as a receiver function.
(It is called `cuckoo.Equal(t1, t2)`, not `t1.Equals(t2)`.)
The latter implies `t1` has authority, when in fact neither do.
We define equality as:
1. Neither table has a key the other doesn't.
2. Each key has the same value in each table.
Parameter `eq` determines this equality.
Custom `EqualFunc`'s complicate this, as they modulate key identity in tables.
If two tables may differ on whether two keys are different, this function might break.
So, we must assume that:
- Both tables have `EqualFunc`'s which 'agree' on the identity of the keys present in the tables.
Agreement is defined as: if two keys are distinct in one table, they are distinct in the other.
The name `EqualFunc` is already taken by `EqualFunc[K, V]`: an alias for `func(a, b K) bool`.
Inlining `EqualFunc[K, V]` would solve this problem.
We will move the documentation attached to it to `DefaultEqualFunc`.
</details>
<details>
<summary><code>ok := maps.Equal(m1, m2)</code></summary>
We should implement a new function, to conform with the standard library:
```go
func Equal[K any, V comparable](t1, t2 *Table[K, V]) bool
```
It uses the same equality check as in `EqualFunc`.
Once again, the function is free because it is symmetric.
</details>
<details>
<summary><code>maps.Insert(m, seq)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) Insert(seq iter.Seq2[K, V]) error
```
A receiver fits better even though `maps.Insert` is a free function, because copying it is asymmetric.
Map `dst` receives entries from map `src`.
It's only free because Go's standard map is built into the language, and so cannot have receivers.
In terms of naming, `t.Extend` is more accurate, and has precedent in [Python](docs.python.org/3/tutorial/datastructures.html#more-on-lists) and [Rust](https://doc.rust-lang.org/std/iter/trait.Extend.html).
When [adding iterator function](https://github.com/golang/go/issues/61900) to the `maps` package, the Go team chose to frame it as 'sources' and 'sinks'.
With this model, `maps.Insert` made more sense than `maps.Extend`.
Ultimately, `t.Insert()` is a better choice to be consistent with `maps`.
</details>
<details>
<summary><code>maps.Copy(dst, src)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) Copy(src *Table[K, V]) error
```
It's functionality should match that of `t.Insert()`.
A receiver fits better even though `maps.Copy` is a free function, 'copying' it is asymmetric: `dst` is writen into by `src`.
It is only free because Go's standard map is built into the language, and so cannot have receivers.
The name `t.Merge()` might be more accurate, but it does work because:
- `t.Copy()` matches Go's built-in `copy()`, and `io.Copy()`. The Go team used [the same logic](https://github.com/golang/go/discussions/47330#discussioncomment-1167799) to name `maps.Copy()`.
In this case, `t.Merge()` would be an outlier.
- `t.Merge()` implies some sort of conflict-resolution, when there is not.
It simply overwrites the values.
</details>
<details>
<summary><code>maps.DeleteFunc(m, fn)</code></summary>
We should implement a new receiver for the table:
```go
func (t *Table[K, V]) DeleteFunc(del func(K, V) bool)
```
It would have the same functionality as `maps.DeleteFunc`.
A free function could work here, but `t` has clear authority over `del`.
Other than being consistent with the `maps` package, `t.DeleteFunc` follows the Go convention of appending `Func` to higher-order equivalents of functions.
This trumps names like `t.DeleteIf`, which lend more to [Java](https://docs.oracle.com/javase/8/docs/api/java/util/ArrayList.html#removeIf-java.util.function.Predicate-) or [C++](https://en.cppreference.com/cpp/algorithm/remove).
The word `Delete` is also convention, tying back to the built-in `delete()`.
</details>
<details>
<summary><code>m := maps.Collect(seq)</code></summary>
We should implement a new constructor.
```go
func Collect[K comparable, V any](seq iter.Seq2[K, V]) (*Table[K, V], error)
```
It would create a `New()` table, and insert all entries in `seq`.
This reveicer only supports the standard table constructor, with comparable keys.
It is tempting to add `CollectBy` or `CollectCustom` to support all table types, but doing so would pollute the public interface.
It would be just one more line to initialize the table and then call `t.Insert` directly:
```go
t := // ...
err := t.Insert(seq)
```
</details>
<details>
<summary><code>m := map[K]V{...}</code></summary>
We should make a new constructor, because entries are generic.
So, creating an option with inialized entries doesn't work.
With the previous additions, users have a few options.
If they want to use a `New()` table, `t.Collect` matches well:
```go
t, err := cuckoo.Collect(func(yield func(K, V) bool) {
yield(key1, val1)
yield(key2, val2)
})
```
For `NewCustom()` or `NewBy()` tables, users can call `t.Insert` after initialization:
```go
t := // ...
err := t.Insert(func(yield func(K, V) bool) {
yield(key1, val1)
yield(key2, val2)
})
```
It is one more line.
But, the alternative is polluting the public interface with corresponding `*WithEntries` constuctors.
</details>
<details>
<summary><code>m := make(map[K]V, hint)</code></summary>
We should add a new option:
```go
func ExpectedSize(n int) Option
```
When fed to a table, it will allocate enough space to hold `n` entries without a resize.
</details>
<details>
<summary><code>clear(m)</code></summary>
We should implement a new receiver:
```go
func (t *Table[K, V]) Clear()
```
It will remove all entries from the table.
</details>
<details>
<summary><code>m2 := maps.Clone(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Clone() *Table[K, V]
```
Also, it will copy the hash, equality function, and options used in the table.
</details>
<details>
<summary><code>it := maps.Keys(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Keys() iter.Seq[K]
```
It is tempting to just have `All()`, but it returns a `Seq2`, not a `Seq`.
There is no iterator adaptor between `Seq` and `Seq2`, and will not be for the foreseeable future.
This function, while it feels superfluous, is required.
</details>
<details>
<summary><code>it := maps.Values(m)</code></summary>
We should implement a matching function:
```go
func (t *Table[K, V]) Values() iter.Seq[V]
```
For the same reason we need `Keys()`, we also need `Values()`.
</details>

View File

@@ -1 +0,0 @@
<svg width="16" height="16" xmlns="http://www.w3.org/2000/svg" shape-rendering="crispEdges"><path fill="#2a1512" d="M3,6h1v1h-1v-1M2,7h1v1h-1v-1M12,7h1v1h-1v-1M1,8h1v1h-1v-1M4,8h1v2h-1v-2M14,8h1v1h-1v-1M11,9h1v2h-2v-1h1v-1M5,10h1v1h-1v-1M1,13h2v1h1v1h-2v-1h-1v-1M13,13h2v1h-1v1h-1v1h-1v-2h1v-1M4,15h1v1h-1v-1"/><path fill="#38231f" d="M4,6h1v1h-1v-1M11,6h1v1h-1v-1M13,7h1v1h-1v-1M1,9h1v2h-1v-2M14,9h1v1h-1v-1M14,11h1v2h-1v-2M1,12h1v1h-1v-1M5,15h3v1h-3v-1M10,15h2v1h-2v-1"/><path fill="#3f3f74" d="M7,3h2v1h-2v-1M6,4h1v1h-1v-1M9,4h1v1h-1v-1M5,5h1v5h-1v-5M10,5h1v5h-1v-5M6,10h4v1h-4v-1"/><path fill="#6262ab" d="M9,5h1v1h-1v-1M6,9h1v1h-1v-1M9,9h1v1h-1v-1"/><path fill="#663931" d="M11,8h1v1h-1v-1M2,9h1v1h-1v-1M14,10h1v1h-1v-1M1,11h1v1h-1v-1M3,11h2v1h-2v-1M13,11h1v1h-1v-1M2,12h1v1h-1v-1M5,12h1v1h-1v-1M10,12h3v2h-1v-1h-2v-1M3,13h1v1h-1v-1M8,13h2v1h-2v-1M4,14h3v1h-3v-1M10,14h2v1h-2v-1M8,15h2v1h-2v-1"/><path fill="#8d8dcb" d="M9,6h1v3h-1v1h-2v-1h1v-1h1v-2M6,8h1v1h-1v-1"/><path fill="#8f563b" d="M2,8h1v1h-1v-1M12,9h1v1h-1v-1M2,10h1v1h-1v-1M13,10h1v1h-1v-1M7,11h1v1h-1v-1M11,11h2v1h-2v-1M4,12h1v1h1v-1h1v2h-3v-2M13,12h1v1h-1v-1M10,13h2v1h-2v-1M7,14h3v1h-3v-1"/><path fill="#a4a4d5" d="M8,4h1v4h-1v1h-1v-1h-1v-2h1v-1h1v-1"/><path fill="#ab764a" d="M4,7h1v1h-1v-1M3,8h1v2h-1v-2M12,10h1v1h-1v-1M2,11h1v1h-1v-1M8,11h1v1h-1v-1M3,12h1v1h-1v-1M7,12h1v2h-1v-2"/><path fill="#cacaea" d="M7,4h1v1h-1v-1M6,5h1v1h-1v-1"/><path fill="#d9a066" d="M3,7h1v1h-1v-1M11,7h1v1h-1v-1M12,8h2v2h-1v-1h-1v-1M3,10h2v1h-2v-1M5,11h2v1h-2v-1M9,11h2v1h-1v1h-2v-1h1v-1"/></svg>

Before

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 963 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 418 B

73
bucket.go Normal file
View File

@@ -0,0 +1,73 @@
package cuckoo
type entry[K, V any] struct {
key K
value V
}
type slot[K, V any] struct {
entry[K, V]
occupied bool
}
type bucket[K, V any] struct {
hash Hash[K]
slots []slot[K, V]
capacity, size uint64
compare EqualFunc[K]
}
func (b bucket[K, V]) location(key K) uint64 {
return b.hash(key) % b.capacity
}
func (b bucket[K, V]) get(key K) (value V, found bool) {
slot := b.slots[b.location(key)]
return slot.value, slot.occupied && b.compare(slot.key, key)
}
func (b *bucket[K, V]) resize(capacity uint64) {
b.slots = make([]slot[K, V], capacity)
b.capacity = capacity
b.size = 0
}
func (b bucket[K, V]) update(key K, value V) (updated bool) {
slot := &b.slots[b.location(key)]
if slot.occupied && b.compare(slot.key, key) {
slot.value = value
return true
}
return false
}
func (b *bucket[K, V]) evict(insertion entry[K, V]) (evicted entry[K, V], eviction bool) {
slot := &b.slots[b.location(insertion.key)]
if !slot.occupied {
slot.entry = insertion
slot.occupied = true
b.size++
return
}
if b.compare(slot.key, insertion.key) {
slot.value = insertion.value
return
}
insertion, slot.entry = slot.entry, insertion
return insertion, true
}
func newBucket[K, V any](capacity uint64, hash Hash[K], compare EqualFunc[K]) bucket[K, V] {
return bucket[K, V]{
hash: hash,
capacity: capacity,
compare: compare,
size: 0,
slots: make([]slot[K, V], capacity),
}
}

View File

@@ -2,7 +2,7 @@ package cuckoo
// An EqualFunc determines whethers two keys are 'equal'. Keys that are 'equal' // An EqualFunc determines whethers two keys are 'equal'. Keys that are 'equal'
// are teated as the same by the [Table]. A good EqualFunc is pure, // are teated as the same by the [Table]. A good EqualFunc is pure,
// deterministic, and fast. By default, [New] uses [DefaultEqualFunc]. // deterministic, and fast. By default, [NewTable] uses [DefaultEqualFunc].
// //
// This function MUST NOT return true if the [Hash] digest of two keys // This function MUST NOT return true if the [Hash] digest of two keys
// are different: the [Table] will not work. // are different: the [Table] will not work.

View File

@@ -28,7 +28,7 @@ func ExampleEqualFunc_badEqualFunc() {
// Two users with the same ID are equal. // Two users with the same ID are equal.
isEqual := func(a, b User) bool { return a.ID == b.ID } isEqual := func(a, b User) bool { return a.ID == b.ID }
userbase := cuckoo.NewCustom[User, bool](makeHash(1), makeHash(2), isEqual) userbase := cuckoo.NewCustomTable[User, bool](makeHash(1), makeHash(2), isEqual)
(userbase.Put(User{"1", "Robert Doe"}, true)) (userbase.Put(User{"1", "Robert Doe"}, true))

View File

@@ -1,13 +1,11 @@
package cuckoo_test package cuckoo_test
import ( import (
"fmt" "bytes"
"maps" "encoding/binary"
"os"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
go_fuzz_utils "github.com/trailofbits/go-fuzz-utils"
"git.maximhutz.com/tools/go-cuckoo" "git.maximhutz.com/tools/go-cuckoo"
) )
@@ -21,73 +19,31 @@ func offsetHash(seed uint32) cuckoo.Hash[uint32] {
} }
} }
type fuzzStep struct {
drop bool
key, value uint32
}
type fuzzScenario struct {
seedA, seedB uint32
capacity, growthFactor uint8
steps []fuzzStep
}
func FuzzInsertLookup(f *testing.F) { func FuzzInsertLookup(f *testing.F) {
f.Fuzz(func(t *testing.T, data []byte) { f.Fuzz(func(t *testing.T, data []byte, seedA, seedB uint32) {
var scenario fuzzScenario
assert := assert.New(t) assert := assert.New(t)
if tp, err := go_fuzz_utils.NewTypeProvider(data); err != nil { table := cuckoo.NewCustomTable[uint32, uint32](
return
} else if err := tp.Fill(&scenario); err != nil {
return
}
seedA, seedB := scenario.seedA, scenario.seedB
growthFactor := max(2, int(scenario.growthFactor))
capacity := int(scenario.capacity)
// If they are the same number, the hashes will clash, always causing an
// error.
if seedA == seedB {
t.Skip()
}
fmt.Fprintf(os.Stderr, "seedA=%d seedB=%d capacity=%d growthFactor=%d\n",
seedA, seedB, capacity, growthFactor)
actual := cuckoo.NewCustom[uint32, uint32](
offsetHash(seedA), offsetHash(seedA),
offsetHash(seedB), offsetHash(seedB),
func(a, b uint32) bool { return a == b }, func(a, b uint32) bool { return a == b },
cuckoo.Capacity(capacity),
cuckoo.GrowthFactor(growthFactor),
) )
expected := map[uint32]uint32{} if seedA == seedB {
return
}
for _, step := range scenario.steps { r := bytes.NewReader(data)
if step.drop { var key, value uint32
ok := actual.Drop(step.key) for binary.Read(r, binary.LittleEndian, &key) == nil &&
_, has := expected[step.key] binary.Read(r, binary.LittleEndian, &value) == nil {
assert.Equal(ok, has)
delete(expected, step.key) err := table.Put(key, value)
assert.NoError(err)
_, ok = actual.Get(step.key) found, err := table.Get(key)
assert.False(ok) assert.NoError(err)
} else { assert.Equal(value, found)
err := actual.Put(step.key, step.value)
assert.NoError(err)
expected[step.key] = step.value
found, ok := actual.Get(step.key)
assert.True(ok)
assert.Equal(step.value, found)
}
assert.Equal(expected, maps.Collect(actual.Entries()))
} }
}) })
} }

View File

@@ -11,7 +11,7 @@ func TestMaxEvictions(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
for i := 16; i < 116; i++ { for i := 16; i < 116; i++ {
table := New[int, bool](Capacity(i / 2)) table := NewTable[int, bool](Capacity(i / 2))
expectedEvictions := 3 * math.Floor(math.Log2(float64(i))) expectedEvictions := 3 * math.Floor(math.Log2(float64(i)))
assert.Equal(table.maxEvictions(), int(expectedEvictions)) assert.Equal(table.maxEvictions(), int(expectedEvictions))
@@ -20,11 +20,11 @@ func TestMaxEvictions(t *testing.T) {
func TestLoad(t *testing.T) { func TestLoad(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
table := New[int, bool](Capacity(8)) table := NewTable[int, bool](Capacity(8))
for i := range 16 { for i := range 16 {
err := table.Put(i, true) err := table.Put(i, true)
assert.NoError(err) assert.NoError(err)
assert.Equal(float64(table.Size())/float64(table.TotalCapacity()), table.load()) assert.Equal(float64(table.Size())/float64(table.Capacity()), table.load())
} }
} }

View File

@@ -1,7 +1,6 @@
package cuckoo_test package cuckoo_test
import ( import (
"errors"
"maps" "maps"
"math/rand/v2" "math/rand/v2"
"testing" "testing"
@@ -14,7 +13,7 @@ import (
func TestNewTable(t *testing.T) { func TestNewTable(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
table := cuckoo.New[int, bool]() table := cuckoo.NewTable[int, bool]()
assert.NotNil(table) assert.NotNil(table)
assert.Zero(table.Size()) assert.Zero(table.Size())
@@ -23,7 +22,7 @@ func TestNewTable(t *testing.T) {
func TestAddItem(t *testing.T) { func TestAddItem(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
key, value := 0, true key, value := 0, true
table := cuckoo.New[int, bool]() table := cuckoo.NewTable[int, bool]()
err := table.Put(key, value) err := table.Put(key, value)
@@ -35,7 +34,7 @@ func TestAddItem(t *testing.T) {
func TestPutOverwrite(t *testing.T) { func TestPutOverwrite(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
key, value, newValue := 0, 1, 2 key, value, newValue := 0, 1, 2
table := cuckoo.New[int, int]() table := cuckoo.NewTable[int, int]()
(table.Put(key, value)) (table.Put(key, value))
err := table.Put(key, newValue) err := table.Put(key, newValue)
@@ -50,7 +49,7 @@ func TestPutOverwrite(t *testing.T) {
func TestSameHash(t *testing.T) { func TestSameHash(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
hash := func(int) uint64 { return 0 } hash := func(int) uint64 { return 0 }
table := cuckoo.NewCustom[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int]) table := cuckoo.NewCustomTable[int, bool](hash, hash, cuckoo.DefaultEqualFunc[int])
errA := table.Put(0, true) errA := table.Put(0, true)
errB := table.Put(1, true) errB := table.Put(1, true)
@@ -63,29 +62,29 @@ func TestSameHash(t *testing.T) {
func TestStartingCapacity(t *testing.T) { func TestStartingCapacity(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
table := cuckoo.New[int, bool](cuckoo.Capacity(64)) table := cuckoo.NewTable[int, bool](cuckoo.Capacity(64))
assert.Equal(uint64(128), table.TotalCapacity()) assert.Equal(uint64(128), table.Capacity())
} }
func TestResizeCapacity(t *testing.T) { func TestResizeCapacity(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
table := cuckoo.New[int, bool]( table := cuckoo.NewTable[int, bool](
cuckoo.Capacity(8), cuckoo.Capacity(8),
cuckoo.GrowthFactor(2), cuckoo.GrowthFactor(2),
) )
for table.TotalCapacity() == 16 { for table.Capacity() == 16 {
err := table.Put(rand.Int(), true) err := table.Put(rand.Int(), true)
assert.NoError(err) assert.NoError(err)
} }
assert.Equal(uint64(32), table.TotalCapacity()) assert.Equal(uint64(32), table.Capacity())
} }
func TestPutMany(t *testing.T) { func TestPutMany(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
expected, actual := map[int]bool{}, cuckoo.New[int, bool]() expected, actual := map[int]bool{}, cuckoo.NewTable[int, bool]()
for i := range 1_000 { for i := range 1_000 {
expected[i] = true expected[i] = true
@@ -100,7 +99,7 @@ func TestPutMany(t *testing.T) {
func TestGetMany(t *testing.T) { func TestGetMany(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
table := cuckoo.New[int, bool]() table := cuckoo.NewTable[int, bool]()
for i := range 1_000 { for i := range 1_000 {
err := table.Put(i, true) err := table.Put(i, true)
@@ -108,119 +107,24 @@ func TestGetMany(t *testing.T) {
} }
for i := range 2_000 { for i := range 2_000 {
value, ok := table.Get(i) value, err := table.Get(i)
if i < 1_000 { if i < 1_000 {
assert.True(ok) assert.NoError(err)
assert.Equal(value, true) assert.Equal(value, true)
} else { } else {
assert.False(ok) assert.Error(err)
} }
} }
} }
func TestDropExistingItem(t *testing.T) { func TestRemove(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
key, value := 0, true table := cuckoo.NewTable[int, bool]()
table := cuckoo.New[int, bool]()
(table.Put(key, value))
had := table.Drop(key) assert.False(table.Has(0))
assert.True(had)
assert.Equal(0, table.Size())
assert.False(table.Has(key))
}
func TestDropNoItem(t *testing.T) {
assert := assert.New(t)
key := 0
table := cuckoo.New[int, bool]()
had := table.Drop(key)
assert.False(had)
assert.Equal(0, table.Size())
assert.False(table.Has(key))
}
func TestDropItemCapacity(t *testing.T) {
assert := assert.New(t)
key := 0
table := cuckoo.New[int, bool](
cuckoo.Capacity(64),
cuckoo.GrowthFactor(2),
)
startingCapacity := table.TotalCapacity()
table.Drop(key)
endingCapacity := table.TotalCapacity()
assert.Equal(0, table.Size())
assert.Equal(uint64(128), startingCapacity)
assert.Equal(uint64(64), endingCapacity)
}
func TestPutNoCapacity(t *testing.T) {
assert := assert.New(t)
key, value := 0, true
table := cuckoo.New[int, bool](
cuckoo.Capacity(0),
)
err := table.Put(key, value)
err := table.Put(0, true)
assert.NoError(err) assert.NoError(err)
assert.Equal(1, table.Size())
assert.True(table.Has(key)) assert.True(table.Has(0))
}
func TestBadHashCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.NewCustom[int, bool](
func(int) uint64 { return 0 },
func(int) uint64 { return 0 },
func(a, b int) bool { return a == b },
cuckoo.Capacity(20),
)
err1 := table.Put(0, true)
err2 := table.Put(1, true)
err3 := table.Put(2, true)
assert.NoError(err1)
assert.NoError(err2)
assert.Error(err3)
assert.Equal(uint64(80), table.TotalCapacity())
}
func TestDropResizeCapacity(t *testing.T) {
assert := assert.New(t)
table := cuckoo.New[int, bool](
cuckoo.Capacity(10),
)
err1 := table.Put(0, true)
err2 := table.Put(1, true)
table.Drop(1)
assert.NoError(errors.Join(err1, err2))
assert.Equal(uint64(20), table.TotalCapacity())
}
func TestNewTableBy(t *testing.T) {
type User struct {
_ func()
id string
name string
}
assert := assert.New(t)
table := cuckoo.NewBy[User, bool](func(u User) string { return u.id })
err := table.Put(User{nil, "1", "Robert"}, true)
assert.NoError(err)
assert.Equal(1, table.Size())
assert.True(table.Has(User{nil, "1", "Robbie"}))
} }

7
doc.go
View File

@@ -1,12 +1,9 @@
// Package cuckoo provides a hash table that uses cuckoo hashing to achieve // Package cuckoo provides a hash table that uses cuckoo hashing to achieve
// a worst-case O(1) lookup time. // a worst-case O(1) lookup time.
// //
// While a [New] only supports comparable keys by default, you can create // While a [NewTable] only supports comparable keys by default, you can create
// a table with any key type using [NewCustom]. Custom [Hash] functions and // a table with any key type using [NewCustomTable]. Custom [Hash] functions and
// key comparison are also supported. // key comparison are also supported.
// //
// NOTE: The [Table] is a look-up structure, and not a source of truth. If
// [ErrBadHash] occurs, the data cannot be restored.
//
// See more: https://en.wikipedia.org/wiki/Cuckoo_hashing // See more: https://en.wikipedia.org/wiki/Cuckoo_hashing
package cuckoo package cuckoo

View File

@@ -8,25 +8,25 @@ import (
) )
func Example_basic() { func Example_basic() {
table := cuckoo.New[int, string]() table := cuckoo.NewTable[int, string]()
if err := table.Put(1, "Hello, World!"); err != nil { if err := table.Put(1, "Hello, World!"); err != nil {
fmt.Println("Put error:", err) fmt.Println("Put error:", err)
} }
if item, ok := table.Get(1); !ok { if item, err := table.Get(1); err != nil {
fmt.Println("Not Found 1!") fmt.Println("Error:", err)
} else { } else {
fmt.Println("Found 1:", item) fmt.Println("Found 1:", item)
} }
if item, ok := table.Get(0); !ok { if item, err := table.Get(0); err != nil {
fmt.Println("Not Found 0!") fmt.Println("Error:", err)
} else { } else {
fmt.Println("Found 0:", item) fmt.Println("Found 0:", item)
} }
// Output: // Output:
// Found 1: Hello, World! // Found 1: Hello, World!
// Not Found 0! // Error: key '0' not found
} }

3
go.mod
View File

@@ -4,11 +4,8 @@ go 1.25.6
require github.com/stretchr/testify v1.11.1 require github.com/stretchr/testify v1.11.1
require github.com/kr/pretty v0.3.1 // indirect
require ( require (
github.com/davecgh/go-spew v1.1.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
) )

12
go.sum
View File

@@ -1,21 +1,9 @@
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589 h1:UmBZCTPdDYore2IEHN+U4eIqEaRq6METh9pKiPumkqc=
github.com/trailofbits/go-fuzz-utils v0.0.0-20260318143407-0907cafe7589/go.mod h1:zh+T+w9XT/3o4E0WLEGCdmLJ8Yqx/zY3o538tQY3OjY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,7 +1,5 @@
package cuckoo package cuckoo
import "fmt"
// DefaultCapacity is the initial capacity of a [Table]. It is inspired from // DefaultCapacity is the initial capacity of a [Table]. It is inspired from
// Java's [HashMap] implementation, which also uses 16. // Java's [HashMap] implementation, which also uses 16.
// //
@@ -9,20 +7,14 @@ import "fmt"
const DefaultCapacity uint64 = 16 const DefaultCapacity uint64 = 16
// DefaultGrowthFactor is the standard resize multiplier for a [Table]. Most // DefaultGrowthFactor is the standard resize multiplier for a [Table]. Most
// implementations use 2. // hash table implementations use 2.
const DefaultGrowthFactor uint64 = 2 const DefaultGrowthFactor uint64 = 2
// defaultMinimumLoad is the default lowest acceptable occupancy of a [Table]. // DefaultMinimumLoad is the default lowest acceptable occupancy of a [Table].
// The higher the minimum load, the more likely that a [Table.Put] will not // The value of 5% is taken from [libcuckoo].
// succeed. The value of 5% is taken from [libcuckoo].
// //
// [libcuckoo]: https://github.com/efficient/libcuckoo/blob/656714705a055df2b7a605eb3c71586d9da1e119/libcuckoo/cuckoohash_config.hh#L21 // [libcuckoo]: https://github.com/efficient/libcuckoo/blob/656714705a055df2b7a605eb3c71586d9da1e119/libcuckoo/cuckoohash_config.hh#L21
const defaultMinimumLoad float64 = 0.05 const DefaultMinimumLoad float64 = 0.05
// defaultGrowthLimit is the maximum number of times a [Table] can grow in a
// single [Table.Put], before the library infers it will lead to a stack
// overflow. The value of '64' was chosen arbirarily.
const defaultGrowthLimit uint64 = 64
type settings struct { type settings struct {
growthFactor uint64 growthFactor uint64
@@ -31,25 +23,23 @@ type settings struct {
} }
// An Option modifies the settings of a [Table]. It is used in its constructors // An Option modifies the settings of a [Table]. It is used in its constructors
// like [New], for example. // like [NewTable], for example.
type Option func(*settings) type Option func(*settings)
// Capacity modifies the starting capacity of each subtable of the [Table]. The // Capacity modifies the starting capacity of each bucket of the [Table]. The
// value must be non-negative. // value must be greater than 0.
func Capacity(value int) Option { func Capacity(value int) Option {
if value < 0 {
panic(fmt.Sprintf("go-cuckoo: Capacity must be non-negative, got %d", value))
}
return func(s *settings) { s.bucketSize = uint64(value) } return func(s *settings) { s.bucketSize = uint64(value) }
} }
// MinimumLoad modifies the [DefaultMinimumLoad] of the [Table]. The value must
// be between 0.00 and 1.00.
func MinimumLoad(value float64) Option {
return func(s *settings) { s.minLoadFactor = value }
}
// GrowthFactor controls how much the capacity of the [Table] multiplies when // GrowthFactor controls how much the capacity of the [Table] multiplies when
// it must resize. The value must be greater than 1. // it must resize. The value must be greater than 1.
func GrowthFactor(value int) Option { func GrowthFactor(value int) Option {
if value < 2 {
panic(fmt.Sprintf("go-cuckoo: GrowthFactor must be greater than 1, got %d", value))
}
return func(s *settings) { s.growthFactor = uint64(value) } return func(s *settings) { s.growthFactor = uint64(value) }
} }

View File

@@ -1,107 +0,0 @@
package cuckoo
// An entry is a key-value pair.
type entry[K, V any] struct {
key K
value V
}
type slot[K, V any] struct {
entry[K, V]
occupied bool
}
type subtable[K, V any] struct {
hash Hash[K]
slots []slot[K, V]
capacity, size uint64
compare EqualFunc[K]
}
// location determines where in the subtable a certain key would be placed. If
// the capacity is 0, this will panic.
func (t *subtable[K, V]) location(key K) uint64 {
return t.hash(key) % t.capacity
}
func (t *subtable[K, V]) get(key K) (value V, found bool) {
if t.capacity == 0 {
return
}
slot := t.slots[t.location(key)]
return slot.value, slot.occupied && t.compare(slot.key, key)
}
func (t *subtable[K, V]) drop(key K) (occupied bool) {
if t.capacity == 0 {
return
}
slot := &t.slots[t.location(key)]
if slot.occupied && t.compare(slot.key, key) {
slot.occupied = false
t.size--
return true
}
return false
}
func (t *subtable[K, V]) resized(capacity uint64) *subtable[K, V] {
return &subtable[K, V]{
slots: make([]slot[K, V], capacity),
capacity: capacity,
hash: t.hash,
compare: t.compare,
}
}
func (t *subtable[K, V]) update(key K, value V) (updated bool) {
if t.capacity == 0 {
return
}
slot := &t.slots[t.location(key)]
if slot.occupied && t.compare(slot.key, key) {
slot.value = value
return true
}
return false
}
func (t *subtable[K, V]) insert(insertion entry[K, V]) (evicted entry[K, V], eviction bool) {
if t.capacity == 0 {
return insertion, true
}
slot := &t.slots[t.location(insertion.key)]
if !slot.occupied {
slot.entry = insertion
slot.occupied = true
t.size++
return
}
if t.compare(slot.key, insertion.key) {
slot.value = insertion.value
return
}
insertion, slot.entry = slot.entry, insertion
return insertion, true
}
func newSubtable[K, V any](capacity uint64, hash Hash[K], compare EqualFunc[K]) *subtable[K, V] {
return &subtable[K, V]{
hash: hash,
capacity: capacity,
compare: compare,
size: 0,
slots: make([]slot[K, V], capacity),
}
}

252
table.go
View File

@@ -1,206 +1,126 @@
package cuckoo package cuckoo
import ( import (
"errors"
"fmt" "fmt"
"iter" "iter"
"math/bits" "math/bits"
"strings" "strings"
) )
// ErrBadHash occurs when the hashes given to a [Table] cause too many key // A Table is hash table that uses cuckoo hashing to resolve collision. Create
// collisions. Discard the old table, rebuild it from your source data, and try: // one with [NewTable]. Or if you want more granularity, use [NewTableBy] or
// // [NewCustomTable].
// 1. Different hash seeds. Equal seeds produce equal hash functions, which
// always cycle.
// 2. A different [Hash] algorithm.
var ErrBadHash = errors.New("bad hash")
// A Table which uses cuckoo hashing to resolve collision. Create
// one with [New]. Or if you want more granularity, use [NewBy] or
// [NewCustom].
type Table[K, V any] struct { type Table[K, V any] struct {
tableA, tableB *subtable[K, V] bucketA, bucketB bucket[K, V]
growthFactor uint64 growthFactor uint64
minLoadFactor float64 minLoadFactor float64
} }
// TotalCapacity returns the number of slots allocated for the [Table]. To get the // Capacity returns the number of slots allocated for the [Table]. To get the
// number of slots filled, look at [Table.Size]. // number of slots filled, look at [Table.Size].
func (t *Table[K, V]) TotalCapacity() uint64 { func (t Table[K, V]) Capacity() uint64 {
return t.tableA.capacity + t.tableB.capacity return t.bucketA.capacity + t.bucketB.capacity
} }
// Size returns how many slots are filled in the [Table]. // Size returns how many slots are filled in the [Table].
func (t *Table[K, V]) Size() int { func (t Table[K, V]) Size() int {
return int(t.tableA.size + t.tableB.size) return int(t.bucketA.size + t.bucketB.size)
} }
func log2(n uint64) (m int) { func log2(n uint64) (m int) {
return max(0, bits.Len64(n)-1) return bits.Len64(n) - 1
} }
func (t *Table[K, V]) maxEvictions() int { func (t Table[K, V]) maxEvictions() int {
return 3 * log2(t.TotalCapacity()) return 3 * log2(t.Capacity())
} }
func (t *Table[K, V]) load() float64 { func (t Table[K, V]) load() float64 {
// When there are no slots in the table, we still treat the load as 100%. return float64(t.Size()) / float64(t.Capacity())
// Every slot in the table is full.
if t.TotalCapacity() == 0 {
return 1.0
}
return float64(t.Size()) / float64(t.TotalCapacity())
} }
// insert attempts to put/update an entry in the table, without modifying the func (t *Table[K, V]) resize() error {
// size of the table. Returns a displaced entry and 'homeless = true' if an entries := make([]entry[K, V], 0, t.Size())
// entry could not be placed after exhausting evictions.
func (t *Table[K, V]) insert(entry entry[K, V]) (displaced entry[K, V], homeless bool) {
if t.tableA.update(entry.key, entry.value) {
return
}
if t.tableB.update(entry.key, entry.value) {
return
}
for range t.maxEvictions() {
if entry, homeless = t.tableA.insert(entry); !homeless {
return
}
if entry, homeless = t.tableB.insert(entry); !homeless {
return
}
}
return entry, true
}
// resized creates an empty copy of the table, with a new capacity for each
// bucket.
func (t *Table[K, V]) resized(capacity uint64) *Table[K, V] {
return &Table[K, V]{
growthFactor: t.growthFactor,
minLoadFactor: t.minLoadFactor,
tableA: t.tableA.resized(capacity),
tableB: t.tableB.resized(capacity),
}
}
// resize creates a new [Table.resized] with 'capacity', inserts all items into
// the array, and replaces the current table. It is a helper function for
// [Table.grow] and [Table.shrink]; use them instead.
func (t *Table[K, V]) resize(capacity uint64) bool {
updated := t.resized(capacity)
for k, v := range t.Entries() { for k, v := range t.Entries() {
if _, failed := updated.insert(entry[K, V]{k, v}); failed { entries = append(entries, entry[K, V]{k, v})
return false }
t.bucketA.resize(t.growthFactor * t.bucketA.capacity)
t.bucketB.resize(t.growthFactor * t.bucketB.capacity)
for _, entry := range entries {
if err := t.Put(entry.key, entry.value); err != nil {
return err
} }
} }
*t = *updated return nil
return true
} }
// grow increases the table's capacity by the growth factor. If the // Get fetches the value for a key in the [Table]. Returns an error if no value
// capacity is 0, it increases it to 1. // is found.
func (t *Table[K, V]) grow() bool { func (t Table[K, V]) Get(key K) (value V, err error) {
var newCapacity uint64 if item, ok := t.bucketA.get(key); ok {
return item, nil
if t.TotalCapacity() == 0 {
newCapacity = 1
} else {
newCapacity = t.tableA.capacity * t.growthFactor
} }
return t.resize(newCapacity) if item, ok := t.bucketB.get(key); ok {
} return item, nil
// shrink reduces the table's capacity by the growth factor. It may
// reduce it down to 0.
func (t *Table[K, V]) shrink() bool {
return t.resize(t.tableA.capacity / t.growthFactor)
}
// Get fetches the value for a key in the [Table]. Matches the comma-ok pattern
// of a builtin map; see [Table.Find] for plain indexing.
func (t *Table[K, V]) Get(key K) (value V, ok bool) {
if item, ok := t.tableA.get(key); ok {
return item, true
} }
if item, ok := t.tableB.get(key); ok { return value, fmt.Errorf("key '%v' not found", key)
return item, true
}
return
}
// Find fetches the value of a key. Matches direct indexing of a builtin map;
// see [Table.Get] for a comma-ok pattern.
func (t *Table[K, V]) Find(key K) (value V) {
value, _ = t.Get(key)
return
} }
// Has returns true if a key has a value in the table. // Has returns true if a key has a value in the table.
func (t *Table[K, V]) Has(key K) (exists bool) { func (t Table[K, V]) Has(key K) (exists bool) {
_, exists = t.Get(key) _, err := t.Get(key)
return return err == nil
} }
// Put sets the value for a key. If it cannot be set, an error is returned. // Put sets the value for a key. Returns error if its value cannot be set.
func (t *Table[K, V]) Put(key K, value V) (err error) { func (t *Table[K, V]) Put(key K, value V) (err error) {
var ( if t.bucketA.update(key, value) {
entry = entry[K, V]{key, value} return nil
homeless bool
)
for range defaultGrowthLimit {
if entry, homeless = t.insert(entry); !homeless {
return
}
// Both this and the growth limit are necessary: this catches bad hashes
// early when the table is sparse, while the latter catches cases where
// growing never helps.
if t.load() < t.minLoadFactor {
return fmt.Errorf("hash functions produced a cycle at load %d/%d: %w", t.Size(), t.TotalCapacity(), ErrBadHash)
}
// It is theoretically possible to have a table with a larger capacity
// that is valid. But this chance is astronomically small, so we ignore
// it in this implementation.
if grew := t.grow(); !grew {
return fmt.Errorf("could not redistribute entries into larger table: %w", ErrBadHash)
}
} }
return fmt.Errorf("could not place entry after %d resizes: %w", defaultGrowthLimit, ErrBadHash) if t.bucketB.update(key, value) {
} return nil
}
// Drop removes a value for a key in the table. Returns whether the key had entry, eviction := entry[K, V]{key, value}, false
// existed. for range t.maxEvictions() {
func (t *Table[K, V]) Drop(key K) bool { if entry, eviction = t.bucketA.evict(entry); !eviction {
occupied := t.tableA.drop(key) || t.tableB.drop(key) return nil
}
if entry, eviction = t.bucketB.evict(entry); !eviction {
return nil
}
}
if t.load() < t.minLoadFactor { if t.load() < t.minLoadFactor {
// The error is not handled here, because table-shrinking is an internal return fmt.Errorf("bad hash: resize on load %d/%d = %f", t.Size(), t.Capacity(), t.load())
// optimization.
t.shrink()
} }
return occupied if err := t.resize(); err != nil {
return err
}
return t.Put(entry.key, entry.value)
}
// Drop removes a value for a key in the table. Returns an error if its value
// cannot be removed.
//
// Deprecated: Do not use.
func (t Table[K, V]) Drop(_ K) {
panic("Not implemented")
} }
// Entries returns an unordered sequence of all key-value pairs in the table. // Entries returns an unordered sequence of all key-value pairs in the table.
func (t *Table[K, V]) Entries() iter.Seq2[K, V] { func (t Table[K, V]) Entries() iter.Seq2[K, V] {
return func(yield func(K, V) bool) { return func(yield func(K, V) bool) {
for _, slot := range t.tableA.slots { for _, slot := range t.bucketA.slots {
if slot.occupied { if slot.occupied {
if !yield(slot.key, slot.value) { if !yield(slot.key, slot.value) {
return return
@@ -208,7 +128,7 @@ func (t *Table[K, V]) Entries() iter.Seq2[K, V] {
} }
} }
for _, slot := range t.tableB.slots { for _, slot := range t.bucketB.slots {
if slot.occupied { if slot.occupied {
if !yield(slot.key, slot.value) { if !yield(slot.key, slot.value) {
return return
@@ -219,8 +139,8 @@ func (t *Table[K, V]) Entries() iter.Seq2[K, V] {
} }
// String returns the entries of the table as a string in the format: // String returns the entries of the table as a string in the format:
// "table[k1:v1 k2:v2 ...]". // "table[k1:v1 h2:v2 ...]".
func (t *Table[K, V]) String() string { func (t Table[K, V]) String() string {
var sb strings.Builder var sb strings.Builder
sb.WriteString("table[") sb.WriteString("table[")
@@ -238,13 +158,13 @@ func (t *Table[K, V]) String() string {
return sb.String() return sb.String()
} }
// NewCustom creates a [Table] with custom [Hash] and [EqualFunc] // NewCustomTable creates a [Table] with custom [Hash] and [EqualFunc]
// functions, along with any [Option] the user provides. // functions, along with any [Option] the user provides.
func NewCustom[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...Option) *Table[K, V] { func NewCustomTable[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...Option) *Table[K, V] {
settings := &settings{ settings := &settings{
growthFactor: DefaultGrowthFactor, growthFactor: DefaultGrowthFactor,
bucketSize: DefaultCapacity, bucketSize: DefaultCapacity,
minLoadFactor: defaultMinimumLoad, minLoadFactor: DefaultMinimumLoad,
} }
for _, option := range options { for _, option := range options {
@@ -254,8 +174,8 @@ func NewCustom[K, V any](hashA, hashB Hash[K], compare EqualFunc[K], options ...
return &Table[K, V]{ return &Table[K, V]{
growthFactor: settings.growthFactor, growthFactor: settings.growthFactor,
minLoadFactor: settings.minLoadFactor, minLoadFactor: settings.minLoadFactor,
tableA: newSubtable[K, V](settings.bucketSize, hashA, compare), bucketA: newBucket[K, V](settings.bucketSize, hashA, compare),
tableB: newSubtable[K, V](settings.bucketSize, hashB, compare), bucketB: newBucket[K, V](settings.bucketSize, hashB, compare),
} }
} }
@@ -263,10 +183,10 @@ func pipe[X, Y, Z any](a func(X) Y, b func(Y) Z) func(X) Z {
return func(x X) Z { return b(a(x)) } return func(x X) Z { return b(a(x)) }
} }
// NewBy creates a [Table] for any key type by using keyFunc to derive a // NewTableBy creates a [Table] for any key type by using keyFunc to derive a
// comparable key. Two keys with the same derived key are treated as equal. // comparable key. Two keys with the same derived key are treated as equal.
func NewBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[K, V] { func NewTableBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[K, V] {
return NewCustom[K, V]( return NewCustomTable[K, V](
pipe(keyFunc, NewDefaultHash[C]()), pipe(keyFunc, NewDefaultHash[C]()),
pipe(keyFunc, NewDefaultHash[C]()), pipe(keyFunc, NewDefaultHash[C]()),
func(a, b K) bool { return keyFunc(a) == keyFunc(b) }, func(a, b K) bool { return keyFunc(a) == keyFunc(b) },
@@ -274,10 +194,10 @@ func NewBy[K, V any, C comparable](keyFunc func(K) C, options ...Option) *Table[
) )
} }
// New creates a [Table] using the default [Hash] and [EqualFunc]. Use // NewTable creates a [Table] using the default [Hash] and [EqualFunc]. Use
// the [Option] functions to configure its behavior. Note that this constructor // the [Option] functions to configure its behavior. Note that this constructor
// is only provided for comparable keys. For arbitrary keys, consider // is only provided for comparable keys. For arbitrary keys, consider
// [NewBy] or [NewCustom]. // [NewTableBy] or [NewCustomTable].
func New[K comparable, V any](options ...Option) *Table[K, V] { func NewTable[K comparable, V any](options ...Option) *Table[K, V] {
return NewCustom[K, V](NewDefaultHash[K](), NewDefaultHash[K](), DefaultEqualFunc[K], options...) return NewCustomTable[K, V](NewDefaultHash[K](), NewDefaultHash[K](), DefaultEqualFunc[K], options...)
} }