forked from mirrors/kingfisher
116 lines
3 KiB
Rust
116 lines
3 KiB
Rust
use std::{
|
||
collections::{hash_map::Entry, HashMap},
|
||
ops::Range,
|
||
};
|
||
|
||
use bstr::{BStr, BString};
|
||
|
||
/// A symbolic range pointing into a `BStringTable` buffer.
|
||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||
pub struct Symbol<T> {
|
||
i: T,
|
||
j: T,
|
||
}
|
||
|
||
/// Convert between `Symbol` and `Range<usize>`.
|
||
pub trait SymbolType: Copy + Eq + std::hash::Hash {
|
||
fn to_range(self) -> Range<usize>;
|
||
fn from_range(range: Range<usize>) -> Self;
|
||
}
|
||
|
||
impl SymbolType for Symbol<usize> {
|
||
#[inline]
|
||
fn to_range(self) -> Range<usize> {
|
||
self.i..self.j
|
||
}
|
||
|
||
#[inline]
|
||
fn from_range(range: Range<usize>) -> Self {
|
||
Symbol { i: range.start, j: range.end }
|
||
}
|
||
}
|
||
|
||
impl SymbolType for Symbol<u32> {
|
||
#[inline]
|
||
fn to_range(self) -> Range<usize> {
|
||
(self.i as usize)..(self.j as usize)
|
||
}
|
||
|
||
#[inline]
|
||
fn from_range(range: Range<usize>) -> Self {
|
||
Symbol {
|
||
i: range.start.try_into().expect("start fits in u32"),
|
||
j: range.end.try_into().expect("end fits in u32"),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Interns `BString` values in one contiguous byte buffer.
|
||
#[derive(Default)]
|
||
pub struct BStringTable<S = Symbol<u32>> {
|
||
storage: Vec<u8>,
|
||
mapping: HashMap<BString, S>,
|
||
}
|
||
|
||
impl<S: SymbolType> BStringTable<S> {
|
||
/// Default: ~32 KB symbols, ~1 MB bytes.
|
||
#[inline]
|
||
pub fn new() -> Self {
|
||
Self::with_capacity(32 * 1024, 1024 * 1024)
|
||
}
|
||
|
||
/// Reserve space for `num_symbols` entries and `total_bytes` of storage.
|
||
pub fn with_capacity(num_symbols: usize, total_bytes: usize) -> Self {
|
||
BStringTable {
|
||
storage: Vec::with_capacity(total_bytes),
|
||
mapping: HashMap::with_capacity(num_symbols),
|
||
}
|
||
}
|
||
|
||
/// Interns `s`, returning an existing or new symbol.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn get_or_intern(&mut self, s: BString) -> S {
|
||
match self.mapping.entry(s) {
|
||
Entry::Occupied(e) => *e.get(),
|
||
Entry::Vacant(e) => {
|
||
let key = e.key();
|
||
let start = self.storage.len();
|
||
let end = start + key.len();
|
||
self.storage.extend_from_slice(key.as_slice());
|
||
*e.insert(S::from_range(start..end))
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Look up the `BStr` slice for `symbol`.
|
||
#[inline]
|
||
#[must_use]
|
||
pub fn resolve(&self, symbol: S) -> &BStr {
|
||
let range = symbol.to_range();
|
||
BStr::new(&self.storage[range])
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use pretty_assertions::{assert_eq, assert_ne};
|
||
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn simple_roundtrip() {
|
||
let mut table: BStringTable = BStringTable::new();
|
||
let a = BStr::new("foo");
|
||
let b = BStr::new("bar");
|
||
|
||
let s1 = table.get_or_intern(a.into());
|
||
let s1a = table.get_or_intern(a.into());
|
||
assert_eq!(s1, s1a);
|
||
|
||
let s2 = table.get_or_intern(b.into());
|
||
assert_ne!(s1, s2);
|
||
assert_eq!(a, table.resolve(s1));
|
||
assert_eq!(b, table.resolve(s2));
|
||
}
|
||
}
|