From 2bc939479c41739544e7e0601653e025be124ef7 Mon Sep 17 00:00:00 2001 From: appflowy Date: Thu, 16 Mar 2023 17:42:55 +0800 Subject: [PATCH] feat: document crud --- Cargo.lock | 10 +-- build/init_redis.sh | 1 - crates/revdb/Cargo.toml | 2 + crates/revdb/src/db.rs | 55 +++++++++++----- crates/revdb/src/document.rs | 112 +++++++++++++++++++++++++++++++++ crates/revdb/src/error.rs | 3 + crates/revdb/src/lib.rs | 2 + crates/revdb/src/range.rs | 48 ++++++++++++++ crates/revdb/tests/document.rs | 2 + crates/revdb/tests/main.rs | 1 + 10 files changed, 214 insertions(+), 22 deletions(-) create mode 100644 crates/revdb/src/document.rs create mode 100644 crates/revdb/src/range.rs create mode 100644 crates/revdb/tests/document.rs create mode 100644 crates/revdb/tests/main.rs diff --git a/Cargo.lock b/Cargo.lock index 6786e6ef..5d3aec97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2250,6 +2250,8 @@ dependencies = [ name = "revdb" version = "0.1.0" dependencies = [ + "bincode", + "serde", "sled", "thiserror", ] @@ -2400,9 +2402,9 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" [[package]] name = "serde" -version = "1.0.154" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] @@ -2420,9 +2422,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.154" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc80d722935453bcafdc2c9a73cd6fac4dc1938f0346035d84bf99fa9e33217" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", diff --git a/build/init_redis.sh b/build/init_redis.sh index 44d97b4c..d54096f9 100755 --- a/build/init_redis.sh +++ b/build/init_redis.sh @@ -2,7 +2,6 @@ set -x set -eo pipefail -# if a redis container is running, print instructions to kill it and exit RUNNING_CONTAINER=$(docker ps --filter 'name=redis' --format '{{.ID}}') if [[ -n $RUNNING_CONTAINER ]]; then echo >&2 "there is a redis container already running, kill it with" diff --git a/crates/revdb/Cargo.toml b/crates/revdb/Cargo.toml index 172fae99..6e17eb18 100644 --- a/crates/revdb/Cargo.toml +++ b/crates/revdb/Cargo.toml @@ -8,3 +8,5 @@ edition = "2021" [dependencies] sled = "0.34.7" thiserror = "1.0.30" +serde = { version = "1.0", features = ["derive"] } +bincode = "1.3.3" \ No newline at end of file diff --git a/crates/revdb/src/db.rs b/crates/revdb/src/db.rs index bfd4df92..fb4fcb61 100644 --- a/crates/revdb/src/db.rs +++ b/crates/revdb/src/db.rs @@ -1,5 +1,6 @@ +use crate::document::Document; use crate::error::RevDBError; -use sled::Db; +use sled::{Batch, Db, IVec}; use std::path::Path; pub struct RevDB { @@ -12,24 +13,44 @@ impl RevDB { Ok(Self { db }) } - pub fn insert(&self, uid: i64, rev_id: i64, data: &[u8]) -> Result<(), RevDBError> { - let key = make_seq_key(uid, rev_id); - let _ = self.db.insert(key, data)?; + pub fn document(&self) -> Document { + Document { db: self } + } + + pub fn get>(&self, key: K) -> Result, RevDBError> { + let value = self.db.get(key)?; + Ok(value) + } + + pub fn batch_get>( + &self, + from_key: K, + to_key: K, + ) -> Result, RevDBError> { + let iter = self.db.range(from_key..to_key); + let mut items = vec![]; + for item in iter { + let (_, value) = item?; + items.push(value) + } + Ok(items) + } + + pub fn insert>(&self, key: K, value: &[u8]) -> Result<(), RevDBError> { + let _ = self.db.insert(key, value)?; Ok(()) } - pub fn get(&self, uid: i64, rev_id: i64) -> Result>, RevDBError> { - let key = make_seq_key(uid, rev_id); - let value = self.db.get(key)?; - Ok(value.map(|value| value.to_vec())) + pub fn batch_insert<'a, K: AsRef<[u8]>>( + &self, + items: impl IntoIterator, + ) -> Result<(), RevDBError> { + let mut batch = Batch::default(); + let items = items.into_iter(); + items.for_each(|(key, value)| { + batch.insert(key.as_ref(), value); + }); + let _ = self.db.apply_batch(batch)?; + Ok(()) } } - -// Optimize your data layout: Sled's B-Tree implementation works best when the keys are sequential, -// so try to organize the data in a way that maximizes sequential access. -fn make_seq_key(uid: i64, rev_id: i64) -> [u8; 16] { - let mut key = [0; 16]; - key[0..8].copy_from_slice(&uid.to_be_bytes()); - key[8..16].copy_from_slice(&rev_id.to_be_bytes()); - key -} diff --git a/crates/revdb/src/document.rs b/crates/revdb/src/document.rs new file mode 100644 index 00000000..7fdeac85 --- /dev/null +++ b/crates/revdb/src/document.rs @@ -0,0 +1,112 @@ +use crate::db::RevDB; +use crate::error::RevDBError; +use crate::range::RevRange; +use serde::{Deserialize, Serialize}; + +pub struct Document<'a> { + pub(crate) db: &'a RevDB, +} + +impl<'a> Document<'a> { + pub fn insert( + &self, + uid: i64, + document_id: i64, + rev_id: i64, + data: &[u8], + ) -> Result<(), RevDBError> { + let key = make_document_key(uid, document_id, rev_id); + let _ = self.db.insert(key, data)?; + Ok(()) + } + + pub fn get( + &self, + uid: i64, + document_id: i64, + rev_id: i64, + ) -> Result>, RevDBError> { + let key = make_document_key(uid, document_id, rev_id); + let value = self.db.get(key)?; + Ok(value.map(|value| value.to_vec())) + } + + pub fn get_with_range( + &self, + uid: i64, + document_id: i64, + range: RevRange, + ) -> Result, RevDBError> { + let from = make_document_key(uid, document_id, range.start); + let to = make_document_key(uid, document_id, range.end); + self.batch_get(from, to) + } + + pub fn get_after( + &self, + uid: i64, + document_id: i64, + rev_id: i64, + ) -> Result, RevDBError> { + let from = make_document_key(uid, document_id, rev_id); + let to = make_document_key(uid, document_id, i64::MAX); + self.batch_get(from, to) + } + + pub fn get_before( + &self, + uid: i64, + document_id: i64, + rev_id: i64, + ) -> Result, RevDBError> { + let from = make_document_key(uid, document_id, 0); + let to = make_document_key(uid, document_id, rev_id); + self.batch_get(from, to) + } + + pub fn batch_get>( + &self, + from: K, + to: K, + ) -> Result, RevDBError> { + let items = self.db.batch_get(from, to)?; + let mut document_revs = vec![]; + for item in items { + let rev_data = DocumentRevData::from_vec(item.as_ref())?; + document_revs.push(rev_data); + } + Ok(document_revs) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct DocumentRevData { + #[serde(rename = "rid")] + pub rev_id: i64, + + #[serde(rename = "bid")] + pub base_rev_id: i64, + + #[serde(rename = "data")] + pub content: String, +} + +impl DocumentRevData { + pub fn from_vec(data: &[u8]) -> Result { + bincode::deserialize::(data).map_err(|_e| RevDBError::SerdeError) + } + + pub fn to_vec(&self) -> Result, RevDBError> { + bincode::serialize(self).map_err(|_e| RevDBError::SerdeError) + } +} + +// Optimize your data layout: Sled's B-Tree implementation works best when the keys are sequential, +// so try to organize the data in a way that maximizes sequential access. +fn make_document_key(uid: i64, document_id: i64, rev_id: i64) -> [u8; 24] { + let mut key = [0; 24]; + key[0..8].copy_from_slice(&uid.to_be_bytes()); + key[8..16].copy_from_slice(&document_id.to_be_bytes()); + key[16..24].copy_from_slice(&rev_id.to_be_bytes()); + key +} diff --git a/crates/revdb/src/error.rs b/crates/revdb/src/error.rs index 2a48e59b..f3a2b9e6 100644 --- a/crates/revdb/src/error.rs +++ b/crates/revdb/src/error.rs @@ -3,6 +3,9 @@ pub enum RevDBError { #[error(transparent)] Db(#[from] sled::Error), + #[error("Serde error")] + SerdeError, + #[error("invalid data")] InvalidData, } diff --git a/crates/revdb/src/lib.rs b/crates/revdb/src/lib.rs index 7f097aa1..75ae3967 100644 --- a/crates/revdb/src/lib.rs +++ b/crates/revdb/src/lib.rs @@ -1,2 +1,4 @@ mod db; +pub mod document; pub mod error; +pub mod range; diff --git a/crates/revdb/src/range.rs b/crates/revdb/src/range.rs new file mode 100644 index 00000000..0e46e3ef --- /dev/null +++ b/crates/revdb/src/range.rs @@ -0,0 +1,48 @@ +use std::ops::{Range, RangeInclusive, RangeToInclusive}; + +#[derive(Clone)] +pub struct RevRange { + pub(crate) start: i64, + pub(crate) end: i64, +} + +impl RevRange { + /// Construct a new `RevRange` representing the range [start..end). + /// It is an invariant that `start <= end`. + pub fn new(start: i64, end: i64) -> RevRange { + debug_assert!(start <= end); + RevRange { start, end } + } +} + +impl From> for RevRange { + fn from(src: RangeInclusive) -> RevRange { + RevRange::new(*src.start(), src.end().saturating_add(1)) + } +} + +impl From> for RevRange { + fn from(src: RangeToInclusive) -> RevRange { + RevRange::new(0, src.end.saturating_add(1)) + } +} + +impl From> for RevRange { + fn from(src: Range) -> RevRange { + let Range { start, end } = src; + RevRange { start, end } + } +} + +impl Iterator for RevRange { + type Item = i64; + + fn next(&mut self) -> Option { + if self.start > self.end { + return None; + } + let val = self.start; + self.start += 1; + Some(val) + } +} diff --git a/crates/revdb/tests/document.rs b/crates/revdb/tests/document.rs new file mode 100644 index 00000000..b61455dd --- /dev/null +++ b/crates/revdb/tests/document.rs @@ -0,0 +1,2 @@ +#[test] +fn read_document() {} diff --git a/crates/revdb/tests/main.rs b/crates/revdb/tests/main.rs new file mode 100644 index 00000000..10331894 --- /dev/null +++ b/crates/revdb/tests/main.rs @@ -0,0 +1 @@ +mod document;