feat: try to resotre from snapshot when fail to initialize the collab in CollabStoragePlugin (#222)
This commit is contained in:
parent
80f72b91b5
commit
453329dc0c
|
|
@ -22,6 +22,8 @@ services:
|
||||||
- 5433:5432
|
- 5433:5432
|
||||||
volumes:
|
volumes:
|
||||||
- ./migrations/before:/docker-entrypoint-initdb.d
|
- ./migrations/before:/docker-entrypoint-initdb.d
|
||||||
|
# comment out the following line if you want to persist data when restarting docker
|
||||||
|
#- postgres_data:/var/lib/postgresql/data
|
||||||
|
|
||||||
redis:
|
redis:
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
|
|
@ -95,3 +97,5 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./docker/pgadmin/servers.json:/pgadmin4/servers.json
|
- ./docker/pgadmin/servers.json:/pgadmin4/servers.json
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ use database_entity::dto::{
|
||||||
InsertCollabParams, QueryCollabResult, RawData,
|
InsertCollabParams, QueryCollabResult, RawData,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::collab::SNAPSHOT_PER_HOUR;
|
||||||
use app_error::AppError;
|
use app_error::AppError;
|
||||||
use chrono::{Duration, Utc};
|
use chrono::{Duration, Utc};
|
||||||
use database_entity::pg_row::AFCollabMemerAccessLevelRow;
|
use database_entity::pg_row::AFCollabMemerAccessLevelRow;
|
||||||
|
|
@ -288,8 +289,6 @@ pub async fn create_snapshot(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
const SNAPSHOT_PER_HOUR: i64 = 3;
|
|
||||||
|
|
||||||
/// Determines whether a new snapshot should be created for the given `oid`.
|
/// Determines whether a new snapshot should be created for the given `oid`.
|
||||||
///
|
///
|
||||||
/// This asynchronous function checks the most recent snapshot creation time for the specified `oid`.
|
/// This asynchronous function checks the most recent snapshot creation time for the specified `oid`.
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,8 @@ use std::sync::{Arc, Weak};
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
use validator::Validate;
|
use validator::Validate;
|
||||||
|
|
||||||
pub const COLLAB_SNAPSHOT_LIMIT: i64 = 10;
|
pub const COLLAB_SNAPSHOT_LIMIT: i64 = 15;
|
||||||
|
pub const SNAPSHOT_PER_HOUR: i64 = 6;
|
||||||
pub type DatabaseResult<T, E = AppError> = core::result::Result<T, E>;
|
pub type DatabaseResult<T, E = AppError> = core::result::Result<T, E>;
|
||||||
|
|
||||||
/// [CollabStorageAccessControl] is a trait that provides access control when accessing the storage
|
/// [CollabStorageAccessControl] is a trait that provides access control when accessing the storage
|
||||||
|
|
@ -61,7 +62,9 @@ pub trait CollabStorage: Send + Sync + 'static {
|
||||||
/// * `bool` - `true` if the collaboration exists, `false` otherwise.
|
/// * `bool` - `true` if the collaboration exists, `false` otherwise.
|
||||||
async fn is_exist(&self, object_id: &str) -> bool;
|
async fn is_exist(&self, object_id: &str) -> bool;
|
||||||
|
|
||||||
async fn cache_collab(&self, _object_id: &str, _collab: Weak<MutexCollab>);
|
async fn cache_collab(&self, object_id: &str, collab: Weak<MutexCollab>);
|
||||||
|
|
||||||
|
async fn remove_collab_cache(&self, object_id: &str);
|
||||||
|
|
||||||
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool>;
|
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool>;
|
||||||
|
|
||||||
|
|
@ -129,8 +132,12 @@ where
|
||||||
self.as_ref().is_exist(object_id).await
|
self.as_ref().is_exist(object_id).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cache_collab(&self, _object_id: &str, _collab: Weak<MutexCollab>) {
|
async fn cache_collab(&self, object_id: &str, collab: Weak<MutexCollab>) {
|
||||||
self.as_ref().cache_collab(_object_id, _collab).await
|
self.as_ref().cache_collab(object_id, collab).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn remove_collab_cache(&self, object_id: &str) {
|
||||||
|
self.as_ref().remove_collab_cache(object_id).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
||||||
|
|
@ -220,6 +227,8 @@ impl CollabStorage for CollabStoragePgImpl {
|
||||||
|
|
||||||
async fn cache_collab(&self, _object_id: &str, _collab: Weak<MutexCollab>) {}
|
async fn cache_collab(&self, _object_id: &str, _collab: Weak<MutexCollab>) {}
|
||||||
|
|
||||||
|
async fn remove_collab_cache(&self, _object_id: &str) {}
|
||||||
|
|
||||||
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
||||||
let is_exist = is_collab_exists(oid, &self.pg_pool).await?;
|
let is_exist = is_collab_exists(oid, &self.pg_pool).await?;
|
||||||
Ok(is_exist)
|
Ok(is_exist)
|
||||||
|
|
|
||||||
|
|
@ -29,11 +29,8 @@ pub struct CollabBroadcast {
|
||||||
object_id: String,
|
object_id: String,
|
||||||
collab: MutexCollab,
|
collab: MutexCollab,
|
||||||
sender: Sender<CollabMessage>,
|
sender: Sender<CollabMessage>,
|
||||||
|
awareness_sub: Mutex<Option<awareness::UpdateSubscription>>,
|
||||||
#[allow(dead_code)]
|
doc_sub: Mutex<Option<UpdateSubscription>>,
|
||||||
awareness_sub: awareness::UpdateSubscription,
|
|
||||||
#[allow(dead_code)]
|
|
||||||
doc_sub: UpdateSubscription,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CollabBroadcast {
|
impl CollabBroadcast {
|
||||||
|
|
@ -47,28 +44,43 @@ impl CollabBroadcast {
|
||||||
let object_id = object_id.to_owned();
|
let object_id = object_id.to_owned();
|
||||||
// broadcast channel
|
// broadcast channel
|
||||||
let (sender, _) = channel(buffer_capacity);
|
let (sender, _) = channel(buffer_capacity);
|
||||||
|
CollabBroadcast {
|
||||||
|
object_id,
|
||||||
|
collab,
|
||||||
|
sender,
|
||||||
|
awareness_sub: Default::default(),
|
||||||
|
doc_sub: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn observe_collab_changes(&self) {
|
||||||
let (doc_sub, awareness_sub) = {
|
let (doc_sub, awareness_sub) = {
|
||||||
let mut mutex_collab = collab.lock();
|
let mut mutex_collab = self.collab.lock();
|
||||||
|
|
||||||
// Observer the document's update and broadcast it to all subscribers.
|
// Observer the document's update and broadcast it to all subscribers.
|
||||||
let cloned_oid = object_id.clone();
|
let cloned_oid = self.object_id.clone();
|
||||||
let broadcast_sink = sender.clone();
|
let broadcast_sink = self.sender.clone();
|
||||||
let doc_sub = mutex_collab
|
let doc_sub = mutex_collab
|
||||||
.get_mut_awareness()
|
.get_mut_awareness()
|
||||||
.doc_mut()
|
.doc_mut()
|
||||||
.observe_update_v1(move |txn, event| {
|
.observe_update_v1(move |txn, event| {
|
||||||
trace!("broadcast doc update with len:{}", event.update.len());
|
let update_len = event.update.len();
|
||||||
let origin = CollabOrigin::from(txn);
|
let origin = CollabOrigin::from(txn);
|
||||||
let payload = gen_update_message(&event.update);
|
let payload = gen_update_message(&event.update);
|
||||||
let msg = CollabBroadcastData::new(origin, cloned_oid.clone(), payload);
|
let msg = CollabBroadcastData::new(origin, cloned_oid.clone(), payload);
|
||||||
if let Err(e) = broadcast_sink.send(msg.into()) {
|
|
||||||
error!("broadcast sink fail: {}", e);
|
match broadcast_sink.send(msg.into()) {
|
||||||
|
Ok(_) => trace!("observe doc update with len:{}", update_len),
|
||||||
|
Err(e) => error!(
|
||||||
|
"observe doc update with len:{} - broadcast sink fail: {}",
|
||||||
|
update_len, e
|
||||||
|
),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let broadcast_sink = sender.clone();
|
let broadcast_sink = self.sender.clone();
|
||||||
let cloned_oid = object_id.clone();
|
let cloned_oid = self.object_id.clone();
|
||||||
|
|
||||||
// Observer the awareness's update and broadcast it to all subscribers.
|
// Observer the awareness's update and broadcast it to all subscribers.
|
||||||
let awareness_sub = mutex_collab
|
let awareness_sub = mutex_collab
|
||||||
|
|
@ -84,13 +96,9 @@ impl CollabBroadcast {
|
||||||
});
|
});
|
||||||
(doc_sub, awareness_sub)
|
(doc_sub, awareness_sub)
|
||||||
};
|
};
|
||||||
CollabBroadcast {
|
|
||||||
object_id,
|
*self.doc_sub.lock().await = Some(doc_sub);
|
||||||
collab,
|
*self.awareness_sub.lock().await = Some(awareness_sub);
|
||||||
sender,
|
|
||||||
awareness_sub,
|
|
||||||
doc_sub,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a reference to an underlying [MutexCollab] instance.
|
/// Returns a reference to an underlying [MutexCollab] instance.
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,7 @@ where
|
||||||
match self.group_by_object_id.try_write() {
|
match self.group_by_object_id.try_write() {
|
||||||
Ok(mut group_by_object_id) => {
|
Ok(mut group_by_object_id) => {
|
||||||
group_by_object_id.remove(object_id);
|
group_by_object_id.remove(object_id);
|
||||||
|
// self.storage.remove_collab_cache(object_id).await;
|
||||||
},
|
},
|
||||||
Err(err) => error!("Failed to acquire write lock to remove group: {:?}", err),
|
Err(err) => error!("Failed to acquire write lock to remove group: {:?}", err),
|
||||||
}
|
}
|
||||||
|
|
@ -127,12 +128,7 @@ where
|
||||||
let collab = Arc::new(collab.clone());
|
let collab = Arc::new(collab.clone());
|
||||||
|
|
||||||
// The lifecycle of the collab is managed by the group.
|
// The lifecycle of the collab is managed by the group.
|
||||||
let group = Arc::new(CollabGroup {
|
let group = Arc::new(CollabGroup::new(collab.clone(), broadcast));
|
||||||
collab: collab.clone(),
|
|
||||||
broadcast,
|
|
||||||
subscribers: Default::default(),
|
|
||||||
});
|
|
||||||
|
|
||||||
let plugin = CollabStoragePlugin::new(
|
let plugin = CollabStoragePlugin::new(
|
||||||
uid,
|
uid,
|
||||||
workspace_id,
|
workspace_id,
|
||||||
|
|
@ -148,6 +144,7 @@ where
|
||||||
.storage
|
.storage
|
||||||
.cache_collab(object_id, Arc::downgrade(&collab))
|
.cache_collab(object_id, Arc::downgrade(&collab))
|
||||||
.await;
|
.await;
|
||||||
|
group.observe_collab().await;
|
||||||
group
|
group
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -169,6 +166,18 @@ impl<U> CollabGroup<U>
|
||||||
where
|
where
|
||||||
U: RealtimeUser,
|
U: RealtimeUser,
|
||||||
{
|
{
|
||||||
|
pub fn new(collab: Arc<MutexCollab>, broadcast: CollabBroadcast) -> Self {
|
||||||
|
Self {
|
||||||
|
collab,
|
||||||
|
broadcast,
|
||||||
|
subscribers: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn observe_collab(&self) {
|
||||||
|
self.broadcast.observe_collab_changes().await;
|
||||||
|
}
|
||||||
|
|
||||||
/// Mutate the [Collab] by the given closure
|
/// Mutate the [Collab] by the given closure
|
||||||
pub fn get_mut_collab<F>(&self, f: F)
|
pub fn get_mut_collab<F>(&self, f: F)
|
||||||
where
|
where
|
||||||
|
|
|
||||||
|
|
@ -174,9 +174,11 @@ where
|
||||||
};
|
};
|
||||||
|
|
||||||
match self.storage.get_collab_encoded_v1(&self.uid, params).await {
|
match self.storage.get_collab_encoded_v1(&self.uid, params).await {
|
||||||
Ok(encoded_collab) => match init_collab_with_raw_data(&encoded_collab, doc).await {
|
Ok(encoded_collab_v1) => match init_collab_with_raw_data(&encoded_collab_v1, doc).await {
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
// Try to create a snapshot for the collab object
|
// Attempt to create a snapshot for the collaboration object. When creating this snapshot, it is
|
||||||
|
// assumed that the 'encoded_collab_v1' is already in a valid format. Therefore, there is no need
|
||||||
|
// to verify the outcome of the 'encode_to_bytes' operation.
|
||||||
if self.storage.should_create_snapshot(object_id).await {
|
if self.storage.should_create_snapshot(object_id).await {
|
||||||
let cloned_workspace_id = self.workspace_id.clone();
|
let cloned_workspace_id = self.workspace_id.clone();
|
||||||
let cloned_object_id = object_id.to_string();
|
let cloned_object_id = object_id.to_string();
|
||||||
|
|
@ -184,23 +186,39 @@ where
|
||||||
let _ = tokio::task::spawn_blocking(move || {
|
let _ = tokio::task::spawn_blocking(move || {
|
||||||
let params = InsertSnapshotParams {
|
let params = InsertSnapshotParams {
|
||||||
object_id: cloned_object_id,
|
object_id: cloned_object_id,
|
||||||
encoded_collab_v1: encoded_collab.encode_to_bytes().unwrap(),
|
encoded_collab_v1: encoded_collab_v1.encode_to_bytes().unwrap(),
|
||||||
workspace_id: cloned_workspace_id,
|
workspace_id: cloned_workspace_id,
|
||||||
};
|
};
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
|
// FIXME(nathan): There is a potential issue when concurrently spawning tasks to create snapshots. A subsequent
|
||||||
|
// task for creating a snapshot might write to the database before a previous task completes. To address
|
||||||
|
// this, consider using `stream!` to queue these tasks, ensuring they are executed in the order they were
|
||||||
|
// spawned.
|
||||||
if let Err(err) = storage.create_snapshot(params).await {
|
if let Err(err) = storage.create_snapshot(params).await {
|
||||||
error!("Create snapshot {:?}", err);
|
error!("create snapshot {:?}", err);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(e) => error!("🔴Init collab failed: {:?}", e),
|
Err(err) => {
|
||||||
|
// When initializing a collaboration object, if the 'init_collab_with_raw_data' operation fails, attempt to
|
||||||
|
// restore the collaboration object from the latest snapshot.
|
||||||
|
if let Some(encoded_collab_v1) = get_latest_snapshot(object_id, &self.storage).await {
|
||||||
|
if let Err(err) = init_collab_with_raw_data(&encoded_collab_v1, doc).await {
|
||||||
|
error!("restore collab with snapshot failed: {:?}", err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
error!("init collab failed: {:?}", err)
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Err(err) => match &err {
|
Err(err) => match &err {
|
||||||
AppError::RecordNotFound(_) => {
|
AppError::RecordNotFound(_) => {
|
||||||
|
// When attempting to retrieve collaboration data from the disk and a 'Record Not Found' error is returned,
|
||||||
|
// this indicates that the collaboration is new. Therefore, the current collaboration data should be saved to disk.
|
||||||
if let Err(err) = self.insert_new_collab(doc, object_id).await {
|
if let Err(err) = self.insert_new_collab(doc, object_id).await {
|
||||||
error!("Insert collab {:?}", err);
|
error!("Insert collab {:?}", err);
|
||||||
}
|
}
|
||||||
|
|
@ -223,7 +241,7 @@ where
|
||||||
self.edit_state.flush_edit();
|
self.edit_state.flush_edit();
|
||||||
trace!("number of updates reach flush_per_update, start flushing");
|
trace!("number of updates reach flush_per_update, start flushing");
|
||||||
match self.group.upgrade() {
|
match self.group.upgrade() {
|
||||||
None => error!("🔴Group is dropped, skip flush collab"),
|
None => error!("Group is dropped, skip flush collab"),
|
||||||
Some(group) => group.flush_collab(),
|
Some(group) => group.flush_collab(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -260,11 +278,21 @@ where
|
||||||
|
|
||||||
fn encoded_v1_from_doc(doc: &Doc) -> EncodedCollabV1 {
|
fn encoded_v1_from_doc(doc: &Doc) -> EncodedCollabV1 {
|
||||||
let txn = doc.transact();
|
let txn = doc.transact();
|
||||||
let doc_state = txn.encode_state_as_update_v1(&StateVector::default());
|
|
||||||
let state_vector = txn.state_vector().encode_v1();
|
let state_vector = txn.state_vector().encode_v1();
|
||||||
|
let doc_state = txn.encode_state_as_update_v1(&StateVector::default());
|
||||||
EncodedCollabV1::new(state_vector, doc_state)
|
EncodedCollabV1::new(state_vector, doc_state)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_latest_snapshot<S>(object_id: &str, storage: &S) -> Option<EncodedCollabV1>
|
||||||
|
where
|
||||||
|
S: CollabStorage,
|
||||||
|
{
|
||||||
|
let metas = storage.get_collab_snapshot_list(object_id).await.ok()?;
|
||||||
|
let meta = metas.0.first()?;
|
||||||
|
let snapshot_data = storage.get_collab_snapshot(&meta.snapshot_id).await.ok()?;
|
||||||
|
EncodedCollabV1::decode_from_bytes(&snapshot_data.encoded_collab_v1).ok()
|
||||||
|
}
|
||||||
|
|
||||||
struct CollabEditState {
|
struct CollabEditState {
|
||||||
edit_count: AtomicU32,
|
edit_count: AtomicU32,
|
||||||
flush_edit_count: AtomicU32,
|
flush_edit_count: AtomicU32,
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ use std::{
|
||||||
sync::{Arc, Weak},
|
sync::{Arc, Weak},
|
||||||
};
|
};
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
use tracing::{event, info, instrument};
|
use tracing::{event, instrument};
|
||||||
use validator::Validate;
|
use validator::Validate;
|
||||||
|
|
||||||
pub type CollabPostgresDBStorage = CollabStorageWrapper<
|
pub type CollabPostgresDBStorage = CollabStorageWrapper<
|
||||||
|
|
@ -75,7 +75,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cache_collab(&self, object_id: &str, collab: Weak<MutexCollab>) {
|
async fn cache_collab(&self, object_id: &str, collab: Weak<MutexCollab>) {
|
||||||
tracing::trace!("Cache collab:{} in memory", object_id);
|
tracing::trace!("cache collab:{}", object_id);
|
||||||
self
|
self
|
||||||
.collab_by_object_id
|
.collab_by_object_id
|
||||||
.write()
|
.write()
|
||||||
|
|
@ -83,6 +83,11 @@ where
|
||||||
.insert(object_id.to_string(), collab);
|
.insert(object_id.to_string(), collab);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn remove_collab_cache(&self, object_id: &str) {
|
||||||
|
tracing::trace!("remove collab:{} cache", object_id);
|
||||||
|
self.collab_by_object_id.write().await.remove(object_id);
|
||||||
|
}
|
||||||
|
|
||||||
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
async fn is_collab_exist(&self, oid: &str) -> DatabaseResult<bool> {
|
||||||
self.inner.is_collab_exist(oid).await
|
self.inner.is_collab_exist(oid).await
|
||||||
}
|
}
|
||||||
|
|
@ -156,9 +161,20 @@ where
|
||||||
.and_then(|collab| collab.upgrade());
|
.and_then(|collab| collab.upgrade());
|
||||||
|
|
||||||
match collab {
|
match collab {
|
||||||
None => self.inner.get_collab_encoded_v1(uid, params).await,
|
None => {
|
||||||
|
event!(
|
||||||
|
tracing::Level::DEBUG,
|
||||||
|
"Get collab data:{} from disk",
|
||||||
|
params.object_id
|
||||||
|
);
|
||||||
|
self.inner.get_collab_encoded_v1(uid, params).await
|
||||||
|
},
|
||||||
Some(collab) => {
|
Some(collab) => {
|
||||||
info!("Get collab data:{} from memory", params.object_id);
|
event!(
|
||||||
|
tracing::Level::DEBUG,
|
||||||
|
"Get collab data:{} from memory",
|
||||||
|
params.object_id
|
||||||
|
);
|
||||||
let data = collab.encode_collab_v1();
|
let data = collab.encode_collab_v1();
|
||||||
Ok(data)
|
Ok(data)
|
||||||
},
|
},
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue