chore: make collab storage switch between s3 and postgres configurable (#1035)

This commit is contained in:
Bartosz Sypytkowski 2024-12-03 07:09:09 +01:00 committed by GitHub
parent 9ff6f1c744
commit 51bf9a85c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 32 additions and 13 deletions

View File

@ -22,10 +22,6 @@ use validator::Validate;
/// The default compression level of ZSTD-compressed collabs.
pub const ZSTD_COMPRESSION_LEVEL: i32 = 3;
/// The threshold used to determine whether collab data should land
/// in S3 or Postgres. Collabs with size below this value will land into Postgres.
pub const S3_COLLAB_THRESHOLD: usize = 2000;
#[derive(Debug, Clone, Validate, Serialize, Deserialize)]
pub struct CreateCollabParams {
#[validate(custom = "validate_not_empty_str")]

View File

@ -19,6 +19,7 @@ pub struct CollabCache {
mem_cache: CollabMemCache,
success_attempts: Arc<AtomicU64>,
total_attempts: Arc<AtomicU64>,
s3_collab_threshold: usize,
}
impl CollabCache {
@ -26,12 +27,14 @@ impl CollabCache {
redis_conn_manager: redis::aio::ConnectionManager,
pg_pool: PgPool,
s3: AwsS3BucketClientImpl,
s3_collab_threshold: usize,
) -> Self {
let mem_cache = CollabMemCache::new(redis_conn_manager.clone());
let disk_cache = CollabDiskCache::new(pg_pool.clone(), s3);
let disk_cache = CollabDiskCache::new(pg_pool.clone(), s3, s3_collab_threshold);
Self {
disk_cache,
mem_cache,
s3_collab_threshold,
success_attempts: Arc::new(AtomicU64::new(0)),
total_attempts: Arc::new(AtomicU64::new(0)),
}
@ -168,8 +171,15 @@ impl CollabCache {
let object_id = params.object_id.clone();
let encode_collab_data = params.encoded_collab_v1.clone();
let s3 = self.disk_cache.s3_client();
CollabDiskCache::upsert_collab_with_transaction(workspace_id, uid, params, transaction, s3)
.await?;
CollabDiskCache::upsert_collab_with_transaction(
workspace_id,
uid,
params,
transaction,
s3,
self.s3_collab_threshold,
)
.await?;
// when the data is written to the disk cache but fails to be written to the memory cache
// we log the error and continue.

View File

@ -21,19 +21,23 @@ use crate::file::{BucketClient, ResponseBlob};
use crate::index::upsert_collab_embeddings;
use app_error::AppError;
use database_entity::dto::{
CollabParams, PendingCollabWrite, QueryCollab, QueryCollabResult, S3_COLLAB_THRESHOLD,
ZSTD_COMPRESSION_LEVEL,
CollabParams, PendingCollabWrite, QueryCollab, QueryCollabResult, ZSTD_COMPRESSION_LEVEL,
};
#[derive(Clone)]
pub struct CollabDiskCache {
pg_pool: PgPool,
s3: AwsS3BucketClientImpl,
s3_collab_threshold: usize,
}
impl CollabDiskCache {
pub fn new(pg_pool: PgPool, s3: AwsS3BucketClientImpl) -> Self {
Self { pg_pool, s3 }
pub fn new(pg_pool: PgPool, s3: AwsS3BucketClientImpl, s3_collab_threshold: usize) -> Self {
Self {
pg_pool,
s3,
s3_collab_threshold,
}
}
pub async fn is_exist(&self, workspace_id: &str, object_id: &str) -> AppResult<bool> {
@ -67,6 +71,7 @@ impl CollabDiskCache {
params,
&mut transaction,
self.s3.clone(),
self.s3_collab_threshold,
)
.await?;
@ -91,10 +96,11 @@ impl CollabDiskCache {
mut params: CollabParams,
transaction: &mut Transaction<'_, sqlx::Postgres>,
s3: AwsS3BucketClientImpl,
s3_collab_threshold: usize,
) -> AppResult<()> {
let mut delete_from_s3 = Vec::new();
let key = collab_key(workspace_id, &params.object_id);
if params.encoded_collab_v1.len() > S3_COLLAB_THRESHOLD {
if params.encoded_collab_v1.len() > s3_collab_threshold {
// put collab into S3
let encoded_collab = std::mem::take(&mut params.encoded_collab_v1);
tokio::spawn(Self::insert_blob_with_retries(
@ -222,7 +228,7 @@ impl CollabDiskCache {
let mut blobs = HashMap::new();
for param in params_list.iter_mut() {
let key = collab_key(workspace_id, &param.object_id);
if param.encoded_collab_v1.len() > S3_COLLAB_THRESHOLD {
if param.encoded_collab_v1.len() > self.s3_collab_threshold {
let blob = std::mem::take(&mut param.encoded_collab_v1);
blobs.insert(key, blob);
} else {
@ -277,6 +283,7 @@ impl CollabDiskCache {
params,
&mut transaction,
s3.clone(),
self.s3_collab_threshold,
)
.await
{

View File

@ -130,6 +130,7 @@ pub async fn init_state(config: &Config, rt_cmd_tx: CLCommandSender) -> Result<A
redis_conn_manager.clone(),
pg_pool.clone(),
s3_client.clone(),
config.collab.s3_collab_threshold as usize,
);
let collab_storage_access_control = CollabStorageAccessControlImpl {

View File

@ -128,6 +128,7 @@ pub struct CollabSetting {
pub group_persistence_interval_secs: u64,
pub edit_state_max_count: u32,
pub edit_state_max_secs: i64,
pub s3_collab_threshold: u64,
}
pub fn get_env_var(key: &str, default: &str) -> String {
@ -191,6 +192,7 @@ pub fn get_configuration() -> Result<Config, anyhow::Error> {
.parse()?,
edit_state_max_count: get_env_var("APPFLOWY_COLLAB_EDIT_STATE_MAX_COUNT", "100").parse()?,
edit_state_max_secs: get_env_var("APPFLOWY_COLLAB_EDIT_STATE_MAX_SECS", "60").parse()?,
s3_collab_threshold: get_env_var("APPFLOWY_COLLAB_S3_THRESHOLD", "8000").parse()?,
},
redis_uri: get_env_var("APPFLOWY_REDIS_URI", "redis://localhost:6379").into(),
ai: AISettings {

View File

@ -285,6 +285,7 @@ pub async fn init_state(config: &Config, rt_cmd_tx: CLCommandSender) -> Result<A
redis_conn_manager.clone(),
pg_pool.clone(),
s3_client.clone(),
config.collab.s3_collab_threshold as usize,
);
let collab_storage_access_control = CollabStorageAccessControlImpl {

View File

@ -145,6 +145,7 @@ pub struct CollabSetting {
pub group_persistence_interval_secs: u64,
pub edit_state_max_count: u32,
pub edit_state_max_secs: i64,
pub s3_collab_threshold: u64,
}
#[derive(Clone, Debug)]
@ -251,6 +252,7 @@ pub fn get_configuration() -> Result<Config, anyhow::Error> {
.parse()?,
edit_state_max_count: get_env_var("APPFLOWY_COLLAB_EDIT_STATE_MAX_COUNT", "100").parse()?,
edit_state_max_secs: get_env_var("APPFLOWY_COLLAB_EDIT_STATE_MAX_SECS", "60").parse()?,
s3_collab_threshold: get_env_var("APPFLOWY_COLLAB_S3_THRESHOLD", "60").parse()?,
},
published_collab: PublishedCollabSetting {
storage_backend: get_env_var("APPFLOWY_PUBLISHED_COLLAB_STORAGE_BACKEND", "postgres")