chore: retry with exponentail back off (#437)

This commit is contained in:
Nathan.fooo 2024-04-02 09:24:23 +08:00 committed by GitHub
parent a18b3d7886
commit fbadca759a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 66 additions and 102 deletions

View File

@ -1,8 +1,9 @@
use crate::af_spawn;
use crate::collab_sync::collab_stream::SeqNumCounter;
use crate::collab_sync::ping::PingSyncRunner;
use crate::collab_sync::sink_queue::{QueueItem, SinkQueue};
use crate::collab_sync::{SinkConfig, SyncError, SyncObject};
use collab::core::origin::{CollabClient, CollabOrigin};
use collab_rt_entity::{ClientCollabMessage, MsgId, ServerCollabMessage, SinkMessage};
use futures_util::SinkExt;
@ -11,6 +12,7 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Weak};
use std::time::{Duration, Instant};
use crate::collab_sync::ping::PingSyncRunner;
use tokio::sync::{broadcast, watch, Mutex};
use tokio::time::{interval, sleep};
use tracing::{error, trace, warn};
@ -95,14 +97,13 @@ where
let last_sync = Arc::new(SyncTimestamp::new());
let mut interval = interval(SEND_INTERVAL);
let weak_notifier = Arc::downgrade(&notifier);
let weak_flying_messages = Arc::downgrade(&flying_messages);
let weak_notifier = Arc::downgrade(&notifier);
let origin = CollabOrigin::Client(CollabClient {
uid,
device_id: object.device_id.clone(),
});
PingSyncRunner::run(
origin,
object.object_id.clone(),

View File

@ -332,7 +332,6 @@ impl SeqNumCounter {
if ack_seq_num > broadcast_seq_num + 3 {
self.store_broadcast_seq_num(ack_seq_num);
return Err(SyncError::MissUpdates(format!(
"missing {} updates, start init sync",
ack_seq_num - broadcast_seq_num,
@ -341,7 +340,6 @@ impl SeqNumCounter {
if self.equal_counter.load(Ordering::SeqCst) >= 5 {
self.equal_counter.store(0, Ordering::SeqCst);
return Err(SyncError::MissUpdates(
"ping exceeds, start init sync".to_string(),
));

View File

@ -9,9 +9,11 @@ use tokio::sync::watch;
use tokio::time::{sleep_until, Instant};
use tracing::warn;
#[allow(dead_code)]
pub struct PingSyncRunner;
impl PingSyncRunner {
#[allow(dead_code)]
pub(crate) fn run(
origin: CollabOrigin,
object_id: String,
@ -21,7 +23,12 @@ impl PingSyncRunner {
weak_notify: Weak<watch::Sender<SinkSignal>>,
sync_timestamp: Arc<SyncTimestamp>,
) {
let duration = Duration::from_secs(10);
let duration = if cfg!(feature = "test_fast_sync") {
Duration::from_secs(10)
} else {
Duration::from_secs(20)
};
let mut next_tick = Instant::now() + duration;
tokio::spawn(async move {
loop {
@ -51,7 +58,7 @@ impl PingSyncRunner {
if is_not_empty {
#[cfg(feature = "sync_verbose_log")]
tracing::trace!("{} slow down ping", object_id);
next_tick = Instant::now() + Duration::from_secs(20);
next_tick = Instant::now() + Duration::from_secs(30);
}
let msg_id = msg_id_counter.next();

View File

@ -27,6 +27,7 @@ use parking_lot::RwLock;
use reqwest::{header, StatusCode};
use collab_entity::CollabType;
use reqwest::header::HeaderValue;
use reqwest::Method;
use reqwest::RequestBuilder;

View File

@ -1,9 +1,10 @@
use crate::http::log_request_id;
use crate::native::GetCollabAction;
use crate::ws::{WSClientHttpSender, WSError};
use crate::{spawn_blocking_brotli_compress, Client};
use crate::{RefreshTokenAction, RefreshTokenRetryCondition};
use anyhow::anyhow;
use app_error::{AppError, ErrorCode};
use app_error::AppError;
use async_trait::async_trait;
use collab_rt_entity::EncodedCollab;
use collab_rt_entity::HttpRealtimeMessage;
@ -15,8 +16,8 @@ use shared_entity::response::{AppResponse, AppResponseError};
use std::future::Future;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio_retry::strategy::FixedInterval;
use tokio_retry::RetryIf;
use tokio_retry::strategy::{ExponentialBackoff, FixedInterval};
use tokio_retry::{Retry, RetryIf};
use tracing::{event, instrument};
impl Client {
@ -25,42 +26,10 @@ impl Client {
&self,
params: QueryCollabParams,
) -> Result<EncodedCollab, AppResponseError> {
let url = format!(
"{}/api/workspace/{}/collab/{}",
self.base_url, &params.workspace_id, &params.object_id
);
let mut retries = 3; // Maximum number of retries
let retry_delay = Duration::from_secs(2);
while retries > 0 {
let resp = self
.http_client_with_auth(Method::GET, &url)
.await?
.json(&params)
.send()
.await?;
log_request_id(&resp);
let response = AppResponse::<EncodedCollab>::from_response(resp).await?;
// Retry if the record is not found
if response.code == ErrorCode::RecordNotFound {
retries -= 1;
if retries > 0 {
tokio::time::sleep(retry_delay).await;
continue;
} else {
return response.into_data();
}
} else {
return response.into_data();
}
}
// this part is unreachable by logic
Err(AppResponseError::new(
ErrorCode::Unhandled,
"Exhausted retries to fetch collaboration data.",
))
// 2 seconds, 4 seconds, 8 seconds
let retry_strategy = ExponentialBackoff::from_millis(2).factor(1000).take(3);
let action = GetCollabAction::new(self.clone(), params);
Retry::spawn(retry_strategy, action).await
}
#[instrument(level = "debug", skip_all, err)]

View File

@ -1,12 +1,18 @@
use crate::http::log_request_id;
use crate::notify::ClientToken;
use crate::ws::{
ConnectInfo, ConnectState, ConnectStateNotify, CurrentConnInfo, StateNotify, WSError,
};
use crate::Client;
use app_error::gotrue::GoTrueError;
use client_websocket::{connect_async, WebSocketStream};
use collab_rt_entity::EncodedCollab;
use database_entity::dto::QueryCollabParams;
use gotrue::grant::{Grant, RefreshTokenGrant};
use parking_lot::RwLock;
use reqwest::header::HeaderMap;
use reqwest::Method;
use shared_entity::response::{AppResponse, AppResponseError};
use std::future::Future;
use std::pin::Pin;
use std::sync::{Arc, Weak};
@ -150,3 +156,41 @@ impl Condition<WSError> for RetryCondition {
should_retry
}
}
pub(crate) struct GetCollabAction {
client: Client,
params: QueryCollabParams,
}
impl GetCollabAction {
pub fn new(client: Client, params: QueryCollabParams) -> Self {
Self { client, params }
}
}
impl Action for GetCollabAction {
type Future = Pin<Box<dyn Future<Output = Result<Self::Item, Self::Error>> + Send + Sync>>;
type Item = EncodedCollab;
type Error = AppResponseError;
fn run(&mut self) -> Self::Future {
let client = self.client.clone();
let params = self.params.clone();
Box::pin(async move {
let url = format!(
"{}/api/workspace/{}/collab/{}",
client.base_url, &params.workspace_id, &params.object_id
);
let resp = client
.http_client_with_auth(Method::GET, &url)
.await?
.json(&params)
.send()
.await?;
log_request_id(&resp);
let resp = AppResponse::<EncodedCollab>::from_response(resp).await?;
resp.into_data()
})
}
}

View File

@ -275,59 +275,3 @@ async fn edit_document_with_both_clients_offline_then_online_sync_test() {
.await
.unwrap();
}
#[tokio::test]
async fn second_client_missing_broadcast_and_then_pull_missing_updates_test() {
let collab_type = CollabType::Empty;
let mut client_1 = TestClient::new_user().await;
let mut client_2 = TestClient::new_user().await;
// Create a collaborative document with client_1 and invite client_2 to collaborate.
let workspace_id = client_1.workspace_id().await;
let object_id = client_1
.create_and_edit_collab(&workspace_id, collab_type.clone())
.await;
client_1
.add_collab_member(
&workspace_id,
&object_id,
&client_2,
AFAccessLevel::ReadAndWrite,
)
.await;
// after client 2 finish init sync and then disable receive message
client_2
.open_collab(&workspace_id, &object_id, collab_type.clone())
.await;
client_2
.wait_object_sync_complete(&object_id)
.await
.unwrap();
client_2.ws_client.disable_receive_message();
// Client_1 makes the first edit by inserting "task 1".
client_1
.collabs
.get_mut(&object_id)
.unwrap()
.collab
.lock()
.insert("content", "hello world");
client_1
.wait_object_sync_complete(&object_id)
.await
.unwrap();
// sleep two seconds to make sure missing the server broadcast message
sleep(Duration::from_secs(2)).await;
// after a period of time, client 2 should trigger init sync
client_2.ws_client.enable_receive_message();
let expected_json = json!({
"content": "hello world"
});
assert_client_collab_include_value(&mut client_2, &object_id, expected_json)
.await
.unwrap();
}