From 37a759ead65464ea6bad956bbd1d6ac5761a80fe Mon Sep 17 00:00:00 2001 From: Zack <33050391+speed2exe@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:57:05 +0800 Subject: [PATCH] feat: Request Id (#144) * feat: added basic monitoring stack * docs(grafana): import steps and recommended imports * feat(metric): api metrics * feat(prometheus): add appflowy cloud metric scraping job * fix: put back tracing logger * feat: middleware for path counter * feat: added gotrue to prometheus metrics * feat: added nginx to prometheus metrics * feat: added latency and result to metrics * feat(nginx): set request id to header * feat: added request id in nginx * feat: simplify metric collection * feat: added simple grafana dashboard for appflowy cloud * feat(logging): added logging stack * docs: improve monitoring docs * docs: added known common issues * feat: give persistence * feat: add exemplars * fix: docker compose volumes * feat(grafana): add trace id to prometheus datasource * feat(grafana): improve dashboard to include exemplars * feat: request id middleware * docs: move doc to more relevant place * chore: clean up logs * feat: try json fmt * feat: remove pretty * feat: logging use json in prod but pretty in local --- doc/logging.md | 16 +++++++++ doc/monitoring.md | 19 +--------- src/api/file_storage.rs | 3 -- src/api/user.rs | 5 --- src/api/workspace.rs | 12 ------- src/application.rs | 5 ++- src/config/config.rs | 13 +++---- src/main.rs | 17 +++++---- src/middleware/metrics_mw.rs | 15 +------- src/middleware/mod.rs | 1 + src/middleware/request_id.rs | 69 ++++++++++++++++++++++++++++++++++++ src/telemetry.rs | 61 +++++++++++++++++-------------- 12 files changed, 138 insertions(+), 98 deletions(-) create mode 100644 src/middleware/request_id.rs diff --git a/doc/logging.md b/doc/logging.md index de875a75..eb60d40a 100644 --- a/doc/logging.md +++ b/doc/logging.md @@ -10,6 +10,22 @@ A simple guide to deploy the logging infrastructure for AppFlowy Cloud ``` docker compose --file docker-compose-logging.yml up -d ``` +- Quick check: `docker ps -a` + +## Common deployment problems +### Filebeat did not start successfully +``` +$ docker logs appflowy-cloud-filebeat-1 +Exiting: error loading config file: config file ("filebeat.yml") can only be writable by the owner but the permissions are "-rw-rw-r--" (to fix the permissions use: 'chmod go-w /usr/share/filebeat/filebeat.yml') +``` +- Solution: remove write permission on the file: `chmod -w docker/filebeat/filebeat.yml` + +### No Logs +``` +$ docker logs appflowy-cloud-filebeat-1 +...Non-zero metrics in the last 30s... +``` +- Solution: give read permission to docker logs: `chmod -R a+r /var/lib/docker/containers` ## Credentials - After deployment, when you go to localhost:5601, both username and password will be `admin` diff --git a/doc/monitoring.md b/doc/monitoring.md index a35ca3f9..05505221 100644 --- a/doc/monitoring.md +++ b/doc/monitoring.md @@ -12,24 +12,7 @@ A simple guide to deploy the monitoring tools for AppFlowy Cloud ``` docker compose --file docker-compose-monitor.yml up -d ``` -- Check deployment -``` -docker ps -a -``` -## Common problems -### Filebeat did not start successfully -``` -$ docker logs appflowy-cloud-filebeat-1 -Exiting: error loading config file: config file ("filebeat.yml") can only be writable by the owner but the permissions are "-rw-rw-r--" (to fix the permissions use: 'chmod go-w /usr/share/filebeat/filebeat.yml') -``` -- Solution: remove write permission on the file: `chmod -w docker/filebeat/filebeat.yml` - -### No Logs -``` -$ docker logs appflowy-cloud-filebeat-1 -...Non-zero metrics in the last 30s... -``` -- Solution: give read permission to docker logs: `chmod -R a+r /var/lib/docker/containers` +- Quick check: `docker ps -a` ## Web UI Setup - After deployment, you will have a Grafana dashboard server, sign in with your username and password defined previously diff --git a/src/api/file_storage.rs b/src/api/file_storage.rs index c50f2bdb..a0e59450 100644 --- a/src/api/file_storage.rs +++ b/src/api/file_storage.rs @@ -24,7 +24,6 @@ use tokio::io::AsyncRead; use tokio_stream::StreamExt; use tokio_util::io::StreamReader; use tracing::{event, instrument}; -use tracing_actix_web::RequestId; use crate::state::AppState; @@ -62,7 +61,6 @@ async fn put_blob_handler( content_type: web::Header, content_length: web::Header, workspace_id: web::Path, - request_id: RequestId, ) -> Result> { let content_length = content_length.into_inner().into_inner(); // Check content length, if it's too large, return error. @@ -117,7 +115,6 @@ async fn delete_blob_handler( async fn get_blob_handler( state: Data, path: web::Path, - request_id: RequestId, req: HttpRequest, ) -> Result> { let PathInfo { diff --git a/src/api/user.rs b/src/api/user.rs index 955ece57..e3937e1b 100644 --- a/src/api/user.rs +++ b/src/api/user.rs @@ -20,7 +20,6 @@ use actix_web::{web, HttpResponse, Scope}; use database_entity::dto::{AFUserProfile, AFUserWorkspaceInfo}; use shared_entity::response::AppResponseError; -use tracing_actix_web::RequestId; pub fn user_scope() -> Scope { web::scope("/api/user") @@ -41,7 +40,6 @@ pub fn user_scope() -> Scope { async fn verify_user_handler( path: web::Path, state: Data, - request_id: RequestId, ) -> Result> { let access_token = path.into_inner(); let is_new = biz::user::verify_token( @@ -60,7 +58,6 @@ async fn verify_user_handler( async fn get_user_profile_handler( uuid: UserUuid, state: Data, - request_id: RequestId, ) -> Result> { let profile = biz::user::get_profile(&state.pg_pool, &uuid) .await @@ -72,7 +69,6 @@ async fn get_user_profile_handler( async fn get_user_workspace_info_handler( uuid: UserUuid, state: Data, - request_id: RequestId, ) -> Result> { let info = biz::user::get_user_workspace_info(&state.pg_pool, &uuid).await?; Ok(AppResponse::Ok().with_data(info).into()) @@ -83,7 +79,6 @@ async fn update_user_handler( auth: Authorization, payload: Json, state: Data, - request_id: RequestId, ) -> Result> { let params = payload.into_inner(); biz::user::update_user(&state.pg_pool, auth.uuid()?, params).await?; diff --git a/src/api/workspace.rs b/src/api/workspace.rs index c5db6751..2d4dc153 100644 --- a/src/api/workspace.rs +++ b/src/api/workspace.rs @@ -14,7 +14,6 @@ use shared_entity::response::AppResponseError; use shared_entity::response::{AppResponse, JsonAppResponse}; use sqlx::types::uuid; use tracing::{debug, event, instrument}; -use tracing_actix_web::RequestId; use uuid::Uuid; pub const WORKSPACE_ID_PATH: &str = "workspace_id"; @@ -81,7 +80,6 @@ async fn list_handler( #[instrument(skip(payload, state), err)] async fn add_workspace_members_handler( - request_id: RequestId, user_uuid: UserUuid, workspace_id: web::Path, payload: Json, @@ -198,7 +196,6 @@ async fn update_workspace_member_handler( #[instrument(skip(state, payload), err)] async fn create_collab_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -209,7 +206,6 @@ async fn create_collab_handler( #[instrument(skip(payload, state), err)] async fn get_collab_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -229,7 +225,6 @@ async fn get_collab_handler( #[instrument(skip(payload, state), err)] async fn batch_get_collab_handler( user_uuid: UserUuid, - request_id: RequestId, state: Data, payload: Json, ) -> Result>> { @@ -248,7 +243,6 @@ async fn batch_get_collab_handler( #[instrument(skip(state, payload), err)] async fn update_collab_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -259,7 +253,6 @@ async fn update_collab_handler( #[instrument(level = "info", skip(state, payload), err)] async fn delete_collab_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -292,7 +285,6 @@ async fn retrieve_snapshots_handler( #[instrument(skip(state, payload), err)] async fn add_collab_member_handler( - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -308,7 +300,6 @@ async fn add_collab_member_handler( #[instrument(skip(state, payload), err)] async fn update_collab_member_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -325,7 +316,6 @@ async fn update_collab_member_handler( #[instrument(skip(state, payload), err)] async fn get_collab_member_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -337,7 +327,6 @@ async fn get_collab_member_handler( #[instrument(skip(state, payload), err)] async fn remove_collab_member_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { @@ -354,7 +343,6 @@ async fn remove_collab_member_handler( #[instrument(skip(state, payload), err)] async fn get_collab_member_list_handler( user_uuid: UserUuid, - request_id: RequestId, payload: Json, state: Data, ) -> Result>> { diff --git a/src/application.rs b/src/application.rs index 1a40ed88..5c03647c 100644 --- a/src/application.rs +++ b/src/application.rs @@ -2,13 +2,13 @@ use crate::api::metrics::{metrics_registry, metrics_scope}; use crate::component::auth::HEADER_TOKEN; use crate::config::config::{Config, DatabaseSetting, GoTrueSetting, S3Setting, TlsConfig}; use crate::middleware::cors_mw::default_cors; +use crate::middleware::request_id::RequestIdMiddleware; use crate::self_signed::create_self_signed_certificate; use crate::state::AppState; use actix_identity::IdentityMiddleware; use actix_session::storage::RedisSessionStore; use actix_session::SessionMiddleware; use actix_web::cookie::Key; -use actix_web::middleware::Compat; use actix_web::{dev::Server, web, web::Data, App, HttpServer}; use actix::Actor; @@ -20,7 +20,6 @@ use snowflake::Snowflake; use sqlx::{postgres::PgPoolOptions, PgPool}; use std::net::TcpListener; use std::sync::Arc; -use tracing_actix_web::TracingLogger; use tokio::sync::RwLock; @@ -107,6 +106,7 @@ pub async fn run( let mut server = HttpServer::new(move || { App::new() + .wrap(RequestIdMiddleware) .wrap(MetricsMiddleware) .wrap(IdentityMiddleware::default()) .wrap( @@ -116,7 +116,6 @@ pub async fn run( ) .wrap(default_cors()) .wrap(access_control.clone()) - .wrap(Compat::new(TracingLogger::default())) .app_data(web::JsonConfig::default().limit(4096)) .service(user_scope()) .service(workspace_scope()) diff --git a/src/config/config.rs b/src/config/config.rs index ce9a39f2..a23e6051 100644 --- a/src/config/config.rs +++ b/src/config/config.rs @@ -1,8 +1,9 @@ use config::{Config as InnerConfig, FileFormat}; use secrecy::Secret; +use serde::Deserialize; use serde_aux::field_attributes::deserialize_number_from_string; use sqlx::postgres::{PgConnectOptions, PgSslMode}; -use std::convert::{TryFrom, TryInto}; +use std::convert::TryFrom; use std::path::PathBuf; #[derive(serde::Deserialize, Clone, Debug)] @@ -106,15 +107,10 @@ impl DatabaseSetting { } } -pub fn get_configuration() -> Result { +pub fn get_configuration(app_env: &Environment) -> Result { let base_path = std::env::current_dir().expect("Failed to determine the current directory"); let configuration_dir = base_path.join("configuration"); - let environment: Environment = std::env::var("APP_ENVIRONMENT") - .unwrap_or_else(|_| "local".to_string()) - .try_into() - .expect("Failed to parse APP_ENVIRONMENT."); - let builder = InnerConfig::builder() .set_default("default", "1")? .add_source( @@ -123,7 +119,7 @@ pub fn get_configuration() -> Result { .format(FileFormat::Yaml), ) .add_source( - config::File::from(configuration_dir.join(environment.as_str())) + config::File::from(configuration_dir.join(app_env.as_str())) .required(true) .format(FileFormat::Yaml), ) @@ -137,6 +133,7 @@ pub fn get_configuration() -> Result { } /// The possible runtime environment for our application. +#[derive(Clone, Debug, Deserialize)] pub enum Environment { Local, Production, diff --git a/src/main.rs b/src/main.rs index 6a2f2f78..49a0ae1f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use appflowy_cloud::application::{init_state, Application}; -use appflowy_cloud::config::config::get_configuration; -use appflowy_cloud::telemetry::{get_subscriber, init_subscriber}; +use appflowy_cloud::config::config::{get_configuration, Environment}; +use appflowy_cloud::telemetry::init_subscriber; #[actix_web::main] async fn main() -> anyhow::Result<()> { @@ -17,14 +17,13 @@ async fn main() -> anyhow::Result<()> { filters.push(format!("database={}", level)); filters.push(format!("storage={}", level)); - let subscriber = get_subscriber( - "appflowy_cloud".to_string(), - Some(filters.join(",")), - std::io::stdout, - ); - init_subscriber(subscriber); + let app_env: Environment = std::env::var("APP_ENVIRONMENT") + .unwrap_or_else(|_| "local".to_string()) + .try_into() + .expect("Failed to parse APP_ENVIRONMENT."); - let configuration = get_configuration().expect("The configuration should be configured."); + init_subscriber(&app_env, filters); + let configuration = get_configuration(&app_env).expect("The configuration should be configured."); let state = init_state(&configuration) .await .expect("The AppState should be initialized"); diff --git a/src/middleware/metrics_mw.rs b/src/middleware/metrics_mw.rs index 0682a2e1..a70933d2 100644 --- a/src/middleware/metrics_mw.rs +++ b/src/middleware/metrics_mw.rs @@ -1,4 +1,3 @@ -use actix_http::header::HeaderName; use actix_service::{forward_ready, Service, Transform}; use actix_web::dev::{ServiceRequest, ServiceResponse}; use actix_web::web::Data; @@ -7,6 +6,7 @@ use futures_util::future::LocalBoxFuture; use std::future::{ready, Ready}; use std::sync::Arc; +use super::request_id::get_request_id; use crate::api::metrics::AppFlowyCloudMetrics; pub struct MetricsMiddleware; @@ -77,16 +77,3 @@ where }) } } - -fn get_request_id(req: &ServiceRequest) -> Option { - match req.headers().get(HeaderName::from_static("x-request-id")) { - Some(h) => match h.to_str() { - Ok(s) => Some(s.to_owned()), - Err(e) => { - tracing::error!("Failed to get request id from header: {}", e); - None - }, - }, - None => None, - } -} diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index ccda4ccc..c56622be 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -1,3 +1,4 @@ pub mod access_control_mw; pub mod cors_mw; pub mod metrics_mw; +pub mod request_id; diff --git a/src/middleware/request_id.rs b/src/middleware/request_id.rs new file mode 100644 index 00000000..59d637c1 --- /dev/null +++ b/src/middleware/request_id.rs @@ -0,0 +1,69 @@ +use actix_http::header::HeaderName; +use std::future::{ready, Ready}; +use tracing::{Instrument, Level}; + +use actix_service::{forward_ready, Service, Transform}; +use actix_web::dev::{ServiceRequest, ServiceResponse}; +use futures_util::future::LocalBoxFuture; + +pub struct RequestIdMiddleware; + +impl Transform for RequestIdMiddleware +where + S: Service, Error = actix_web::Error>, + S::Future: 'static, + B: 'static, +{ + type Response = ServiceResponse; + type Error = actix_web::Error; + type Transform = RequestIdMiddlewareService; + type InitError = (); + type Future = Ready>; + + fn new_transform(&self, service: S) -> Self::Future { + ready(Ok(RequestIdMiddlewareService { service })) + } +} + +pub struct RequestIdMiddlewareService { + service: S, +} + +impl Service for RequestIdMiddlewareService +where + S: Service, Error = actix_web::Error>, + S::Future: 'static, + B: 'static, +{ + type Response = ServiceResponse; + type Error = actix_web::Error; + type Future = LocalBoxFuture<'static, Result>; + + forward_ready!(service); + + fn call(&self, req: ServiceRequest) -> Self::Future { + let request_id = get_request_id(&req).unwrap_or({ + let new_id = uuid::Uuid::new_v4().to_string(); + tracing::warn!("No request id found in header, generating one: {}", new_id); + new_id + }); + + // Call the next service + let span = tracing::span!(Level::INFO, "request_id", request_id = %request_id); + let res = self.service.call(req); + Box::pin(res.instrument(span)) + } +} + +pub fn get_request_id(req: &ServiceRequest) -> Option { + match req.headers().get(HeaderName::from_static("x-request-id")) { + Some(h) => match h.to_str() { + Ok(s) => Some(s.to_owned()), + Err(e) => { + tracing::error!("Failed to get request id from header: {}", e); + None + }, + }, + None => None, + } +} diff --git a/src/telemetry.rs b/src/telemetry.rs index f38e67db..174a66b9 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,21 +1,20 @@ use actix_web::rt::task::JoinHandle; use tracing::subscriber::set_global_default; -use tracing::Subscriber; use tracing_bunyan_formatter::{BunyanFormattingLayer, JsonStorageLayer}; use tracing_log::LogTracer; -use tracing_subscriber::fmt::MakeWriter; use tracing_subscriber::{layer::SubscriberExt, EnvFilter}; -/// Compose multiple layers into a `tracing`'s subscriber. -pub fn get_subscriber( - name: String, - env_filter: Option, - sink: Sink, -) -> impl Subscriber + Sync + Send -where - Sink: for<'a> MakeWriter<'a> + Send + Sync + 'static, -{ +use crate::config::config::Environment; + +/// Register a subscriber as global default to process span data. +/// +/// It should only be called once! +pub fn init_subscriber(app_env: &Environment, filters: Vec) { + let name = "appflowy_cloud".to_string(); + let env_filter = Some(filters.join(",")); + let sink = std::io::stdout; + let env_filter = match env_filter { None => { dbg!("Using default env filter"); @@ -25,25 +24,35 @@ where }; let formatting_layer = BunyanFormattingLayer::new(name, sink); - tracing_subscriber::fmt() - .with_ansi(true) + let builder = tracing_subscriber::fmt() + .json() .with_target(true) .with_max_level(tracing::Level::TRACE) .with_thread_ids(false) - .with_file(false) - .pretty() - .finish() - .with(env_filter) - .with(JsonStorageLayer) - .with(formatting_layer) -} + .with_file(false); -/// Register a subscriber as global default to process span data. -/// -/// It should only be called once! -pub fn init_subscriber(subscriber: impl Subscriber + Sync + Send) { - LogTracer::init().expect("Failed to set logger"); - set_global_default(subscriber).expect("Failed to set subscriber"); + match app_env { + Environment::Local => { + let subscriber = builder + .pretty() + .with_ansi(true) + .finish() + .with(env_filter) + .with(JsonStorageLayer) + .with(formatting_layer); + LogTracer::init().unwrap(); + set_global_default(subscriber).unwrap(); + }, + Environment::Production => { + let subscriber = builder + .finish() + .with(env_filter) + .with(JsonStorageLayer) + .with(formatting_layer); + LogTracer::init().unwrap(); + set_global_default(subscriber).unwrap(); + }, + } } pub fn spawn_blocking_with_tracing(f: F) -> JoinHandle