feat: Request Id (#144)

* feat: added basic monitoring stack

* docs(grafana): import steps and recommended imports

* feat(metric): api metrics

* feat(prometheus): add appflowy cloud metric scraping job

* fix: put back tracing logger

* feat: middleware for path counter

* feat: added gotrue to prometheus metrics

* feat: added nginx to prometheus metrics

* feat: added latency and result to metrics

* feat(nginx): set request id to header

* feat: added request id in nginx

* feat: simplify metric collection

* feat: added simple grafana dashboard for appflowy cloud

* feat(logging): added logging stack

* docs: improve monitoring docs

* docs: added known common issues

* feat: give persistence

* feat: add exemplars

* fix: docker compose volumes

* feat(grafana): add trace id to prometheus datasource

* feat(grafana): improve dashboard to include exemplars

* feat: request id middleware

* docs: move doc to more relevant place

* chore: clean up logs

* feat: try json fmt

* feat: remove pretty

* feat: logging use json in prod but pretty in local
This commit is contained in:
Zack 2023-11-03 16:57:05 +08:00 committed by GitHub
parent 937e3bd9b2
commit 37a759ead6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 138 additions and 98 deletions

View File

@ -10,6 +10,22 @@ A simple guide to deploy the logging infrastructure for AppFlowy Cloud
```
docker compose --file docker-compose-logging.yml up -d
```
- Quick check: `docker ps -a`
## Common deployment problems
### Filebeat did not start successfully
```
$ docker logs appflowy-cloud-filebeat-1
Exiting: error loading config file: config file ("filebeat.yml") can only be writable by the owner but the permissions are "-rw-rw-r--" (to fix the permissions use: 'chmod go-w /usr/share/filebeat/filebeat.yml')
```
- Solution: remove write permission on the file: `chmod -w docker/filebeat/filebeat.yml`
### No Logs
```
$ docker logs appflowy-cloud-filebeat-1
...Non-zero metrics in the last 30s...
```
- Solution: give read permission to docker logs: `chmod -R a+r /var/lib/docker/containers`
## Credentials
- After deployment, when you go to localhost:5601, both username and password will be `admin`

View File

@ -12,24 +12,7 @@ A simple guide to deploy the monitoring tools for AppFlowy Cloud
```
docker compose --file docker-compose-monitor.yml up -d
```
- Check deployment
```
docker ps -a
```
## Common problems
### Filebeat did not start successfully
```
$ docker logs appflowy-cloud-filebeat-1
Exiting: error loading config file: config file ("filebeat.yml") can only be writable by the owner but the permissions are "-rw-rw-r--" (to fix the permissions use: 'chmod go-w /usr/share/filebeat/filebeat.yml')
```
- Solution: remove write permission on the file: `chmod -w docker/filebeat/filebeat.yml`
### No Logs
```
$ docker logs appflowy-cloud-filebeat-1
...Non-zero metrics in the last 30s...
```
- Solution: give read permission to docker logs: `chmod -R a+r /var/lib/docker/containers`
- Quick check: `docker ps -a`
## Web UI Setup
- After deployment, you will have a Grafana dashboard server, sign in with your username and password defined previously

View File

@ -24,7 +24,6 @@ use tokio::io::AsyncRead;
use tokio_stream::StreamExt;
use tokio_util::io::StreamReader;
use tracing::{event, instrument};
use tracing_actix_web::RequestId;
use crate::state::AppState;
@ -62,7 +61,6 @@ async fn put_blob_handler(
content_type: web::Header<ContentType>,
content_length: web::Header<ContentLength>,
workspace_id: web::Path<Uuid>,
request_id: RequestId,
) -> Result<JsonAppResponse<AFBlobRecord>> {
let content_length = content_length.into_inner().into_inner();
// Check content length, if it's too large, return error.
@ -117,7 +115,6 @@ async fn delete_blob_handler(
async fn get_blob_handler(
state: Data<AppState>,
path: web::Path<PathInfo>,
request_id: RequestId,
req: HttpRequest,
) -> Result<HttpResponse<BoxBody>> {
let PathInfo {

View File

@ -20,7 +20,6 @@ use actix_web::{web, HttpResponse, Scope};
use database_entity::dto::{AFUserProfile, AFUserWorkspaceInfo};
use shared_entity::response::AppResponseError;
use tracing_actix_web::RequestId;
pub fn user_scope() -> Scope {
web::scope("/api/user")
@ -41,7 +40,6 @@ pub fn user_scope() -> Scope {
async fn verify_user_handler(
path: web::Path<String>,
state: Data<AppState>,
request_id: RequestId,
) -> Result<JsonAppResponse<SignInTokenResponse>> {
let access_token = path.into_inner();
let is_new = biz::user::verify_token(
@ -60,7 +58,6 @@ async fn verify_user_handler(
async fn get_user_profile_handler(
uuid: UserUuid,
state: Data<AppState>,
request_id: RequestId,
) -> Result<JsonAppResponse<AFUserProfile>> {
let profile = biz::user::get_profile(&state.pg_pool, &uuid)
.await
@ -72,7 +69,6 @@ async fn get_user_profile_handler(
async fn get_user_workspace_info_handler(
uuid: UserUuid,
state: Data<AppState>,
request_id: RequestId,
) -> Result<JsonAppResponse<AFUserWorkspaceInfo>> {
let info = biz::user::get_user_workspace_info(&state.pg_pool, &uuid).await?;
Ok(AppResponse::Ok().with_data(info).into())
@ -83,7 +79,6 @@ async fn update_user_handler(
auth: Authorization,
payload: Json<UpdateUserParams>,
state: Data<AppState>,
request_id: RequestId,
) -> Result<JsonAppResponse<()>> {
let params = payload.into_inner();
biz::user::update_user(&state.pg_pool, auth.uuid()?, params).await?;

View File

@ -14,7 +14,6 @@ use shared_entity::response::AppResponseError;
use shared_entity::response::{AppResponse, JsonAppResponse};
use sqlx::types::uuid;
use tracing::{debug, event, instrument};
use tracing_actix_web::RequestId;
use uuid::Uuid;
pub const WORKSPACE_ID_PATH: &str = "workspace_id";
@ -81,7 +80,6 @@ async fn list_handler(
#[instrument(skip(payload, state), err)]
async fn add_workspace_members_handler(
request_id: RequestId,
user_uuid: UserUuid,
workspace_id: web::Path<Uuid>,
payload: Json<CreateWorkspaceMembers>,
@ -198,7 +196,6 @@ async fn update_workspace_member_handler(
#[instrument(skip(state, payload), err)]
async fn create_collab_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<InsertCollabParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -209,7 +206,6 @@ async fn create_collab_handler(
#[instrument(skip(payload, state), err)]
async fn get_collab_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<QueryCollabParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<RawData>>> {
@ -229,7 +225,6 @@ async fn get_collab_handler(
#[instrument(skip(payload, state), err)]
async fn batch_get_collab_handler(
user_uuid: UserUuid,
request_id: RequestId,
state: Data<AppState>,
payload: Json<BatchQueryCollabParams>,
) -> Result<Json<AppResponse<BatchQueryCollabResult>>> {
@ -248,7 +243,6 @@ async fn batch_get_collab_handler(
#[instrument(skip(state, payload), err)]
async fn update_collab_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<InsertCollabParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -259,7 +253,6 @@ async fn update_collab_handler(
#[instrument(level = "info", skip(state, payload), err)]
async fn delete_collab_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<DeleteCollabParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -292,7 +285,6 @@ async fn retrieve_snapshots_handler(
#[instrument(skip(state, payload), err)]
async fn add_collab_member_handler(
request_id: RequestId,
payload: Json<InsertCollabMemberParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -308,7 +300,6 @@ async fn add_collab_member_handler(
#[instrument(skip(state, payload), err)]
async fn update_collab_member_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<UpdateCollabMemberParams>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -325,7 +316,6 @@ async fn update_collab_member_handler(
#[instrument(skip(state, payload), err)]
async fn get_collab_member_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<CollabMemberIdentify>,
state: Data<AppState>,
) -> Result<Json<AppResponse<AFCollabMember>>> {
@ -337,7 +327,6 @@ async fn get_collab_member_handler(
#[instrument(skip(state, payload), err)]
async fn remove_collab_member_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<CollabMemberIdentify>,
state: Data<AppState>,
) -> Result<Json<AppResponse<()>>> {
@ -354,7 +343,6 @@ async fn remove_collab_member_handler(
#[instrument(skip(state, payload), err)]
async fn get_collab_member_list_handler(
user_uuid: UserUuid,
request_id: RequestId,
payload: Json<QueryCollabMembers>,
state: Data<AppState>,
) -> Result<Json<AppResponse<AFCollabMembers>>> {

View File

@ -2,13 +2,13 @@ use crate::api::metrics::{metrics_registry, metrics_scope};
use crate::component::auth::HEADER_TOKEN;
use crate::config::config::{Config, DatabaseSetting, GoTrueSetting, S3Setting, TlsConfig};
use crate::middleware::cors_mw::default_cors;
use crate::middleware::request_id::RequestIdMiddleware;
use crate::self_signed::create_self_signed_certificate;
use crate::state::AppState;
use actix_identity::IdentityMiddleware;
use actix_session::storage::RedisSessionStore;
use actix_session::SessionMiddleware;
use actix_web::cookie::Key;
use actix_web::middleware::Compat;
use actix_web::{dev::Server, web, web::Data, App, HttpServer};
use actix::Actor;
@ -20,7 +20,6 @@ use snowflake::Snowflake;
use sqlx::{postgres::PgPoolOptions, PgPool};
use std::net::TcpListener;
use std::sync::Arc;
use tracing_actix_web::TracingLogger;
use tokio::sync::RwLock;
@ -107,6 +106,7 @@ pub async fn run(
let mut server = HttpServer::new(move || {
App::new()
.wrap(RequestIdMiddleware)
.wrap(MetricsMiddleware)
.wrap(IdentityMiddleware::default())
.wrap(
@ -116,7 +116,6 @@ pub async fn run(
)
.wrap(default_cors())
.wrap(access_control.clone())
.wrap(Compat::new(TracingLogger::default()))
.app_data(web::JsonConfig::default().limit(4096))
.service(user_scope())
.service(workspace_scope())

View File

@ -1,8 +1,9 @@
use config::{Config as InnerConfig, FileFormat};
use secrecy::Secret;
use serde::Deserialize;
use serde_aux::field_attributes::deserialize_number_from_string;
use sqlx::postgres::{PgConnectOptions, PgSslMode};
use std::convert::{TryFrom, TryInto};
use std::convert::TryFrom;
use std::path::PathBuf;
#[derive(serde::Deserialize, Clone, Debug)]
@ -106,15 +107,10 @@ impl DatabaseSetting {
}
}
pub fn get_configuration() -> Result<Config, config::ConfigError> {
pub fn get_configuration(app_env: &Environment) -> Result<Config, config::ConfigError> {
let base_path = std::env::current_dir().expect("Failed to determine the current directory");
let configuration_dir = base_path.join("configuration");
let environment: Environment = std::env::var("APP_ENVIRONMENT")
.unwrap_or_else(|_| "local".to_string())
.try_into()
.expect("Failed to parse APP_ENVIRONMENT.");
let builder = InnerConfig::builder()
.set_default("default", "1")?
.add_source(
@ -123,7 +119,7 @@ pub fn get_configuration() -> Result<Config, config::ConfigError> {
.format(FileFormat::Yaml),
)
.add_source(
config::File::from(configuration_dir.join(environment.as_str()))
config::File::from(configuration_dir.join(app_env.as_str()))
.required(true)
.format(FileFormat::Yaml),
)
@ -137,6 +133,7 @@ pub fn get_configuration() -> Result<Config, config::ConfigError> {
}
/// The possible runtime environment for our application.
#[derive(Clone, Debug, Deserialize)]
pub enum Environment {
Local,
Production,

View File

@ -1,6 +1,6 @@
use appflowy_cloud::application::{init_state, Application};
use appflowy_cloud::config::config::get_configuration;
use appflowy_cloud::telemetry::{get_subscriber, init_subscriber};
use appflowy_cloud::config::config::{get_configuration, Environment};
use appflowy_cloud::telemetry::init_subscriber;
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
@ -17,14 +17,13 @@ async fn main() -> anyhow::Result<()> {
filters.push(format!("database={}", level));
filters.push(format!("storage={}", level));
let subscriber = get_subscriber(
"appflowy_cloud".to_string(),
Some(filters.join(",")),
std::io::stdout,
);
init_subscriber(subscriber);
let app_env: Environment = std::env::var("APP_ENVIRONMENT")
.unwrap_or_else(|_| "local".to_string())
.try_into()
.expect("Failed to parse APP_ENVIRONMENT.");
let configuration = get_configuration().expect("The configuration should be configured.");
init_subscriber(&app_env, filters);
let configuration = get_configuration(&app_env).expect("The configuration should be configured.");
let state = init_state(&configuration)
.await
.expect("The AppState should be initialized");

View File

@ -1,4 +1,3 @@
use actix_http::header::HeaderName;
use actix_service::{forward_ready, Service, Transform};
use actix_web::dev::{ServiceRequest, ServiceResponse};
use actix_web::web::Data;
@ -7,6 +6,7 @@ use futures_util::future::LocalBoxFuture;
use std::future::{ready, Ready};
use std::sync::Arc;
use super::request_id::get_request_id;
use crate::api::metrics::AppFlowyCloudMetrics;
pub struct MetricsMiddleware;
@ -77,16 +77,3 @@ where
})
}
}
fn get_request_id(req: &ServiceRequest) -> Option<String> {
match req.headers().get(HeaderName::from_static("x-request-id")) {
Some(h) => match h.to_str() {
Ok(s) => Some(s.to_owned()),
Err(e) => {
tracing::error!("Failed to get request id from header: {}", e);
None
},
},
None => None,
}
}

View File

@ -1,3 +1,4 @@
pub mod access_control_mw;
pub mod cors_mw;
pub mod metrics_mw;
pub mod request_id;

View File

@ -0,0 +1,69 @@
use actix_http::header::HeaderName;
use std::future::{ready, Ready};
use tracing::{Instrument, Level};
use actix_service::{forward_ready, Service, Transform};
use actix_web::dev::{ServiceRequest, ServiceResponse};
use futures_util::future::LocalBoxFuture;
pub struct RequestIdMiddleware;
impl<S, B> Transform<S, ServiceRequest> for RequestIdMiddleware
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Transform = RequestIdMiddlewareService<S>;
type InitError = ();
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ready(Ok(RequestIdMiddlewareService { service }))
}
}
pub struct RequestIdMiddlewareService<S> {
service: S,
}
impl<S, B> Service<ServiceRequest> for RequestIdMiddlewareService<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
forward_ready!(service);
fn call(&self, req: ServiceRequest) -> Self::Future {
let request_id = get_request_id(&req).unwrap_or({
let new_id = uuid::Uuid::new_v4().to_string();
tracing::warn!("No request id found in header, generating one: {}", new_id);
new_id
});
// Call the next service
let span = tracing::span!(Level::INFO, "request_id", request_id = %request_id);
let res = self.service.call(req);
Box::pin(res.instrument(span))
}
}
pub fn get_request_id(req: &ServiceRequest) -> Option<String> {
match req.headers().get(HeaderName::from_static("x-request-id")) {
Some(h) => match h.to_str() {
Ok(s) => Some(s.to_owned()),
Err(e) => {
tracing::error!("Failed to get request id from header: {}", e);
None
},
},
None => None,
}
}

View File

@ -1,21 +1,20 @@
use actix_web::rt::task::JoinHandle;
use tracing::subscriber::set_global_default;
use tracing::Subscriber;
use tracing_bunyan_formatter::{BunyanFormattingLayer, JsonStorageLayer};
use tracing_log::LogTracer;
use tracing_subscriber::fmt::MakeWriter;
use tracing_subscriber::{layer::SubscriberExt, EnvFilter};
/// Compose multiple layers into a `tracing`'s subscriber.
pub fn get_subscriber<Sink>(
name: String,
env_filter: Option<String>,
sink: Sink,
) -> impl Subscriber + Sync + Send
where
Sink: for<'a> MakeWriter<'a> + Send + Sync + 'static,
{
use crate::config::config::Environment;
/// Register a subscriber as global default to process span data.
///
/// It should only be called once!
pub fn init_subscriber(app_env: &Environment, filters: Vec<String>) {
let name = "appflowy_cloud".to_string();
let env_filter = Some(filters.join(","));
let sink = std::io::stdout;
let env_filter = match env_filter {
None => {
dbg!("Using default env filter");
@ -25,25 +24,35 @@ where
};
let formatting_layer = BunyanFormattingLayer::new(name, sink);
tracing_subscriber::fmt()
.with_ansi(true)
let builder = tracing_subscriber::fmt()
.json()
.with_target(true)
.with_max_level(tracing::Level::TRACE)
.with_thread_ids(false)
.with_file(false)
.pretty()
.finish()
.with(env_filter)
.with(JsonStorageLayer)
.with(formatting_layer)
}
.with_file(false);
/// Register a subscriber as global default to process span data.
///
/// It should only be called once!
pub fn init_subscriber(subscriber: impl Subscriber + Sync + Send) {
LogTracer::init().expect("Failed to set logger");
set_global_default(subscriber).expect("Failed to set subscriber");
match app_env {
Environment::Local => {
let subscriber = builder
.pretty()
.with_ansi(true)
.finish()
.with(env_filter)
.with(JsonStorageLayer)
.with(formatting_layer);
LogTracer::init().unwrap();
set_global_default(subscriber).unwrap();
},
Environment::Production => {
let subscriber = builder
.finish()
.with(env_filter)
.with(JsonStorageLayer)
.with(formatting_layer);
LogTracer::init().unwrap();
set_global_default(subscriber).unwrap();
},
}
}
pub fn spawn_blocking_with_tracing<F, R>(f: F) -> JoinHandle<R>