Merge pull request #608 from AppFlowy-IO/search-tokens-metrics

let prometheus track open ai tokens used on search
2024-06-12 09:06:30 +02:00 · 2024-06-12 09:06:30 +02:00 · 64baf9ea00
parent 0109597330 d02c7e4ea7
commit 64baf9ea00
3 changed files with 27 additions and 1 deletions
--- a/src/api/metrics.rs
+++ b/src/api/metrics.rs
@ -9,6 +9,7 @@ use prometheus_client::metrics::exemplar::CounterWithExemplar;
 use prometheus_client::metrics::family::Family;
 use prometheus_client::registry::Registry;
 use std::sync::Arc;
+use uuid::Uuid;

 pub fn metrics_scope() -> Scope {
  web::scope("/metrics").service(web::resource("").route(web::get().to(metrics_handler)))
@ -38,6 +39,11 @@ pub struct ResultLabel {
  pub status_code: u16,
 }

+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct WorkspaceLabel {
+  pub workspace: String,
+}
+
 // Metrics contains list of metrics that are collected by the application.
 // Metric types: https://prometheus.io/docs/concepts/metric_types
 // Application handlers should call the corresponding methods to update the metrics.
@ -46,6 +52,7 @@ pub struct RequestMetrics {
  requests_count: Family<PathLabel, Counter>,
  requests_latency: Family<PathLabel, CounterWithExemplar<TraceLabel>>,
  requests_result: Family<ResultLabel, CounterWithExemplar<TraceLabel>>,
+  openai_token_usage: Family<WorkspaceLabel, Counter>,
 }

 #[derive(Clone, Hash, PartialEq, Eq, EncodeLabelSet, Debug, Default)]
@ -59,6 +66,7 @@ impl RequestMetrics {
      requests_count: Family::default(),
      requests_latency: Family::default(),
      requests_result: Family::default(),
+      openai_token_usage: Family::default(),
    }
  }

@ -81,9 +89,23 @@ impl RequestMetrics {
      "status code of response",
      af_metrics.requests_result.clone(),
    );
+    af_registry.register(
+      "search_tokens_used",
+      "OpenAI API tokens used for search requests",
+      af_metrics.openai_token_usage.clone(),
+    );
    af_metrics
  }

+  pub fn record_search_tokens_used(&self, workspace_id: &Uuid, tokens: u32) {
+    self
+      .openai_token_usage
+      .get_or_create(&WorkspaceLabel {
+        workspace: workspace_id.to_string(),
+      })
+      .inc_by(tokens as u64);
+  }
+
  // app services/middleware should call this method to increase the request count for the path
  pub fn record_request(&self, trace_id: Option<String>, path: String, ms: u64, status_code: u16) {
    self
--- a/src/api/search.rs
+++ b/src/api/search.rs
@ -37,6 +37,7 @@ async fn document_search(
      )
    },
  };
-  let resp = search_document(&state.pg_pool, openai, uid, workspace_id, request).await?;
+  let metrics = &*state.metrics.request_metrics;
+  let resp = search_document(&state.pg_pool, openai, uid, workspace_id, request, metrics).await?;
  Ok(AppResponse::Ok().with_data(resp).into())
 }
--- a/src/biz/search/ops.rs
+++ b/src/biz/search/ops.rs
@ -1,3 +1,4 @@
+use crate::api::metrics::RequestMetrics;
 use app_error::ErrorCode;
 use database::index::{search_documents, SearchDocumentParams};
 use openai_dive::v1::models::EmbeddingsEngine;
@ -17,6 +18,7 @@ pub async fn search_document(
  uid: i64,
  workspace_id: Uuid,
  request: SearchDocumentRequest,
+  metrics: &RequestMetrics,
 ) -> Result<Vec<SearchDocumentResponseItem>, AppResponseError> {
  let embeddings = openai
    .embeddings()
@ -31,6 +33,7 @@ pub async fn search_document(
    .map_err(|e| AppResponseError::new(ErrorCode::Internal, e.to_string()))?;

  let tokens_used = if let Some(usage) = embeddings.usage {
+    metrics.record_search_tokens_used(&workspace_id, usage.total_tokens);
    tracing::info!(
      "workspace {} OpenAI API search tokens used: {}",
      workspace_id,