fix: index all of the document blocks
This commit is contained in:
parent
9d3d28ad89
commit
7ad105ee34
|
|
@ -58,12 +58,19 @@ impl StreamGroup {
|
|||
|
||||
/// Ensures the consumer group exists, creating it if necessary.
|
||||
pub async fn ensure_consumer_group(&mut self) -> Result<(), StreamError> {
|
||||
let _: RedisResult<()> = self
|
||||
.connection_manager
|
||||
//Use '$' if you want new messages or '0' to read from the beginning.
|
||||
.xgroup_create_mkstream(&self.stream_key, &self.group_name, "0")
|
||||
.await;
|
||||
|
||||
let result: RedisResult<()> = self
|
||||
.connection_manager
|
||||
//Use '$' if you want new messages or '0' to read from the beginning.
|
||||
.xgroup_create_mkstream(&self.stream_key, &self.group_name, "0")
|
||||
.await;
|
||||
if let Err(e) = result {
|
||||
tracing::warn!(
|
||||
"error when creating consumer group `{}` `{}`: {:?}",
|
||||
self.stream_key,
|
||||
self.group_name,
|
||||
e
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ use app_error::AppError;
|
|||
use database_entity::dto::{CollabParams, QueryCollab, QueryCollabResult};
|
||||
|
||||
use crate::collab::queue_redis_ops::{
|
||||
get_pending_meta, remove_all_pending_meta, remove_pending_meta, storage_cache_key, PendingWrite,
|
||||
WritePriority, PENDING_WRITE_META_EXPIRE_SECS,
|
||||
get_pending_meta, remove_pending_meta, storage_cache_key, PendingWrite, WritePriority,
|
||||
PENDING_WRITE_META_EXPIRE_SECS,
|
||||
};
|
||||
use crate::collab::RedisSortedSet;
|
||||
use crate::metrics::CollabMetrics;
|
||||
|
|
@ -170,7 +170,8 @@ impl StorageQueue {
|
|||
#[cfg(debug_assertions)]
|
||||
pub async fn clear(&self) -> Result<(), AppError> {
|
||||
self.pending_write_set.clear().await?;
|
||||
remove_all_pending_meta(self.connection_manager.clone()).await?;
|
||||
crate::collab::queue_redis_ops::remove_all_pending_meta(self.connection_manager.clone())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use serde_repr::{Deserialize_repr, Serialize_repr};
|
|||
|
||||
pub(crate) const PENDING_WRITE_META_EXPIRE_SECS: u64 = 604800; // 7 days in seconds
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) async fn remove_all_pending_meta(
|
||||
mut connection_manager: RedisConnectionManager,
|
||||
) -> Result<(), AppError> {
|
||||
|
|
|
|||
|
|
@ -178,8 +178,9 @@ impl CollabHandle {
|
|||
Err(err) => tracing::error!("failed to decode update event: {}", err),
|
||||
}
|
||||
}
|
||||
txn.commit();
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("failed to obtain a collab lock");
|
||||
};
|
||||
update_stream.ack_messages(&messages).await?;
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -123,12 +123,6 @@ impl OpenCollabConsumer {
|
|||
let fragment = {
|
||||
match &collab.collab_type {
|
||||
CollabType::Document => {
|
||||
tracing::trace!(
|
||||
"indexing document {}/{}",
|
||||
collab.workspace_id,
|
||||
collab.object_id
|
||||
);
|
||||
|
||||
let document = Document::from_doc_state(
|
||||
CollabOrigin::Empty,
|
||||
DataSource::DocStateV1(collab.collab.doc_state.to_vec()),
|
||||
|
|
@ -137,6 +131,9 @@ impl OpenCollabConsumer {
|
|||
)?;
|
||||
let data = document.get_document_data()?;
|
||||
let content = crate::extract::document_to_plain_text(&data);
|
||||
if content.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
Fragment {
|
||||
fragment_id: collab.object_id.clone(),
|
||||
object_id: collab.object_id.clone(),
|
||||
|
|
@ -156,6 +153,11 @@ impl OpenCollabConsumer {
|
|||
},
|
||||
}
|
||||
};
|
||||
tracing::trace!(
|
||||
"indexing collab {}/{}",
|
||||
collab.workspace_id,
|
||||
collab.object_id
|
||||
);
|
||||
indexer
|
||||
.update_index(&collab.workspace_id, vec![fragment])
|
||||
.await?;
|
||||
|
|
|
|||
|
|
@ -8,23 +8,17 @@ pub fn document_to_plain_text(document: &DocumentData) -> String {
|
|||
// do a depth-first scan of the document blocks
|
||||
while let Some(block_id) = stack.pop() {
|
||||
if let Some(block) = document.blocks.get(block_id) {
|
||||
if block.external_type.as_deref() == Some("text") {
|
||||
if let Some(text_id) = block.external_id.as_deref() {
|
||||
if let Some(json) = text_map.get(text_id) {
|
||||
match serde_json::from_str::<Vec<TextDelta>>(json) {
|
||||
Ok(deltas) => {
|
||||
for delta in deltas {
|
||||
if let TextDelta::Inserted(text, _) = delta {
|
||||
buf.push_str(&text);
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
tracing::error!("text_id `{}` is not a valid delta array: {}", text_id, err)
|
||||
},
|
||||
if let Some(delta) = block.data.get("delta") {
|
||||
if let Ok(deltas) = serde_json::from_value::<Vec<TextDelta>>(delta.clone()) {
|
||||
for delta in deltas {
|
||||
if let TextDelta::Inserted(text, _) = delta {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
buf.push_str(&trimmed);
|
||||
buf.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
buf.push('\n');
|
||||
}
|
||||
}
|
||||
if let Some(children) = document.meta.children_map.get(&block.children) {
|
||||
|
|
@ -36,6 +30,7 @@ pub fn document_to_plain_text(document: &DocumentData) -> String {
|
|||
}
|
||||
}
|
||||
}
|
||||
//tracing::trace!("Document plain text: `{}`", buf);
|
||||
buf
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ use crate::indexer::Fragment;
|
|||
pub struct DocumentWatcher {
|
||||
object_id: String,
|
||||
content: Document,
|
||||
receiver: tokio::sync::watch::Receiver<DashMap<String, DeltaType>>,
|
||||
receiver: tokio::sync::watch::Receiver<u64>,
|
||||
}
|
||||
|
||||
unsafe impl Send for DocumentWatcher {}
|
||||
|
|
@ -31,7 +31,7 @@ impl DocumentWatcher {
|
|||
mut content: Document,
|
||||
index_initial_content: bool,
|
||||
) -> Result<Self> {
|
||||
let (tx, receiver) = tokio::sync::watch::channel(DashMap::new());
|
||||
let (tx, receiver) = tokio::sync::watch::channel(0);
|
||||
if index_initial_content {
|
||||
Self::index_initial_content(&mut content, &tx)?;
|
||||
}
|
||||
|
|
@ -43,35 +43,16 @@ impl DocumentWatcher {
|
|||
})
|
||||
}
|
||||
|
||||
fn attach_listener(document: &mut Document, notifier: Sender<DashMap<String, DeltaType>>) {
|
||||
document.subscribe_block_changed(move |blocks, _| {
|
||||
let changes: Vec<_> = blocks
|
||||
.iter()
|
||||
.flat_map(|block| {
|
||||
block
|
||||
.iter()
|
||||
.map(|payload| (payload.id.clone(), payload.command.clone()))
|
||||
})
|
||||
.collect();
|
||||
notifier.send_modify(|map| {
|
||||
for (id, command) in changes {
|
||||
map.insert(id, command);
|
||||
}
|
||||
})
|
||||
fn attach_listener(document: &mut Document, notifier: Sender<u64>) {
|
||||
document.subscribe_block_changed(move |_, _| {
|
||||
notifier.send_modify(|i| *i += 1);
|
||||
});
|
||||
}
|
||||
|
||||
fn index_initial_content(
|
||||
document: &mut Document,
|
||||
notifier: &Sender<DashMap<String, DeltaType>>,
|
||||
) -> Result<()> {
|
||||
fn index_initial_content(document: &mut Document, notifier: &Sender<u64>) -> Result<()> {
|
||||
let data = document.get_document_data()?;
|
||||
if let Some(text_map) = data.meta.text_map.as_ref() {
|
||||
notifier.send_modify(|map| {
|
||||
for text_id in text_map.keys() {
|
||||
map.insert(text_id.clone(), DeltaType::Inserted);
|
||||
}
|
||||
});
|
||||
if let Some(_) = data.meta.text_map.as_ref() {
|
||||
notifier.send_modify(|i| *i += 1);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -83,7 +64,6 @@ impl DocumentWatcher {
|
|||
Box::pin(stream! {
|
||||
while let Ok(()) = receiver.changed().await {
|
||||
if let Some(collab) = collab.upgrade() {
|
||||
receiver.borrow().clear();
|
||||
match Self::get_document_content(collab) {
|
||||
Ok(content) => {
|
||||
yield FragmentUpdate::Update(Fragment {
|
||||
|
|
|
|||
Loading…
Reference in New Issue