-
-
Notifications
You must be signed in to change notification settings - Fork 139
import_batch panics in ensure_vv_for when DAG has shared dependency #929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
kimjoar
wants to merge
1
commit into
loro-dev:main
Choose a base branch
from
kimjoar:kim/import-batch-panic-min-repro
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+178
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| use loro_common::ContainerID; | ||
| use loro_internal::{ | ||
| json::{JsonChange, JsonOp, JsonOpContent, JsonSchema, MapOp}, | ||
| loro::ExportMode, | ||
| ContainerType, LoroDoc, LoroValue, ID, | ||
| }; | ||
| use std::mem::ManuallyDrop; | ||
| use std::panic::{self, AssertUnwindSafe}; | ||
|
|
||
| // Each synthetic change is a single map insert. The container/op type does not matter for this | ||
| // bug; we only need a tiny legal change graph with explicit deps. | ||
| fn map_insert_change(peer: u64, deps: Vec<ID>, lamport: u32, key: &str) -> JsonChange { | ||
| JsonChange { | ||
| id: ID::new(peer, 0), | ||
| timestamp: 0, | ||
| deps, | ||
| lamport, | ||
| msg: None, | ||
| ops: vec![JsonOp { | ||
| content: JsonOpContent::Map(MapOp::Insert { | ||
| key: key.into(), | ||
| value: LoroValue::from(key), | ||
| }), | ||
| container: ContainerID::new_root("map", ContainerType::Map), | ||
| counter: 0, | ||
| }], | ||
| } | ||
| } | ||
|
|
||
| fn doc_from_json_changes(changes: Vec<JsonChange>) -> LoroDoc { | ||
| let doc = LoroDoc::new(); | ||
| doc.import_json_updates(JsonSchema { | ||
| schema_version: 1, | ||
| start_version: Default::default(), | ||
| peers: None, | ||
| changes, | ||
| }) | ||
| .unwrap(); | ||
| doc | ||
| } | ||
|
|
||
| // Construct the smallest DAG that can double-push a shared dependency during ensure_vv_for: | ||
| // | ||
| // A | ||
| // / \ | ||
| // C B | ||
| // | ||
| // where: | ||
| // - C depends on A | ||
| // - B depends on both A and C | ||
| // | ||
| // When B's deps iterate as [A, C], the DFS pushes A, then C, and then C pushes A again. | ||
| fn make_changes(peer_a: u64, peer_b: u64, peer_c: u64) -> [JsonChange; 3] { | ||
| let a = map_insert_change(peer_a, vec![], 0, "a"); | ||
| let c = map_insert_change(peer_c, vec![ID::new(peer_a, 0)], 1, "c"); | ||
| let b = map_insert_change(peer_b, vec![ID::new(peer_a, 0), ID::new(peer_c, 0)], 2, "b"); | ||
| [a, c, b] | ||
| } | ||
|
|
||
| fn find_peer_ids_for_duplicate_stack_shape() -> (u64, u64, u64) { | ||
| // The duplicate push only happens when B iterates deps as [A, C]. | ||
| // Frontiers with 2+ deps are backed by FxHashMap, so choose peers dynamically | ||
| // instead of hard-coding a hasher-dependent order. | ||
| for peer_a in 1..16 { | ||
| for peer_c in 1..16 { | ||
| if peer_c == peer_a { | ||
| continue; | ||
| } | ||
|
|
||
| for peer_b in 1..16 { | ||
| if peer_b == peer_a || peer_b == peer_c { | ||
| continue; | ||
| } | ||
|
|
||
| let [a, c, b] = make_changes(peer_a, peer_b, peer_c); | ||
| let doc = doc_from_json_changes(vec![a, c, b]); | ||
| let deps = doc | ||
| .oplog() | ||
| .lock() | ||
| .unwrap() | ||
| .get_change_at(ID::new(peer_b, 0)) | ||
| .unwrap() | ||
| .deps() | ||
| .to_vec(); | ||
|
|
||
| if deps == vec![ID::new(peer_a, 0), ID::new(peer_c, 0)] { | ||
| return (peer_a, peer_b, peer_c); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| panic!("failed to find peer ids where B iterates deps as [A, C]"); | ||
| } | ||
|
|
||
| fn export_two_blob_minimal_graph(peer_a: u64, peer_b: u64, peer_c: u64) -> (Vec<u8>, Vec<u8>) { | ||
| let [a, c, b] = make_changes(peer_a, peer_b, peer_c); | ||
|
|
||
| // Blob 1 is a snapshot that already contains the shared dep A and the sibling C -> A. | ||
| // Blob 2 is a plain update that adds only B -> [A, C]. | ||
| // | ||
| // This is the smallest batch shape that still reaches the buggy detached import path: | ||
| // one blob is not enough because import_batch([single_blob]) delegates to import(). | ||
| let snapshot_doc = doc_from_json_changes(vec![a.clone(), c.clone()]); | ||
| let snapshot = snapshot_doc.export(ExportMode::Snapshot).unwrap(); | ||
| let snapshot_vv = snapshot_doc.oplog_vv(); | ||
|
|
||
| let full_doc = doc_from_json_changes(vec![a, c, b]); | ||
| let update = full_doc.export(ExportMode::updates(&snapshot_vv)).unwrap(); | ||
|
|
||
| (snapshot, update) | ||
| } | ||
|
|
||
| fn import_batch_error(bytes: &[Vec<u8>]) -> Option<String> { | ||
| // The current bug panics inside import_batch and would otherwise poison the doc on drop, | ||
| // which aborts the test process. Catch the panic and suppress the drop path so the test can | ||
| // report a normal assertion failure instead. | ||
| let hook = panic::take_hook(); | ||
| panic::set_hook(Box::new(|_| {})); | ||
|
|
||
| let mut doc = ManuallyDrop::new(LoroDoc::new()); | ||
| let result = panic::catch_unwind(AssertUnwindSafe(|| { | ||
| (&*doc).import_batch(bytes).unwrap(); | ||
| })); | ||
|
|
||
| panic::set_hook(hook); | ||
| match result { | ||
| Ok(()) => { | ||
| unsafe { ManuallyDrop::drop(&mut doc) }; | ||
| None | ||
| } | ||
| Err(_) => Some("import_batch panicked while computing version vectors".to_string()), | ||
| } | ||
| } | ||
|
|
||
| /// Minimal repro for the `ensure_vv_for` double-push bug: | ||
| /// | ||
| /// - 3 changes total: `A`, `C -> A`, `B -> [A, C]` | ||
| /// - 2 blobs total: `Snapshot(A, C)` and `Update(B)` | ||
| /// | ||
| /// `import_batch` needs at least 2 blobs because the 1-blob fast path delegates to `import`. | ||
| /// Sequential `import()` works because importing the snapshot computes/caches VVs for the partial | ||
| /// graph before `B` exists. `import_batch` imports both blobs while detached, so the final | ||
| /// checkout sees the full 3-node DAG with every `OnceCell` still empty. | ||
| #[test] | ||
| fn import_batch_panic_shared_dep_on_dag_node() { | ||
| let (peer_a, peer_b, peer_c) = find_peer_ids_for_duplicate_stack_shape(); | ||
| let (snapshot, update) = export_two_blob_minimal_graph(peer_a, peer_b, peer_c); | ||
|
|
||
| // The batch really is "2 blobs / 3 changes": the snapshot carries A and C, and the update | ||
| // carries only B. | ||
| assert_eq!( | ||
| LoroDoc::decode_import_blob_meta(&snapshot, false) | ||
| .unwrap() | ||
| .change_num, | ||
| 2 | ||
| ); | ||
| assert_eq!( | ||
| LoroDoc::decode_import_blob_meta(&update, false) | ||
| .unwrap() | ||
| .change_num, | ||
| 1 | ||
| ); | ||
|
|
||
| // Sequential import works because the first import attaches state to the A <- C partial graph, | ||
| // which computes VVs before B is added by the second import. | ||
| { | ||
| let doc = LoroDoc::new(); | ||
| doc.import(&snapshot).unwrap(); | ||
| doc.import(&update).unwrap(); | ||
| } | ||
|
|
||
| // Batch import should also work. Without the ensure_vv_for guard, the final checkout traverses | ||
| // B -> [A, C], pushes A twice, and panics on the second OnceCell::set(). | ||
| if let Some(err) = import_batch_error(&[snapshot, update]) { | ||
| panic!("{err}"); | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This commit adds a regression test for the known
import_batch/ensure_vv_forpanic, but it does not modify the production code that panics. On the current implementation,import_batch_error(&[snapshot, update])will returnSome(...), so this new test immediately fails and turnscargo test -p loro-internalred if the commit is merged on its own. If the fix is intended to arrive in a later commit, the reproducer needs to stay with that fix rather than landing independently.Useful? React with 👍 / 👎.