diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock
index d099964a383..15f519615af 100644
--- a/quickwit/Cargo.lock
+++ b/quickwit/Cargo.lock
@@ -7591,7 +7591,6 @@ dependencies = [
  "arrow",
  "async-trait",
  "once_cell",
- "parquet",
  "prost 0.14.3",
  "quickwit-common",
  "quickwit-config",
@@ -7611,10 +7610,14 @@ dependencies = [
 name = "quickwit-parquet-engine"
 version = "0.8.0"
 dependencies = [
+ "anyhow",
  "arrow",
+ "chrono",
  "parquet",
  "proptest",
+ "prost 0.14.3",
  "quickwit-common",
+ "quickwit-proto",
  "sea-query",
  "serde",
  "serde_json",
diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs
index 9f79032d7c5..e29d4af2540 100644
--- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs
+++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs
@@ -68,7 +68,7 @@ pub struct IndexingSchedulerState {
 ///
 /// Scheduling executes the following steps:
 /// 1. Builds a [`PhysicalIndexingPlan`] from the list of logical indexing tasks. See
-///    [`build_physical_indexing_plan`] for the implementation details.
+///    `build_physical_indexing_plan` for the implementation details.
 /// 2. Apply the [`PhysicalIndexingPlan`]: for each indexer, the scheduler send the indexing tasks
 ///    by gRPC. An indexer immediately returns an Ok and apply asynchronously the received plan. Any
 ///    errors (network) happening in this step are ignored. The scheduler runs a control loop that
@@ -98,7 +98,7 @@ pub struct IndexingSchedulerState {
 /// Concretely, it will send the faulty nodes of the plan they are supposed to follow.
 //
 /// Finally, in order to give the time for each indexer to run their indexing tasks, the control
-/// plane will wait at least [`MIN_DURATION_BETWEEN_SCHEDULING`] before comparing the desired
+/// plane will wait at least `MIN_DURATION_BETWEEN_SCHEDULING` before comparing the desired
 /// plan with the running plan.
 pub struct IndexingScheduler {
     cluster_id: String,
diff --git a/quickwit/quickwit-indexing/Cargo.toml b/quickwit/quickwit-indexing/Cargo.toml
index 677b95e32aa..5d3a6504d29 100644
--- a/quickwit/quickwit-indexing/Cargo.toml
+++ b/quickwit/quickwit-indexing/Cargo.toml
@@ -123,6 +123,7 @@ sqlx = { workspace = true, features = ["runtime-tokio", "postgres"] }
 tempfile = { workspace = true }
 
 quickwit-actors = { workspace = true, features = ["testsuite"] }
+quickwit-parquet-engine = { workspace = true, features = ["testsuite"] }
 quickwit-cluster = { workspace = true, features = ["testsuite"] }
 quickwit-common = { workspace = true, features = ["testsuite"] }
 quickwit-config = { workspace = true, features = ["testsuite"] }
diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
index 16daf18102f..cfb2ade9361 100644
--- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
+++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
@@ -637,10 +637,8 @@ impl IndexingPipeline {
             .spawn(parquet_uploader);
 
         // ParquetPackager
-        let parquet_schema = quickwit_parquet_engine::schema::ParquetSchema::new();
         let writer_config = quickwit_parquet_engine::storage::ParquetWriterConfig::default();
         let split_writer = quickwit_parquet_engine::storage::ParquetSplitWriter::new(
-            parquet_schema,
             writer_config,
             self.params.indexing_directory.path(),
         );
diff --git a/quickwit/quickwit-indexing/src/actors/parquet_doc_processor.rs b/quickwit/quickwit-indexing/src/actors/parquet_doc_processor.rs
index 2a83c9610c9..eb51621a30f 100644
--- a/quickwit/quickwit-indexing/src/actors/parquet_doc_processor.rs
+++ b/quickwit/quickwit-indexing/src/actors/parquet_doc_processor.rs
@@ -24,7 +24,6 @@ use quickwit_common::rate_limited_tracing::rate_limited_warn;
 use quickwit_common::runtimes::RuntimeType;
 use quickwit_metastore::checkpoint::SourceCheckpointDelta;
 use quickwit_parquet_engine::ingest::{IngestError, ParquetIngestProcessor};
-use quickwit_parquet_engine::schema::ParquetSchema;
 use quickwit_proto::types::{IndexId, SourceId};
 use serde::Serialize;
 use tokio::runtime::Handle;
@@ -143,8 +142,7 @@ impl ParquetDocProcessor {
         source_id: SourceId,
         indexer_mailbox: Mailbox<ParquetIndexer>,
     ) -> Self {
-        let schema = ParquetSchema::new();
-        let processor = ParquetIngestProcessor::new(schema);
+        let processor = ParquetIngestProcessor;
         let counters = ParquetDocProcessorCounters::new(index_id.clone(), source_id.clone());
 
         info!(
@@ -306,7 +304,7 @@ impl Handler<RawDocBatch> for ParquetDocProcessor {
         // forever.
         if !checkpoint_forwarded && !checkpoint_delta.is_empty() {
             let empty_batch =
-                RecordBatch::new_empty(self.processor.schema().arrow_schema().clone());
+                RecordBatch::new_empty(std::sync::Arc::new(arrow::datatypes::Schema::empty()));
             let processed_batch =
                 ProcessedParquetBatch::new(empty_batch, checkpoint_delta, force_commit);
             ctx.send_message(&self.indexer_mailbox, processed_batch)
@@ -399,14 +397,8 @@ mod tests {
 
     #[tokio::test]
     async fn test_metrics_doc_processor_valid_arrow_ipc() {
-        use std::sync::Arc as StdArc;
+        use quickwit_parquet_engine::test_helpers::create_test_batch_with_tags;
 
-        use arrow::array::{
-            ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, Int32Array, StringArray,
-            StructArray, UInt8Array, UInt64Array,
-        };
-        use arrow::datatypes::{DataType, Field, Int32Type};
-        use arrow::record_batch::RecordBatch;
         let universe = Universe::with_accelerated_time();
 
         let (indexer_mailbox, _indexer_inbox) = universe.create_test_mailbox::<ParquetIndexer>();
@@ -419,103 +411,7 @@ mod tests {
         let (metrics_doc_processor_mailbox, metrics_doc_processor_handle) =
             universe.spawn_builder().spawn(metrics_doc_processor);
 
-        // Create a test batch matching the metrics schema
-        let schema = ParquetSchema::new();
-        let num_rows = 3;
-
-        // Helper to create dictionary arrays
-        fn create_dict_array(values: &[&str]) -> ArrayRef {
-            let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-            let string_array = StringArray::from(values.to_vec());
-            StdArc::new(
-                DictionaryArray::<Int32Type>::try_new(
-                    Int32Array::from(keys),
-                    StdArc::new(string_array),
-                )
-                .unwrap(),
-            )
-        }
-
-        fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-            let keys: Vec<Option<i32>> = values
-                .iter()
-                .enumerate()
-                .map(|(i, v)| v.map(|_| i as i32))
-                .collect();
-            let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-            let string_array = StringArray::from(string_values);
-            StdArc::new(
-                DictionaryArray::<Int32Type>::try_new(
-                    Int32Array::from(keys),
-                    StdArc::new(string_array),
-                )
-                .unwrap(),
-            )
-        }
-
-        let metric_name: ArrayRef = create_dict_array(&vec!["cpu.usage"; num_rows]);
-        let metric_type: ArrayRef = StdArc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = StdArc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamp_secs: ArrayRef = StdArc::new(UInt64Array::from(vec![100u64, 101u64, 102u64]));
-        let start_timestamp_secs: ArrayRef =
-            StdArc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let value: ArrayRef = StdArc::new(Float64Array::from(vec![42.0, 43.0, 44.0]));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-
-        // Create empty Variant (Struct with metadata and value BinaryView fields)
-        let metadata_array = StdArc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let value_array = StdArc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let attributes: ArrayRef = StdArc::new(StructArray::from(vec![
-            (
-                StdArc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array.clone() as ArrayRef,
-            ),
-            (
-                StdArc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array.clone() as ArrayRef,
-            ),
-        ]));
-
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-
-        let resource_attributes: ArrayRef = StdArc::new(StructArray::from(vec![
-            (
-                StdArc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array as ArrayRef,
-            ),
-            (
-                StdArc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array as ArrayRef,
-            ),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap();
-
-        // Serialize to Arrow IPC
+        let batch = create_test_batch_with_tags(3, &["service"]);
         let ipc_bytes = record_batch_to_ipc(&batch).unwrap();
 
         // Create RawDocBatch with the IPC bytes
@@ -624,13 +520,8 @@ mod tests {
     async fn test_metrics_doc_processor_with_indexer() {
         use std::sync::Arc as StdArc;
 
-        use arrow::array::{
-            ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, Int32Array, StringArray,
-            StructArray, UInt8Array, UInt64Array,
-        };
-        use arrow::datatypes::{DataType, Field, Int32Type};
-        use arrow::record_batch::RecordBatch;
         use quickwit_parquet_engine::storage::{ParquetSplitWriter, ParquetWriterConfig};
+        use quickwit_parquet_engine::test_helpers::create_test_batch_with_tags;
         use quickwit_proto::metastore::MockMetastoreService;
         use quickwit_storage::RamStorage;
 
@@ -657,9 +548,8 @@ mod tests {
         let (uploader_mailbox, _uploader_handle) = universe.spawn_builder().spawn(uploader);
 
         // Create ParquetPackager
-        let parquet_schema = ParquetSchema::new();
         let writer_config = ParquetWriterConfig::default();
-        let split_writer = ParquetSplitWriter::new(parquet_schema, writer_config, temp_dir.path());
+        let split_writer = ParquetSplitWriter::new(writer_config, temp_dir.path());
         let packager = ParquetPackager::new(split_writer, uploader_mailbox);
         let (packager_mailbox, packager_handle) = universe.spawn_builder().spawn(packager);
 
@@ -681,104 +571,7 @@ mod tests {
         let (metrics_doc_processor_mailbox, metrics_doc_processor_handle) =
             universe.spawn_builder().spawn(metrics_doc_processor);
 
-        // Create a test batch
-        let schema = ParquetSchema::new();
-        let num_rows = 5;
-
-        fn create_dict_array(values: &[&str]) -> ArrayRef {
-            let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-            let string_array = StringArray::from(values.to_vec());
-            StdArc::new(
-                DictionaryArray::<Int32Type>::try_new(
-                    Int32Array::from(keys),
-                    StdArc::new(string_array),
-                )
-                .unwrap(),
-            )
-        }
-
-        fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-            let keys: Vec<Option<i32>> = values
-                .iter()
-                .enumerate()
-                .map(|(i, v)| v.map(|_| i as i32))
-                .collect();
-            let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-            let string_array = StringArray::from(string_values);
-            StdArc::new(
-                DictionaryArray::<Int32Type>::try_new(
-                    Int32Array::from(keys),
-                    StdArc::new(string_array),
-                )
-                .unwrap(),
-            )
-        }
-
-        let metric_name: ArrayRef = create_dict_array(&vec!["cpu.usage"; num_rows]);
-        let metric_type: ArrayRef = StdArc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = StdArc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = StdArc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            StdArc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = StdArc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-
-        // Create empty Variant (Struct with metadata and value BinaryView fields)
-        let metadata_array = StdArc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let value_array = StdArc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let attributes: ArrayRef = StdArc::new(StructArray::from(vec![
-            (
-                StdArc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array.clone() as ArrayRef,
-            ),
-            (
-                StdArc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array.clone() as ArrayRef,
-            ),
-        ]));
-
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-
-        let resource_attributes: ArrayRef = StdArc::new(StructArray::from(vec![
-            (
-                StdArc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array as ArrayRef,
-            ),
-            (
-                StdArc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array as ArrayRef,
-            ),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap();
-
-        // Serialize to Arrow IPC
+        let batch = create_test_batch_with_tags(5, &["service"]);
         let ipc_bytes = record_batch_to_ipc(&batch).unwrap();
 
         // Create RawDocBatch with force_commit to trigger split production
diff --git a/quickwit/quickwit-indexing/src/actors/parquet_e2e_test.rs b/quickwit/quickwit-indexing/src/actors/parquet_e2e_test.rs
index 0823ce4ad04..d613fc96003 100644
--- a/quickwit/quickwit-indexing/src/actors/parquet_e2e_test.rs
+++ b/quickwit/quickwit-indexing/src/actors/parquet_e2e_test.rs
@@ -24,15 +24,13 @@ use std::time::Duration;
 use arrow::array::{
     ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
 };
-use arrow::datatypes::Int32Type;
+use arrow::datatypes::{DataType, Field, Int32Type, Schema as ArrowSchema};
 use arrow::record_batch::RecordBatch;
 use bytes::Bytes;
-use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
 use quickwit_actors::{ActorHandle, Universe};
 use quickwit_common::test_utils::wait_until_predicate;
 use quickwit_metastore::checkpoint::SourceCheckpointDelta;
 use quickwit_parquet_engine::ingest::record_batch_to_ipc;
-use quickwit_parquet_engine::schema::ParquetSchema;
 use quickwit_parquet_engine::storage::{ParquetSplitWriter, ParquetWriterConfig};
 use quickwit_proto::metastore::{EmptyResponse, MockMetastoreService};
 use quickwit_proto::types::IndexUid;
@@ -65,48 +63,6 @@ async fn wait_for_published_splits(
     .map_err(|_| anyhow::anyhow!("Timeout waiting for {} published splits", expected_splits))
 }
 
-fn create_dict_array(values: &[&str]) -> ArrayRef {
-    let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-    let string_array = StringArray::from(values.to_vec());
-    Arc::new(
-        DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-            .unwrap(),
-    )
-}
-
-fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-    let keys: Vec<Option<i32>> = values
-        .iter()
-        .enumerate()
-        .map(|(i, v)| v.map(|_| i as i32))
-        .collect();
-    let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-    let string_array = StringArray::from(string_values);
-    Arc::new(
-        DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-            .unwrap(),
-    )
-}
-
-fn create_variant_array(num_rows: usize, fields: Option<&[(&str, &str)]>) -> ArrayRef {
-    let mut builder = VariantArrayBuilder::new(num_rows);
-    for _ in 0..num_rows {
-        match fields {
-            Some(kv_pairs) => {
-                let mut obj = builder.new_object();
-                for &(key, value) in kv_pairs {
-                    obj = obj.with_field(key, value);
-                }
-                obj.finish();
-            }
-            None => {
-                builder.append_null();
-            }
-        }
-    }
-    ArrayRef::from(builder.build())
-}
-
 fn create_test_batch(
     num_rows: usize,
     metric_name: &str,
@@ -114,44 +70,46 @@ fn create_test_batch(
     base_timestamp: u64,
     base_value: f64,
 ) -> RecordBatch {
-    let schema = ParquetSchema::new();
-
-    let metric_names: Vec<&str> = vec![metric_name; num_rows];
-    let metric_name_arr: ArrayRef = create_dict_array(&metric_names);
+    let schema = Arc::new(ArrowSchema::new(vec![
+        Field::new(
+            "metric_name",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        ),
+        Field::new("metric_type", DataType::UInt8, false),
+        Field::new("timestamp_secs", DataType::UInt64, false),
+        Field::new("value", DataType::Float64, false),
+        Field::new(
+            "service",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+        ),
+    ]));
+
+    let metric_name_arr: ArrayRef = {
+        let keys = Int32Array::from(vec![0i32; num_rows]);
+        let vals = StringArray::from(vec![metric_name]);
+        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(vals)).unwrap())
+    };
     let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-    let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("count"); num_rows]));
     let timestamps: Vec<u64> = (0..num_rows).map(|i| base_timestamp + i as u64).collect();
     let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-    let start_timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
     let values: Vec<f64> = (0..num_rows).map(|i| base_value + i as f64).collect();
     let value: ArrayRef = Arc::new(Float64Array::from(values));
-    let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some(service); num_rows]);
-    let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-    let tag_datacenter: ArrayRef = create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-    let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-    let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-    let attributes: ArrayRef = create_variant_array(num_rows, None);
-    let service_names: Vec<&str> = vec![service; num_rows];
-    let service_name: ArrayRef = create_dict_array(&service_names);
-    let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
+    let service_arr: ArrayRef = {
+        let keys = Int32Array::from(vec![0i32; num_rows]);
+        let vals = StringArray::from(vec![service]);
+        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(vals)).unwrap())
+    };
 
     RecordBatch::try_new(
-        schema.arrow_schema().clone(),
+        schema,
         vec![
             metric_name_arr,
             metric_type,
-            metric_unit,
             timestamp_secs,
-            start_timestamp_secs,
             value,
-            tag_service,
-            tag_env,
-            tag_datacenter,
-            tag_region,
-            tag_host,
-            attributes,
-            service_name,
-            resource_attributes,
+            service_arr,
         ],
     )
     .unwrap()
@@ -213,9 +171,8 @@ async fn test_metrics_pipeline_e2e() {
     let (uploader_mailbox, _uploader_handle) = universe.spawn_builder().spawn(uploader);
 
     // ParquetPackager between indexer and uploader
-    let parquet_schema = ParquetSchema::new();
     let writer_config = ParquetWriterConfig::default();
-    let split_writer = ParquetSplitWriter::new(parquet_schema, writer_config, temp_dir.path());
+    let split_writer = ParquetSplitWriter::new(writer_config, temp_dir.path());
     let packager = ParquetPackager::new(split_writer, uploader_mailbox);
     let (packager_mailbox, packager_handle) = universe.spawn_builder().spawn(packager);
 
diff --git a/quickwit/quickwit-indexing/src/actors/parquet_indexer.rs b/quickwit/quickwit-indexing/src/actors/parquet_indexer.rs
index 2738a42163f..254ff2dc719 100644
--- a/quickwit/quickwit-indexing/src/actors/parquet_indexer.rs
+++ b/quickwit/quickwit-indexing/src/actors/parquet_indexer.rs
@@ -530,16 +530,10 @@ mod tests {
     use std::sync::atomic::Ordering;
     use std::time::Duration;
 
-    use arrow::array::{
-        ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::Int32Type;
-    use arrow::record_batch::RecordBatch;
-    use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
     use quickwit_actors::{ActorHandle, Universe};
     use quickwit_common::test_utils::wait_until_predicate;
-    use quickwit_parquet_engine::schema::ParquetSchema;
     use quickwit_parquet_engine::storage::{ParquetSplitWriter, ParquetWriterConfig};
+    use quickwit_parquet_engine::test_helpers::create_test_batch;
     use quickwit_proto::metastore::{EmptyResponse, MockMetastoreService};
     use quickwit_storage::RamStorage;
 
@@ -598,9 +592,8 @@ mod tests {
         temp_dir: &std::path::Path,
         uploader_mailbox: Mailbox<ParquetUploader>,
     ) -> (Mailbox<ParquetPackager>, ActorHandle<ParquetPackager>) {
-        let schema = ParquetSchema::new();
         let writer_config = ParquetWriterConfig::default();
-        let split_writer = ParquetSplitWriter::new(schema, writer_config, temp_dir);
+        let split_writer = ParquetSplitWriter::new(writer_config, temp_dir);
 
         let packager = ParquetPackager::new(split_writer, uploader_mailbox);
         universe.spawn_builder().spawn(packager)
@@ -624,97 +617,6 @@ mod tests {
         .map_err(|_| anyhow::anyhow!("Timeout waiting for {} staged splits", expected_splits))
     }
 
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create a VARIANT array for testing with specified number of rows.
-    fn create_variant_array(num_rows: usize, fields: Option<&[(&str, &str)]>) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(num_rows);
-        for _ in 0..num_rows {
-            match fields {
-                Some(kv_pairs) => {
-                    let mut obj = builder.new_object();
-                    for &(key, value) in kv_pairs {
-                        obj = obj.with_field(key, value);
-                    }
-                    obj.finish();
-                }
-                None => {
-                    builder.append_null();
-                }
-            }
-        }
-        ArrayRef::from(builder.build())
-    }
-
-    /// Create a test batch matching the metrics schema.
-    fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows, None);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
-
     #[tokio::test]
     async fn test_metrics_indexer_receives_batch() {
         let universe = Universe::with_accelerated_time();
diff --git a/quickwit/quickwit-indexing/src/actors/parquet_packager.rs b/quickwit/quickwit-indexing/src/actors/parquet_packager.rs
index a23b88789e4..b0950141b10 100644
--- a/quickwit/quickwit-indexing/src/actors/parquet_packager.rs
+++ b/quickwit/quickwit-indexing/src/actors/parquet_packager.rs
@@ -235,110 +235,17 @@ mod tests {
     use std::sync::atomic::Ordering as AtomicOrdering;
     use std::time::Duration;
 
-    use arrow::array::{
-        ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::Int32Type;
-    use arrow::record_batch::RecordBatch;
-    use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
     use quickwit_actors::{ActorHandle, Universe};
     use quickwit_common::test_utils::wait_until_predicate;
     use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta};
-    use quickwit_parquet_engine::schema::ParquetSchema;
     use quickwit_parquet_engine::storage::ParquetWriterConfig;
+    use quickwit_parquet_engine::test_helpers::create_test_batch;
     use quickwit_proto::metastore::{EmptyResponse, MockMetastoreService};
     use quickwit_storage::RamStorage;
 
     use super::*;
     use crate::actors::{ParquetPublisher, SplitsUpdateMailbox, UploaderType};
 
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    fn create_variant_array(num_rows: usize, fields: Option<&[(&str, &str)]>) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(num_rows);
-        for _ in 0..num_rows {
-            match fields {
-                Some(kv_pairs) => {
-                    let mut obj = builder.new_object();
-                    for &(key, value) in kv_pairs {
-                        obj = obj.with_field(key, value);
-                    }
-                    obj.finish();
-                }
-                None => {
-                    builder.append_null();
-                }
-            }
-        }
-        ArrayRef::from(builder.build())
-    }
-
-    fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows, None);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
-
     fn create_test_uploader(
         universe: &Universe,
     ) -> (Mailbox<ParquetUploader>, ActorHandle<ParquetUploader>) {
@@ -366,9 +273,8 @@ mod tests {
         temp_dir: &std::path::Path,
         uploader_mailbox: Mailbox<ParquetUploader>,
     ) -> (Mailbox<ParquetPackager>, ActorHandle<ParquetPackager>) {
-        let schema = ParquetSchema::new();
         let writer_config = ParquetWriterConfig::default();
-        let split_writer = ParquetSplitWriter::new(schema, writer_config, temp_dir);
+        let split_writer = ParquetSplitWriter::new(writer_config, temp_dir);
 
         let packager = ParquetPackager::new(split_writer, uploader_mailbox);
         universe.spawn_builder().spawn(packager)
diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs
index 1827af7a153..448b7a4e312 100644
--- a/quickwit/quickwit-indexing/src/actors/uploader.rs
+++ b/quickwit/quickwit-indexing/src/actors/uploader.rs
@@ -66,7 +66,7 @@ pub enum UploaderType {
 /// [`SplitsUpdateMailbox`] wraps either a [`Mailbox<Sequencer<P>>`] or [`Mailbox<P>`].
 ///
 /// It makes it possible to send a splits update either to the [`Sequencer`] or directly
-/// to the publisher actor `P`. It is used in combination with [`SplitsUpdateSender`] that
+/// to the publisher actor `P`. It is used in combination with `SplitsUpdateSender` that
 /// will do the send.
 ///
 /// This is useful as we have different requirements between the indexing pipeline and
diff --git a/quickwit/quickwit-indexing/src/models/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/models/processed_parquet_batch.rs
index 0db83abcc02..c70afe35976 100644
--- a/quickwit/quickwit-indexing/src/models/processed_parquet_batch.rs
+++ b/quickwit/quickwit-indexing/src/models/processed_parquet_batch.rs
@@ -96,112 +96,10 @@ impl fmt::Debug for ProcessedParquetBatch {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{
-        ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, Int32Array, StringArray,
-        StructArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::{DataType, Field, Int32Type};
-    use quickwit_parquet_engine::schema::ParquetSchema;
+    use quickwit_parquet_engine::test_helpers::create_test_batch;
 
     use super::*;
 
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create a test batch matching the metrics schema.
-    fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-
-        // Create empty Variant (Struct with metadata and value BinaryView fields)
-        let metadata_array = Arc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let value_array = Arc::new(BinaryViewArray::from(vec![b"" as &[u8]; num_rows]));
-        let attributes: ArrayRef = Arc::new(StructArray::from(vec![
-            (
-                Arc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array.clone() as ArrayRef,
-            ),
-            (
-                Arc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array.clone() as ArrayRef,
-            ),
-        ]));
-
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-
-        let resource_attributes: ArrayRef = Arc::new(StructArray::from(vec![
-            (
-                Arc::new(Field::new("metadata", DataType::BinaryView, false)),
-                metadata_array as ArrayRef,
-            ),
-            (
-                Arc::new(Field::new("value", DataType::BinaryView, false)),
-                value_array as ArrayRef,
-            ),
-        ]));
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
-
     #[test]
     fn test_processed_parquet_batch_new() {
         let batch = create_test_batch(10);
diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
index 6fd5ce244be..e35618b99f8 100644
--- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
+++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! [`FileBackedIndex`] module. It is public so that the crate `quickwit-backward-compat` can
-//! import [`FileBackedIndex`] and run backward-compatibility tests. You should not have to import
+//! `FileBackedIndex` module. It is public so that the crate `quickwit-backward-compat` can
+//! import `FileBackedIndex` and run backward-compatibility tests. You should not have to import
 //! anything from here directly.
 
 mod serialize;
diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/lazy_file_backed_index.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/lazy_file_backed_index.rs
index c13711de0b2..43979334f55 100644
--- a/quickwit/quickwit-metastore/src/metastore/file_backed/lazy_file_backed_index.rs
+++ b/quickwit/quickwit-metastore/src/metastore/file_backed/lazy_file_backed_index.rs
@@ -24,7 +24,7 @@ use tracing::error;
 use super::file_backed_index::FileBackedIndex;
 use super::store_operations::{METASTORE_FILE_NAME, load_index};
 
-/// Lazy [`FileBackedIndex`]. It loads a `FileBackedIndex` on demand. When the index is first
+/// Lazy `FileBackedIndex`. It loads a `FileBackedIndex` on demand. When the index is first
 /// loaded, it optionally spawns a task to periodically poll the storage and update the index.
 pub(crate) struct LazyFileBackedIndex {
     index_id: IndexId,
diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs
index 00791488e65..74e0f05abae 100644
--- a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs
+++ b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 //! Module for [`FileBackedMetastore`]. It is public so that the crate `quickwit-backward-compat`
-//! can import [`FileBackedIndex`] and run backward-compatibility tests. You should not have to
+//! can import `FileBackedIndex` and run backward-compatibility tests. You should not have to
 //! import anything from here directly.
 
 pub mod file_backed_index;
@@ -116,9 +116,9 @@ impl From<bool> for MutationOccurred<()> {
 /// into as many files and stores a map of indexes
 /// (index_id, index_status) in a dedicated file `manifest.json`.
 ///
-/// A [`LazyIndexStatus`] describes the lifecycle of an index: [`LazyIndexStatus::Creating`] and
-/// [`LazyIndexStatus::Deleting`] are transitioning states that indicates that the index is not
-/// yet available. On the contrary, the [`LazyIndexStatus::Active`] status indicates the index is
+/// A `LazyIndexStatus` describes the lifecycle of an index: `LazyIndexStatus::Creating` and
+/// `LazyIndexStatus::Deleting` are transitioning states that indicates that the index is not
+/// yet available. On the contrary, the `LazyIndexStatus::Active` status indicates the index is
 /// ready to be fetched and updated.
 ///
 /// Transitioning states are useful to track inconsistencies between the in-memory and on-disk data
diff --git a/quickwit/quickwit-metastore/src/metastore_resolver.rs b/quickwit/quickwit-metastore/src/metastore_resolver.rs
index 7793fdbbf45..265d9069f3c 100644
--- a/quickwit/quickwit-metastore/src/metastore_resolver.rs
+++ b/quickwit/quickwit-metastore/src/metastore_resolver.rs
@@ -45,7 +45,7 @@ impl fmt::Debug for MetastoreResolver {
 }
 
 impl MetastoreResolver {
-    /// Creates an empty [`MetastoreResolverBuilder`].
+    /// Creates an empty `MetastoreResolverBuilder`.
     pub fn builder() -> MetastoreResolverBuilder {
         MetastoreResolverBuilder::default()
     }
diff --git a/quickwit/quickwit-opentelemetry/Cargo.toml b/quickwit/quickwit-opentelemetry/Cargo.toml
index d6e897990a2..a7432ac5403 100644
--- a/quickwit/quickwit-opentelemetry/Cargo.toml
+++ b/quickwit/quickwit-opentelemetry/Cargo.toml
@@ -15,7 +15,6 @@ anyhow = { workspace = true }
 arrow = { workspace = true }
 async-trait = { workspace = true }
 once_cell = { workspace = true }
-parquet = { workspace = true }
 prost = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
diff --git a/quickwit/quickwit-opentelemetry/src/otlp/arrow_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/arrow_metrics.rs
index 1811a63a909..d36c2f1b98b 100644
--- a/quickwit/quickwit-opentelemetry/src/otlp/arrow_metrics.rs
+++ b/quickwit/quickwit-opentelemetry/src/otlp/arrow_metrics.rs
@@ -12,272 +12,138 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Arrow-based batch building for metrics with dictionary encoding.
+//! Arrow-based batch building for metrics with dynamic schema discovery.
 //!
 //! This module provides Arrow RecordBatch construction with dictionary-encoded
-//! string columns for efficient storage of metrics with low cardinality tags.
+//! string columns for efficient storage of metrics with dynamic tag keys.
+//! The schema is discovered at `finish()` time by scanning all accumulated
+//! data points for the union of tag keys.
 
+use std::collections::BTreeSet;
 use std::io::Cursor;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayBuilder, ArrayRef, Float64Builder, RecordBatch, StringBuilder, StringDictionaryBuilder,
-    UInt8Builder, UInt64Builder,
+    ArrayRef, Float64Builder, RecordBatch, StringDictionaryBuilder, UInt8Builder, UInt64Builder,
 };
-use arrow::datatypes::{DataType, Field, Fields, Int32Type, Schema as ArrowSchema};
+use arrow::datatypes::{DataType, Field, Int32Type, Schema as ArrowSchema};
 use arrow::ipc::reader::StreamReader;
 use arrow::ipc::writer::StreamWriter;
-use parquet::variant::{VariantArrayBuilder, VariantBuilderExt, VariantType};
 use quickwit_proto::bytes::Bytes;
 use quickwit_proto::ingest::{DocBatchV2, DocFormat};
 use quickwit_proto::types::DocUid;
 
 use super::otel_metrics::{MetricDataPoint, MetricType};
 
-/// Creates the Arrow schema for metrics with dictionary-encoded string columns.
-///
-/// Dictionary encoding stores unique string values once and references them by
-/// integer index, providing significant compression for low cardinality tag values.
-pub fn metrics_arrow_schema() -> ArrowSchema {
-    ArrowSchema::new(vec![
-        // Dictionary-encoded string columns for low cardinality fields
-        Field::new(
-            "metric_name",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            false,
-        ),
-        // MetricType enum stored as UInt8 (only ~5 possible values)
-        Field::new("metric_type", DataType::UInt8, false),
-        Field::new("metric_unit", DataType::Utf8, true),
-        // Measurement timestamp in seconds since Unix epoch.
-        Field::new("timestamp_secs", DataType::UInt64, false),
-        Field::new("start_timestamp_secs", DataType::UInt64, true),
-        Field::new("value", DataType::Float64, false),
-        // Dictionary-encoded tag columns (low cardinality expected)
-        Field::new(
-            "tag_service",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        ),
-        Field::new(
-            "tag_env",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        ),
-        Field::new(
-            "tag_datacenter",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        ),
-        Field::new(
-            "tag_region",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        ),
-        Field::new(
-            "tag_host",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        ),
-        // VARIANT fields for semi-structured attributes
-        // VariantArrayBuilder produces BinaryView fields, not Binary
-        Field::new(
-            "attributes",
-            DataType::Struct(Fields::from(vec![
-                Field::new("metadata", DataType::BinaryView, false),
-                Field::new("value", DataType::BinaryView, false),
-            ])),
-            true,
-        )
-        .with_extension_type(VariantType),
-        // Service name (low cardinality)
-        Field::new(
-            "service_name",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            false,
-        ),
-        Field::new(
-            "resource_attributes",
-            DataType::Struct(Fields::from(vec![
-                Field::new("metadata", DataType::BinaryView, false),
-                Field::new("value", DataType::BinaryView, false),
-            ])),
-            true,
-        )
-        .with_extension_type(VariantType),
-    ])
-}
-
 /// Builder for creating Arrow RecordBatch from MetricDataPoints.
 ///
-/// Uses dictionary encoding for low cardinality string columns
-/// (tags, service names, metric names) to achieve significant compression.
-/// Uses VARIANT encoding for semi-structured attributes.
+/// Accumulates data points and discovers the schema dynamically at `finish()`
+/// time. Uses dictionary encoding for string columns (metric_name, all tags).
 pub struct ArrowMetricsBatchBuilder {
-    metric_name: StringDictionaryBuilder<Int32Type>,
-    metric_type: UInt8Builder,
-    metric_unit: StringBuilder,
-    timestamp_secs: UInt64Builder,
-    start_timestamp_secs: UInt64Builder,
-    value: Float64Builder,
-    tag_service: StringDictionaryBuilder<Int32Type>,
-    tag_env: StringDictionaryBuilder<Int32Type>,
-    tag_datacenter: StringDictionaryBuilder<Int32Type>,
-    tag_region: StringDictionaryBuilder<Int32Type>,
-    tag_host: StringDictionaryBuilder<Int32Type>,
-    attributes: VariantArrayBuilder,
-    service_name: StringDictionaryBuilder<Int32Type>,
-    resource_attributes: VariantArrayBuilder,
+    data_points: Vec<MetricDataPoint>,
 }
 
 impl ArrowMetricsBatchBuilder {
     /// Creates a new builder with pre-allocated capacity.
     pub fn with_capacity(capacity: usize) -> Self {
         Self {
-            metric_name: StringDictionaryBuilder::new(),
-            metric_type: UInt8Builder::with_capacity(capacity),
-            metric_unit: StringBuilder::with_capacity(capacity, capacity * 8),
-            timestamp_secs: UInt64Builder::with_capacity(capacity),
-            start_timestamp_secs: UInt64Builder::with_capacity(capacity),
-            value: Float64Builder::with_capacity(capacity),
-            tag_service: StringDictionaryBuilder::new(),
-            tag_env: StringDictionaryBuilder::new(),
-            tag_datacenter: StringDictionaryBuilder::new(),
-            tag_region: StringDictionaryBuilder::new(),
-            tag_host: StringDictionaryBuilder::new(),
-            attributes: VariantArrayBuilder::new(capacity),
-            service_name: StringDictionaryBuilder::new(),
-            resource_attributes: VariantArrayBuilder::new(capacity),
+            data_points: Vec::with_capacity(capacity),
         }
     }
 
     /// Appends a MetricDataPoint to the batch.
-    pub fn append(&mut self, data_point: &MetricDataPoint) {
-        self.metric_name.append_value(&data_point.metric_name);
-        self.metric_type.append_value(data_point.metric_type as u8);
+    pub fn append(&mut self, data_point: MetricDataPoint) {
+        self.data_points.push(data_point);
+    }
 
-        match &data_point.metric_unit {
-            Some(unit) => self.metric_unit.append_value(unit),
-            None => self.metric_unit.append_null(),
+    /// Finalizes and returns the RecordBatch.
+    ///
+    /// Performs two passes:
+    /// 1. Schema discovery: scans all data points to collect the union of tag keys.
+    /// 2. Array building: creates per-column builders and populates them.
+    pub fn finish(self) -> RecordBatch {
+        let num_rows = self.data_points.len();
+
+        // Pass 1: discover all tag keys across all data points.
+        let mut tag_keys: BTreeSet<&str> = BTreeSet::new();
+        for dp in &self.data_points {
+            for key in dp.tags.keys() {
+                tag_keys.insert(key.as_str());
+            }
         }
+        let sorted_tag_keys: Vec<&str> = tag_keys.into_iter().collect();
 
-        self.timestamp_secs.append_value(data_point.timestamp_secs);
-        match data_point.start_timestamp_secs {
-            Some(ts) => self.start_timestamp_secs.append_value(ts),
-            None => self.start_timestamp_secs.append_null(),
+        // Build the Arrow schema dynamically
+        let mut fields = Vec::with_capacity(4 + sorted_tag_keys.len());
+        fields.push(Field::new(
+            "metric_name",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            false,
+        ));
+        fields.push(Field::new("metric_type", DataType::UInt8, false));
+        fields.push(Field::new("timestamp_secs", DataType::UInt64, false));
+        fields.push(Field::new("value", DataType::Float64, false));
+
+        for &tag_key in &sorted_tag_keys {
+            fields.push(Field::new(
+                tag_key,
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ));
         }
-        self.value.append_value(data_point.value);
-
-        append_optional_dict(&mut self.tag_service, &data_point.tag_service);
-        append_optional_dict(&mut self.tag_env, &data_point.tag_env);
-        append_optional_dict(&mut self.tag_datacenter, &data_point.tag_datacenter);
-        append_optional_dict(&mut self.tag_region, &data_point.tag_region);
-        append_optional_dict(&mut self.tag_host, &data_point.tag_host);
-
-        if data_point.attributes.is_empty() {
-            self.attributes.append_null();
-        } else {
-            append_variant_object(&mut self.attributes, &data_point.attributes);
+
+        let schema = Arc::new(ArrowSchema::new(fields));
+
+        // Pass 2: build arrays
+        let mut metric_name_builder: StringDictionaryBuilder<Int32Type> =
+            StringDictionaryBuilder::new();
+        let mut metric_type_builder = UInt8Builder::with_capacity(num_rows);
+        let mut timestamp_secs_builder = UInt64Builder::with_capacity(num_rows);
+        let mut value_builder = Float64Builder::with_capacity(num_rows);
+
+        let mut tag_builders: Vec<StringDictionaryBuilder<Int32Type>> = sorted_tag_keys
+            .iter()
+            .map(|_| StringDictionaryBuilder::new())
+            .collect();
+
+        for dp in &self.data_points {
+            metric_name_builder.append_value(&dp.metric_name);
+            metric_type_builder.append_value(dp.metric_type as u8);
+            timestamp_secs_builder.append_value(dp.timestamp_secs);
+            value_builder.append_value(dp.value);
+
+            for (tag_idx, tag_key) in sorted_tag_keys.iter().enumerate() {
+                match dp.tags.get(*tag_key) {
+                    Some(tag_val) => tag_builders[tag_idx].append_value(tag_val),
+                    None => tag_builders[tag_idx].append_null(),
+                }
+            }
         }
 
-        self.service_name.append_value(&data_point.service_name);
+        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(4 + sorted_tag_keys.len());
+        arrays.push(Arc::new(metric_name_builder.finish()));
+        arrays.push(Arc::new(metric_type_builder.finish()));
+        arrays.push(Arc::new(timestamp_secs_builder.finish()));
+        arrays.push(Arc::new(value_builder.finish()));
 
-        if data_point.resource_attributes.is_empty() {
-            self.resource_attributes.append_null();
-        } else {
-            append_variant_object(
-                &mut self.resource_attributes,
-                &data_point.resource_attributes,
-            );
+        for tag_builder in &mut tag_builders {
+            arrays.push(Arc::new(tag_builder.finish()));
         }
-    }
 
-    /// Finalizes and returns the RecordBatch.
-    pub fn finish(mut self) -> RecordBatch {
-        // Build variant arrays and convert to ArrayRef
-        let attributes_array = self.attributes.build();
-        let resource_attributes_array = self.resource_attributes.build();
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(self.metric_name.finish()),
-            Arc::new(self.metric_type.finish()),
-            Arc::new(self.metric_unit.finish()),
-            Arc::new(self.timestamp_secs.finish()),
-            Arc::new(self.start_timestamp_secs.finish()),
-            Arc::new(self.value.finish()),
-            Arc::new(self.tag_service.finish()),
-            Arc::new(self.tag_env.finish()),
-            Arc::new(self.tag_datacenter.finish()),
-            Arc::new(self.tag_region.finish()),
-            Arc::new(self.tag_host.finish()),
-            ArrayRef::from(attributes_array),
-            Arc::new(self.service_name.finish()),
-            ArrayRef::from(resource_attributes_array),
-        ];
-
-        RecordBatch::try_new(Arc::new(metrics_arrow_schema()), arrays)
-            .expect("record batch should match Arrow schema")
+        RecordBatch::try_new(schema, arrays).expect("record batch should match Arrow schema")
     }
 
     /// Returns the number of rows appended so far.
     pub fn len(&self) -> usize {
-        self.timestamp_secs.len()
+        self.data_points.len()
     }
 
     /// Returns true if no rows have been appended.
     pub fn is_empty(&self) -> bool {
-        self.len() == 0
+        self.data_points.is_empty()
     }
 }
 
-/// Helper to append optional string values to dictionary builder.
-fn append_optional_dict(builder: &mut StringDictionaryBuilder<Int32Type>, value: &Option<String>) {
-    match value {
-        Some(s) => builder.append_value(s),
-        None => builder.append_null(),
-    }
-}
-
-/// Helper to append a HashMap as a VARIANT object to the builder.
-fn append_variant_object(
-    builder: &mut VariantArrayBuilder,
-    map: &std::collections::HashMap<String, serde_json::Value>,
-) {
-    // Use a macro-like approach with fold to build the object
-    // We need to chain with_field calls which consume and return the builder
-    let obj_builder = builder.new_object();
-
-    // Build object by folding over the map entries
-    let final_builder = map.iter().fold(obj_builder, |b, (key, value)| {
-        match value {
-            serde_json::Value::Null => b.with_field(key.as_str(), ()),
-            serde_json::Value::Bool(v) => b.with_field(key.as_str(), *v),
-            serde_json::Value::Number(n) => {
-                if let Some(i) = n.as_i64() {
-                    b.with_field(key.as_str(), i)
-                } else if let Some(f) = n.as_f64() {
-                    b.with_field(key.as_str(), f)
-                } else {
-                    b.with_field(key.as_str(), ())
-                }
-            }
-            serde_json::Value::String(s) => b.with_field(key.as_str(), s.as_str()),
-            serde_json::Value::Array(arr) => {
-                // For arrays, serialize to JSON string as fallback
-                let json_str = serde_json::to_string(arr).unwrap_or_default();
-                b.with_field(key.as_str(), json_str.as_str())
-            }
-            serde_json::Value::Object(obj) => {
-                // For nested objects, serialize to JSON string as fallback
-                let json_str = serde_json::to_string(obj).unwrap_or_default();
-                b.with_field(key.as_str(), json_str.as_str())
-            }
-        }
-    });
-
-    final_builder.finish();
-}
-
 /// Error type for Arrow IPC operations.
 #[derive(Debug, thiserror::Error)]
 pub enum ArrowIpcError {
@@ -462,32 +328,26 @@ impl ArrowDocBatchV2Builder {
 mod tests {
     use std::collections::HashMap;
 
-    use serde_json::Value as JsonValue;
-
     use super::*;
 
     fn make_test_data_point() -> MetricDataPoint {
+        let mut tags = HashMap::new();
+        tags.insert("service".to_string(), "api".to_string());
+        tags.insert("env".to_string(), "prod".to_string());
+        tags.insert("datacenter".to_string(), "us-east-1a".to_string());
+        tags.insert("region".to_string(), "us-east-1".to_string());
+        tags.insert("host".to_string(), "server-001".to_string());
+        tags.insert("endpoint".to_string(), "/health".to_string());
+        tags.insert("metric_unit".to_string(), "%".to_string());
+        tags.insert("start_timestamp_secs".to_string(), "1704067190".to_string());
+        tags.insert("service_name".to_string(), "api-service".to_string());
+
         MetricDataPoint {
             metric_name: "cpu.usage".to_string(),
             metric_type: MetricType::Gauge,
-            metric_unit: Some("%".to_string()),
             timestamp_secs: 1704067200,
-            start_timestamp_secs: Some(1704067190),
             value: 85.5,
-            tag_service: Some("api".to_string()),
-            tag_env: Some("prod".to_string()),
-            tag_datacenter: Some("us-east-1a".to_string()),
-            tag_region: Some("us-east-1".to_string()),
-            tag_host: Some("server-001".to_string()),
-            attributes: HashMap::from([(
-                "endpoint".to_string(),
-                JsonValue::String("/health".to_string()),
-            )]),
-            service_name: "api-service".to_string(),
-            resource_attributes: HashMap::from([(
-                "k8s.pod".to_string(),
-                JsonValue::String("pod-123".to_string()),
-            )]),
+            tags,
         }
     }
 
@@ -495,38 +355,36 @@ mod tests {
     fn test_arrow_batch_builder_single_row() {
         let dp = make_test_data_point();
         let mut builder = ArrowMetricsBatchBuilder::with_capacity(1);
-        builder.append(&dp);
+        builder.append(dp);
 
         assert_eq!(builder.len(), 1);
         assert!(!builder.is_empty());
 
         let batch = builder.finish();
         assert_eq!(batch.num_rows(), 1);
-        assert_eq!(batch.num_columns(), 14);
+        // 4 fixed columns + 9 tag columns
+        assert_eq!(batch.num_columns(), 13);
     }
 
     #[test]
     fn test_arrow_batch_builder_multiple_rows() {
         let mut builder = ArrowMetricsBatchBuilder::with_capacity(100);
 
-        for i in 0..100 {
+        for idx in 0..100 {
+            let mut tags = HashMap::new();
+            tags.insert("service".to_string(), format!("service-{}", idx % 10));
+            tags.insert("env".to_string(), "prod".to_string());
+            tags.insert("host".to_string(), format!("host-{}", idx % 5));
+            tags.insert("service_name".to_string(), "test-service".to_string());
+
             let dp = MetricDataPoint {
                 metric_name: "test.metric".to_string(),
                 metric_type: MetricType::Gauge,
-                metric_unit: None,
-                timestamp_secs: 1704067200 + i as u64,
-                start_timestamp_secs: None,
-                value: i as f64 * 0.1,
-                tag_service: Some(format!("service-{}", i % 10)), // 10 unique values
-                tag_env: Some("prod".to_string()),                // 1 unique value
-                tag_datacenter: None,
-                tag_region: None,
-                tag_host: Some(format!("host-{}", i % 5)), // 5 unique values
-                attributes: HashMap::new(),
-                service_name: "test-service".to_string(),
-                resource_attributes: HashMap::new(),
+                timestamp_secs: 1704067200 + idx as u64,
+                value: idx as f64 * 0.1,
+                tags,
             };
-            builder.append(&dp);
+            builder.append(dp);
         }
 
         assert_eq!(builder.len(), 100);
@@ -539,89 +397,108 @@ mod tests {
         let mut builder = ArrowMetricsBatchBuilder::with_capacity(1000);
 
         // Create 1000 data points with only 10 unique service values
-        for i in 0..1000 {
+        for idx in 0..1000 {
+            let mut tags = HashMap::new();
+            tags.insert("service".to_string(), format!("service-{}", idx % 10));
+            tags.insert("env".to_string(), "prod".to_string());
+            tags.insert("datacenter".to_string(), format!("dc-{}", idx % 4));
+            tags.insert("service_name".to_string(), format!("svc-{}", idx % 5));
+
             let dp = MetricDataPoint {
                 metric_name: "test.metric".to_string(),
                 metric_type: MetricType::Gauge,
-                metric_unit: None,
-                timestamp_secs: 1704067200 + i as u64,
-                start_timestamp_secs: None,
-                value: i as f64,
-                tag_service: Some(format!("service-{}", i % 10)),
-                tag_env: Some("prod".to_string()),
-                tag_datacenter: Some(format!("dc-{}", i % 4)),
-                tag_region: None,
-                tag_host: None,
-                attributes: HashMap::new(),
-                service_name: format!("svc-{}", i % 5),
-                resource_attributes: HashMap::new(),
+                timestamp_secs: 1704067200 + idx as u64,
+                value: idx as f64,
+                tags,
             };
-            builder.append(&dp);
+            builder.append(dp);
         }
 
         let batch = builder.finish();
         assert_eq!(batch.num_rows(), 1000);
 
         // Verify the batch was created successfully with dictionary encoding
-        // The dictionary arrays should have far fewer unique values than rows
         let schema = batch.schema();
 
-        // Check that tag_service uses dictionary encoding
-        let tag_service_field = schema.field_with_name("tag_service").unwrap();
+        // Check that the service tag uses dictionary encoding
+        let service_field = schema.field_with_name("service").unwrap();
         assert!(matches!(
-            tag_service_field.data_type(),
+            service_field.data_type(),
             DataType::Dictionary(_, _)
         ));
     }
 
     #[test]
     fn test_null_handling() {
+        let mut tags = HashMap::new();
+        tags.insert("service_name".to_string(), "unknown".to_string());
+
         let dp = MetricDataPoint {
             metric_name: "minimal.metric".to_string(),
             metric_type: MetricType::Gauge,
-            metric_unit: None, // null
             timestamp_secs: 1704067200,
-            start_timestamp_secs: None, // null
             value: 0.0,
-            tag_service: None, // null
-            tag_env: None,     // null
-            tag_datacenter: None,
-            tag_region: None,
-            tag_host: None,
-            attributes: HashMap::new(), // empty -> null
-            service_name: "unknown".to_string(),
-            resource_attributes: HashMap::new(), // empty -> null
+            tags,
         };
 
         let mut builder = ArrowMetricsBatchBuilder::with_capacity(1);
-        builder.append(&dp);
+        builder.append(dp);
         let batch = builder.finish();
 
         assert_eq!(batch.num_rows(), 1);
-        // The batch should handle nulls correctly
+        // 4 fixed columns + 1 tag column (service_name)
+        assert_eq!(batch.num_columns(), 5);
     }
 
     #[test]
-    fn test_schema_field_count() {
-        let schema = metrics_arrow_schema();
-        assert_eq!(schema.fields().len(), 14);
+    fn test_dynamic_schema_discovery() {
+        let mut builder = ArrowMetricsBatchBuilder::with_capacity(2);
 
-        // Verify field names
+        // First data point has tags: env, host
+        let mut tags1 = HashMap::new();
+        tags1.insert("env".to_string(), "prod".to_string());
+        tags1.insert("host".to_string(), "server-1".to_string());
+
+        builder.append(MetricDataPoint {
+            metric_name: "metric.a".to_string(),
+            metric_type: MetricType::Gauge,
+            timestamp_secs: 1704067200,
+            value: 1.0,
+            tags: tags1,
+        });
+
+        // Second data point has tags: env, region (different set)
+        let mut tags2 = HashMap::new();
+        tags2.insert("env".to_string(), "staging".to_string());
+        tags2.insert("region".to_string(), "us-west".to_string());
+
+        builder.append(MetricDataPoint {
+            metric_name: "metric.b".to_string(),
+            metric_type: MetricType::Sum,
+            timestamp_secs: 1704067201,
+            value: 2.0,
+            tags: tags2,
+        });
+
+        let batch = builder.finish();
+        assert_eq!(batch.num_rows(), 2);
+        // 4 fixed + 3 tag columns (env, host, region) - sorted alphabetically
+        assert_eq!(batch.num_columns(), 7);
+
+        let schema = batch.schema();
         let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
-        assert!(field_names.contains(&"metric_name"));
-        assert!(field_names.contains(&"metric_type"));
-        assert!(field_names.contains(&"metric_unit"));
-        assert!(field_names.contains(&"timestamp_secs"));
-        assert!(field_names.contains(&"start_timestamp_secs"));
-        assert!(field_names.contains(&"value"));
-        assert!(field_names.contains(&"tag_service"));
-        assert!(field_names.contains(&"tag_env"));
-        assert!(field_names.contains(&"tag_datacenter"));
-        assert!(field_names.contains(&"tag_region"));
-        assert!(field_names.contains(&"tag_host"));
-        assert!(field_names.contains(&"attributes"));
-        assert!(field_names.contains(&"service_name"));
-        assert!(field_names.contains(&"resource_attributes"));
+        assert_eq!(
+            field_names,
+            vec![
+                "metric_name",
+                "metric_type",
+                "timestamp_secs",
+                "value",
+                "env",
+                "host",
+                "region",
+            ]
+        );
     }
 
     #[test]
@@ -638,24 +515,20 @@ mod tests {
     fn test_ipc_round_trip() {
         // Build a RecordBatch
         let mut builder = ArrowMetricsBatchBuilder::with_capacity(10);
-        for i in 0..10 {
+        for idx in 0..10 {
+            let mut tags = HashMap::new();
+            tags.insert("service".to_string(), format!("service-{}", idx % 3));
+            tags.insert("env".to_string(), "prod".to_string());
+            tags.insert("service_name".to_string(), "test-service".to_string());
+
             let dp = MetricDataPoint {
                 metric_name: "test.metric".to_string(),
                 metric_type: MetricType::Gauge,
-                metric_unit: None,
-                timestamp_secs: 1704067200 + i as u64,
-                start_timestamp_secs: None,
-                value: i as f64 * 0.1,
-                tag_service: Some(format!("service-{}", i % 3)),
-                tag_env: Some("prod".to_string()),
-                tag_datacenter: None,
-                tag_region: None,
-                tag_host: None,
-                attributes: HashMap::new(),
-                service_name: "test-service".to_string(),
-                resource_attributes: HashMap::new(),
+                timestamp_secs: 1704067200 + idx as u64,
+                value: idx as f64 * 0.1,
+                tags,
             };
-            builder.append(&dp);
+            builder.append(dp);
         }
         let original_batch = builder.finish();
 
@@ -682,24 +555,18 @@ mod tests {
         let mut doc_uid_generator = DocUidGenerator::default();
         let mut doc_uids = Vec::new();
 
-        for i in 0..5 {
+        for idx in 0..5 {
+            let mut tags = HashMap::new();
+            tags.insert("service_name".to_string(), "test".to_string());
+
             let dp = MetricDataPoint {
                 metric_name: "test.metric".to_string(),
                 metric_type: MetricType::Gauge,
-                metric_unit: None,
-                timestamp_secs: 1704067200 + i as u64,
-                start_timestamp_secs: None,
-                value: i as f64,
-                tag_service: None,
-                tag_env: None,
-                tag_datacenter: None,
-                tag_region: None,
-                tag_host: None,
-                attributes: HashMap::new(),
-                service_name: "test".to_string(),
-                resource_attributes: HashMap::new(),
+                timestamp_secs: 1704067200 + idx as u64,
+                value: idx as f64,
+                tags,
             };
-            builder.append(&dp);
+            builder.append(dp);
             doc_uids.push(doc_uid_generator.next_doc_uid());
         }
         let batch = builder.finish();
diff --git a/quickwit/quickwit-opentelemetry/src/otlp/mod.rs b/quickwit/quickwit-opentelemetry/src/otlp/mod.rs
index 67abd6c1d68..9927c4fd800 100644
--- a/quickwit/quickwit-opentelemetry/src/otlp/mod.rs
+++ b/quickwit/quickwit-opentelemetry/src/otlp/mod.rs
@@ -37,7 +37,7 @@ mod traces;
 
 pub use arrow_metrics::{
     ArrowDocBatchV2Builder, ArrowIpcError, ArrowMetricsBatchBuilder, ipc_to_json_values,
-    ipc_to_record_batch, metrics_arrow_schema, record_batch_to_ipc,
+    ipc_to_record_batch, record_batch_to_ipc,
 };
 pub use logs::{
     JsonLogIterator, OTEL_LOGS_INDEX_ID, OtlpGrpcLogsService, OtlpLogsError, parse_otlp_logs_json,
diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs
index bcb67c5fadb..994bf7cb324 100644
--- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs
+++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs
@@ -134,59 +134,14 @@ impl From<OtlpMetricsError> for tonic::Status {
     }
 }
 
-/// Represents a single metric data point document
+/// Represents a single metric data point document.
 #[derive(Debug, Clone)]
 pub struct MetricDataPoint {
-    // Metric identity
     pub metric_name: String,
     pub metric_type: MetricType,
-    pub metric_unit: Option<String>,
-
-    // Timestamps (seconds granularity)
     pub timestamp_secs: u64,
-    pub start_timestamp_secs: Option<u64>,
-
-    // Value (f64 only)
     pub value: f64,
-
-    // Explicit tag columns
-    pub tag_service: Option<String>,
-    pub tag_env: Option<String>,
-    pub tag_datacenter: Option<String>,
-    pub tag_region: Option<String>,
-    pub tag_host: Option<String>,
-
-    // Dynamic tags (remaining attributes)
-    pub attributes: HashMap<String, JsonValue>,
-
-    // Resource metadata
-    pub service_name: String,
-    pub resource_attributes: HashMap<String, JsonValue>,
-}
-
-struct ExplicitTags {
-    service: Option<String>,
-    env: Option<String>,
-    datacenter: Option<String>,
-    region: Option<String>,
-    host: Option<String>,
-}
-
-fn extract_string_tag(attributes: &mut HashMap<String, JsonValue>, key: &str) -> Option<String> {
-    attributes.remove(key).and_then(|v| match v {
-        JsonValue::String(s) => Some(s),
-        _ => None,
-    })
-}
-
-fn extract_explicit_tags(attributes: &mut HashMap<String, JsonValue>) -> ExplicitTags {
-    ExplicitTags {
-        service: extract_string_tag(attributes, "service"),
-        env: extract_string_tag(attributes, "env"),
-        datacenter: extract_string_tag(attributes, "datacenter"),
-        region: extract_string_tag(attributes, "region"),
-        host: extract_string_tag(attributes, "host"),
-    }
+    pub tags: HashMap<String, String>,
 }
 
 /// Convert nanoseconds to seconds
@@ -194,6 +149,16 @@ fn nanos_to_secs(nanos: u64) -> u64 {
     nanos / 1_000_000_000
 }
 
+/// Convert a `serde_json::Value` to a plain `String`.
+fn json_value_to_string(value: JsonValue) -> String {
+    match value {
+        JsonValue::String(s) => s,
+        JsonValue::Number(n) => n.to_string(),
+        JsonValue::Bool(b) => b.to_string(),
+        other => serde_json::to_string(&other).unwrap_or_default(),
+    }
+}
+
 struct ParsedMetrics {
     doc_batch: DocBatchV2,
     num_data_points: u64,
@@ -243,7 +208,7 @@ impl OtlpGrpcMetricsService {
             Status::internal("failed to parse metric records")
         })??;
 
-        if num_data_points == num_parse_errors {
+        if num_data_points > 0 && num_data_points == num_parse_errors {
             return Err(tonic::Status::internal(error_message));
         }
 
@@ -274,15 +239,18 @@ impl OtlpGrpcMetricsService {
         request: ExportMetricsServiceRequest,
         parent_span: RuntimeSpan,
     ) -> tonic::Result<ParsedMetrics> {
-        let data_points = parse_otlp_metrics(request)?;
-        let num_data_points = data_points.len() as u64;
-
-        // Build Arrow RecordBatch from data points
-        let mut arrow_builder = ArrowMetricsBatchBuilder::with_capacity(num_data_points as usize);
+        let ParseOtlpResult {
+            data_points,
+            num_rejected,
+        } = parse_otlp_metrics(request);
+        let num_data_points = data_points.len() as u64 + num_rejected;
+
+        // Build Arrow RecordBatch from valid data points
+        let mut arrow_builder = ArrowMetricsBatchBuilder::with_capacity(data_points.len());
         let mut doc_uid_generator = DocUidGenerator::default();
-        let mut doc_uids = Vec::with_capacity(num_data_points as usize);
+        let mut doc_uids = Vec::with_capacity(data_points.len());
 
-        for data_point in &data_points {
+        for data_point in data_points {
             arrow_builder.append(data_point);
             doc_uids.push(doc_uid_generator.next_doc_uid());
         }
@@ -300,13 +268,22 @@ impl OtlpGrpcMetricsService {
         let current_span = RuntimeSpan::current();
         current_span.record("num_data_points", num_data_points);
         current_span.record("num_bytes", doc_batch.num_bytes());
-        current_span.record("num_parse_errors", 0u64);
+        current_span.record("num_parse_errors", num_rejected);
+
+        let error_message = if num_rejected > 0 {
+            format!(
+                "{num_rejected} data point(s) rejected (unsupported temporality or missing \
+                 required fields)"
+            )
+        } else {
+            String::new()
+        };
 
         let parsed_metrics = ParsedMetrics {
             doc_batch,
             num_data_points,
-            num_parse_errors: 0,
-            error_message: String::new(),
+            num_parse_errors: num_rejected,
+            error_message,
         };
         Ok(parsed_metrics)
     }
@@ -381,16 +358,19 @@ impl MetricsService for OtlpGrpcMetricsService {
     }
 }
 
-fn parse_otlp_metrics(
-    request: ExportMetricsServiceRequest,
-) -> Result<Vec<MetricDataPoint>, OtlpMetricsError> {
+struct ParseOtlpResult {
+    data_points: Vec<MetricDataPoint>,
+    num_rejected: u64,
+}
+
+fn parse_otlp_metrics(request: ExportMetricsServiceRequest) -> ParseOtlpResult {
     let mut data_points = Vec::new();
+    let mut num_rejected: u64 = 0;
 
     for resource_metrics in request.resource_metrics {
         let mut resource_attributes = extract_attributes(
             resource_metrics
                 .resource
-                .clone()
                 .map(|rsrc| rsrc.attributes)
                 .unwrap_or_default(),
         );
@@ -401,25 +381,23 @@ fn parse_otlp_metrics(
 
         for scope_metrics in resource_metrics.scope_metrics {
             for metric in scope_metrics.metrics {
-                parse_metric(
-                    &metric,
-                    &service_name,
-                    &resource_attributes,
-                    &mut data_points,
-                )?;
+                parse_metric(&metric, &service_name, &mut data_points, &mut num_rejected);
             }
         }
     }
 
-    Ok(data_points)
+    ParseOtlpResult {
+        data_points,
+        num_rejected,
+    }
 }
 
 fn parse_metric(
     metric: &Metric,
     service_name: &str,
-    resource_attributes: &HashMap<String, JsonValue>,
     data_points: &mut Vec<MetricDataPoint>,
-) -> Result<(), OtlpMetricsError> {
+    num_rejected: &mut u64,
+) {
     let metric_name = metric.name.clone();
     let metric_unit = if metric.unit.is_empty() {
         None
@@ -430,36 +408,47 @@ fn parse_metric(
     match &metric.data {
         Some(metric::Data::Gauge(gauge)) => {
             for dp in &gauge.data_points {
-                let data_point = create_number_data_point(
+                match create_number_data_point(
                     &metric_name,
                     MetricType::Gauge,
                     &metric_unit,
                     dp,
                     service_name,
-                    resource_attributes,
-                )?;
-                data_points.push(data_point);
+                ) {
+                    Ok(Some(data_point)) => data_points.push(data_point),
+                    Ok(None) => *num_rejected += 1,
+                    Err(err) => {
+                        warn!(error = %err, metric_name, "skipping invalid gauge data point");
+                        *num_rejected += 1;
+                    }
+                }
             }
         }
         Some(metric::Data::Sum(sum)) => {
-            // Only support DELTA temporality
             if sum.aggregation_temporality == AggregationTemporality::Cumulative as i32 {
-                return Err(OtlpMetricsError::InvalidArgument(
-                    "cumulative aggregation temporality is not supported, only delta is supported"
-                        .to_string(),
-                ));
+                warn!(
+                    metric_name,
+                    "skipping sum metric with cumulative temporality (only delta is supported)"
+                );
+                *num_rejected += sum.data_points.len() as u64;
+                return;
             }
 
             for dp in &sum.data_points {
-                let data_point = create_number_data_point(
+                match create_number_data_point(
                     &metric_name,
                     MetricType::Sum,
                     &metric_unit,
                     dp,
                     service_name,
-                    resource_attributes,
-                )?;
-                data_points.push(data_point);
+                ) {
+                    Ok(Some(data_point)) => data_points.push(data_point),
+                    Ok(None) => *num_rejected += 1,
+                    Err(err) => {
+                        warn!(error = %err, metric_name, "skipping invalid sum data point");
+                        *num_rejected += 1;
+                    }
+                }
             }
         }
         Some(metric::Data::Histogram(_)) => {
@@ -475,8 +464,6 @@ fn parse_metric(
             warn!("metric has no data, skipping");
         }
     }
-
-    Ok(())
 }
 
 fn create_number_data_point(
@@ -485,8 +472,20 @@ fn create_number_data_point(
     metric_unit: &Option<String>,
     dp: &NumberDataPoint,
     service_name: &str,
-    resource_attributes: &HashMap<String, JsonValue>,
-) -> Result<MetricDataPoint, OtlpMetricsError> {
+) -> Result<Option<MetricDataPoint>, OtlpMetricsError> {
+    // Convert timestamps to seconds
+    let timestamp_secs = nanos_to_secs(dp.time_unix_nano);
+
+    // Validate: skip data points with empty metric_name or zero timestamp
+    if metric_name.is_empty() {
+        warn!("skipping data point with empty metric_name");
+        return Ok(None);
+    }
+    if timestamp_secs == 0 {
+        warn!("skipping data point with zero timestamp_secs");
+        return Ok(None);
+    }
+
     // Extract value as f64
     let value = match &dp.value {
         Some(
@@ -500,34 +499,39 @@ fn create_number_data_point(
         None => 0.0,
     };
 
-    // Extract attributes and explicit tags
-    let mut attributes = extract_attributes(dp.attributes.clone());
-    let explicit_tags = extract_explicit_tags(&mut attributes);
+    // Extract attributes and convert all values to strings for tags
+    let attributes = extract_attributes(dp.attributes.clone());
+    let mut tags = HashMap::with_capacity(attributes.len() + 3);
 
-    // Convert timestamps to seconds
-    let timestamp_secs = nanos_to_secs(dp.time_unix_nano);
-    let start_timestamp_secs = if dp.start_time_unix_nano != 0 {
-        Some(nanos_to_secs(dp.start_time_unix_nano))
-    } else {
-        None
-    };
+    for (key, json_val) in attributes {
+        tags.insert(key, json_value_to_string(json_val));
+    }
+
+    // Add metric_unit and start_timestamp_secs using or_insert_with so a
+    // data-point attribute with the same name is not silently overwritten.
+    if let Some(unit) = metric_unit {
+        tags.entry("metric_unit".to_string())
+            .or_insert_with(|| unit.clone());
+    }
 
-    Ok(MetricDataPoint {
+    if dp.start_time_unix_nano != 0 {
+        let start_ts = nanos_to_secs(dp.start_time_unix_nano);
+        tags.entry("start_timestamp_secs".to_string())
+            .or_insert_with(|| start_ts.to_string());
+    }
+
+    // Fall back to the resource-level service.name if no data-point-level
+    // "service" tag was set. Data-point attributes take precedence.
+    tags.entry("service".to_string())
+        .or_insert_with(|| service_name.to_string());
+
+    Ok(Some(MetricDataPoint {
         metric_name: metric_name.to_string(),
         metric_type,
-        metric_unit: metric_unit.clone(),
         timestamp_secs,
-        start_timestamp_secs,
         value,
-        tag_service: explicit_tags.service,
-        tag_env: explicit_tags.env,
-        tag_datacenter: explicit_tags.datacenter,
-        tag_region: explicit_tags.region,
-        tag_host: explicit_tags.host,
-        attributes,
-        service_name: service_name.to_string(),
-        resource_attributes: resource_attributes.clone(),
-    })
+        tags,
+    }))
 }
 
 #[cfg(test)]
@@ -562,42 +566,6 @@ mod tests {
         assert_eq!(nanos_to_secs(2_000_000_000), 2);
     }
 
-    #[test]
-    fn test_extract_explicit_tags() {
-        let mut attributes = HashMap::from([
-            ("service".to_string(), JsonValue::String("api".to_string())),
-            ("env".to_string(), JsonValue::String("prod".to_string())),
-            (
-                "datacenter".to_string(),
-                JsonValue::String("us-east".to_string()),
-            ),
-            (
-                "region".to_string(),
-                JsonValue::String("us-east-1".to_string()),
-            ),
-            (
-                "host".to_string(),
-                JsonValue::String("server-1".to_string()),
-            ),
-            (
-                "custom_tag".to_string(),
-                JsonValue::String("custom_value".to_string()),
-            ),
-        ]);
-
-        let explicit_tags = extract_explicit_tags(&mut attributes);
-
-        assert_eq!(explicit_tags.service, Some("api".to_string()));
-        assert_eq!(explicit_tags.env, Some("prod".to_string()));
-        assert_eq!(explicit_tags.datacenter, Some("us-east".to_string()));
-        assert_eq!(explicit_tags.region, Some("us-east-1".to_string()));
-        assert_eq!(explicit_tags.host, Some("server-1".to_string()));
-
-        // custom_tag should remain in attributes
-        assert_eq!(attributes.len(), 1);
-        assert!(attributes.contains_key("custom_tag"));
-    }
-
     fn make_test_gauge_request() -> ExportMetricsServiceRequest {
         use quickwit_proto::opentelemetry::proto::common::v1::{AnyValue, KeyValue, any_value};
         use quickwit_proto::opentelemetry::proto::metrics::v1::{
@@ -747,35 +715,41 @@ mod tests {
     #[test]
     fn test_parse_gauge_metrics() {
         let request = make_test_gauge_request();
-        let data_points = parse_otlp_metrics(request).unwrap();
+        let data_points = parse_otlp_metrics(request).data_points;
 
         assert_eq!(data_points.len(), 1);
         let dp = &data_points[0];
         assert_eq!(dp.metric_name, "cpu.usage");
         assert_eq!(dp.metric_type, MetricType::Gauge);
-        assert_eq!(dp.metric_unit, Some("%".to_string()));
+        assert_eq!(dp.tags.get("metric_unit").map(|s| s.as_str()), Some("%"));
         assert_eq!(dp.timestamp_secs, 2);
-        assert_eq!(dp.start_timestamp_secs, Some(1));
+        assert_eq!(
+            dp.tags.get("start_timestamp_secs").map(|s| s.as_str()),
+            Some("1")
+        );
         assert_eq!(dp.value, 85.5);
-        assert_eq!(dp.tag_service, Some("api".to_string()));
-        assert_eq!(dp.tag_env, Some("prod".to_string()));
-        assert_eq!(dp.service_name, "test-service");
+        // Data-point attribute "service" takes precedence over resource-level service.name.
+        assert_eq!(dp.tags.get("service").map(|s| s.as_str()), Some("api"));
+        assert_eq!(dp.tags.get("env").map(|s| s.as_str()), Some("prod"));
     }
 
     #[test]
     fn test_parse_sum_delta_metrics() {
         let request = make_test_sum_delta_request();
-        let data_points = parse_otlp_metrics(request).unwrap();
+        let data_points = parse_otlp_metrics(request).data_points;
 
         assert_eq!(data_points.len(), 1);
         let dp = &data_points[0];
         assert_eq!(dp.metric_name, "http.requests");
         assert_eq!(dp.metric_type, MetricType::Sum);
-        assert_eq!(dp.metric_unit, Some("1".to_string()));
+        assert_eq!(dp.tags.get("metric_unit").map(|s| s.as_str()), Some("1"));
         assert_eq!(dp.timestamp_secs, 2);
         assert_eq!(dp.value, 100.0); // int converted to f64
-        assert_eq!(dp.tag_host, Some("server-1".to_string()));
-        assert_eq!(dp.service_name, "counter-service");
+        assert_eq!(dp.tags.get("host").map(|s| s.as_str()), Some("server-1"));
+        assert_eq!(
+            dp.tags.get("service").map(|s| s.as_str()),
+            Some("counter-service")
+        );
     }
 
     #[test]
@@ -783,11 +757,10 @@ mod tests {
         let request = make_test_sum_cumulative_request();
         let result = parse_otlp_metrics(request);
 
-        assert!(result.is_err());
-        match result.unwrap_err() {
-            OtlpMetricsError::InvalidArgument(_) => {}
-            err => panic!("unexpected error type: {:?}", err),
-        }
+        // Cumulative sums are skipped (not a hard error) so other metrics in the same
+        // request can still be processed. The rejected count is incremented instead.
+        assert_eq!(result.data_points.len(), 0);
+        assert_eq!(result.num_rejected, 1);
     }
 
     /// Test parsing metrics with various attribute types
@@ -860,19 +833,14 @@ mod tests {
             }],
         };
 
-        let data_points = parse_otlp_metrics(request).unwrap();
+        let data_points = parse_otlp_metrics(request).data_points;
         assert_eq!(data_points.len(), 1);
 
         let dp = &data_points[0];
-        assert_eq!(dp.service_name, "test");
-
-        // Verify resource attributes contain the non-service.name attributes
-        assert!(dp.resource_attributes.contains_key("int_attr"));
-        assert!(dp.resource_attributes.contains_key("bool_attr"));
-        assert!(dp.resource_attributes.contains_key("double_attr"));
+        assert_eq!(dp.tags.get("service").map(|s| s.as_str()), Some("test"));
 
-        // Verify data point attributes
-        assert!(dp.attributes.contains_key("string_tag"));
+        // Verify data point attributes are in tags as strings
+        assert_eq!(dp.tags.get("string_tag").map(|s| s.as_str()), Some("value"));
     }
 
     /// Test metrics with empty and missing values
@@ -908,17 +876,94 @@ mod tests {
             }],
         };
 
-        let data_points = parse_otlp_metrics(request).unwrap();
+        let data_points = parse_otlp_metrics(request).data_points;
         assert_eq!(data_points.len(), 1);
 
         let dp = &data_points[0];
         assert_eq!(dp.metric_name, "minimal.metric");
-        assert_eq!(dp.service_name, "unknown_service"); // Default value
-        assert!(dp.metric_unit.is_none());
-        assert!(dp.start_timestamp_secs.is_none());
-        assert!(dp.tag_service.is_none());
-        assert!(dp.tag_env.is_none());
-        assert!(dp.attributes.is_empty());
-        assert!(dp.resource_attributes.is_empty());
+        assert_eq!(
+            dp.tags.get("service").map(|s| s.as_str()),
+            Some("unknown_service")
+        );
+        // No metric_unit tag when unit is empty
+        assert!(!dp.tags.contains_key("metric_unit"));
+        // No start_timestamp_secs tag when start time is 0
+        assert!(!dp.tags.contains_key("start_timestamp_secs"));
+        // Only "service" should be in tags (no attributes, no unit, no start time)
+        assert_eq!(dp.tags.len(), 1);
+    }
+
+    /// Test that data points with empty metric_name are skipped
+    #[test]
+    fn test_skip_empty_metric_name() {
+        use quickwit_proto::opentelemetry::proto::metrics::v1::{
+            Gauge, ResourceMetrics, ScopeMetrics, number_data_point,
+        };
+
+        let request = ExportMetricsServiceRequest {
+            resource_metrics: vec![ResourceMetrics {
+                resource: None,
+                scope_metrics: vec![ScopeMetrics {
+                    scope: None,
+                    metrics: vec![Metric {
+                        name: String::new(), // Empty name
+                        description: String::new(),
+                        unit: String::new(),
+                        data: Some(metric::Data::Gauge(Gauge {
+                            data_points: vec![NumberDataPoint {
+                                attributes: Vec::new(),
+                                start_time_unix_nano: 0,
+                                time_unix_nano: 1_000_000_000,
+                                exemplars: Vec::new(),
+                                flags: 0,
+                                value: Some(number_data_point::Value::AsDouble(1.0)),
+                            }],
+                        })),
+                    }],
+                    schema_url: String::new(),
+                }],
+                schema_url: String::new(),
+            }],
+        };
+
+        let data_points = parse_otlp_metrics(request).data_points;
+        assert_eq!(data_points.len(), 0);
+    }
+
+    /// Test that data points with zero timestamp are skipped
+    #[test]
+    fn test_skip_zero_timestamp() {
+        use quickwit_proto::opentelemetry::proto::metrics::v1::{
+            Gauge, ResourceMetrics, ScopeMetrics, number_data_point,
+        };
+
+        let request = ExportMetricsServiceRequest {
+            resource_metrics: vec![ResourceMetrics {
+                resource: None,
+                scope_metrics: vec![ScopeMetrics {
+                    scope: None,
+                    metrics: vec![Metric {
+                        name: "test.metric".to_string(),
+                        description: String::new(),
+                        unit: String::new(),
+                        data: Some(metric::Data::Gauge(Gauge {
+                            data_points: vec![NumberDataPoint {
+                                attributes: Vec::new(),
+                                start_time_unix_nano: 0,
+                                time_unix_nano: 0, // Zero timestamp
+                                exemplars: Vec::new(),
+                                flags: 0,
+                                value: Some(number_data_point::Value::AsDouble(1.0)),
+                            }],
+                        })),
+                    }],
+                    schema_url: String::new(),
+                }],
+                schema_url: String::new(),
+            }],
+        };
+
+        let data_points = parse_otlp_metrics(request).data_points;
+        assert_eq!(data_points.len(), 0);
     }
 }
diff --git a/quickwit/quickwit-parquet-engine/Cargo.toml b/quickwit/quickwit-parquet-engine/Cargo.toml
index f886083eded..9744dbcc2bc 100644
--- a/quickwit/quickwit-parquet-engine/Cargo.toml
+++ b/quickwit/quickwit-parquet-engine/Cargo.toml
@@ -11,9 +11,12 @@ authors.workspace = true
 license.workspace = true
 
 [dependencies]
+anyhow = { workspace = true }
 arrow = { workspace = true }
+chrono = { workspace = true }
 parquet = { workspace = true }
 quickwit-common = { workspace = true }
+quickwit-proto = { workspace = true }
 sea-query = { workspace = true, optional = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
diff --git a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs
index 29065847136..817f3f16d93 100644
--- a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs
+++ b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs
@@ -14,15 +14,18 @@
 
 //! Batch accumulator for producing splits from RecordBatches.
 
+use std::collections::BTreeMap;
+use std::sync::Arc;
 use std::time::Instant;
 
+use arrow::array::{ArrayRef, new_null_array};
 use arrow::compute::concat_batches;
+use arrow::datatypes::{DataType, Field, Schema as ArrowSchema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use tracing::{debug, info};
 
 use super::config::ParquetIndexingConfig;
 use crate::metrics::PARQUET_ENGINE_METRICS;
-use crate::schema::ParquetSchema;
 
 /// Error type for index operations.
 #[derive(Debug, thiserror::Error)]
@@ -38,11 +41,14 @@ pub enum IndexingError {
 /// Batches are accumulated until either `max_rows` or `max_bytes` threshold is reached,
 /// at which point they are concatenated and returned for downstream processing (writing to
 /// Parquet by the ParquetPackager actor).
+///
+/// Consecutive batches may have different column sets. The accumulator tracks the union
+/// schema incrementally and aligns all batches to it on flush.
 pub struct ParquetBatchAccumulator {
     /// Configuration for accumulation thresholds.
     config: ParquetIndexingConfig,
-    /// Metrics schema for concatenation.
-    schema: ParquetSchema,
+    /// Union of all fields seen across pending batches.
+    union_fields: BTreeMap<String, (DataType, bool)>,
     /// Pending batches waiting to be flushed.
     pending_batches: Vec<RecordBatch>,
     /// Total rows in pending batches.
@@ -57,11 +63,9 @@ impl ParquetBatchAccumulator {
     /// # Arguments
     /// * `config` - Configuration for accumulation thresholds
     pub fn new(config: ParquetIndexingConfig) -> Self {
-        let schema = ParquetSchema::new();
-
         Self {
             config,
-            schema,
+            union_fields: BTreeMap::new(),
             pending_batches: Vec::new(),
             pending_rows: 0,
             pending_bytes: 0,
@@ -83,6 +87,13 @@ impl ParquetBatchAccumulator {
             .index_rows_total
             .inc_by(batch_rows as u64);
 
+        // Merge fields into union schema before pushing (we need the schema reference)
+        for field in batch.schema().fields() {
+            self.union_fields
+                .entry(field.name().clone())
+                .or_insert_with(|| (field.data_type().clone(), field.is_nullable()));
+        }
+
         self.pending_batches.push(batch);
         self.pending_rows += batch_rows;
         self.pending_bytes += batch_bytes;
@@ -92,7 +103,7 @@ impl ParquetBatchAccumulator {
             batch_bytes,
             total_pending_rows = self.pending_rows,
             total_pending_bytes = self.pending_bytes,
-            "Added batch to accumulator"
+            "added batch to accumulator"
         );
 
         let flushed = if self.should_flush() {
@@ -101,7 +112,7 @@ impl ParquetBatchAccumulator {
                 pending_bytes = self.pending_bytes,
                 max_rows = self.config.max_rows,
                 max_bytes = self.config.max_bytes,
-                "Threshold exceeded, triggering flush"
+                "threshold exceeded, triggering flush"
             );
             self.flush_internal()?
         } else {
@@ -119,6 +130,7 @@ impl ParquetBatchAccumulator {
     /// Discard all pending data without producing output.
     pub fn discard(&mut self) {
         self.pending_batches.clear();
+        self.union_fields.clear();
         self.pending_rows = 0;
         self.pending_bytes = 0;
     }
@@ -136,14 +148,33 @@ impl ParquetBatchAccumulator {
             return Ok(None);
         }
 
-        // Concatenate all pending batches into one
-        let combined = concat_batches(self.schema.arrow_schema(), self.pending_batches.iter())?;
+        // Build the union schema from accumulated fields.
+        // All fields are marked nullable=true regardless of their source schema:
+        // any field that appears in some batches but not others will be null-filled
+        // for the missing batches, so non-nullable would cause Arrow to reject the concat.
+        let fields: Vec<Field> = self
+            .union_fields
+            .iter()
+            .map(|(name, (data_type, _nullable))| Field::new(name, data_type.clone(), true))
+            .collect();
+        let union_schema: SchemaRef = Arc::new(ArrowSchema::new(fields));
+
+        // Align each pending batch to the union schema
+        let aligned: Vec<RecordBatch> = self
+            .pending_batches
+            .iter()
+            .map(|batch| align_batch_to_schema(batch, &union_schema))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        // Concatenate all aligned batches
+        let combined = concat_batches(&union_schema, aligned.iter())?;
 
         let num_rows = combined.num_rows();
-        info!(num_rows, "Flushed accumulated batches");
+        info!(num_rows, "flushed accumulated batches");
 
         // Reset state
         self.pending_batches.clear();
+        self.union_fields.clear();
         self.pending_rows = 0;
         self.pending_bytes = 0;
 
@@ -173,6 +204,24 @@ impl ParquetBatchAccumulator {
     }
 }
 
+/// Align a RecordBatch to a target schema, inserting null columns where needed.
+fn align_batch_to_schema(
+    batch: &RecordBatch,
+    target_schema: &SchemaRef,
+) -> Result<RecordBatch, arrow::error::ArrowError> {
+    let num_rows = batch.num_rows();
+    let batch_schema = batch.schema();
+    let columns: Vec<ArrayRef> = target_schema
+        .fields()
+        .iter()
+        .map(|field| match batch_schema.index_of(field.name()) {
+            Ok(idx) => Arc::clone(batch.column(idx)),
+            Err(_) => new_null_array(field.data_type(), num_rows),
+        })
+        .collect();
+    RecordBatch::try_new(target_schema.clone(), columns)
+}
+
 /// Estimate the memory size of a RecordBatch.
 fn estimate_batch_bytes(batch: &RecordBatch) -> usize {
     batch
@@ -184,106 +233,8 @@ fn estimate_batch_bytes(batch: &RecordBatch) -> usize {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{
-        ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::Int32Type;
-    use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
-
     use super::*;
-
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create a VARIANT array for testing with specified number of rows.
-    fn create_variant_array(num_rows: usize, fields: Option<&[(&str, &str)]>) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(num_rows);
-        for _ in 0..num_rows {
-            match fields {
-                Some(kv_pairs) => {
-                    let mut obj = builder.new_object();
-                    for (key, value) in kv_pairs {
-                        obj = obj.with_field(key, *value);
-                    }
-                    obj.finish();
-                }
-                None => {
-                    builder.append_null();
-                }
-            }
-        }
-        ArrayRef::from(builder.build())
-    }
-
-    /// Create a test batch matching the metrics schema.
-    fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows, None);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
+    use crate::test_helpers::{create_test_batch, create_test_batch_with_tags};
 
     #[test]
     fn test_accumulator_below_threshold() {
@@ -351,6 +302,93 @@ mod tests {
         assert_eq!(combined.num_rows(), 50);
     }
 
+    #[test]
+    fn test_accumulator_merges_different_tag_sets() {
+        let config = ParquetIndexingConfig::default().with_max_rows(1000);
+        let mut accumulator = ParquetBatchAccumulator::new(config);
+
+        // First batch has "service" tag
+        let batch1 = create_test_batch_with_tags(3, &["service"]);
+        let _ = accumulator.add_batch(batch1).unwrap();
+
+        // Second batch has "host" tag
+        let batch2 = create_test_batch_with_tags(2, &["host"]);
+        let _ = accumulator.add_batch(batch2).unwrap();
+
+        let combined = accumulator.flush().unwrap().unwrap();
+        assert_eq!(combined.num_rows(), 5);
+
+        // Union schema should have all 4 required fields + both tags
+        let schema = combined.schema();
+        assert!(schema.index_of("metric_name").is_ok());
+        assert!(schema.index_of("metric_type").is_ok());
+        assert!(schema.index_of("timestamp_secs").is_ok());
+        assert!(schema.index_of("value").is_ok());
+        assert!(schema.index_of("service").is_ok());
+        assert!(schema.index_of("host").is_ok());
+        assert_eq!(schema.fields().len(), 6);
+
+        // First 3 rows should have null "host", last 2 rows should have null "service"
+        let host_idx = schema.index_of("host").unwrap();
+        let host_col = combined.column(host_idx);
+        assert_eq!(host_col.null_count(), 3); // first batch had no host
+
+        let service_idx = schema.index_of("service").unwrap();
+        let service_col = combined.column(service_idx);
+        assert_eq!(service_col.null_count(), 2); // second batch had no service
+
+        // No duplicate column names — each name appears exactly once.
+        let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+        let unique_count = field_names
+            .iter()
+            .collect::<std::collections::HashSet<_>>()
+            .len();
+        assert_eq!(
+            unique_count,
+            field_names.len(),
+            "duplicate columns in union schema: {field_names:?}"
+        );
+    }
+
+    #[test]
+    fn test_accumulator_no_duplicates_with_overlapping_tags() {
+        let config = ParquetIndexingConfig::default().with_max_rows(1000);
+        let mut accumulator = ParquetBatchAccumulator::new(config);
+
+        // Both batches share "service"; second also has "host".
+        // "service" must appear exactly once in the flushed schema.
+        let batch1 = create_test_batch_with_tags(3, &["service"]);
+        let batch2 = create_test_batch_with_tags(2, &["service", "host"]);
+        let _ = accumulator.add_batch(batch1).unwrap();
+        let _ = accumulator.add_batch(batch2).unwrap();
+
+        let combined = accumulator.flush().unwrap().unwrap();
+        assert_eq!(combined.num_rows(), 5);
+
+        let schema = combined.schema();
+        let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+        let unique_count = field_names
+            .iter()
+            .collect::<std::collections::HashSet<_>>()
+            .len();
+        assert_eq!(
+            unique_count,
+            field_names.len(),
+            "duplicate columns in union schema: {field_names:?}"
+        );
+
+        // 4 required + service + host = 6
+        assert_eq!(schema.fields().len(), 6);
+
+        // Rows from batch1 have no "host" → 3 nulls; batch2 has "host" for all 2 rows → 0 nulls.
+        let host_idx = schema.index_of("host").unwrap();
+        assert_eq!(combined.column(host_idx).null_count(), 3);
+
+        // "service" present in both batches → 0 nulls total.
+        let service_idx = schema.index_of("service").unwrap();
+        assert_eq!(combined.column(service_idx).null_count(), 0);
+    }
+
     #[test]
     fn test_estimate_batch_bytes() {
         let batch = create_test_batch(100);
diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs
index 80dd4f322e5..21412414b2b 100644
--- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs
+++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs
@@ -21,7 +21,7 @@ use arrow::record_batch::RecordBatch;
 use tracing::{debug, instrument, warn};
 
 use crate::metrics::PARQUET_ENGINE_METRICS;
-use crate::schema::ParquetSchema;
+use crate::schema::validate_required_fields;
 
 /// Error type for ingest operations.
 #[derive(Debug, thiserror::Error)]
@@ -34,9 +34,9 @@ pub enum IngestError {
     #[error("IPC decode error: {0}")]
     IpcDecode(#[from] arrow::error::ArrowError),
 
-    /// RecordBatch schema doesn't match expected metrics schema.
-    #[error("Schema mismatch: expected {expected} fields, got {actual}")]
-    SchemaMismatch { expected: usize, actual: usize },
+    /// RecordBatch schema validation failed.
+    #[error("Schema validation failed: {0}")]
+    SchemaValidation(String),
 
     /// Expected exactly one RecordBatch in IPC stream.
     #[error("Expected 1 RecordBatch in IPC stream, got {0}")]
@@ -53,17 +53,10 @@ pub enum IngestError {
 
 /// Processor that converts Arrow IPC bytes to RecordBatch.
 ///
-/// Validates that the decoded batch matches the expected metrics schema.
-pub struct ParquetIngestProcessor {
-    schema: ParquetSchema,
-}
+/// Validates that the decoded batch contains all required fields.
+pub struct ParquetIngestProcessor;
 
 impl ParquetIngestProcessor {
-    /// Create a new ParquetIngestProcessor.
-    pub fn new(schema: ParquetSchema) -> Self {
-        Self { schema }
-    }
-
     /// Convert Arrow IPC bytes to RecordBatch.
     ///
     /// Returns error if IPC is malformed or schema doesn't match.
@@ -95,65 +88,15 @@ impl ParquetIngestProcessor {
 
         debug!(
             num_rows = batch.num_rows(),
-            "Successfully decoded IPC to RecordBatch"
+            "successfully decoded IPC to RecordBatch"
         );
         Ok(batch)
     }
 
-    /// Validate that the RecordBatch schema matches expected metrics schema.
+    /// Validate that the RecordBatch schema contains all required fields.
     fn validate_schema(&self, batch: &RecordBatch) -> Result<(), IngestError> {
-        let expected_fields = self.schema.num_fields();
-        let actual_fields = batch.schema().fields().len();
-
-        if expected_fields != actual_fields {
-            warn!(
-                expected = expected_fields,
-                actual = actual_fields,
-                "Schema mismatch: field count differs"
-            );
-            return Err(IngestError::SchemaMismatch {
-                expected: expected_fields,
-                actual: actual_fields,
-            });
-        }
-
-        // Verify field names match (in order)
-        let schema_fields = self.schema.arrow_schema().fields();
-        let batch_schema = batch.schema();
-        let batch_fields = batch_schema.fields();
-
-        for (expected, actual) in schema_fields.iter().zip(batch_fields.iter()) {
-            if expected.name() != actual.name() {
-                warn!(
-                    expected_name = expected.name(),
-                    actual_name = actual.name(),
-                    "Schema mismatch: field name differs"
-                );
-                return Err(IngestError::SchemaMismatch {
-                    expected: expected_fields,
-                    actual: actual_fields,
-                });
-            }
-            if expected.data_type() != actual.data_type() {
-                warn!(
-                    field = expected.name(),
-                    expected_type = ?expected.data_type(),
-                    actual_type = ?actual.data_type(),
-                    "Schema mismatch: field type differs"
-                );
-                return Err(IngestError::SchemaMismatch {
-                    expected: expected_fields,
-                    actual: actual_fields,
-                });
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Get a reference to the schema.
-    pub fn schema(&self) -> &ParquetSchema {
-        &self.schema
+        validate_required_fields(batch.schema().as_ref())
+            .map_err(|e| IngestError::SchemaValidation(e.to_string()))
     }
 }
 
@@ -168,11 +111,10 @@ fn ipc_to_record_batch(ipc_bytes: &[u8]) -> Result<RecordBatch, IngestError> {
         return Err(IngestError::UnexpectedBatchCount(batches.len()));
     }
 
-    // Safe: we verified exactly 1 batch above, but use ok_or for defensive programming
-    batches
+    Ok(batches
         .into_iter()
         .next()
-        .ok_or(IngestError::UnexpectedBatchCount(0))
+        .expect("len verified to be 1 above"))
 }
 
 /// Serialize a RecordBatch to Arrow IPC stream format.
@@ -190,100 +132,12 @@ pub fn record_batch_to_ipc(batch: &RecordBatch) -> Result<Vec<u8>, IngestError>
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{
-        ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::Int32Type;
-    use parquet::variant::VariantArrayBuilder;
-
     use super::*;
-
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
-
-    /// Create a VARIANT array for testing with specified number of rows (all nulls).
-    fn create_variant_array(num_rows: usize) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(num_rows);
-        for _ in 0..num_rows {
-            builder.append_null();
-        }
-        ArrayRef::from(builder.build())
-    }
-
-    /// Create a test batch matching the metrics schema.
-    fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
+    use crate::test_helpers::create_test_batch;
 
     #[test]
     fn test_process_ipc() {
-        let schema = ParquetSchema::new();
-        let processor = ParquetIngestProcessor::new(schema);
+        let processor = ParquetIngestProcessor;
 
         // Create a valid batch
         let batch = create_test_batch(10);
@@ -295,13 +149,12 @@ mod tests {
 
         let recovered = result.unwrap();
         assert_eq!(recovered.num_rows(), 10);
-        assert_eq!(recovered.num_columns(), 14);
+        assert_eq!(recovered.num_columns(), 6); // 4 required + 2 tags
     }
 
     #[test]
     fn test_process_ipc_invalid_bytes() {
-        let schema = ParquetSchema::new();
-        let processor = ParquetIngestProcessor::new(schema);
+        let processor = ParquetIngestProcessor;
 
         let result = processor.process_ipc(&[0u8; 10]);
         assert!(result.is_err());
diff --git a/quickwit/quickwit-parquet-engine/src/lib.rs b/quickwit/quickwit-parquet-engine/src/lib.rs
index eac11bfa8e9..d34c67c665d 100644
--- a/quickwit/quickwit-parquet-engine/src/lib.rs
+++ b/quickwit/quickwit-parquet-engine/src/lib.rs
@@ -24,5 +24,10 @@ pub mod index;
 pub mod ingest;
 pub mod metrics;
 pub mod schema;
+pub mod sort_fields;
 pub mod split;
 pub mod storage;
+pub mod table_config;
+
+#[cfg(any(test, feature = "testsuite"))]
+pub mod test_helpers;
diff --git a/quickwit/quickwit-parquet-engine/src/schema/fields.rs b/quickwit/quickwit-parquet-engine/src/schema/fields.rs
index b6a52c738aa..9f46dcf3b8c 100644
--- a/quickwit/quickwit-parquet-engine/src/schema/fields.rs
+++ b/quickwit/quickwit-parquet-engine/src/schema/fields.rs
@@ -12,151 +12,57 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Parquet field definitions with column metadata.
+//! Parquet field definitions with sort order constants and validation.
 
-use arrow::datatypes::{DataType, Field, Fields};
-use parquet::variant::VariantType;
+use anyhow::{Result, bail};
+use arrow::datatypes::DataType;
 
-/// All fields in the parquet schema.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum ParquetField {
-    MetricName,
-    MetricType,
-    MetricUnit,
-    TimestampSecs,
-    StartTimestampSecs,
-    Value,
-    TagService,
-    TagEnv,
-    TagDatacenter,
-    TagRegion,
-    TagHost,
-    Attributes,
-    ServiceName,
-    ResourceAttributes,
-}
+/// Required field names that must exist in every batch.
+pub const REQUIRED_FIELDS: &[&str] = &["metric_name", "metric_type", "timestamp_secs", "value"];
 
-impl ParquetField {
-    /// Field name as stored in Parquet.
-    pub fn name(&self) -> &'static str {
-        match self {
-            Self::MetricName => "metric_name",
-            Self::MetricType => "metric_type",
-            Self::MetricUnit => "metric_unit",
-            Self::TimestampSecs => "timestamp_secs",
-            Self::StartTimestampSecs => "start_timestamp_secs",
-            Self::Value => "value",
-            Self::TagService => "tag_service",
-            Self::TagEnv => "tag_env",
-            Self::TagDatacenter => "tag_datacenter",
-            Self::TagRegion => "tag_region",
-            Self::TagHost => "tag_host",
-            Self::Attributes => "attributes",
-            Self::ServiceName => "service_name",
-            Self::ResourceAttributes => "resource_attributes",
-        }
-    }
+/// Sort order column names. Columns not present in a batch are skipped.
+pub const SORT_ORDER: &[&str] = &[
+    "metric_name",
+    "service",
+    "env",
+    "datacenter",
+    "region",
+    "host",
+    "timestamp_secs",
+];
 
-    /// Whether this field is nullable.
-    pub fn is_nullable(&self) -> bool {
-        matches!(
-            self,
-            Self::MetricUnit
-                | Self::StartTimestampSecs
-                | Self::TagService
-                | Self::TagEnv
-                | Self::TagDatacenter
-                | Self::TagRegion
-                | Self::TagHost
-                | Self::Attributes
-                | Self::ResourceAttributes
-        )
+/// Arrow type for required fields by name.
+pub fn required_field_type(name: &str) -> Option<DataType> {
+    match name {
+        "metric_name" => Some(DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Utf8),
+        )),
+        "metric_type" => Some(DataType::UInt8),
+        "timestamp_secs" => Some(DataType::UInt64),
+        "value" => Some(DataType::Float64),
+        _ => None,
     }
+}
 
-    /// Arrow DataType for this field.
-    /// Use dictionary encoding for high-cardinality strings.
-    pub fn arrow_type(&self) -> DataType {
-        match self {
-            // Dictionary-encoded strings for high cardinality
-            Self::MetricName | Self::ServiceName => {
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
+/// Validate that a batch schema contains all required fields with correct types.
+pub fn validate_required_fields(schema: &arrow::datatypes::Schema) -> Result<()> {
+    for &name in REQUIRED_FIELDS {
+        match schema.index_of(name) {
+            Ok(idx) => {
+                let expected_type = required_field_type(name).unwrap();
+                let actual_type = schema.field(idx).data_type();
+                if *actual_type != expected_type {
+                    bail!(
+                        "field '{}' has type {:?}, expected {:?}",
+                        name,
+                        actual_type,
+                        expected_type
+                    );
+                }
             }
-            // Dictionary-encoded optional tags
-            Self::TagService
-            | Self::TagEnv
-            | Self::TagDatacenter
-            | Self::TagRegion
-            | Self::TagHost => {
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
-            }
-            // Enum stored as UInt8
-            Self::MetricType => DataType::UInt8,
-            // Timestamps as UInt64 seconds
-            Self::TimestampSecs | Self::StartTimestampSecs => DataType::UInt64,
-            // Metric value
-            Self::Value => DataType::Float64,
-            // Plain string for metric unit
-            Self::MetricUnit => DataType::Utf8,
-            // VARIANT type for semi-structured attributes
-            // Uses the Parquet Variant binary encoding format
-            Self::Attributes | Self::ResourceAttributes => {
-                // VARIANT is stored as a struct with metadata and value BinaryView fields
-                // VariantArrayBuilder produces BinaryView, not Binary
-                DataType::Struct(Fields::from(vec![
-                    Field::new("metadata", DataType::BinaryView, false),
-                    Field::new("value", DataType::BinaryView, false),
-                ]))
-            }
-        }
-    }
-
-    /// Convert to Arrow Field.
-    pub fn to_arrow_field(&self) -> Field {
-        let field = Field::new(self.name(), self.arrow_type(), self.is_nullable());
-
-        // Add VARIANT extension type metadata for attributes fields
-        match self {
-            Self::Attributes | Self::ResourceAttributes => field.with_extension_type(VariantType),
-            _ => field,
+            Err(_) => bail!("missing required field '{}'", name),
         }
     }
-
-    /// All fields in schema order.
-    pub fn all() -> &'static [ParquetField] {
-        &[
-            Self::MetricName,
-            Self::MetricType,
-            Self::MetricUnit,
-            Self::TimestampSecs,
-            Self::StartTimestampSecs,
-            Self::Value,
-            Self::TagService,
-            Self::TagEnv,
-            Self::TagDatacenter,
-            Self::TagRegion,
-            Self::TagHost,
-            Self::Attributes,
-            Self::ServiceName,
-            Self::ResourceAttributes,
-        ]
-    }
-
-    /// Sort order for metrics data (used for pruning).
-    /// Order: metric_name, common tags (service, env, datacenter, region, host), timestamp.
-    pub fn sort_order() -> &'static [ParquetField] {
-        &[
-            Self::MetricName,
-            Self::TagService,
-            Self::TagEnv,
-            Self::TagDatacenter,
-            Self::TagRegion,
-            Self::TagHost,
-            Self::TimestampSecs,
-        ]
-    }
-
-    /// Get the column index in the schema.
-    pub fn column_index(&self) -> usize {
-        Self::all().iter().position(|f| f == self).unwrap()
-    }
+    Ok(())
 }
diff --git a/quickwit/quickwit-parquet-engine/src/schema/mod.rs b/quickwit/quickwit-parquet-engine/src/schema/mod.rs
index 4cbe31d357d..f9b5c06d9c4 100644
--- a/quickwit/quickwit-parquet-engine/src/schema/mod.rs
+++ b/quickwit/quickwit-parquet-engine/src/schema/mod.rs
@@ -20,5 +20,5 @@
 mod fields;
 mod parquet;
 
-pub use fields::ParquetField;
+pub use fields::{REQUIRED_FIELDS, SORT_ORDER, required_field_type, validate_required_fields};
 pub use parquet::ParquetSchema;
diff --git a/quickwit/quickwit-parquet-engine/src/schema/parquet.rs b/quickwit/quickwit-parquet-engine/src/schema/parquet.rs
index 83ef99e351c..d0991f89807 100644
--- a/quickwit/quickwit-parquet-engine/src/schema/parquet.rs
+++ b/quickwit/quickwit-parquet-engine/src/schema/parquet.rs
@@ -14,13 +14,7 @@
 
 //! Parquet schema construction for metrics.
 
-use std::sync::Arc;
-
-use arrow::datatypes::{Schema as ArrowSchema, SchemaRef};
-use parquet::arrow::ArrowSchemaConverter;
-use parquet::schema::types::SchemaDescriptor;
-
-use super::fields::ParquetField;
+use arrow::datatypes::SchemaRef;
 
 /// Parquet schema for storage.
 #[derive(Debug, Clone)]
@@ -29,15 +23,11 @@ pub struct ParquetSchema {
 }
 
 impl ParquetSchema {
-    /// Create a new ParquetSchema.
-    pub fn new() -> Self {
-        let fields: Vec<_> = ParquetField::all()
-            .iter()
-            .map(|f| f.to_arrow_field())
-            .collect();
-
-        let arrow_schema = Arc::new(ArrowSchema::new(fields));
-        Self { arrow_schema }
+    /// Create a ParquetSchema from an Arrow schema.
+    pub fn from_arrow_schema(schema: SchemaRef) -> Self {
+        Self {
+            arrow_schema: schema,
+        }
     }
 
     /// Get the Arrow schema.
@@ -45,11 +35,6 @@ impl ParquetSchema {
         &self.arrow_schema
     }
 
-    /// Convert to Parquet schema descriptor.
-    pub fn parquet_schema(&self) -> Result<SchemaDescriptor, parquet::errors::ParquetError> {
-        ArrowSchemaConverter::new().convert(&self.arrow_schema)
-    }
-
     /// Get field by name.
     pub fn field(&self, name: &str) -> Option<&arrow::datatypes::Field> {
         self.arrow_schema.field_with_name(name).ok()
@@ -61,33 +46,42 @@ impl ParquetSchema {
     }
 }
 
-impl Default for ParquetSchema {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
+    use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
+
     use super::*;
 
+    fn create_test_schema() -> SchemaRef {
+        Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "metric_name",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
+            Field::new("metric_type", DataType::UInt8, false),
+            Field::new("timestamp_secs", DataType::UInt64, false),
+            Field::new("value", DataType::Float64, false),
+            Field::new(
+                "service",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ),
+        ]))
+    }
+
     #[test]
     fn test_schema_creation() {
-        let schema = ParquetSchema::new();
-        assert_eq!(schema.num_fields(), 14);
+        let schema = ParquetSchema::from_arrow_schema(create_test_schema());
+        assert_eq!(schema.num_fields(), 5);
     }
 
     #[test]
     fn test_field_lookup() {
-        let schema = ParquetSchema::new();
+        let schema = ParquetSchema::from_arrow_schema(create_test_schema());
         let field = schema.field("metric_name").unwrap();
         assert!(!field.is_nullable());
     }
-
-    #[test]
-    fn test_parquet_conversion() {
-        let schema = ParquetSchema::new();
-        let parquet_schema = schema.parquet_schema();
-        assert!(parquet_schema.is_ok());
-    }
 }
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/column_type.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/column_type.rs
new file mode 100644
index 00000000000..21690f01e0e
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/column_type.rs
@@ -0,0 +1,240 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Column type identification from name suffixes and string names.
+//!
+//! Type can be specified via Husky-convention suffixes (`__s`, `__i`, `__nf`)
+//! or inferred from well-known bare names. The discriminant values match
+//! the Go iota exactly for cross-system interoperability.
+
+use std::str::FromStr;
+
+use super::SortFieldsError;
+
+/// Well-known column name for timestamps.
+pub const TIMESTAMP: &str = "timestamp";
+
+/// Well-known column name for tiebreaker.
+pub const TIEBREAKER: &str = "tiebreaker";
+
+/// Well-known column name for timeseries ID hash.
+pub const TIMESERIES_ID: &str = "timeseries_id";
+
+/// Well-known column name for metric value.
+pub const METRIC_VALUE: &str = "metric_value";
+
+/// Column type IDs matching Go `types.TypeID` iota values.
+///
+/// Only the types that appear in sort schemas are included here.
+/// The discriminant values MUST match Go exactly for cross-system interop.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u64)]
+pub enum ColumnTypeId {
+    Int64 = 2,
+    Float64 = 10,
+    String = 14,
+    Sketch = 17,
+    CpcSketch = 20,
+    ItemSketch = 22,
+}
+
+impl ColumnTypeId {
+    /// The Husky-convention suffix for this column type.
+    ///
+    /// Used when serializing back to the string format with explicit types.
+    pub fn suffix(self) -> &'static str {
+        match self {
+            Self::Int64 => "__i",
+            Self::Float64 => "__nf",
+            Self::String => "__s",
+            Self::Sketch => "__sk",
+            Self::CpcSketch => "__cpcsk",
+            Self::ItemSketch => "__isk",
+        }
+    }
+
+    /// Human-readable type name matching Go `TypeID.String()`.
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Int64 => "dense-int64",
+            Self::Float64 => "dense-float64",
+            Self::String => "dense-string",
+            Self::Sketch => "dense-sketch",
+            Self::CpcSketch => "dense-cpc-sketch",
+            Self::ItemSketch => "dense-item-sketch",
+        }
+    }
+
+    /// Resolve column type from a column name, stripping any type suffix.
+    ///
+    /// Returns `(bare_name, type)`. Type resolution order:
+    /// 1. Explicit suffix (`__s`, `__i`, `__nf`, etc.) — stripped, type from suffix
+    /// 2. Well-known bare name defaults:
+    ///    - `timestamp`, `tiebreaker`, `timeseries_id` → Int64
+    ///    - `metric_value` → Float64
+    ///    - everything else → String
+    pub fn from_column_name(name: &str) -> Result<(&str, Self), SortFieldsError> {
+        // Try explicit suffixes first (longest match first to avoid ambiguity).
+        if let Some(bare) = name.strip_suffix("__isk") {
+            return Ok((bare, Self::ItemSketch));
+        }
+        if let Some(bare) = name.strip_suffix("__cpcsk") {
+            return Ok((bare, Self::CpcSketch));
+        }
+        if let Some(bare) = name.strip_suffix("__sk") {
+            return Ok((bare, Self::Sketch));
+        }
+        if let Some(bare) = name.strip_suffix("__nf") {
+            return Ok((bare, Self::Float64));
+        }
+        if let Some(bare) = name.strip_suffix("__i") {
+            return Ok((bare, Self::Int64));
+        }
+        if let Some(bare) = name.strip_suffix("__s") {
+            return Ok((bare, Self::String));
+        }
+
+        // No suffix — use well-known name defaults.
+        Ok((name, default_type_for_name(name)))
+    }
+}
+
+/// Default column type and sort direction for a bare column name.
+///
+/// This is the single source of truth for well-known column defaults.
+/// Used by the parser (type inference, default direction), display
+/// (suffix omission, direction omission), and validation.
+pub struct ColumnDefaults {
+    pub column_type: ColumnTypeId,
+    /// True if the default sort direction is descending.
+    pub descending: bool,
+}
+
+/// Well-known name → default type and sort direction lookup table.
+///
+/// Columns not in this table default to String, ascending.
+static WELL_KNOWN_COLUMNS: &[(&str, ColumnDefaults)] = &[
+    (
+        TIMESTAMP,
+        ColumnDefaults {
+            column_type: ColumnTypeId::Int64,
+            descending: true,
+        },
+    ),
+    (
+        "timestamp_secs",
+        ColumnDefaults {
+            column_type: ColumnTypeId::Int64,
+            descending: true,
+        },
+    ),
+    (
+        TIEBREAKER,
+        ColumnDefaults {
+            column_type: ColumnTypeId::Int64,
+            descending: false,
+        },
+    ),
+    (
+        TIMESERIES_ID,
+        ColumnDefaults {
+            column_type: ColumnTypeId::Int64,
+            descending: false,
+        },
+    ),
+    (
+        METRIC_VALUE,
+        ColumnDefaults {
+            column_type: ColumnTypeId::Float64,
+            descending: false,
+        },
+    ),
+    (
+        "value",
+        ColumnDefaults {
+            column_type: ColumnTypeId::Float64,
+            descending: false,
+        },
+    ),
+];
+
+const DEFAULT_COLUMN: ColumnDefaults = ColumnDefaults {
+    column_type: ColumnTypeId::String,
+    descending: false,
+};
+
+/// Look up default type and direction for a bare column name.
+pub fn column_defaults(name: &str) -> &'static ColumnDefaults {
+    WELL_KNOWN_COLUMNS
+        .iter()
+        .find(|(n, _)| *n == name)
+        .map(|(_, d)| d)
+        .unwrap_or(&DEFAULT_COLUMN)
+}
+
+/// Default column type for a bare name (convenience wrapper).
+pub fn default_type_for_name(name: &str) -> ColumnTypeId {
+    column_defaults(name).column_type
+}
+
+/// Whether this bare name defaults to descending sort.
+pub fn default_is_descending(name: &str) -> bool {
+    column_defaults(name).descending
+}
+
+impl std::fmt::Display for ColumnTypeId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Parse a type name string (e.g., "dense-int64") into a `ColumnTypeId`.
+impl FromStr for ColumnTypeId {
+    type Err = SortFieldsError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "dense-int64" => Ok(Self::Int64),
+            "dense-float64" => Ok(Self::Float64),
+            "dense-string" => Ok(Self::String),
+            "dense-sketch" => Ok(Self::Sketch),
+            "dense-cpc-sketch" => Ok(Self::CpcSketch),
+            "dense-item-sketch" => Ok(Self::ItemSketch),
+            _ => Err(SortFieldsError::UnknownColumnType(format!(
+                "unknown column type '{}'",
+                s
+            ))),
+        }
+    }
+}
+
+/// Convert a proto `column_type` u64 back to a `ColumnTypeId`.
+impl TryFrom<u64> for ColumnTypeId {
+    type Error = SortFieldsError;
+
+    fn try_from(value: u64) -> Result<Self, Self::Error> {
+        match value {
+            2 => Ok(Self::Int64),
+            10 => Ok(Self::Float64),
+            14 => Ok(Self::String),
+            17 => Ok(Self::Sketch),
+            20 => Ok(Self::CpcSketch),
+            22 => Ok(Self::ItemSketch),
+            _ => Err(SortFieldsError::UnknownColumnType(format!(
+                "unknown column type id: {}",
+                value
+            ))),
+        }
+    }
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/display.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/display.rs
new file mode 100644
index 00000000000..8a6d74f482a
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/display.rs
@@ -0,0 +1,124 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Sort schema to string serialization -- direct port of Go `SchemaToString` and
+//! `SchemaToStringShort`.
+//!
+//! The proto `SortColumn.name` stores the bare Parquet column name (no type suffix).
+//! These functions reconstruct the Husky-format suffixed name for serialization
+//! using `SortColumn.column_type` to determine the suffix.
+
+use quickwit_proto::sortschema::{SortColumn, SortColumnDirection, SortSchema};
+
+use super::column_type::{ColumnTypeId, default_is_descending, default_type_for_name};
+
+fn direction_str(sort_direction: i32) -> &'static str {
+    match SortColumnDirection::try_from(sort_direction) {
+        Ok(SortColumnDirection::SortDirectionAscending) => ":+",
+        Ok(SortColumnDirection::SortDirectionDescending) => ":-",
+        _ => ":???",
+    }
+}
+
+fn type_str(column_type: u64) -> &'static str {
+    match ColumnTypeId::try_from(column_type) {
+        Ok(ct) => ct.as_str(),
+        Err(_) => "unknown",
+    }
+}
+
+/// Reconstruct the column name for the Husky string format.
+///
+/// Only appends the type suffix when the column's type differs from the
+/// default for its bare name. This keeps the string short and readable:
+///   `metric_name` (default String) → no suffix needed
+///   `timestamp` (default Int64) → no suffix needed
+///   `my_counter__i` → suffix needed (Int64 differs from default String)
+fn display_name(col: &SortColumn) -> String {
+    let col_type = match ColumnTypeId::try_from(col.column_type) {
+        Ok(ct) => ct,
+        Err(_) => return col.name.clone(),
+    };
+    let default_type = default_type_for_name(&col.name);
+    if col_type == default_type {
+        col.name.clone()
+    } else {
+        format!("{}{}", col.name, col_type.suffix())
+    }
+}
+
+/// Convert a `SortSchema` to its full string representation.
+///
+/// Format: `[name=]column__suffix:type:+/-[|...][/V#]`
+///
+/// Direct port of Go `SchemaToString`.
+pub fn schema_to_string(schema: &SortSchema) -> String {
+    schema_to_string_inner(schema, true)
+}
+
+/// Convert a `SortSchema` to its short string representation.
+///
+/// Format: `[name=]column__suffix[|...][/V#]`
+///
+/// Omits the explicit type and skips the sort direction when it matches the
+/// default (ascending for non-timestamp, descending for timestamp).
+///
+/// Direct port of Go `SchemaToStringShort`.
+pub fn schema_to_string_short(schema: &SortSchema) -> String {
+    schema_to_string_inner(schema, false)
+}
+
+/// Shared implementation for both full and short schema string formats.
+///
+/// When `verbose` is true, includes the explicit type and always emits direction.
+/// When `verbose` is false, omits type and skips direction when it matches the default.
+fn schema_to_string_inner(schema: &SortSchema, verbose: bool) -> String {
+    let mut rv = String::new();
+
+    if !schema.name.is_empty() {
+        rv.push_str(&schema.name);
+        rv.push('=');
+    }
+
+    for (i, col) in schema.column.iter().enumerate() {
+        if i > 0 {
+            rv.push('|');
+        }
+        if schema.lsm_comparison_cutoff > 0 && i == schema.lsm_comparison_cutoff as usize {
+            rv.push('&');
+        }
+        rv.push_str(&display_name(col));
+
+        if verbose {
+            rv.push(':');
+            rv.push_str(type_str(col.column_type));
+            rv.push_str(direction_str(col.sort_direction));
+        } else {
+            let is_default_direction = if default_is_descending(&col.name) {
+                col.sort_direction == SortColumnDirection::SortDirectionDescending as i32
+            } else {
+                col.sort_direction == SortColumnDirection::SortDirectionAscending as i32
+            };
+            if !is_default_direction {
+                rv.push_str(direction_str(col.sort_direction));
+            }
+        }
+    }
+
+    if schema.sort_version > 0 {
+        rv.push_str(&format!("/V{}", schema.sort_version));
+    }
+
+    rv
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/equivalence.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/equivalence.rs
new file mode 100644
index 00000000000..403ec9fd135
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/equivalence.rs
@@ -0,0 +1,62 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Sort schema equivalence comparison -- direct port of Go `EquivalentSchemas`
+//! and `EquivalentSchemasForCompaction`.
+
+use quickwit_proto::sortschema::SortSchema;
+
+/// Base comparison: checks sort_version and all columns match (name, type, direction).
+///
+/// Hand-rolled comparison (not proto equality) because the Go compactor calls this
+/// in tight loops on 10s-100s of thousands of fragments and proto.Equal allocates.
+fn equivalent_schemas_base(a: &SortSchema, b: &SortSchema) -> bool {
+    if a.sort_version != b.sort_version {
+        return false;
+    }
+    if a.column.len() != b.column.len() {
+        return false;
+    }
+    for (a_col, b_col) in a.column.iter().zip(b.column.iter()) {
+        if a_col.name != b_col.name {
+            return false;
+        }
+        if a_col.column_type != b_col.column_type {
+            return false;
+        }
+        if a_col.sort_direction != b_col.sort_direction {
+            return false;
+        }
+    }
+    true
+}
+
+/// Check if two schemas are equivalent, ignoring names and versioned schema.
+///
+/// Compares columns, sort_version, and `lsm_comparison_cutoff`.
+///
+/// Direct port of Go `EquivalentSchemas`.
+pub fn equivalent_schemas(a: &SortSchema, b: &SortSchema) -> bool {
+    equivalent_schemas_base(a, b) && a.lsm_comparison_cutoff == b.lsm_comparison_cutoff
+}
+
+/// Check if two schemas are equivalent for compaction purposes.
+///
+/// Same as `equivalent_schemas` but ignores `lsm_comparison_cutoff`, providing
+/// backward compatibility when old cplanners send steps without LSM cutoff info.
+///
+/// Direct port of Go `EquivalentSchemasForCompaction`.
+pub fn equivalent_schemas_for_compaction(a: &SortSchema, b: &SortSchema) -> bool {
+    equivalent_schemas_base(a, b)
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/mod.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/mod.rs
new file mode 100644
index 00000000000..3043531c557
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/mod.rs
@@ -0,0 +1,52 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Sort fields types, parsing, and time-window arithmetic for metrics compaction.
+//!
+//! The sort fields define how rows are ordered within a Parquet split,
+//! which determines merge and compaction behavior. The parser is a direct
+//! port of Husky's Go `schemautils.go` for cross-system interoperability.
+
+// TableConfig.effective_sort_fields() is wired into ParquetWriter at construction
+// time. The writer resolves sort field names to physical ParquetField columns;
+// columns not yet in the schema (e.g., timeseries_id) are skipped during sort
+// but recorded in metadata.
+//
+// TODO(Phase 32): Wire per-index TableConfig into IndexConfig so each index can
+// override the default sort fields. Currently all metrics indexes use
+// ProductType::Metrics default.
+//
+// When accepting user-supplied sort_fields for metrics indexes, validation MUST
+// reject schemas that do not include timeseries_id__i immediately before timestamp.
+// The timeseries_id tiebreaker is mandatory for metrics to ensure deterministic
+// sort order across splits with identical tag combinations.
+
+pub mod column_type;
+pub mod display;
+pub mod equivalence;
+pub mod parser;
+pub mod validation;
+pub mod window;
+
+#[cfg(test)]
+mod tests;
+
+// Public API re-exports.
+pub use column_type::ColumnTypeId;
+pub use display::{schema_to_string, schema_to_string_short};
+pub use equivalence::{equivalent_schemas, equivalent_schemas_for_compaction};
+pub use parser::parse_sort_fields;
+pub use quickwit_proto::SortFieldsError;
+pub use validation::validate_schema;
+pub use window::{validate_window_duration, window_start};
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/parser.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/parser.rs
new file mode 100644
index 00000000000..77b11757b29
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/parser.rs
@@ -0,0 +1,338 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Sort schema string parser -- direct port of Go `StringToSchema`.
+//!
+//! Parses Husky-style sort schema strings like:
+//!   `"metric_name|host|env|timeseries_id|timestamp/V2"`
+//! into a `SortSchema` proto with correct column names, types, directions, and version.
+
+use quickwit_proto::sortschema::{SortColumn, SortColumnDirection, SortSchema};
+
+use super::SortFieldsError;
+use super::column_type::{ColumnTypeId, default_is_descending};
+use super::validation::validate_schema;
+
+/// The minimum sort version we accept. V0 (INCORRECT_TRIM) and V1 (TRIMMED_WITH_BUDGET)
+/// are rejected per the strict V2-only decision.
+const MINIMUM_SORT_VERSION: i32 = 2;
+
+/// Parse a sort schema string into a `SortSchema` proto.
+///
+/// Direct port of Go `StringToSchema`. Accepts the format:
+///   `[name=]column[|column...][/V#]`
+///
+/// Each column can be:
+///   - `name` (1-part): infer type from suffix, default direction
+///   - `+name` or `name+` (1-part with prefix/suffix direction)
+///   - `name:+/-` (2-part): infer type from suffix, explicit direction
+///   - `name:type:+/-` (3-part): explicit type + verify matches suffix
+///
+/// Direction (`+`/`-`) may appear as a prefix (`+name`), suffix (`name+`),
+/// or after a colon (`name:+`). It is an error for direction to appear in
+/// more than one position for a given column (e.g., `+name+` or `+name:+`).
+///
+/// The `&` marker before a column name indicates the LSM comparison cutoff.
+///
+/// **V2-only enforcement**: Only sort_version >= 2 is accepted. Unversioned
+/// strings (default to 0), V0, and V1 are rejected.
+pub fn parse_sort_fields(s: &str) -> Result<SortSchema, SortFieldsError> {
+    let mut schema = SortSchema::default();
+    let mut input = s;
+
+    // Split on `=` for optional name prefix.
+    if let Some((name, rest)) = split_once_max2(input, '=', s)? {
+        schema.name = name.to_string();
+        input = rest;
+    }
+
+    // Split on `/` for version suffix.
+    let sort_version = parse_version_suffix(&mut input, s)?;
+    if sort_version < MINIMUM_SORT_VERSION {
+        return Err(SortFieldsError::UnsupportedVersion {
+            version: sort_version,
+            minimum: MINIMUM_SORT_VERSION,
+        });
+    }
+    schema.sort_version = sort_version;
+
+    // Parse columns.
+    let mut cutoff_marker_count = 0;
+
+    for (i, col_str) in input.split('|').enumerate() {
+        let col_remaining = parse_cutoff_marker(col_str, i, &mut cutoff_marker_count, &mut schema)?;
+
+        let (prefix_dir, after_prefix) = strip_direction_prefix(col_remaining);
+        let parts: Vec<&str> = after_prefix.split(':').collect();
+
+        let column = match parts.len() {
+            3 => parse_3part(parts, prefix_dir, col_str)?,
+            2 => parse_2part(parts, prefix_dir, col_str)?,
+            1 => {
+                schema
+                    .column
+                    .push(parse_1part(parts[0], prefix_dir, col_str)?);
+                continue;
+            }
+            _ => {
+                return Err(SortFieldsError::InvalidColumnFormat(format!(
+                    "columns should be of the form 'name:type:+/-' or 'name:+/-' or 'name', \
+                     found: {}",
+                    col_str
+                )));
+            }
+        };
+
+        schema.column.push(column);
+    }
+
+    if cutoff_marker_count > 0 && schema.column.len() < 2 {
+        return Err(SortFieldsError::InvalidCutoffPlacement(
+            "LSM cutoff marker (&) requires at least 2 columns".to_string(),
+        ));
+    }
+
+    validate_schema(&schema)?;
+    Ok(schema)
+}
+
+/// Parse the `/V#` version suffix, updating `input` to point at the columns portion.
+/// Returns 0 if no version suffix is present.
+fn parse_version_suffix(input: &mut &str, original: &str) -> Result<i32, SortFieldsError> {
+    let Some((columns, version_str)) = split_once_max2(input, '/', original)? else {
+        return Ok(0);
+    };
+    let version_str = version_str.strip_prefix('V').ok_or_else(|| {
+        SortFieldsError::BadSortVersion(format!(
+            "mal-formatted sort schema '{}' -- bad sort version",
+            *input
+        ))
+    })?;
+    let version = version_str.parse::<i32>().map_err(|_| {
+        SortFieldsError::BadSortVersion(format!(
+            "mal-formatted sort schema '{}' parsing sort version",
+            *input
+        ))
+    })?;
+    *input = columns;
+    Ok(version)
+}
+
+/// Handle the `&` LSM cutoff marker at the start of a column string.
+/// Returns the remaining column string after stripping `&`.
+fn parse_cutoff_marker<'a>(
+    col_str: &'a str,
+    column_index: usize,
+    cutoff_count: &mut usize,
+    schema: &mut SortSchema,
+) -> Result<&'a str, SortFieldsError> {
+    let Some(rest) = col_str.strip_prefix('&') else {
+        if col_str.contains('&') {
+            return Err(SortFieldsError::MalformedSchema(format!(
+                "LSM cutoff marker (&) must appear at the beginning of column name, found in \
+                 middle of: {}",
+                col_str
+            )));
+        }
+        return Ok(col_str);
+    };
+
+    *cutoff_count += 1;
+    if *cutoff_count > 1 {
+        return Err(SortFieldsError::InvalidCutoffPlacement(
+            "only one LSM cutoff marker (&) is allowed per schema".to_string(),
+        ));
+    }
+    schema.lsm_comparison_cutoff = column_index as i32;
+    if rest.is_empty() {
+        return Err(SortFieldsError::InvalidCutoffPlacement(
+            "LSM cutoff marker (&) must be followed by a valid column name".to_string(),
+        ));
+    }
+    if column_index == 0 {
+        return Err(SortFieldsError::InvalidCutoffPlacement(
+            "LSM cutoff marker (&) cannot be used on the first column as it would ignore all \
+             columns"
+                .to_string(),
+        ));
+    }
+    if rest.contains('&') {
+        return Err(SortFieldsError::MalformedSchema(format!(
+            "LSM cutoff marker (&) must appear at the beginning of column name, found in middle \
+             of: {}",
+            rest
+        )));
+    }
+    Ok(rest)
+}
+
+/// Resolve bare name and type from a column name string via `ColumnTypeId::from_column_name`.
+fn resolve_name_type(name: &str) -> Result<(&str, ColumnTypeId), SortFieldsError> {
+    ColumnTypeId::from_column_name(name).map_err(|_| {
+        SortFieldsError::UnknownColumnType(format!(
+            "error determining type for column {} from suffix",
+            name
+        ))
+    })
+}
+
+/// Parse a 3-part column: `name__suffix:type:+/-`.
+fn parse_3part(
+    parts: Vec<&str>,
+    prefix_dir: Option<i32>,
+    col_str: &str,
+) -> Result<SortColumn, SortFieldsError> {
+    let explicit_type: ColumnTypeId = parts[1].parse().map_err(|_| {
+        SortFieldsError::UnknownColumnType(format!(
+            "error determining type for column {}: unknown type '{}'",
+            parts[0], parts[1]
+        ))
+    })?;
+    let (bare_name, suffix_type) = resolve_name_type(parts[0])?;
+    if explicit_type != suffix_type {
+        return Err(SortFieldsError::TypeMismatch {
+            column: parts[0].to_string(),
+            from_suffix: suffix_type.to_string(),
+            explicit: explicit_type.to_string(),
+        });
+    }
+    let colon_dir = parse_direction(parts[2])?;
+    if prefix_dir.is_some() {
+        return Err(SortFieldsError::DuplicateDirection(col_str.to_string()));
+    }
+    Ok(SortColumn {
+        name: bare_name.to_string(),
+        column_type: explicit_type as u64,
+        sort_direction: colon_dir,
+    })
+}
+
+/// Parse a 2-part column: `name__suffix:+/-`.
+fn parse_2part(
+    parts: Vec<&str>,
+    prefix_dir: Option<i32>,
+    col_str: &str,
+) -> Result<SortColumn, SortFieldsError> {
+    // Reject direction suffix embedded in the name part: `name-:-` has
+    // direction in both the name suffix and the colon position.
+    let (embedded_suffix_dir, name_without_suffix) = strip_direction_suffix(parts[0]);
+    if embedded_suffix_dir.is_some() {
+        return Err(SortFieldsError::DuplicateDirection(col_str.to_string()));
+    }
+    let (bare_name, col_type) = resolve_name_type(name_without_suffix)?;
+    let colon_dir = parse_direction(parts[1])?;
+    if prefix_dir.is_some() {
+        return Err(SortFieldsError::DuplicateDirection(col_str.to_string()));
+    }
+    Ok(SortColumn {
+        name: bare_name.to_string(),
+        column_type: col_type as u64,
+        sort_direction: colon_dir,
+    })
+}
+
+/// Parse a 1-part column: `name__suffix` with optional direction prefix/suffix.
+fn parse_1part(
+    part: &str,
+    prefix_dir: Option<i32>,
+    col_str: &str,
+) -> Result<SortColumn, SortFieldsError> {
+    let (suffix_dir, suffixed_name) = strip_direction_suffix(part);
+    if prefix_dir.is_some() && suffix_dir.is_some() {
+        return Err(SortFieldsError::DuplicateDirection(col_str.to_string()));
+    }
+    let (bare_name, col_type) = resolve_name_type(suffixed_name)?;
+    let direction = prefix_dir.or(suffix_dir).unwrap_or_else(|| {
+        if default_is_descending(bare_name) {
+            SortColumnDirection::SortDirectionDescending as i32
+        } else {
+            SortColumnDirection::SortDirectionAscending as i32
+        }
+    });
+    Ok(SortColumn {
+        name: bare_name.to_string(),
+        column_type: col_type as u64,
+        sort_direction: direction,
+    })
+}
+
+/// Split `input` on the first `sep`, returning None if no separator.
+/// Errors if there are more than 2 parts (i.e., multiple separators).
+fn split_once_max2<'a>(
+    input: &'a str,
+    sep: char,
+    original: &str,
+) -> Result<Option<(&'a str, &'a str)>, SortFieldsError> {
+    let mut iter = input.splitn(3, sep);
+    let first = iter.next().unwrap(); // always present
+    let second = match iter.next() {
+        Some(s) => s,
+        None => return Ok(None),
+    };
+    if iter.next().is_some() {
+        return Err(SortFieldsError::MalformedSchema(format!(
+            "mal-formatted sort schema '{}'",
+            original
+        )));
+    }
+    Ok(Some((first, second)))
+}
+
+/// Strip a leading `+` or `-` from a string, returning the direction and remainder.
+fn strip_direction_prefix(s: &str) -> (Option<i32>, &str) {
+    if let Some(rest) = s.strip_prefix('+') {
+        (
+            Some(SortColumnDirection::SortDirectionAscending as i32),
+            rest,
+        )
+    } else if let Some(rest) = s.strip_prefix('-') {
+        (
+            Some(SortColumnDirection::SortDirectionDescending as i32),
+            rest,
+        )
+    } else {
+        (None, s)
+    }
+}
+
+/// Strip a trailing `+` or `-` from a string, returning the direction and trimmed name.
+fn strip_direction_suffix(s: &str) -> (Option<i32>, &str) {
+    if s.len() > 1 {
+        if let Some(rest) = s.strip_suffix('+') {
+            return (
+                Some(SortColumnDirection::SortDirectionAscending as i32),
+                rest,
+            );
+        }
+        if let Some(rest) = s.strip_suffix('-') {
+            return (
+                Some(SortColumnDirection::SortDirectionDescending as i32),
+                rest,
+            );
+        }
+    }
+    (None, s)
+}
+
+/// Parse a sort direction string ("+" or "-") into the proto enum value.
+fn parse_direction(s: &str) -> Result<i32, SortFieldsError> {
+    match s {
+        "+" => Ok(SortColumnDirection::SortDirectionAscending as i32),
+        "-" => Ok(SortColumnDirection::SortDirectionDescending as i32),
+        _ => Err(SortFieldsError::UnknownSortDirection(format!(
+            "unknown sort direction '{}'",
+            s
+        ))),
+    }
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/tests.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/tests.rs
new file mode 100644
index 00000000000..ee8e188916f
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/tests.rs
@@ -0,0 +1,963 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Test suite ported from Go `schemautils_test.go` plus strict V2-only enforcement tests.
+
+use quickwit_proto::sortschema::{SortColumnDirection, SortSchema};
+
+use super::column_type::ColumnTypeId;
+use super::display::{schema_to_string, schema_to_string_short};
+use super::equivalence::{equivalent_schemas, equivalent_schemas_for_compaction};
+use super::parser::parse_sort_fields;
+
+// ---------------------------------------------------------------------------
+// Helper
+// ---------------------------------------------------------------------------
+
+fn must_parse(s: &str) -> SortSchema {
+    parse_sort_fields(s).unwrap_or_else(|e| panic!("failed to parse '{}': {}", s, e))
+}
+
+// ---------------------------------------------------------------------------
+// Strict V2-only enforcement tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_v2_only_rejects_unversioned() {
+    // No version suffix defaults to version 0 -> rejected.
+    let err = parse_sort_fields("timestamp").unwrap_err();
+    let msg = err.to_string();
+    assert!(
+        msg.contains("unsupported") || msg.contains("version"),
+        "expected unsupported version error, got: {}",
+        msg
+    );
+}
+
+#[test]
+fn test_v2_only_rejects_v0() {
+    let err = parse_sort_fields("timestamp/V0").unwrap_err();
+    let msg = err.to_string();
+    assert!(
+        msg.contains("unsupported") || msg.contains("version"),
+        "expected unsupported version error, got: {}",
+        msg
+    );
+}
+
+#[test]
+fn test_v2_only_rejects_v1() {
+    let err = parse_sort_fields("timestamp/V1").unwrap_err();
+    let msg = err.to_string();
+    assert!(
+        msg.contains("unsupported") || msg.contains("version"),
+        "expected unsupported version error, got: {}",
+        msg
+    );
+}
+
+#[test]
+fn test_v2_only_accepts_v2() {
+    let schema = must_parse("timestamp/V2");
+    assert_eq!(schema.sort_version, 2);
+    assert_eq!(schema.column.len(), 1);
+    assert_eq!(schema.column[0].name, "timestamp");
+}
+
+#[test]
+fn test_v2_only_accepts_v3() {
+    let schema = must_parse("timestamp/V3");
+    assert_eq!(schema.sort_version, 3);
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestStringToSchema -- error paths
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_string_to_schema_mangled() {
+    // Mangled schema with multiple `=`
+    assert!(parse_sort_fields("a=b=c/V2").is_err(), "must disallow >1 =");
+}
+
+#[test]
+fn test_string_to_schema_invalid_column_format() {
+    assert!(
+        parse_sort_fields("name:dense-int64:+:what-is-this?/V2").is_err(),
+        "must disallow invalid column formats"
+    );
+    assert!(
+        parse_sort_fields("name:dense-int64:+:what-is-this?:really?/V2").is_err(),
+        "must disallow invalid column formats"
+    );
+}
+
+#[test]
+fn test_string_to_schema_3part_errors() {
+    // Invalid type name.
+    assert!(
+        parse_sort_fields("name__i:invalid-type:+|timestamp/V2").is_err(),
+        "must disallow an invalid type"
+    );
+    // Type mismatch: suffix says int64 but explicit says string.
+    assert!(
+        parse_sort_fields("name__i:dense-string:+|timestamp/V2").is_err(),
+        "must disallow mismatch between type suffix and explicit type"
+    );
+    // Invalid sort direction.
+    assert!(
+        parse_sort_fields("name__i:dense-int64:invalid-sort-direction|timestamp/V2").is_err(),
+        "must disallow an invalid sort direction"
+    );
+}
+
+#[test]
+fn test_string_to_schema_2part_errors() {
+    // `name__x` no longer errors: unknown suffixes are treated as bare column names
+    // with default type (String), so `name__x` is a valid column name.
+    assert!(
+        parse_sort_fields("name__x:+|timestamp/V2").is_ok(),
+        "bare column names with unknown suffix-like patterns are now valid"
+    );
+    // Invalid sort direction.
+    assert!(
+        parse_sort_fields("name__i:invalid-sort-direction|timestamp/V2").is_err(),
+        "must disallow an invalid sort direction"
+    );
+}
+
+#[test]
+fn test_string_to_schema_missing_timestamp() {
+    // Semantically invalid: missing timestamp column.
+    assert!(
+        parse_sort_fields("name__i:dense-int64:+/V2").is_err(),
+        "must disallow schema with missing timestamp column"
+    );
+}
+
+#[test]
+fn test_string_to_schema_bad_version() {
+    // `/X` is not a valid version specification.
+    assert!(
+        parse_sort_fields("timestamp/X").is_err(),
+        "/X isn't a valid version specification"
+    );
+    // `/VX` -- V followed by non-numeric.
+    assert!(
+        parse_sort_fields("timestamp/VX").is_err(),
+        "/VX isn't a valid version specification"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// LSM cutoff marker error paths (from Go TestStringToSchema)
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_lsm_cutoff_multiple_markers() {
+    assert!(
+        parse_sort_fields("service__s|&env__s|&source__s|timestamp/V2").is_err(),
+        "must disallow multiple LSM cutoff markers"
+    );
+}
+
+#[test]
+fn test_lsm_cutoff_double_ampersand() {
+    assert!(
+        parse_sort_fields("service__s|&&env__s|timestamp/V2").is_err(),
+        "must disallow multiple consecutive LSM cutoff markers"
+    );
+}
+
+#[test]
+fn test_lsm_cutoff_empty_after_marker() {
+    assert!(
+        parse_sort_fields("service__s|&|timestamp/V2").is_err(),
+        "must disallow empty column name after LSM cutoff marker"
+    );
+}
+
+#[test]
+fn test_lsm_cutoff_in_middle() {
+    assert!(
+        parse_sort_fields("service__s|env&__s|timestamp/V2").is_err(),
+        "must disallow LSM cutoff marker in middle of column name"
+    );
+}
+
+#[test]
+fn test_lsm_cutoff_single_column() {
+    assert!(
+        parse_sort_fields("&timestamp/V2").is_err(),
+        "must disallow LSM cutoff marker on single column schema"
+    );
+}
+
+#[test]
+fn test_lsm_cutoff_first_column() {
+    assert!(
+        parse_sort_fields("&service__s|env__s|timestamp/V2").is_err(),
+        "must disallow LSM cutoff marker on first column"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestStringToSchema -- valid schemas
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_string_to_schema_timestamp_only() {
+    let s = must_parse("timestamp/V2");
+    assert_eq!(s.column.len(), 1);
+    assert_eq!(s.column[0].name, "timestamp");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Int64 as u64);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_string_to_schema_named_timestamp() {
+    let s = must_parse("defaultTimestampSchema=timestamp/V2");
+    assert_eq!(s.name, "defaultTimestampSchema");
+    assert_eq!(s.column.len(), 1);
+    assert_eq!(s.column[0].name, "timestamp");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_string_to_schema_named_timestamp_explicit_direction() {
+    let s = must_parse("defaultTimestampSchema=timestamp:-/V2");
+    assert_eq!(s.name, "defaultTimestampSchema");
+    assert_eq!(s.column[0].name, "timestamp");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_string_to_schema_named_timestamp_explicit_type() {
+    let s = must_parse("defaultTimestampSchema=timestamp:dense-int64:-/V2");
+    assert_eq!(s.name, "defaultTimestampSchema");
+    assert_eq!(s.column[0].name, "timestamp");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Int64 as u64);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_string_to_schema_multi_column() {
+    let s = must_parse("testSchema=columnA__s|columnB__i:-|timestamp/V2");
+    assert_eq!(s.name, "testSchema");
+    assert_eq!(s.sort_version, 2);
+    assert_eq!(s.column.len(), 3);
+
+    // columnA__s: string, ascending (default)
+    assert_eq!(s.column[0].name, "columnA");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::String as u64);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+
+    // columnB__i: int64, descending (explicit)
+    assert_eq!(s.column[1].name, "columnB");
+    assert_eq!(s.column[1].column_type, ColumnTypeId::Int64 as u64);
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+
+    // timestamp: int64, descending (default)
+    assert_eq!(s.column[2].name, "timestamp");
+    assert_eq!(s.column[2].column_type, ColumnTypeId::Int64 as u64);
+    assert_eq!(
+        s.column[2].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_string_to_schema_multi_column_explicit_type() {
+    let s = must_parse("testSchema=columnA__s:dense-string:+|columnB__i:-|timestamp/V2");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::String as u64);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+// ---------------------------------------------------------------------------
+// SchemaToString and SchemaToStringShort
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_schema_to_string_full() {
+    let s = must_parse("testSchema=columnA__s|columnB__i:-|timestamp/V2");
+    let full = schema_to_string(&s);
+    // columnA has type String == default for a generic name, so no suffix in output.
+    // columnB has type Int64 != default String, so __i suffix is preserved.
+    assert_eq!(
+        full,
+        "testSchema=columnA:dense-string:+|columnB__i:dense-int64:-|timestamp:dense-int64:-/V2"
+    );
+}
+
+#[test]
+fn test_schema_to_string_short() {
+    let s = must_parse("testSchema=columnA__s|columnB__i:-|timestamp/V2");
+    let short = schema_to_string_short(&s);
+    // columnA has type String == default, so no suffix. columnB keeps __i (Int64 != default).
+    assert_eq!(short, "testSchema=columnA|columnB__i:-|timestamp/V2");
+}
+
+// ---------------------------------------------------------------------------
+// Round-trip tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_round_trip_short_form() {
+    // Inputs that round-trip to themselves: bare names (no suffix) and non-default
+    // typed columns already serialize without a suffix change.
+    let exact_round_trip_cases = [
+        "timestamp/V2",
+        // `service` with no suffix: default String, serializes as `service` (no suffix).
+        "service|timestamp/V2",
+        "service|env|timestamp/V2",
+        // columnB__i: Int64 != default String, keeps __i suffix.
+        "testSchema=columnA|columnB__i:-|timestamp/V2",
+    ];
+    for input in exact_round_trip_cases {
+        let parsed = must_parse(input);
+        let short = schema_to_string_short(&parsed);
+        assert_eq!(short, input, "short round-trip failed for '{}'", input);
+
+        // Also verify full round-trip: parse(to_string(schema)) == schema.
+        let full = schema_to_string(&parsed);
+        let reparsed = must_parse(&full);
+        assert_eq!(
+            parsed.column.len(),
+            reparsed.column.len(),
+            "column count mismatch after full round-trip for '{}'",
+            input
+        );
+        for (a, b) in parsed.column.iter().zip(reparsed.column.iter()) {
+            assert_eq!(a.name, b.name);
+            assert_eq!(a.column_type, b.column_type);
+            assert_eq!(a.sort_direction, b.sort_direction);
+        }
+        assert_eq!(parsed.sort_version, reparsed.sort_version);
+        assert_eq!(parsed.lsm_comparison_cutoff, reparsed.lsm_comparison_cutoff);
+    }
+
+    // Inputs with explicit __s suffix on default-String columns: the short form drops
+    // the suffix (it is not needed), but the semantic content is preserved.
+    let semantic_round_trip_cases = [
+        ("service__s|timestamp/V2", "service|timestamp/V2"),
+        ("service__s|env__s|timestamp/V2", "service|env|timestamp/V2"),
+        (
+            "testSchema=columnA__s|columnB__i:-|timestamp/V2",
+            "testSchema=columnA|columnB__i:-|timestamp/V2",
+        ),
+    ];
+    for (input, expected_short) in semantic_round_trip_cases {
+        let parsed = must_parse(input);
+        let short = schema_to_string_short(&parsed);
+        assert_eq!(short, expected_short, "short form mismatch for '{}'", input);
+
+        // Verify semantic round-trip: parse the short output and compare protos.
+        let reparsed = must_parse(&short);
+        assert_eq!(
+            parsed.column.len(),
+            reparsed.column.len(),
+            "column count mismatch after semantic round-trip for '{}'",
+            input
+        );
+        for (a, b) in parsed.column.iter().zip(reparsed.column.iter()) {
+            assert_eq!(a.name, b.name);
+            assert_eq!(a.column_type, b.column_type);
+            assert_eq!(a.sort_direction, b.sort_direction);
+        }
+        assert_eq!(parsed.sort_version, reparsed.sort_version);
+        assert_eq!(parsed.lsm_comparison_cutoff, reparsed.lsm_comparison_cutoff);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestEquivalentSchemas
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_equivalent_schemas_identical() {
+    let a = must_parse("timestamp/V2");
+    let b = must_parse("timestamp/V2");
+    assert!(equivalent_schemas(&a, &b));
+    assert!(equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_different_column_counts() {
+    let a = must_parse("service__s|timestamp/V2");
+    let b = must_parse("timestamp/V2");
+    assert!(!equivalent_schemas(&a, &b));
+    assert!(!equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_same_columns_different_names() {
+    let a = must_parse("service__s|timestamp/V2");
+    let b = must_parse("serviceSchema=service__s|timestamp/V2");
+    assert!(equivalent_schemas(&a, &b));
+    assert!(equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_different_column_order() {
+    let a = must_parse("env__s|service__s|timestamp/V2");
+    let b = must_parse("serviceSchema=service__s|timestamp/V2");
+    assert!(!equivalent_schemas(&a, &b));
+    assert!(!equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_different_versions() {
+    let a = must_parse("service__s|timestamp/V2");
+    let b = must_parse("serviceSchema=service__s|timestamp/V3");
+    assert!(!equivalent_schemas(&a, &b));
+    assert!(!equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_different_lsm_cutoffs() {
+    // Different LSM cutoffs: affects EquivalentSchemas but NOT EquivalentSchemasForCompaction.
+    let a = must_parse("service__s|&env__s|timestamp/V2");
+    let b = must_parse("service__s|env__s|timestamp/V2");
+    assert!(
+        !equivalent_schemas(&a, &b),
+        "different LSM cutoffs should not be equivalent"
+    );
+    assert!(
+        equivalent_schemas_for_compaction(&a, &b),
+        "different LSM cutoffs should be equivalent for compaction"
+    );
+}
+
+#[test]
+fn test_equivalent_schemas_different_cutoff_positions() {
+    let a = must_parse("service__s|&env__s|timestamp/V2");
+    let b = must_parse("service__s|env__s|&timestamp/V2");
+    assert!(!equivalent_schemas(&a, &b));
+    assert!(equivalent_schemas_for_compaction(&a, &b));
+}
+
+#[test]
+fn test_equivalent_schemas_identical_lsm_cutoffs() {
+    let a = must_parse("service__s|&env__s|timestamp/V2");
+    let b = must_parse("service__s|&env__s|timestamp/V2");
+    assert!(equivalent_schemas(&a, &b));
+    assert!(equivalent_schemas_for_compaction(&a, &b));
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestStringToSchemaWithLSMCutoff
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_lsm_cutoff_after_first_column() {
+    let s = must_parse("service__s|&env__s|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+    assert_eq!(s.column[1].name, "env"); // "&" stripped
+}
+
+#[test]
+fn test_lsm_cutoff_after_second_column() {
+    let s = must_parse("service__s|env__s|&source__s|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 2);
+    assert_eq!(s.column[2].name, "source");
+}
+
+#[test]
+fn test_lsm_cutoff_before_timestamp() {
+    let s = must_parse("service__s|env__s|source__s|&timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 3);
+    assert_eq!(s.column[3].name, "timestamp");
+}
+
+#[test]
+fn test_lsm_cutoff_named_schema() {
+    let s = must_parse("testSchema=service__s|&env__s|timestamp/V2");
+    assert_eq!(s.name, "testSchema");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+}
+
+#[test]
+fn test_lsm_cutoff_with_version() {
+    let s = must_parse("service__s|&env__s|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+    assert_eq!(s.sort_version, 2);
+}
+
+#[test]
+fn test_lsm_cutoff_with_explicit_type_direction() {
+    let s = must_parse("service__s:dense-string:+|&env__s:dense-string:+|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+#[test]
+fn test_no_lsm_cutoff() {
+    let s = must_parse("service__s|env__s|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 0);
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestSchemaToStringWithLSMCutoff
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_schema_to_string_preserves_cutoff_marker_short() {
+    let s = must_parse("service__s|&env__s|timestamp/V2");
+    // String columns with default type serialize without the __s suffix.
+    assert_eq!(schema_to_string_short(&s), "service|&env|timestamp/V2");
+}
+
+#[test]
+fn test_schema_to_string_preserves_cutoff_marker_full() {
+    let s = must_parse("service__s|&env__s|timestamp/V2");
+    // String columns with default type serialize without the __s suffix in full form too.
+    let expected = "service:dense-string:+|&env:dense-string:+|timestamp:dense-int64:-/V2";
+    assert_eq!(schema_to_string(&s), expected);
+}
+
+#[test]
+fn test_schema_to_string_named_with_cutoff() {
+    let s = must_parse("testSchema=service__s|&env__s|timestamp/V2");
+    // String columns with default type serialize without the __s suffix.
+    assert_eq!(
+        schema_to_string_short(&s),
+        "testSchema=service|&env|timestamp/V2"
+    );
+}
+
+#[test]
+fn test_schema_to_string_no_cutoff() {
+    let s = must_parse("service__s|env__s|timestamp/V2");
+    // String columns with default type serialize without the __s suffix.
+    assert_eq!(schema_to_string_short(&s), "service|env|timestamp/V2");
+}
+
+// ---------------------------------------------------------------------------
+// Port of Go TestLSMCutoffRoundTrip
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_lsm_cutoff_round_trip() {
+    // Pairs of (input, expected_short_output).
+    // String columns with default type serialize without the __s suffix, so inputs
+    // using __s produce shorter output that is semantically equivalent.
+    let test_cases = [
+        (
+            "service__s|&env__s|timestamp/V2",
+            "service|&env|timestamp/V2",
+        ),
+        (
+            "service__s|env__s|&source__s|timestamp/V2",
+            "service|env|&source|timestamp/V2",
+        ),
+        (
+            "service__s|env__s|source__s|&timestamp/V2",
+            "service|env|source|&timestamp/V2",
+        ),
+        (
+            "testSchema=service__s|&env__s|timestamp/V2",
+            "testSchema=service|&env|timestamp/V2",
+        ),
+        ("service__s|env__s|timestamp/V2", "service|env|timestamp/V2"),
+    ];
+    for (input, expected_short) in test_cases {
+        let parsed = must_parse(input);
+        let result = schema_to_string_short(&parsed);
+        assert_eq!(result, expected_short, "round-trip failed for '{}'", input);
+
+        // Verify the output parses back to the same proto (semantic round-trip).
+        let reparsed = must_parse(&result);
+        assert_eq!(
+            parsed.column.len(),
+            reparsed.column.len(),
+            "column count mismatch after round-trip for '{}'",
+            input
+        );
+        for (a, b) in parsed.column.iter().zip(reparsed.column.iter()) {
+            assert_eq!(a.name, b.name);
+            assert_eq!(a.column_type, b.column_type);
+            assert_eq!(a.sort_direction, b.sort_direction);
+        }
+        assert_eq!(
+            parsed.lsm_comparison_cutoff, reparsed.lsm_comparison_cutoff,
+            "LSM cutoff mismatch for '{}'",
+            input
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Direction prefix/suffix tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_direction_prefix_ascending() {
+    let s = must_parse("+service__s|timestamp/V2");
+    assert_eq!(s.column[0].name, "service");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+#[test]
+fn test_direction_prefix_descending() {
+    let s = must_parse("-service__s|timestamp/V2");
+    assert_eq!(s.column[0].name, "service");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_direction_suffix_ascending() {
+    let s = must_parse("service__s+|timestamp/V2");
+    assert_eq!(s.column[0].name, "service");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+#[test]
+fn test_direction_suffix_descending() {
+    let s = must_parse("service__s-|timestamp/V2");
+    assert_eq!(s.column[0].name, "service");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_direction_prefix_descending_on_timestamp() {
+    // Explicit descending prefix on timestamp matches the default and is accepted.
+    let s = must_parse("service__s|-timestamp/V2");
+    assert_eq!(s.column[1].name, "timestamp");
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_direction_prefix_ascending_on_timestamp_rejected() {
+    // Ascending timestamp is rejected by validation (timestamp must be descending).
+    assert!(
+        parse_sort_fields("service__s|+timestamp/V2").is_err(),
+        "ascending timestamp must be rejected by validation"
+    );
+}
+
+#[test]
+fn test_direction_suffix_on_timestamp() {
+    let s = must_parse("service__s|timestamp-/V2");
+    assert_eq!(s.column[1].name, "timestamp");
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+#[test]
+fn test_direction_prefix_and_suffix_is_error() {
+    // +name+ : direction on both sides
+    assert!(
+        parse_sort_fields("+service__s+|timestamp/V2").is_err(),
+        "must reject direction on both prefix and suffix"
+    );
+    // -name- : same direction both sides, still error
+    assert!(
+        parse_sort_fields("-service__s-|timestamp/V2").is_err(),
+        "must reject direction on both prefix and suffix (same direction)"
+    );
+    // +name- : conflicting directions
+    assert!(
+        parse_sort_fields("+service__s-|timestamp/V2").is_err(),
+        "must reject conflicting direction prefix and suffix"
+    );
+}
+
+#[test]
+fn test_direction_prefix_with_colon_direction_is_error() {
+    // +name:+ : prefix direction + colon direction
+    assert!(
+        parse_sort_fields("+service__s:+|timestamp/V2").is_err(),
+        "must reject direction in both prefix and colon form"
+    );
+}
+
+#[test]
+fn test_direction_suffix_with_colon_direction_is_error() {
+    // name-:- : suffix direction + colon direction
+    assert!(
+        parse_sort_fields("service__s-:-|timestamp/V2").is_err(),
+        "must reject direction in both suffix and colon form"
+    );
+}
+
+#[test]
+fn test_direction_prefix_multi_column() {
+    let s = must_parse("-metric_name__s|+host__s|timestamp/V2");
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+    assert_eq!(
+        s.column[2].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32 // default for timestamp
+    );
+}
+
+#[test]
+fn test_direction_prefix_with_lsm_cutoff() {
+    // &+env__s : cutoff marker then direction prefix
+    let s = must_parse("service__s|&+env__s|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+    assert_eq!(s.column[1].name, "env");
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+#[test]
+fn test_direction_suffix_with_lsm_cutoff() {
+    let s = must_parse("service__s|&env__s-|timestamp/V2");
+    assert_eq!(s.lsm_comparison_cutoff, 1);
+    assert_eq!(s.column[1].name, "env");
+    assert_eq!(
+        s.column[1].sort_direction,
+        SortColumnDirection::SortDirectionDescending as i32
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Bare name parsing (no type suffix — uses defaults)
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_bare_names_default_to_string() {
+    let s = must_parse("service|env|host|timestamp/V2");
+    assert_eq!(s.column[0].name, "service");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::String as u64);
+    assert_eq!(s.column[1].name, "env");
+    assert_eq!(s.column[1].column_type, ColumnTypeId::String as u64);
+    assert_eq!(s.column[2].name, "host");
+    assert_eq!(s.column[2].column_type, ColumnTypeId::String as u64);
+}
+
+#[test]
+fn test_bare_timestamp_defaults_to_int64() {
+    let s = must_parse("service|timestamp/V2");
+    assert_eq!(s.column[1].name, "timestamp");
+    assert_eq!(s.column[1].column_type, ColumnTypeId::Int64 as u64);
+}
+
+#[test]
+fn test_bare_timeseries_id_defaults_to_int64() {
+    let s = must_parse("service|timeseries_id|timestamp/V2");
+    assert_eq!(s.column[1].name, "timeseries_id");
+    assert_eq!(s.column[1].column_type, ColumnTypeId::Int64 as u64);
+}
+
+#[test]
+fn test_bare_tiebreaker_defaults_to_int64() {
+    let s = must_parse("service|timestamp|tiebreaker/V2");
+    assert_eq!(s.column[2].name, "tiebreaker");
+    assert_eq!(s.column[2].column_type, ColumnTypeId::Int64 as u64);
+}
+
+#[test]
+fn test_bare_metric_value_defaults_to_float64() {
+    let s = must_parse("metric_value|timestamp/V2");
+    assert_eq!(s.column[0].name, "metric_value");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Float64 as u64);
+}
+
+#[test]
+fn test_bare_and_suffixed_produce_same_proto() {
+    let bare = must_parse("metric_name|host|timeseries_id|timestamp/V2");
+    let suffixed = must_parse("metric_name__s|host__s|timeseries_id__i|timestamp/V2");
+    assert_eq!(bare.column.len(), suffixed.column.len());
+    for (a, b) in bare.column.iter().zip(suffixed.column.iter()) {
+        assert_eq!(a.name, b.name, "names should match");
+        assert_eq!(
+            a.column_type, b.column_type,
+            "types should match for {}",
+            a.name
+        );
+        assert_eq!(
+            a.sort_direction, b.sort_direction,
+            "directions should match for {}",
+            a.name
+        );
+    }
+}
+
+#[test]
+fn test_suffix_overrides_default() {
+    // metric_value defaults to Float64, but __i suffix forces Int64
+    let s = must_parse("metric_value__i|timestamp/V2");
+    assert_eq!(s.column[0].name, "metric_value");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Int64 as u64);
+}
+
+#[test]
+fn test_display_omits_default_suffix() {
+    let s = must_parse("metric_name|host|timestamp/V2");
+    let short = schema_to_string_short(&s);
+    // All columns use default types, so no suffixes in output.
+    assert_eq!(short, "metric_name|host|timestamp/V2");
+}
+
+#[test]
+fn test_display_includes_non_default_suffix() {
+    // Force host to Int64 (non-default for bare "host" which defaults to String)
+    let s = must_parse("metric_name|host__i|timestamp/V2");
+    let short = schema_to_string_short(&s);
+    assert_eq!(short, "metric_name|host__i|timestamp/V2");
+}
+
+// ---------------------------------------------------------------------------
+// timeseries_id handling
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_timeseries_id_as_int64() {
+    let s = must_parse("metric_name__s|host__s|timeseries_id__i|timestamp/V2");
+    assert_eq!(s.column.len(), 4);
+
+    // timeseries_id__i should be TypeIDInt64, ascending.
+    let ts_id_col = &s.column[2];
+    assert_eq!(ts_id_col.name, "timeseries_id");
+    assert_eq!(ts_id_col.column_type, ColumnTypeId::Int64 as u64);
+    assert_eq!(
+        ts_id_col.sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Float and sketch column types
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_float_column() {
+    let s = must_parse("value__nf|timestamp/V2");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Float64 as u64);
+    assert_eq!(
+        s.column[0].sort_direction,
+        SortColumnDirection::SortDirectionAscending as i32
+    );
+}
+
+#[test]
+fn test_sketch_column() {
+    let s = must_parse("latency__sk|timestamp/V2");
+    assert_eq!(s.column[0].column_type, ColumnTypeId::Sketch as u64);
+}
+
+// ---------------------------------------------------------------------------
+// SchemasToString / SchemasToStringShort multi-schema (convenience)
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_schemas_to_string() {
+    let schema1 = must_parse("test=key1__s|timestamp/V2");
+    let schema2 = must_parse("key2__i|timestamp/V2");
+
+    let schemas = [schema1, schema2];
+    let strings: Vec<String> = schemas.iter().map(schema_to_string).collect();
+    let actual = strings.join(",");
+
+    // key1 has type String == default, so no suffix. key2 has type Int64 != default, keeps __i.
+    assert_eq!(
+        actual,
+        "test=key1:dense-string:+|timestamp:dense-int64:-/V2,key2__i:dense-int64:+|timestamp:\
+         dense-int64:-/V2"
+    );
+}
+
+#[test]
+fn test_schemas_to_string_short() {
+    let schema1 = must_parse("test=key1__s|timestamp/V2");
+    let schema2 = must_parse("key2__i|timestamp/V2");
+
+    let schemas = [schema1, schema2];
+    let strings: Vec<String> = schemas.iter().map(schema_to_string_short).collect();
+    let actual = strings.join(",");
+
+    // key1 has type String == default, so no suffix. key2 has type Int64 != default, keeps __i.
+    assert_eq!(actual, "test=key1|timestamp/V2,key2__i|timestamp/V2");
+}
+
+// ---------------------------------------------------------------------------
+// ColumnTypeId TryFrom<u64> (proto deserialization path)
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_column_type_try_from_u64_valid() {
+    assert_eq!(ColumnTypeId::try_from(2u64).unwrap(), ColumnTypeId::Int64);
+    assert_eq!(
+        ColumnTypeId::try_from(10u64).unwrap(),
+        ColumnTypeId::Float64
+    );
+    assert_eq!(ColumnTypeId::try_from(14u64).unwrap(), ColumnTypeId::String);
+    assert_eq!(ColumnTypeId::try_from(17u64).unwrap(), ColumnTypeId::Sketch);
+    assert_eq!(
+        ColumnTypeId::try_from(20u64).unwrap(),
+        ColumnTypeId::CpcSketch
+    );
+    assert_eq!(
+        ColumnTypeId::try_from(22u64).unwrap(),
+        ColumnTypeId::ItemSketch
+    );
+}
+
+#[test]
+fn test_column_type_try_from_u64_invalid() {
+    assert!(ColumnTypeId::try_from(0u64).is_err());
+    assert!(ColumnTypeId::try_from(1u64).is_err());
+    assert!(ColumnTypeId::try_from(99u64).is_err());
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/validation.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/validation.rs
new file mode 100644
index 00000000000..3a33db82f6f
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/validation.rs
@@ -0,0 +1,114 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Sort schema validation -- direct port of Go `ValidateSchema`.
+
+use std::collections::HashSet;
+
+use quickwit_proto::sortschema::{SortColumnDirection, SortSchema};
+
+use super::SortFieldsError;
+use super::column_type::{ColumnTypeId, TIEBREAKER, default_is_descending};
+
+/// Name used for the special skip-builder schema that does not require timestamp.
+const DEFAULT_SKIP_BUILDER_SCHEMA_NAME: &str = "defaultSkipBuilderSchema";
+
+/// Check if a bare column name is a timestamp column (defaults to descending).
+fn is_timestamp_column(name: &str) -> bool {
+    default_is_descending(name)
+}
+
+/// Validate a sort schema, enforcing all rules from Go `ValidateSchema`.
+///
+/// Rules:
+/// - Schema must have at least one column.
+/// - No duplicate column names.
+/// - Sort direction must not be Unknown.
+/// - `timestamp` must be present (unless schema name is `defaultSkipBuilderSchema`).
+/// - `timestamp` must be Int64 and descending (unless it's a msgid schema).
+/// - `timestamp` must come before `tiebreaker`.
+/// - No non-tiebreaker columns may appear after `timestamp`.
+pub fn validate_schema(schema: &SortSchema) -> Result<(), SortFieldsError> {
+    if schema.column.is_empty() {
+        return Err(SortFieldsError::ValidationError("empty schema".to_string()));
+    }
+
+    let mut seen: HashSet<&str> = HashSet::new();
+    let is_msgid = schema.version == 2 || schema.name == "defaultMsgIDsSchema";
+
+    for col in &schema.column {
+        let name = col.name.as_str();
+
+        if seen.contains(name) {
+            return Err(SortFieldsError::ValidationError(format!(
+                "column {} is duplicated in schema",
+                name
+            )));
+        }
+        seen.insert(name);
+
+        if col.sort_direction == SortColumnDirection::SortDirectionUnknown as i32 {
+            return Err(SortFieldsError::ValidationError(format!(
+                "column {} does not specify a sort direction in schema",
+                name
+            )));
+        }
+
+        let has_seen_timestamp = seen.iter().any(|s| is_timestamp_column(s));
+
+        if is_timestamp_column(name) {
+            if seen.contains(TIEBREAKER) {
+                return Err(SortFieldsError::ValidationError(format!(
+                    "{} column must come before {} in schema",
+                    name, TIEBREAKER
+                )));
+            }
+            if col.sort_direction != SortColumnDirection::SortDirectionDescending as i32
+                && !is_msgid
+            {
+                return Err(SortFieldsError::ValidationError(format!(
+                    "{} column must sorted in descending order in schema",
+                    name
+                )));
+            }
+            if col.column_type != ColumnTypeId::Int64 as u64 {
+                return Err(SortFieldsError::ValidationError(format!(
+                    "{} column must be of type int64 in schema",
+                    name
+                )));
+            }
+        } else if name == TIEBREAKER {
+            if !has_seen_timestamp {
+                return Err(SortFieldsError::ValidationError(format!(
+                    "timestamp column must come before {} in schema",
+                    TIEBREAKER
+                )));
+            }
+        } else if has_seen_timestamp && !is_msgid {
+            return Err(SortFieldsError::ValidationError(format!(
+                "column {} is after timestamp but timestamp must be the last schema column",
+                name
+            )));
+        }
+    }
+
+    let has_timestamp = schema.column.iter().any(|c| is_timestamp_column(&c.name));
+    if !has_timestamp && schema.name != DEFAULT_SKIP_BUILDER_SCHEMA_NAME {
+        return Err(SortFieldsError::ValidationError(
+            "timestamp column is required, but is missing from schema".to_string(),
+        ));
+    }
+
+    Ok(())
+}
diff --git a/quickwit/quickwit-parquet-engine/src/sort_fields/window.rs b/quickwit/quickwit-parquet-engine/src/sort_fields/window.rs
new file mode 100644
index 00000000000..b1ad896e31f
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/sort_fields/window.rs
@@ -0,0 +1,240 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Canonical time-window functions for metrics compaction.
+//!
+//! `window_start` is the foundational time-partitioning function used by
+//! ingestion (Phase 32), merge policy (Phase 33), and compaction planning
+//! (Phase 35). Correctness at boundary conditions -- especially negative
+//! timestamps and zero-crossing -- is critical because an off-by-one error
+//! silently misroutes data to wrong windows.
+//!
+//! The implementation uses `rem_euclid` instead of the `%` operator to handle
+//! negative timestamps correctly. Standard `%` truncates toward zero, which
+//! gives wrong results for negative inputs:
+//!   - `-1 % 900 = -1` (wrong: would compute window_start as 0)
+//!   - `(-1i64).rem_euclid(900) = 899` (correct: window_start = -1 - 899 = -900)
+
+use chrono::{DateTime, Utc};
+
+use super::SortFieldsError;
+
+/// Validate that a window duration evenly divides one hour (3600 seconds).
+///
+/// This is ADR-003 invariant TW-2: all windows within an hour must have
+/// identical boundaries regardless of when counting starts. 3600 has 45
+/// positive divisors (1, 2, 3, ..., 1800, 3600). Any of these are accepted.
+/// In practice, metrics systems use durations >= 60s: 60, 120, 180, 240,
+/// 300, 360, 600, 720, 900, 1200, 1800, 3600.
+///
+/// A duration of 0 is rejected as nonsensical. Durations that do not evenly
+/// divide 3600 are rejected because they would produce inconsistent window
+/// boundaries across different starting points within an hour.
+pub fn validate_window_duration(duration_secs: u32) -> Result<(), SortFieldsError> {
+    if duration_secs == 0 {
+        return Err(SortFieldsError::InvalidWindowDuration {
+            duration_secs,
+            reason: "must be positive",
+        });
+    }
+    if 3600 % duration_secs != 0 {
+        return Err(SortFieldsError::InvalidWindowDuration {
+            duration_secs,
+            reason: "must evenly divide 3600 (one hour)",
+        });
+    }
+    Ok(())
+}
+
+/// Compute the start of the time window containing the given timestamp.
+///
+/// Uses `rem_euclid` for correct handling of negative timestamps (before Unix
+/// epoch). Standard `%` truncates toward zero: `-1 % 900 = -1` (wrong).
+/// `rem_euclid` always returns non-negative: `(-1i64).rem_euclid(900) = 899`.
+/// So `window_start(-1, 900) = -1 - 899 = -900` (correct: timestamp -1 is in
+/// window [-900, 0)).
+///
+/// # Invariants (verified by proptest)
+/// - Window start is aligned: `window_start % duration == 0`
+/// - Timestamp is contained: `window_start <= timestamp < window_start + duration`
+/// - Deterministic: same inputs always produce same output
+///
+/// # Errors
+/// Returns `SortFieldsError::WindowStartOutOfRange` if the computed start
+/// timestamp cannot be represented as a `DateTime<Utc>`.
+pub fn window_start(
+    timestamp_secs: i64,
+    duration_secs: i64,
+) -> Result<DateTime<Utc>, SortFieldsError> {
+    debug_assert!(duration_secs > 0, "window duration must be positive");
+    // TW-2 (ADR-003): window duration must evenly divide one hour.
+    // This ensures window boundaries align across hours and days.
+    debug_assert!(
+        3600 % duration_secs == 0,
+        "TW-2 violated: duration_secs={} does not divide 3600",
+        duration_secs
+    );
+    let remainder = timestamp_secs.rem_euclid(duration_secs);
+    let start_secs = timestamp_secs - remainder;
+    DateTime::from_timestamp(start_secs, 0).ok_or(SortFieldsError::WindowStartOutOfRange {
+        timestamp_secs: start_secs,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use proptest::prelude::*;
+
+    use super::*;
+
+    // -----------------------------------------------------------------------
+    // Proptest properties
+    // -----------------------------------------------------------------------
+
+    proptest! {
+        #[test]
+        fn window_start_is_aligned(
+            ts in -1_000_000_000i64..2_000_000_000i64,
+            dur in prop::sample::select(vec![60i64, 120, 180, 240, 300, 360,
+                                             600, 720, 900, 1200, 1800, 3600])
+        ) {
+            let ws = window_start(ts, dur).unwrap();
+            let ws_secs = ws.timestamp();
+            // window_start is aligned to duration
+            prop_assert_eq!(ws_secs.rem_euclid(dur), 0);
+            // timestamp is within [window_start, window_start + duration)
+            prop_assert!(ws_secs <= ts);
+            prop_assert!(ts < ws_secs + dur);
+        }
+
+        #[test]
+        fn window_start_is_deterministic(
+            ts in -1_000_000_000i64..2_000_000_000i64,
+            dur in prop::sample::select(vec![60i64, 300, 900, 3600])
+        ) {
+            let ws1 = window_start(ts, dur).unwrap();
+            let ws2 = window_start(ts, dur).unwrap();
+            prop_assert_eq!(ws1, ws2);
+        }
+
+        #[test]
+        fn adjacent_windows_do_not_overlap(
+            ts in 0i64..1_000_000_000i64,
+            dur in prop::sample::select(vec![60i64, 300, 900, 3600])
+        ) {
+            let ws = window_start(ts, dur).unwrap();
+            let next_ws = window_start(ws.timestamp() + dur, dur).unwrap();
+            // Next window starts exactly at current window end
+            prop_assert_eq!(next_ws.timestamp(), ws.timestamp() + dur);
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Unit tests: edge cases
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_negative_timestamp_crossing() {
+        let ws = window_start(-1, 900).unwrap();
+        assert_eq!(ws.timestamp(), -900);
+    }
+
+    #[test]
+    fn test_zero_timestamp() {
+        let ws = window_start(0, 900).unwrap();
+        assert_eq!(ws.timestamp(), 0);
+    }
+
+    #[test]
+    fn test_exactly_on_boundary() {
+        let ws = window_start(900, 900).unwrap();
+        assert_eq!(ws.timestamp(), 900);
+    }
+
+    #[test]
+    fn test_one_before_boundary() {
+        let ws = window_start(899, 900).unwrap();
+        assert_eq!(ws.timestamp(), 0);
+    }
+
+    #[test]
+    fn test_large_negative_timestamp() {
+        let ws = window_start(-3601, 3600).unwrap();
+        assert_eq!(ws.timestamp(), -7200);
+    }
+
+    #[test]
+    fn test_60s_window() {
+        let ws = window_start(1_700_000_042, 60).unwrap();
+        assert_eq!(ws.timestamp(), 1_700_000_040);
+    }
+
+    // -----------------------------------------------------------------------
+    // Validation tests
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_valid_window_durations() {
+        let valid = [60, 120, 180, 240, 300, 360, 600, 720, 900, 1200, 1800, 3600];
+        for dur in valid {
+            assert!(
+                validate_window_duration(dur).is_ok(),
+                "duration {} should be valid",
+                dur
+            );
+        }
+    }
+
+    #[test]
+    fn test_invalid_window_durations() {
+        // None of these evenly divide 3600.
+        let invalid = [0, 7, 11, 13, 17, 700, 1000, 1500, 2000, 2400, 7200];
+        for dur in invalid {
+            assert!(
+                validate_window_duration(dur).is_err(),
+                "duration {} should be invalid",
+                dur
+            );
+        }
+    }
+
+    #[test]
+    fn test_small_valid_divisors_also_accepted() {
+        // The function accepts all positive divisors of 3600, not just >= 60.
+        let small_valid = [
+            1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 30, 36, 40, 45, 48, 50,
+        ];
+        for dur in small_valid {
+            assert!(
+                validate_window_duration(dur).is_ok(),
+                "duration {} should be valid (divides 3600)",
+                dur
+            );
+        }
+    }
+
+    #[test]
+    fn test_zero_duration_error_message() {
+        let err = validate_window_duration(0).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("must be positive"), "got: {msg}");
+    }
+
+    #[test]
+    fn test_non_divisor_error_message() {
+        let err = validate_window_duration(7).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("must evenly divide 3600"), "got: {msg}");
+    }
+}
diff --git a/quickwit/quickwit-parquet-engine/src/storage/config.rs b/quickwit/quickwit-parquet-engine/src/storage/config.rs
index f87845d2ba9..2eb63d73510 100644
--- a/quickwit/quickwit-parquet-engine/src/storage/config.rs
+++ b/quickwit/quickwit-parquet-engine/src/storage/config.rs
@@ -14,12 +14,13 @@
 
 //! Parquet writer configuration for metrics storage.
 
+use arrow::datatypes::{DataType, Schema as ArrowSchema};
 use parquet::basic::Compression as ParquetCompression;
 use parquet::file::metadata::SortingColumn;
 use parquet::file::properties::{EnabledStatistics, WriterProperties, WriterPropertiesBuilder};
 use parquet::schema::types::ColumnPath;
 
-use crate::schema::ParquetField;
+use crate::schema::SORT_ORDER;
 
 /// Default row group size: 128K rows for efficient columnar scans.
 const DEFAULT_ROW_GROUP_SIZE: usize = 128 * 1024;
@@ -117,8 +118,9 @@ impl ParquetWriterConfig {
         self
     }
 
-    /// Convert to Parquet WriterProperties.
-    pub fn to_writer_properties(&self) -> WriterProperties {
+    /// Convert to Parquet WriterProperties using the given Arrow schema to configure
+    /// per-column settings like dictionary encoding and bloom filters.
+    pub fn to_writer_properties(&self, schema: &ArrowSchema) -> WriterProperties {
         let mut builder = WriterProperties::builder()
             .set_max_row_group_size(self.row_group_size)
             .set_data_page_size_limit(self.data_page_size)
@@ -126,7 +128,7 @@ impl ParquetWriterConfig {
             // Enable column index for efficient pruning on sorted data (64 bytes default)
             .set_column_index_truncate_length(Some(64))
             // Set sorting columns metadata for readers to use during pruning
-            .set_sorting_columns(Some(Self::sorting_columns()))
+            .set_sorting_columns(Some(Self::sorting_columns(schema)))
             // Enable row group level statistics (min/max/null_count) for query pruning
             // This allows DataFusion to skip row groups based on timestamp ranges
             .set_statistics_enabled(EnabledStatistics::Chunk);
@@ -142,77 +144,57 @@ impl ParquetWriterConfig {
             Compression::Uncompressed => builder.set_compression(ParquetCompression::UNCOMPRESSED),
         };
 
-        // Apply RLE_DICTIONARY encoding and bloom filters for dictionary columns
-        builder = Self::configure_dictionary_columns(builder);
+        // Apply dictionary encoding and bloom filters based on schema column types
+        builder = Self::configure_columns(builder, schema);
 
         builder.build()
     }
 
-    /// Configure dictionary encoding and bloom filters for high-cardinality columns.
+    /// Configure dictionary encoding and bloom filters based on the Arrow schema.
     ///
-    /// Dictionary-encoded columns benefit from:
-    /// - Dictionary encoding: Enabled by default, uses RLE for dictionary indices
-    /// - Bloom filters: Enable efficient equality filtering without scanning
-    ///
-    /// Note: Dictionary encoding is ON by default in Parquet. When enabled, the dictionary
-    /// indices are automatically encoded using RLE (run-length encoding), which efficiently
-    /// compresses runs of repeated values. This is ideal for sorted data where consecutive
-    /// rows often share the same dictionary index.
-    fn configure_dictionary_columns(
+    /// - Dictionary encoding is enabled on all Dictionary(Int32, Utf8) columns.
+    /// - Bloom filters are enabled on metric_name and sort order tag columns.
+    fn configure_columns(
         mut builder: WriterPropertiesBuilder,
+        schema: &ArrowSchema,
     ) -> WriterPropertiesBuilder {
-        // Dictionary-encoded columns - ensure dictionary encoding is explicitly enabled
-        // (default is true, but being explicit documents intent)
-        let dictionary_columns = [
-            ParquetField::MetricName,
-            ParquetField::TagService,
-            ParquetField::TagEnv,
-            ParquetField::TagDatacenter,
-            ParquetField::TagRegion,
-            ParquetField::TagHost,
-            ParquetField::ServiceName,
-        ];
-
-        // Columns that benefit from bloom filters (used in WHERE clauses)
-        // Note: We enable bloom filters on filtering columns, not timestamp_secs or value
-        let bloom_filter_columns = [
-            (ParquetField::MetricName, BLOOM_FILTER_NDV_METRIC_NAME),
-            (ParquetField::TagService, BLOOM_FILTER_NDV_TAGS),
-            (ParquetField::TagEnv, BLOOM_FILTER_NDV_TAGS),
-            (ParquetField::TagDatacenter, BLOOM_FILTER_NDV_TAGS),
-            (ParquetField::TagHost, BLOOM_FILTER_NDV_TAGS),
-            (ParquetField::ServiceName, BLOOM_FILTER_NDV_TAGS),
-        ];
-
-        // Ensure dictionary encoding is enabled on dictionary columns
-        // (dictionary encoding uses RLE for indices automatically)
-        for field in dictionary_columns {
+        for field in schema.fields() {
             let col_path = ColumnPath::new(vec![field.name().to_string()]);
-            builder = builder.set_column_dictionary_enabled(col_path, true);
-        }
 
-        // Enable bloom filters on filtering columns
-        for (field, ndv) in bloom_filter_columns {
-            let col_path = ColumnPath::new(vec![field.name().to_string()]);
-            builder = builder
-                .set_column_bloom_filter_enabled(col_path.clone(), true)
-                .set_column_bloom_filter_fpp(col_path.clone(), BLOOM_FILTER_FPP)
-                .set_column_bloom_filter_ndv(col_path, ndv);
-        }
+            // Enable dictionary encoding on all Dictionary(_, _) columns
+            if matches!(field.data_type(), DataType::Dictionary(_, _)) {
+                builder = builder.set_column_dictionary_enabled(col_path.clone(), true);
+            }
 
+            // Enable bloom filters on dictionary-typed metric_name and sort order tag columns.
+            // Exclude non-dictionary columns, like timestamp_secs.
+            let is_bloom_column = matches!(field.data_type(), DataType::Dictionary(_, _))
+                && (field.name() == "metric_name" || SORT_ORDER.contains(&field.name().as_str()));
+            if is_bloom_column {
+                let ndv = if field.name() == "metric_name" {
+                    BLOOM_FILTER_NDV_METRIC_NAME
+                } else {
+                    BLOOM_FILTER_NDV_TAGS
+                };
+                builder = builder
+                    .set_column_bloom_filter_enabled(col_path.clone(), true)
+                    .set_column_bloom_filter_fpp(col_path.clone(), BLOOM_FILTER_FPP)
+                    .set_column_bloom_filter_ndv(col_path, ndv);
+            }
+        }
         builder
     }
 
-    /// Get the sorting columns for parquet metadata.
-    /// Order: metric_name, tag_service, tag_env, tag_datacenter, tag_region, tag_host,
-    /// timestamp_secs.
-    fn sorting_columns() -> Vec<SortingColumn> {
-        ParquetField::sort_order()
+    /// Get the sorting columns for parquet metadata, computed from the schema
+    /// and SORT_ORDER. Only columns present in the schema are included.
+    fn sorting_columns(schema: &ArrowSchema) -> Vec<SortingColumn> {
+        SORT_ORDER
             .iter()
-            .map(|field| SortingColumn {
-                column_idx: field.column_index() as i32,
+            .filter_map(|name| schema.index_of(name).ok())
+            .map(|idx| SortingColumn {
+                column_idx: idx as i32,
                 descending: false,
-                nulls_first: true,
+                nulls_first: false,
             })
             .collect()
     }
@@ -220,8 +202,39 @@ impl ParquetWriterConfig {
 
 #[cfg(test)]
 mod tests {
+    use arrow::datatypes::Field;
+
     use super::*;
 
+    /// Create a test schema with required fields + some tag columns.
+    fn create_test_schema() -> ArrowSchema {
+        ArrowSchema::new(vec![
+            Field::new(
+                "metric_name",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
+            Field::new("metric_type", DataType::UInt8, false),
+            Field::new("timestamp_secs", DataType::UInt64, false),
+            Field::new("value", DataType::Float64, false),
+            Field::new(
+                "service",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ),
+            Field::new(
+                "env",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ),
+            Field::new(
+                "host",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ),
+        ])
+    }
+
     #[test]
     fn test_default_config() {
         let config = ParquetWriterConfig::default();
@@ -245,93 +258,89 @@ mod tests {
     #[test]
     fn test_to_writer_properties_zstd() {
         let config = ParquetWriterConfig::default();
-        let props = config.to_writer_properties();
-        // WriterProperties doesn't expose compression directly, but we can verify it builds
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
         assert!(props.max_row_group_size() == 128 * 1024);
     }
 
     #[test]
     fn test_to_writer_properties_snappy() {
         let config = ParquetWriterConfig::new().with_compression(Compression::Snappy);
-        let props = config.to_writer_properties();
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
         assert!(props.max_row_group_size() == 128 * 1024);
     }
 
     #[test]
     fn test_to_writer_properties_uncompressed() {
         let config = ParquetWriterConfig::new().with_compression(Compression::Uncompressed);
-        let props = config.to_writer_properties();
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
         assert!(props.max_row_group_size() == 128 * 1024);
     }
 
     #[test]
     fn test_bloom_filter_configuration() {
         let config = ParquetWriterConfig::default();
-        let props = config.to_writer_properties();
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
 
         // Verify bloom filter is enabled on metric_name column
         let metric_name_path = ColumnPath::new(vec!["metric_name".to_string()]);
         let bloom_props = props.bloom_filter_properties(&metric_name_path);
         assert!(
             bloom_props.is_some(),
-            "Bloom filter should be enabled for metric_name"
+            "bloom filter should be enabled for metric_name"
         );
 
         let bloom_props = bloom_props.unwrap();
         assert!(
             (bloom_props.fpp - BLOOM_FILTER_FPP).abs() < 0.001,
-            "Bloom filter FPP should be {}",
+            "bloom filter FPP should be {}",
             BLOOM_FILTER_FPP
         );
         assert_eq!(
             bloom_props.ndv, BLOOM_FILTER_NDV_METRIC_NAME,
-            "Bloom filter NDV for metric_name should be {}",
+            "bloom filter NDV for metric_name should be {}",
             BLOOM_FILTER_NDV_METRIC_NAME
         );
 
-        // Verify bloom filter is enabled on tag columns
-        let tag_service_path = ColumnPath::new(vec!["tag_service".to_string()]);
-        let bloom_props = props.bloom_filter_properties(&tag_service_path);
+        // Verify bloom filter is enabled on service tag column (in SORT_ORDER)
+        let service_path = ColumnPath::new(vec!["service".to_string()]);
+        let bloom_props = props.bloom_filter_properties(&service_path);
         assert!(
             bloom_props.is_some(),
-            "Bloom filter should be enabled for tag_service"
+            "bloom filter should be enabled for service"
         );
 
         let bloom_props = bloom_props.unwrap();
         assert_eq!(
             bloom_props.ndv, BLOOM_FILTER_NDV_TAGS,
-            "Bloom filter NDV for tag columns should be {}",
+            "bloom filter NDV for tag columns should be {}",
             BLOOM_FILTER_NDV_TAGS
         );
 
-        // Verify bloom filter is NOT enabled on timestamp_secs (not a filtering column)
-        let timestamp_path = ColumnPath::new(vec!["timestamp_secs".to_string()]);
-        let bloom_props = props.bloom_filter_properties(&timestamp_path);
-        assert!(
-            bloom_props.is_none(),
-            "Bloom filter should NOT be enabled for timestamp_secs"
-        );
-
         // Verify bloom filter is NOT enabled on value column
         let value_path = ColumnPath::new(vec!["value".to_string()]);
         let bloom_props = props.bloom_filter_properties(&value_path);
         assert!(
             bloom_props.is_none(),
-            "Bloom filter should NOT be enabled for value"
+            "bloom filter should NOT be enabled for value"
         );
     }
 
     #[test]
     fn test_statistics_enabled() {
         let config = ParquetWriterConfig::default();
-        let props = config.to_writer_properties();
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
 
         // Verify statistics are enabled at Chunk (row group) level
         let metric_name_path = ColumnPath::new(vec!["metric_name".to_string()]);
         assert_eq!(
             props.statistics_enabled(&metric_name_path),
             EnabledStatistics::Chunk,
-            "Statistics should be enabled at Chunk level"
+            "statistics should be enabled at Chunk level"
         );
 
         // Verify for timestamp column as well (important for time range pruning)
@@ -339,31 +348,24 @@ mod tests {
         assert_eq!(
             props.statistics_enabled(&timestamp_path),
             EnabledStatistics::Chunk,
-            "Statistics should be enabled at Chunk level for timestamp"
+            "statistics should be enabled at Chunk level for timestamp"
         );
     }
 
     #[test]
     fn test_dictionary_encoding_enabled() {
         let config = ParquetWriterConfig::default();
-        let props = config.to_writer_properties();
-
-        // Verify dictionary encoding is enabled for dictionary columns
-        let dictionary_columns = [
-            "metric_name",
-            "tag_service",
-            "tag_env",
-            "tag_datacenter",
-            "tag_region",
-            "tag_host",
-            "service_name",
-        ];
+        let schema = create_test_schema();
+        let props = config.to_writer_properties(&schema);
+
+        // Verify dictionary encoding is enabled for dictionary-typed columns
+        let dictionary_columns = ["metric_name", "service", "env", "host"];
 
         for col_name in dictionary_columns {
             let col_path = ColumnPath::new(vec![col_name.to_string()]);
             assert!(
                 props.dictionary_enabled(&col_path),
-                "Dictionary encoding should be enabled for {}",
+                "dictionary encoding should be enabled for {}",
                 col_name
             );
         }
@@ -371,37 +373,31 @@ mod tests {
 
     #[test]
     fn test_sorting_columns_order() {
-        let sorting_cols = ParquetWriterConfig::sorting_columns();
+        let schema = create_test_schema();
+        let sorting_cols = ParquetWriterConfig::sorting_columns(&schema);
 
-        // Verify we have the expected number of sorting columns
+        // The test schema has metric_name (idx 0), timestamp_secs (idx 2),
+        // service (idx 4), env (idx 5), host (idx 6).
+        // SORT_ORDER is: metric_name, service, env, datacenter, region, host, timestamp_secs
+        // Only present columns are included, so: metric_name, service, env, host, timestamp_secs
         assert_eq!(
             sorting_cols.len(),
-            7,
-            "Should have 7 sorting columns: metric_name, 5 tags, timestamp"
+            5,
+            "should have 5 sorting columns from the test schema"
         );
 
-        // Verify sort order matches expected: metric_name, tag_service, tag_env,
-        // tag_datacenter, tag_region, tag_host, timestamp_secs
-        let expected_order = [
-            ParquetField::MetricName,
-            ParquetField::TagService,
-            ParquetField::TagEnv,
-            ParquetField::TagDatacenter,
-            ParquetField::TagRegion,
-            ParquetField::TagHost,
-            ParquetField::TimestampSecs,
-        ];
-
-        for (i, expected_field) in expected_order.iter().enumerate() {
-            assert_eq!(
-                sorting_cols[i].column_idx,
-                expected_field.column_index() as i32,
-                "Sorting column {} should be {}",
-                i,
-                expected_field.name()
-            );
-            assert!(!sorting_cols[i].descending, "Sorting should be ascending");
-            assert!(sorting_cols[i].nulls_first, "Nulls should be first");
+        // Verify all are ascending with nulls first
+        for col in &sorting_cols {
+            assert!(!col.descending, "sorting should be ascending");
+            assert!(!col.nulls_first, "nulls should be last");
         }
+
+        // Verify order matches SORT_ORDER filtered by schema presence:
+        // metric_name (idx 0), service (idx 4), env (idx 5), host (idx 6), timestamp_secs (idx 2)
+        assert_eq!(sorting_cols[0].column_idx, 0); // metric_name
+        assert_eq!(sorting_cols[1].column_idx, 4); // service
+        assert_eq!(sorting_cols[2].column_idx, 5); // env
+        assert_eq!(sorting_cols[3].column_idx, 6); // host
+        assert_eq!(sorting_cols[4].column_idx, 2); // timestamp_secs
     }
 }
diff --git a/quickwit/quickwit-parquet-engine/src/storage/split_writer.rs b/quickwit/quickwit-parquet-engine/src/storage/split_writer.rs
index 8505eb0f75c..466e2c9cdcb 100644
--- a/quickwit/quickwit-parquet-engine/src/storage/split_writer.rs
+++ b/quickwit/quickwit-parquet-engine/src/storage/split_writer.rs
@@ -25,7 +25,6 @@ use tracing::{debug, info, instrument};
 
 use super::config::ParquetWriterConfig;
 use super::writer::{ParquetWriteError, ParquetWriter};
-use crate::schema::{ParquetField, ParquetSchema};
 use crate::split::{MetricsSplitMetadata, ParquetSplit, SplitId, TAG_SERVICE, TimeRange};
 
 /// Writer that produces complete ParquetSplit with metadata from RecordBatch data.
@@ -40,16 +39,11 @@ impl ParquetSplitWriter {
     /// Create a new ParquetSplitWriter.
     ///
     /// # Arguments
-    /// * `schema` - The metrics schema for validation
     /// * `config` - Parquet writer configuration
     /// * `base_path` - Directory where split files will be written
-    pub fn new(
-        schema: ParquetSchema,
-        config: ParquetWriterConfig,
-        base_path: impl Into<PathBuf>,
-    ) -> Self {
+    pub fn new(config: ParquetWriterConfig, base_path: impl Into<PathBuf>) -> Self {
         Self {
-            writer: ParquetWriter::new(schema, config),
+            writer: ParquetWriter::new(config),
             base_path: base_path.into(),
         }
     }
@@ -89,7 +83,7 @@ impl ParquetSplitWriter {
         debug!(
             start_secs = time_range.start_secs,
             end_secs = time_range.end_secs,
-            "Extracted time range from batch"
+            "extracted time range from batch"
         );
 
         // Extract distinct metric names from batch
@@ -122,7 +116,7 @@ impl ParquetSplitWriter {
             split_id = %split_id,
             file_path = %file_path.display(),
             size_bytes,
-            "Split file written successfully"
+            "split file written successfully"
         );
 
         Ok(ParquetSplit::new(metadata))
@@ -131,7 +125,11 @@ impl ParquetSplitWriter {
 
 /// Extracts the time range (min/max timestamp_secs) from a RecordBatch.
 fn extract_time_range(batch: &RecordBatch) -> Result<TimeRange, ParquetWriteError> {
-    let timestamp_col = batch.column(ParquetField::TimestampSecs.column_index());
+    let timestamp_idx = batch
+        .schema()
+        .index_of("timestamp_secs")
+        .map_err(|_| ParquetWriteError::SchemaValidation("missing timestamp_secs column".into()))?;
+    let timestamp_col = batch.column(timestamp_idx);
     let timestamp_array = timestamp_col.as_primitive::<UInt64Type>();
 
     let min_val = min(timestamp_array);
@@ -151,7 +149,11 @@ fn extract_time_range(batch: &RecordBatch) -> Result<TimeRange, ParquetWriteErro
 
 /// Extracts distinct metric names from a RecordBatch.
 fn extract_metric_names(batch: &RecordBatch) -> Result<HashSet<String>, ParquetWriteError> {
-    let metric_col = batch.column(ParquetField::MetricName.column_index());
+    let metric_idx = batch
+        .schema()
+        .index_of("metric_name")
+        .map_err(|_| ParquetWriteError::SchemaValidation("missing metric_name column".into()))?;
+    let metric_col = batch.column(metric_idx);
     let mut names = HashSet::new();
 
     // The column is Dictionary(Int32, Utf8)
@@ -180,7 +182,11 @@ fn extract_metric_names(batch: &RecordBatch) -> Result<HashSet<String>, ParquetW
 
 /// Extracts distinct service names from a RecordBatch.
 fn extract_service_names(batch: &RecordBatch) -> Result<HashSet<String>, ParquetWriteError> {
-    let service_col = batch.column(ParquetField::ServiceName.column_index());
+    let service_idx = match batch.schema().index_of("service").ok() {
+        Some(idx) => idx,
+        None => return Ok(HashSet::new()),
+    };
+    let service_col = batch.column(service_idx);
     let mut names = HashSet::new();
 
     // The column is Dictionary(Int32, Utf8)
@@ -211,84 +217,59 @@ fn extract_service_names(batch: &RecordBatch) -> Result<HashSet<String>, Parquet
 mod tests {
     use std::sync::Arc;
 
-    use arrow::array::{
-        ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
-    };
-    use arrow::datatypes::Int32Type;
-    use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
+    use arrow::array::{ArrayRef, Float64Array, UInt8Array, UInt64Array};
+    use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
 
     use super::*;
-
-    #[test]
-    fn test_column_indices_match_schema() {
-        let schema = ParquetSchema::new();
-        for field in ParquetField::all() {
-            let expected: usize =
-                schema
-                    .arrow_schema()
-                    .index_of(field.name())
-                    .unwrap_or_else(|_| {
-                        panic!("field {:?} should exist in arrow schema", field.name())
-                    });
-            assert_eq!(
-                field.column_index(),
-                expected,
-                "column_index() for {:?} does not match arrow schema position",
-                field.name()
-            );
+    use crate::test_helpers::{create_dict_array, create_nullable_dict_array};
+
+    /// Create a test batch with required fields, optional service column, and specified tag
+    /// columns.
+    fn create_test_batch_with_options(
+        num_rows: usize,
+        metric_names: &[&str],
+        timestamps: &[u64],
+        service_names: Option<&[&str]>,
+        tags: &[&str],
+    ) -> RecordBatch {
+        let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+
+        let mut fields = vec![
+            Field::new("metric_name", dict_type.clone(), false),
+            Field::new("metric_type", DataType::UInt8, false),
+            Field::new("timestamp_secs", DataType::UInt64, false),
+            Field::new("value", DataType::Float64, false),
+        ];
+        if service_names.is_some() {
+            fields.push(Field::new("service", dict_type.clone(), true));
         }
-    }
+        for tag in tags {
+            fields.push(Field::new(*tag, dict_type.clone(), true));
+        }
+        let schema = Arc::new(ArrowSchema::new(fields));
 
-    /// Create a VARIANT array for testing with specified number of rows.
-    fn create_variant_array(num_rows: usize, fields: Option<&[(&str, &str)]>) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(num_rows);
-        for _ in 0..num_rows {
-            match fields {
-                Some(kv_pairs) => {
-                    let mut obj = builder.new_object();
-                    for (key, value) in kv_pairs {
-                        obj = obj.with_field(key, *value);
-                    }
-                    obj.finish();
-                }
-                None => {
-                    builder.append_null();
-                }
-            }
+        let metric_name: ArrayRef = create_dict_array(metric_names);
+        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
+        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps.to_vec()));
+        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
+        let value: ArrayRef = Arc::new(Float64Array::from(values));
+
+        let mut columns: Vec<ArrayRef> = vec![metric_name, metric_type, timestamp_secs, value];
+
+        if let Some(svc_names) = service_names {
+            columns.push(create_dict_array(svc_names));
         }
-        ArrayRef::from(builder.build())
-    }
 
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: &[&str]) -> ArrayRef {
-        let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
-        let string_array = StringArray::from(values.to_vec());
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
-    }
+        for tag in tags {
+            let tag_values: Vec<Option<&str>> = vec![Some(tag); num_rows];
+            columns.push(create_nullable_dict_array(&tag_values));
+        }
 
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
-        let keys: Vec<Option<i32>> = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| v.map(|_| i as i32))
-            .collect();
-        let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
-        let string_array = StringArray::from(string_values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
-                .unwrap(),
-        )
+        RecordBatch::try_new(schema, columns).unwrap()
     }
 
-    /// Create a test batch with specified number of rows and test data.
+    /// Create a simple test batch with default values.
     fn create_test_batch(num_rows: usize) -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        // Generate test data
         let metric_names: Vec<&str> = (0..num_rows)
             .map(|i| {
                 if i % 2 == 0 {
@@ -299,82 +280,23 @@ mod tests {
             })
             .collect();
         let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64 * 10).collect();
-
-        // MetricName: Dictionary(Int32, Utf8)
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-
-        // MetricType: UInt8
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-
-        // MetricUnit: Utf8 (nullable)
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-
-        // TimestampSecs: UInt64
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
-
-        // StartTimestampSecs: UInt64 (nullable)
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-
-        // Value: Float64
-        let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
-        let value: ArrayRef = Arc::new(Float64Array::from(values));
-
-        // TagService: Dictionary(Int32, Utf8) (nullable)
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-
-        // TagEnv: Dictionary(Int32, Utf8) (nullable)
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-
-        // TagDatacenter: Dictionary(Int32, Utf8) (nullable)
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-
-        // TagRegion: Dictionary(Int32, Utf8) (nullable)
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-
-        // TagHost: Dictionary(Int32, Utf8) (nullable)
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-
-        // Attributes: VARIANT (nullable)
-        let attributes: ArrayRef = create_variant_array(num_rows, Some(&[("key", "value")]));
-
-        // ServiceName: Dictionary(Int32, Utf8)
-        let service_names: Vec<&str> = (0..num_rows).map(|_| "my-service").collect();
-        let service_name: ArrayRef = create_dict_array(&service_names);
-
-        // ResourceAttributes: VARIANT (nullable)
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
+        let service_names: Vec<&str> = vec!["my-service"; num_rows];
+
+        create_test_batch_with_options(
+            num_rows,
+            &metric_names,
+            &timestamps,
+            Some(&service_names),
+            &["service", "host"],
         )
-        .unwrap()
     }
 
     #[test]
     fn test_write_split_creates_file() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
         let temp_dir = tempfile::tempdir().unwrap();
 
-        let writer = ParquetSplitWriter::new(schema, config, temp_dir.path());
+        let writer = ParquetSplitWriter::new(config, temp_dir.path());
 
         let batch = create_test_batch(10);
         let split = writer.write_split(&batch, "test-index").unwrap();
@@ -393,14 +315,19 @@ mod tests {
 
     #[test]
     fn test_write_split_extracts_time_range() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
         let temp_dir = tempfile::tempdir().unwrap();
 
-        let writer = ParquetSplitWriter::new(schema, config, temp_dir.path());
+        let writer = ParquetSplitWriter::new(config, temp_dir.path());
 
         // Create batch with timestamps [100, 150, 200]
-        let batch = create_test_batch_with_timestamps(&[100, 150, 200]);
+        let batch = create_test_batch_with_options(
+            3,
+            &["test.metric", "test.metric", "test.metric"],
+            &[100, 150, 200],
+            Some(&["my-service", "my-service", "my-service"]),
+            &[],
+        );
         let split = writer.write_split(&batch, "test-index").unwrap();
 
         // Verify time range
@@ -410,14 +337,19 @@ mod tests {
 
     #[test]
     fn test_write_split_extracts_metric_names() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
         let temp_dir = tempfile::tempdir().unwrap();
 
-        let writer = ParquetSplitWriter::new(schema, config, temp_dir.path());
+        let writer = ParquetSplitWriter::new(config, temp_dir.path());
 
         // Create batch with specific metric names
-        let batch = create_test_batch_with_metric_names(&["cpu.usage", "memory.used", "cpu.usage"]);
+        let batch = create_test_batch_with_options(
+            3,
+            &["cpu.usage", "memory.used", "cpu.usage"],
+            &[100, 100, 100],
+            Some(&["my-service", "my-service", "my-service"]),
+            &[],
+        );
         let split = writer.write_split(&batch, "test-index").unwrap();
 
         // Verify metric names (distinct values)
@@ -425,93 +357,4 @@ mod tests {
         assert!(split.metadata.metric_names.contains("memory.used"));
         assert_eq!(split.metadata.metric_names.len(), 2);
     }
-
-    /// Create a test batch with specific timestamps.
-    fn create_test_batch_with_timestamps(timestamps: &[u64]) -> RecordBatch {
-        let schema = ParquetSchema::new();
-        let num_rows = timestamps.len();
-
-        let metric_names: Vec<&str> = vec!["test.metric"; num_rows];
-        let metric_name: ArrayRef = create_dict_array(&metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps.to_vec()));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let value: ArrayRef = Arc::new(Float64Array::from(vec![42.0; num_rows]));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows, None);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
-
-    /// Create a test batch with specific metric names.
-    fn create_test_batch_with_metric_names(metric_names: &[&str]) -> RecordBatch {
-        let schema = ParquetSchema::new();
-        let num_rows = metric_names.len();
-
-        let metric_name: ArrayRef = create_dict_array(metric_names);
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes"); num_rows]));
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(vec![100u64; num_rows]));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>; num_rows]));
-        let value: ArrayRef = Arc::new(Float64Array::from(vec![42.0; num_rows]));
-        let tag_service: ArrayRef = create_nullable_dict_array(&vec![Some("web"); num_rows]);
-        let tag_env: ArrayRef = create_nullable_dict_array(&vec![Some("prod"); num_rows]);
-        let tag_datacenter: ArrayRef =
-            create_nullable_dict_array(&vec![Some("us-east-1"); num_rows]);
-        let tag_region: ArrayRef = create_nullable_dict_array(&vec![None; num_rows]);
-        let tag_host: ArrayRef = create_nullable_dict_array(&vec![Some("host-001"); num_rows]);
-        let attributes: ArrayRef = create_variant_array(num_rows, None);
-        let service_name: ArrayRef = create_dict_array(&vec!["my-service"; num_rows]);
-        let resource_attributes: ArrayRef = create_variant_array(num_rows, None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
-    }
 }
diff --git a/quickwit/quickwit-parquet-engine/src/storage/writer.rs b/quickwit/quickwit-parquet-engine/src/storage/writer.rs
index 50a375f42b8..6f29c0be4cc 100644
--- a/quickwit/quickwit-parquet-engine/src/storage/writer.rs
+++ b/quickwit/quickwit-parquet-engine/src/storage/writer.rs
@@ -27,7 +27,7 @@ use thiserror::Error;
 use tracing::{debug, instrument};
 
 use super::config::ParquetWriterConfig;
-use crate::schema::{ParquetField, ParquetSchema};
+use crate::schema::{SORT_ORDER, validate_required_fields};
 
 /// Errors that can occur during parquet writing.
 #[derive(Debug, Error)]
@@ -44,21 +44,20 @@ pub enum ParquetWriteError {
     #[error("Arrow error: {0}")]
     ArrowError(#[from] arrow::error::ArrowError),
 
-    /// Schema mismatch between RecordBatch and ParquetSchema.
-    #[error("Schema mismatch: expected {expected} fields, got {got}")]
-    SchemaMismatch { expected: usize, got: usize },
+    /// Schema validation failed.
+    #[error("Schema validation failed: {0}")]
+    SchemaValidation(String),
 }
 
 /// Writer for metrics data to Parquet format.
 pub struct ParquetWriter {
     config: ParquetWriterConfig,
-    schema: ParquetSchema,
 }
 
 impl ParquetWriter {
     /// Create a new ParquetWriter.
-    pub fn new(schema: ParquetSchema, config: ParquetWriterConfig) -> Self {
-        Self { config, schema }
+    pub fn new(config: ParquetWriterConfig) -> Self {
+        Self { config }
     }
 
     /// Get the writer configuration.
@@ -66,58 +65,58 @@ impl ParquetWriter {
         &self.config
     }
 
-    /// Get the metrics schema.
-    pub fn schema(&self) -> &ParquetSchema {
-        &self.schema
-    }
-
-    /// Validate that a RecordBatch matches the expected schema.
-    fn validate_batch(&self, batch: &RecordBatch) -> Result<(), ParquetWriteError> {
-        let expected = self.schema.num_fields();
-        let got = batch.num_columns();
-        if expected != got {
-            return Err(ParquetWriteError::SchemaMismatch { expected, got });
-        }
-        Ok(())
-    }
-
     /// Sort a RecordBatch by the metrics sort order.
-    /// Order: metric_name, tag_service, tag_env, tag_datacenter, tag_region, tag_host,
-    /// timestamp_secs. This sorting enables efficient pruning during query execution.
+    /// Columns from SORT_ORDER that are present in the batch schema are used;
+    /// missing columns are skipped.
     fn sort_batch(&self, batch: &RecordBatch) -> Result<RecordBatch, ParquetWriteError> {
-        // Build sort columns from the defined sort order
-        let sort_columns: Vec<SortColumn> = ParquetField::sort_order()
+        let schema = batch.schema();
+        let mut sort_columns: Vec<SortColumn> = SORT_ORDER
             .iter()
-            .map(|field| {
-                let col_idx = field.column_index();
-                SortColumn {
-                    values: Arc::clone(batch.column(col_idx)),
-                    options: Some(SortOptions {
-                        descending: false,
-                        nulls_first: true,
-                    }),
-                }
+            .filter_map(|name| schema.index_of(name).ok())
+            .map(|idx| SortColumn {
+                values: Arc::clone(batch.column(idx)),
+                options: Some(SortOptions {
+                    descending: false,
+                    nulls_first: false,
+                }),
             })
             .collect();
 
-        // Compute sorted indices
-        let indices = lexsort_to_indices(&sort_columns, None)?;
+        if sort_columns.is_empty() {
+            return Ok(batch.clone());
+        }
+
+        // Append the original row index as a tiebreaker so that rows with
+        // identical sort keys keep their arrival order (stable sort semantics).
+        // lexsort_to_indices uses an unstable sort internally; the tiebreaker
+        // makes it behave stably at a small cost (one u32 comparison per
+        // equal-key pair, 4 bytes × num_rows of extra allocation).
+        let row_indices = Arc::new(arrow::array::UInt32Array::from_iter_values(
+            0..batch.num_rows() as u32,
+        ));
+        sort_columns.push(SortColumn {
+            values: row_indices,
+            options: Some(SortOptions {
+                descending: false,
+                nulls_first: false,
+            }),
+        });
 
-        // Reorder the batch using the sorted indices
-        let sorted_batch = take_record_batch(batch, &indices)?;
-        Ok(sorted_batch)
+        let indices = lexsort_to_indices(&sort_columns, None)?;
+        Ok(take_record_batch(batch, &indices)?)
     }
 
     /// Write a RecordBatch to Parquet bytes in memory.
     /// The batch is sorted before writing by: metric_name, common tags, timestamp.
     #[instrument(skip(self, batch), fields(batch_rows = batch.num_rows()))]
     pub fn write_to_bytes(&self, batch: &RecordBatch) -> Result<Vec<u8>, ParquetWriteError> {
-        self.validate_batch(batch)?;
+        validate_required_fields(&batch.schema())
+            .map_err(|e| ParquetWriteError::SchemaValidation(e.to_string()))?;
 
         // Sort the batch before writing for efficient pruning
         let sorted_batch = self.sort_batch(batch)?;
 
-        let props = self.config.to_writer_properties();
+        let props = self.config.to_writer_properties(&sorted_batch.schema());
         let buffer = Cursor::new(Vec::new());
 
         let mut writer = ArrowWriter::try_new(buffer, sorted_batch.schema(), Some(props))?;
@@ -125,7 +124,7 @@ impl ParquetWriter {
         let buffer = writer.into_inner()?;
 
         let bytes = buffer.into_inner();
-        debug!(bytes_written = bytes.len(), "Completed write to bytes");
+        debug!(bytes_written = bytes.len(), "completed write to bytes");
         Ok(bytes)
     }
 
@@ -139,12 +138,13 @@ impl ParquetWriter {
         batch: &RecordBatch,
         path: &Path,
     ) -> Result<u64, ParquetWriteError> {
-        self.validate_batch(batch)?;
+        validate_required_fields(&batch.schema())
+            .map_err(|e| ParquetWriteError::SchemaValidation(e.to_string()))?;
 
         // Sort the batch before writing for efficient pruning
         let sorted_batch = self.sort_batch(batch)?;
 
-        let props = self.config.to_writer_properties();
+        let props = self.config.to_writer_properties(&sorted_batch.schema());
         let file = File::create(path)?;
 
         let mut writer = ArrowWriter::try_new(file, sorted_batch.schema(), Some(props))?;
@@ -152,7 +152,7 @@ impl ParquetWriter {
         let file = writer.into_inner()?;
 
         let bytes_written = file.metadata()?.len();
-        debug!(bytes_written, "Completed write to file");
+        debug!(bytes_written, "completed write to file");
         Ok(bytes_written)
     }
 }
@@ -165,148 +165,24 @@ mod tests {
         ArrayRef, DictionaryArray, Float64Array, StringArray, UInt8Array, UInt64Array,
     };
     use arrow::datatypes::{DataType, Field, Int32Type, Schema};
-    use parquet::variant::{VariantArrayBuilder, VariantBuilderExt};
 
     use super::*;
-
-    /// Create dictionary array for string fields with Int32 keys.
-    fn create_dict_array(values: Vec<&str>) -> ArrayRef {
-        let string_array = StringArray::from(values);
-        Arc::new(
-            DictionaryArray::<Int32Type>::try_new(
-                arrow::array::Int32Array::from(vec![0i32]),
-                Arc::new(string_array),
-            )
-            .unwrap(),
-        )
-    }
-
-    /// Create nullable dictionary array for optional string fields.
-    fn create_nullable_dict_array(value: Option<&str>) -> ArrayRef {
-        match value {
-            Some(v) => {
-                let string_array = StringArray::from(vec![v]);
-                Arc::new(
-                    DictionaryArray::<Int32Type>::try_new(
-                        arrow::array::Int32Array::from(vec![0i32]),
-                        Arc::new(string_array),
-                    )
-                    .unwrap(),
-                )
-            }
-            None => {
-                let string_array = StringArray::from(vec![None::<&str>]);
-                Arc::new(
-                    DictionaryArray::<Int32Type>::try_new(
-                        arrow::array::Int32Array::from(vec![None::<i32>]),
-                        Arc::new(string_array),
-                    )
-                    .unwrap(),
-                )
-            }
-        }
-    }
-
-    /// Create a VARIANT array for testing.
-    fn create_variant_array(fields: Option<&[(&str, &str)]>) -> ArrayRef {
-        let mut builder = VariantArrayBuilder::new(1);
-        match fields {
-            Some(kv_pairs) => {
-                let mut obj = builder.new_object();
-                for (key, value) in kv_pairs {
-                    obj = obj.with_field(key, *value);
-                }
-                obj.finish();
-            }
-            None => {
-                builder.append_null();
-            }
-        }
-        ArrayRef::from(builder.build())
-    }
+    use crate::test_helpers::create_test_batch_with_tags;
 
     fn create_test_batch() -> RecordBatch {
-        let schema = ParquetSchema::new();
-
-        // Create arrays for all 14 fields in ParquetSchema matching fields.rs:
-        // MetricName: Dictionary(Int32, Utf8)
-        let metric_name: ArrayRef = create_dict_array(vec!["test.metric"]);
-
-        // MetricType: UInt8
-        let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8])); // gauge
-
-        // MetricUnit: Utf8 (nullable)
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![Some("bytes")]));
-
-        // TimestampSecs: UInt64
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(vec![1704067200u64]));
-
-        // StartTimestampSecs: UInt64 (nullable)
-        let start_timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(vec![None::<u64>]));
-
-        // Value: Float64
-        let value: ArrayRef = Arc::new(Float64Array::from(vec![42.0]));
-
-        // TagService: Dictionary(Int32, Utf8) (nullable)
-        let tag_service: ArrayRef = create_nullable_dict_array(Some("web"));
-
-        // TagEnv: Dictionary(Int32, Utf8) (nullable)
-        let tag_env: ArrayRef = create_nullable_dict_array(Some("prod"));
-
-        // TagDatacenter: Dictionary(Int32, Utf8) (nullable)
-        let tag_datacenter: ArrayRef = create_nullable_dict_array(Some("us-east-1"));
-
-        // TagRegion: Dictionary(Int32, Utf8) (nullable)
-        let tag_region: ArrayRef = create_nullable_dict_array(None);
-
-        // TagHost: Dictionary(Int32, Utf8) (nullable)
-        let tag_host: ArrayRef = create_nullable_dict_array(Some("host-001"));
-
-        // Attributes: VARIANT (nullable)
-        let attributes: ArrayRef = create_variant_array(Some(&[("key", "value")]));
-
-        // ServiceName: Dictionary(Int32, Utf8)
-        let service_name: ArrayRef = create_dict_array(vec!["my-service"]);
-
-        // ResourceAttributes: VARIANT (nullable)
-        let resource_attributes: ArrayRef = create_variant_array(None);
-
-        RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
-        )
-        .unwrap()
+        create_test_batch_with_tags(1, &["service", "env"])
     }
 
     #[test]
     fn test_writer_creation() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
-        let writer = ParquetWriter::new(schema, config);
-
-        assert_eq!(writer.schema().num_fields(), 14);
+        let _writer = ParquetWriter::new(config);
     }
 
     #[test]
     fn test_write_to_bytes() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
-        let writer = ParquetWriter::new(schema, config);
+        let writer = ParquetWriter::new(config);
 
         let batch = create_test_batch();
         let bytes = writer.write_to_bytes(&batch).unwrap();
@@ -318,9 +194,8 @@ mod tests {
 
     #[test]
     fn test_write_to_file() {
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
-        let writer = ParquetWriter::new(schema, config);
+        let writer = ParquetWriter::new(config);
 
         let batch = create_test_batch();
         let temp_dir = std::env::temp_dir();
@@ -334,12 +209,11 @@ mod tests {
     }
 
     #[test]
-    fn test_schema_mismatch() {
-        let schema = ParquetSchema::new();
+    fn test_schema_validation_missing_field() {
         let config = ParquetWriterConfig::default();
-        let writer = ParquetWriter::new(schema, config);
+        let writer = ParquetWriter::new(config);
 
-        // Create a batch with wrong number of columns
+        // Create a batch missing required fields
         let wrong_schema = Arc::new(Schema::new(vec![Field::new(
             "single_field",
             DataType::Utf8,
@@ -354,10 +228,37 @@ mod tests {
         let result = writer.write_to_bytes(&wrong_batch);
         assert!(matches!(
             result,
-            Err(ParquetWriteError::SchemaMismatch {
-                expected: 14,
-                got: 1
-            })
+            Err(ParquetWriteError::SchemaValidation(_))
+        ));
+    }
+
+    #[test]
+    fn test_schema_validation_wrong_type() {
+        let config = ParquetWriterConfig::default();
+        let writer = ParquetWriter::new(config);
+
+        // Create a batch where metric_name has wrong type (Utf8 instead of Dictionary)
+        let wrong_schema = Arc::new(Schema::new(vec![
+            Field::new("metric_name", DataType::Utf8, false),
+            Field::new("metric_type", DataType::UInt8, false),
+            Field::new("timestamp_secs", DataType::UInt64, false),
+            Field::new("value", DataType::Float64, false),
+        ]));
+        let wrong_batch = RecordBatch::try_new(
+            wrong_schema,
+            vec![
+                Arc::new(StringArray::from(vec!["test"])) as ArrayRef,
+                Arc::new(UInt8Array::from(vec![0u8])) as ArrayRef,
+                Arc::new(UInt64Array::from(vec![100u64])) as ArrayRef,
+                Arc::new(Float64Array::from(vec![1.0])) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        let result = writer.write_to_bytes(&wrong_batch);
+        assert!(matches!(
+            result,
+            Err(ParquetWriteError::SchemaValidation(_))
         ));
     }
 
@@ -365,9 +266,8 @@ mod tests {
     fn test_write_with_snappy_compression() {
         use super::super::config::Compression;
 
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::new().with_compression(Compression::Snappy);
-        let writer = ParquetWriter::new(schema, config);
+        let writer = ParquetWriter::new(config);
 
         let batch = create_test_batch();
         let bytes = writer.write_to_bytes(&batch).unwrap();
@@ -382,9 +282,25 @@ mod tests {
 
         use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
 
-        let schema = ParquetSchema::new();
         let config = ParquetWriterConfig::default();
-        let writer = ParquetWriter::new(schema.clone(), config);
+        let writer = ParquetWriter::new(config);
+
+        // Create a schema with required fields + service tag for sort verification
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "metric_name",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
+            Field::new("metric_type", DataType::UInt8, false),
+            Field::new("timestamp_secs", DataType::UInt64, false),
+            Field::new("value", DataType::Float64, false),
+            Field::new(
+                "service",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                true,
+            ),
+        ]));
 
         // Create unsorted batch with multiple rows:
         // Row 0: metric_b, service_a, timestamp=300
@@ -393,11 +309,6 @@ mod tests {
         // Expected sorted order: metric_a/service_a/200, metric_a/service_b/100,
         // metric_b/service_a/300
 
-        // Build arrays for 3 rows (original unsorted order in comments above)
-        let timestamps = [300u64, 100u64, 200u64];
-        let values = [1.0, 2.0, 3.0];
-
-        // metric_name: Dictionary(Int32, Utf8)
         let metric_name: ArrayRef = {
             let keys = arrow::array::Int32Array::from(vec![0i32, 1, 1]);
             let values = StringArray::from(vec!["metric_b", "metric_a"]);
@@ -405,88 +316,18 @@ mod tests {
         };
 
         let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8, 0, 0]));
-        let metric_unit: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("bytes"),
-            Some("bytes"),
-            Some("bytes"),
-        ]));
-        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps.to_vec()));
-        let start_timestamp_secs: ArrayRef =
-            Arc::new(UInt64Array::from(vec![None::<u64>, None, None]));
-        let value: ArrayRef = Arc::new(Float64Array::from(values.to_vec()));
+        let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(vec![300u64, 100u64, 200u64]));
+        let value: ArrayRef = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
 
-        // tag_service: Dictionary(Int32, Utf8) (nullable)
-        let tag_service: ArrayRef = {
+        let service: ArrayRef = {
             let keys = arrow::array::Int32Array::from(vec![Some(0i32), Some(1), Some(0)]);
             let values = StringArray::from(vec!["service_a", "service_b"]);
             Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
         };
 
-        let tag_env: ArrayRef = {
-            let keys = arrow::array::Int32Array::from(vec![Some(0i32), Some(0), Some(0)]);
-            let values = StringArray::from(vec!["prod"]);
-            Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
-        };
-
-        let tag_datacenter: ArrayRef = {
-            let keys = arrow::array::Int32Array::from(vec![None::<i32>, None, None]);
-            let values = StringArray::from(vec![None::<&str>]);
-            Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
-        };
-
-        let tag_region: ArrayRef = {
-            let keys = arrow::array::Int32Array::from(vec![None::<i32>, None, None]);
-            let values = StringArray::from(vec![None::<&str>]);
-            Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
-        };
-
-        let tag_host: ArrayRef = {
-            let keys = arrow::array::Int32Array::from(vec![None::<i32>, None, None]);
-            let values = StringArray::from(vec![None::<&str>]);
-            Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
-        };
-
-        // Build VARIANT arrays for 3 rows
-        let attributes: ArrayRef = {
-            let mut builder = VariantArrayBuilder::new(3);
-            for _ in 0..3 {
-                builder.append_null();
-            }
-            ArrayRef::from(builder.build())
-        };
-
-        let service_name: ArrayRef = {
-            let keys = arrow::array::Int32Array::from(vec![0i32, 1, 0]);
-            let values = StringArray::from(vec!["service_a", "service_b"]);
-            Arc::new(DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap())
-        };
-
-        let resource_attributes: ArrayRef = {
-            let mut builder = VariantArrayBuilder::new(3);
-            for _ in 0..3 {
-                builder.append_null();
-            }
-            ArrayRef::from(builder.build())
-        };
-
         let batch = RecordBatch::try_new(
-            schema.arrow_schema().clone(),
-            vec![
-                metric_name,
-                metric_type,
-                metric_unit,
-                timestamp_secs,
-                start_timestamp_secs,
-                value,
-                tag_service,
-                tag_env,
-                tag_datacenter,
-                tag_region,
-                tag_host,
-                attributes,
-                service_name,
-                resource_attributes,
-            ],
+            schema,
+            vec![metric_name, metric_type, timestamp_secs, value, service],
         )
         .unwrap();
 
@@ -509,25 +350,29 @@ mod tests {
         assert_eq!(result.num_rows(), 3);
 
         // Extract metric names and timestamps to verify sort order
+        let metric_idx = result.schema().index_of("metric_name").unwrap();
+        let ts_idx = result.schema().index_of("timestamp_secs").unwrap();
+        let service_idx = result.schema().index_of("service").unwrap();
+
         let metric_col = result
-            .column(ParquetField::MetricName.column_index())
+            .column(metric_idx)
             .as_any()
             .downcast_ref::<DictionaryArray<Int32Type>>()
             .unwrap();
         let ts_col = result
-            .column(ParquetField::TimestampSecs.column_index())
+            .column(ts_idx)
             .as_any()
             .downcast_ref::<UInt64Array>()
             .unwrap();
         let service_col = result
-            .column(ParquetField::TagService.column_index())
+            .column(service_idx)
             .as_any()
             .downcast_ref::<DictionaryArray<Int32Type>>()
             .unwrap();
 
         // Get string values from dictionary
-        let get_metric = |i: usize| -> &str {
-            let key = metric_col.keys().value(i);
+        let get_metric = |row: usize| -> &str {
+            let key = metric_col.keys().value(row);
             metric_col
                 .values()
                 .as_any()
@@ -535,8 +380,8 @@ mod tests {
                 .unwrap()
                 .value(key as usize)
         };
-        let get_service = |i: usize| -> &str {
-            let key = service_col.keys().value(i);
+        let get_service = |row: usize| -> &str {
+            let key = service_col.keys().value(row);
             service_col
                 .values()
                 .as_any()
@@ -545,7 +390,7 @@ mod tests {
                 .value(key as usize)
         };
 
-        // Expected sort order: metric_name ASC, tag_service ASC, timestamp ASC
+        // Expected sort order: metric_name ASC, service ASC, timestamp_secs ASC
         // Row 0: metric_a, service_a, 200 (original row 2)
         // Row 1: metric_a, service_b, 100 (original row 1)
         // Row 2: metric_b, service_a, 300 (original row 0)
diff --git a/quickwit/quickwit-parquet-engine/src/table_config.rs b/quickwit/quickwit-parquet-engine/src/table_config.rs
new file mode 100644
index 00000000000..002d99246bc
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/table_config.rs
@@ -0,0 +1,191 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Default table configuration for Parquet pipeline product types.
+//!
+//! When a table (index) does not specify an explicit sort fields configuration,
+//! these defaults are applied based on the product type. The `ParquetWriter`
+//! resolves these sort field names to physical `ParquetField` columns at
+//! construction time; columns not yet in the schema (e.g., `timeseries_id`)
+//! are recorded in metadata but skipped during physical sort.
+
+use serde::{Deserialize, Serialize};
+
+/// Product types supported by the Parquet pipeline.
+///
+/// Each product type has a default sort fields schema that matches the common
+/// query predicates for that signal type. See ADR-002 for the rationale.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum ProductType {
+    Metrics,
+    Logs,
+    Traces,
+}
+
+impl ProductType {
+    /// Default sort fields string for this product type.
+    ///
+    /// The metrics default includes `timeseries_id` as a tiebreaker before
+    /// `timestamp_secs`. Since `timeseries_id` is not yet a physical column,
+    /// the writer skips it during sort but records it in the metadata string.
+    /// When the column is added to the schema, sorting will include it
+    /// automatically.
+    ///
+    /// Logs and traces defaults are placeholders — they will be refined
+    /// when the Parquet pipeline is extended to those signal types.
+    pub fn default_sort_fields(self) -> &'static str {
+        match self {
+            Self::Metrics => "metric_name|service|env|datacenter|region|host|timeseries_id|timestamp_secs/V2",
+            // Placeholder: column names TBD when logs Parquet schema is defined.
+            Self::Logs => "service_name|level|host|timestamp_secs/V2",
+            // Placeholder: column names TBD when traces Parquet schema is defined.
+            Self::Traces => "service_name|operation_name|trace_id|timestamp_secs/V2",
+        }
+    }
+}
+
+/// Table-level configuration for the Parquet pipeline.
+///
+/// Stored per-index. When `sort_fields` is `None`, the default for the
+/// product type is used (see `ProductType::default_sort_fields()`).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TableConfig {
+    /// The product type determines schema defaults.
+    pub product_type: ProductType,
+
+    /// Explicit sort fields override. When `None`, the product-type default
+    /// is used. When `Some`, this exact schema string is applied.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub sort_fields: Option<String>,
+
+    /// Window duration in seconds for time-windowed compaction.
+    /// Default: 900 (15 minutes). Must divide 3600.
+    #[serde(default = "default_window_duration_secs")]
+    pub window_duration_secs: u32,
+}
+
+fn default_window_duration_secs() -> u32 {
+    900
+}
+
+impl TableConfig {
+    /// Create a new TableConfig for the given product type with defaults.
+    pub fn new(product_type: ProductType) -> Self {
+        Self {
+            product_type,
+            sort_fields: None,
+            window_duration_secs: default_window_duration_secs(),
+        }
+    }
+
+    /// Get the effective sort fields string for this table.
+    ///
+    /// Returns the explicit override if set, otherwise the product-type default.
+    pub fn effective_sort_fields(&self) -> &str {
+        match &self.sort_fields {
+            Some(sf) => sf.as_str(),
+            None => self.product_type.default_sort_fields(),
+        }
+    }
+}
+
+impl Default for TableConfig {
+    fn default() -> Self {
+        Self::new(ProductType::Metrics)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::sort_fields::parse_sort_fields;
+
+    #[test]
+    fn test_metrics_default_sort_fields_parses() {
+        let schema = parse_sort_fields(ProductType::Metrics.default_sort_fields())
+            .expect("metrics default sort fields must parse");
+        assert_eq!(schema.column.len(), 8);
+        // Proto names are bare (suffixes stripped by parser).
+        assert_eq!(schema.column[0].name, "metric_name");
+        assert_eq!(schema.column[1].name, "service");
+        assert_eq!(schema.column[6].name, "timeseries_id");
+        assert_eq!(schema.column[7].name, "timestamp_secs");
+    }
+
+    #[test]
+    fn test_logs_default_sort_fields_parses() {
+        let schema = parse_sort_fields(ProductType::Logs.default_sort_fields())
+            .expect("logs default sort fields must parse");
+        assert_eq!(schema.column.len(), 4);
+        assert_eq!(schema.column[0].name, "service_name");
+    }
+
+    #[test]
+    fn test_traces_default_sort_fields_parses() {
+        let schema = parse_sort_fields(ProductType::Traces.default_sort_fields())
+            .expect("traces default sort fields must parse");
+        assert_eq!(schema.column.len(), 4);
+        assert_eq!(schema.column[0].name, "service_name");
+    }
+
+    #[test]
+    fn test_effective_sort_fields_uses_default() {
+        let config = TableConfig::new(ProductType::Metrics);
+        assert_eq!(
+            config.effective_sort_fields(),
+            ProductType::Metrics.default_sort_fields()
+        );
+    }
+
+    #[test]
+    fn test_effective_sort_fields_uses_override() {
+        let mut config = TableConfig::new(ProductType::Metrics);
+        config.sort_fields = Some("custom__s|timestamp/V2".to_string());
+        assert_eq!(config.effective_sort_fields(), "custom__s|timestamp/V2");
+    }
+
+    #[test]
+    fn test_default_window_duration() {
+        let config = TableConfig::default();
+        assert_eq!(config.window_duration_secs, 900);
+    }
+
+    #[test]
+    fn test_table_config_serde_roundtrip() {
+        let config = TableConfig::new(ProductType::Traces);
+        let json = serde_json::to_string(&config).unwrap();
+        let recovered: TableConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(recovered.product_type, ProductType::Traces);
+        assert!(recovered.sort_fields.is_none());
+        assert_eq!(recovered.window_duration_secs, 900);
+    }
+
+    #[test]
+    fn test_table_config_serde_with_override() {
+        let mut config = TableConfig::new(ProductType::Metrics);
+        config.sort_fields = Some("host__s|timestamp/V2".to_string());
+        config.window_duration_secs = 3600;
+
+        let json = serde_json::to_string(&config).unwrap();
+        assert!(json.contains("host__s|timestamp/V2"));
+
+        let recovered: TableConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(
+            recovered.sort_fields.as_deref(),
+            Some("host__s|timestamp/V2")
+        );
+        assert_eq!(recovered.window_duration_secs, 3600);
+    }
+}
diff --git a/quickwit/quickwit-parquet-engine/src/test_helpers.rs b/quickwit/quickwit-parquet-engine/src/test_helpers.rs
new file mode 100644
index 00000000000..dd6892dbe23
--- /dev/null
+++ b/quickwit/quickwit-parquet-engine/src/test_helpers.rs
@@ -0,0 +1,96 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Shared test helpers for building Arrow RecordBatches in unit tests.
+
+use std::sync::Arc;
+
+use arrow::array::{
+    ArrayRef, DictionaryArray, Float64Array, Int32Array, StringArray, UInt8Array, UInt64Array,
+};
+use arrow::datatypes::{DataType, Field, Int32Type, Schema as ArrowSchema};
+use arrow::record_batch::RecordBatch;
+
+/// Creates a dictionary-encoded string array with compact 0-based keys.
+pub fn create_dict_array(values: &[&str]) -> ArrayRef {
+    let keys: Vec<i32> = (0..values.len()).map(|i| i as i32).collect();
+    let string_array = StringArray::from(values.to_vec());
+    Arc::new(
+        DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
+            .unwrap(),
+    )
+}
+
+/// Creates a nullable dictionary-encoded string array.
+///
+/// Each `Some(value)` gets a key into the dictionary; `None` values produce
+/// a null key.
+pub fn create_nullable_dict_array(values: &[Option<&str>]) -> ArrayRef {
+    let keys: Vec<Option<i32>> = values
+        .iter()
+        .enumerate()
+        .map(|(i, v)| v.map(|_| i as i32))
+        .collect();
+    let string_values: Vec<&str> = values.iter().filter_map(|v| *v).collect();
+    let string_array = StringArray::from(string_values);
+    Arc::new(
+        DictionaryArray::<Int32Type>::try_new(Int32Array::from(keys), Arc::new(string_array))
+            .unwrap(),
+    )
+}
+
+/// Creates a RecordBatch with the 4 required fields plus the specified
+/// nullable dictionary-encoded tag columns.
+///
+/// - `metric_name`: all rows set to `"cpu.usage"`
+/// - `metric_type`: all rows `0` (Gauge)
+/// - `timestamp_secs`: sequential, starting at `100`
+/// - `value`: sequential `f64` starting at `42.0`
+/// - each tag column: all rows set to the column name as the value
+pub fn create_test_batch_with_tags(num_rows: usize, tags: &[&str]) -> RecordBatch {
+    let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+
+    let mut fields = vec![
+        Field::new("metric_name", dict_type.clone(), false),
+        Field::new("metric_type", DataType::UInt8, false),
+        Field::new("timestamp_secs", DataType::UInt64, false),
+        Field::new("value", DataType::Float64, false),
+    ];
+    for tag in tags {
+        fields.push(Field::new(*tag, dict_type.clone(), true));
+    }
+    let schema = Arc::new(ArrowSchema::new(fields));
+
+    let metric_names: Vec<&str> = vec!["cpu.usage"; num_rows];
+    let metric_name: ArrayRef = create_dict_array(&metric_names);
+    let metric_type: ArrayRef = Arc::new(UInt8Array::from(vec![0u8; num_rows]));
+    let timestamps: Vec<u64> = (0..num_rows).map(|i| 100 + i as u64).collect();
+    let timestamp_secs: ArrayRef = Arc::new(UInt64Array::from(timestamps));
+    let values: Vec<f64> = (0..num_rows).map(|i| 42.0 + i as f64).collect();
+    let value: ArrayRef = Arc::new(Float64Array::from(values));
+
+    let mut columns: Vec<ArrayRef> = vec![metric_name, metric_type, timestamp_secs, value];
+    for tag in tags {
+        let tag_values: Vec<Option<&str>> = vec![Some(tag); num_rows];
+        columns.push(create_nullable_dict_array(&tag_values));
+    }
+
+    RecordBatch::try_new(schema, columns).unwrap()
+}
+
+/// Creates a RecordBatch with the 4 required fields and default tags
+/// (`service`, `host`).
+pub fn create_test_batch(num_rows: usize) -> RecordBatch {
+    create_test_batch_with_tags(num_rows, &["service", "host"])
+}
diff --git a/quickwit/quickwit-proto/build.rs b/quickwit/quickwit-proto/build.rs
index 569d9b5315b..37e7d7c8cb1 100644
--- a/quickwit/quickwit-proto/build.rs
+++ b/quickwit/quickwit-proto/build.rs
@@ -223,6 +223,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             ],
         )?;
 
+    // Event Store sort schema proto (vendored from dd-source).
+    let sortschema_prost_config = prost_build::Config::default();
+    tonic_prost_build::configure()
+        .type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
+        .out_dir("src/codegen/sortschema")
+        .compile_with_config(
+            sortschema_prost_config,
+            &[std::path::PathBuf::from(
+                "protos/event_store_sortschema/event_store_sortschema.proto",
+            )],
+            &[std::path::PathBuf::from("protos/event_store_sortschema")],
+        )?;
+
     // OTEL proto
     let mut prost_config = prost_build::Config::default();
     prost_config.protoc_arg("--experimental_allow_proto3_optional");
diff --git a/quickwit/quickwit-proto/protos/event_store_sortschema/event_store_sortschema.proto b/quickwit/quickwit-proto/protos/event_store_sortschema/event_store_sortschema.proto
new file mode 100644
index 00000000000..a2e1d245b6d
--- /dev/null
+++ b/quickwit/quickwit-proto/protos/event_store_sortschema/event_store_sortschema.proto
@@ -0,0 +1,231 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+// Vendored from dd-source: domains/event-platform/shared/libs/event-store-proto/protos/event_store_sortschema/
+// Keep identical to upstream -- do NOT strip unused fields.
+
+package sortschema;
+
+option java_package = "com.dd.event.store.sortschema.proto";
+option java_multiple_files = true;
+option go_package = "github.com/DataDog/dd-source/domains/event-platform/shared/libs/event-store-proto/protos/event_store_sortschema";
+
+// NOTE:  Be thoughtful making changes to this file.  Everything included here is
+//        included in the metadata of every fragment, and in each fragment header.
+
+enum SortColumnDirection {
+    // SortDirectionUnknown will pop up if someone forgets to initialize a field.
+    SortDirectionUnknown = 0;
+
+    // SortDirectionAscending sorts columns in ascending value order.
+    SortDirectionAscending = 1;
+
+    // SortDirectionDescending sorts columns in descending value order.
+    SortDirectionDescending = 2;
+}
+
+// SortColumn represents a single column that's participating in the sort order
+// for rows in the fragment files.
+//
+// NOTE: Carefully consider whether you need to modify the implementation of
+//       schemautils.go/EquivalentSchemas function whenever this struct is modified.
+message SortColumn {
+    string name = 1;
+    // This integer corresponds 1:1 with the application-level column type which is also a number.
+    // We do that instead of breaking out a separate ENUM here to avoid having to maintain a bunch
+    // of mapping code that converts from column types to protobuf ENUMs and back again for no reason.
+    uint64 column_type = 2;
+    SortColumnDirection sort_direction = 3;
+
+    // NOTE: If we decide later to form lexicographic keys from the schema,
+    //       it may be useful to supply an integer here which would indicate
+    //       the sort precedence and could be used for tagging for e.g.
+    //       orderedcode lexicographic keys.
+}
+
+// SortSchema represents a set of column names and precedence values which define
+// the partial lexicographical sort order of rows in a fragment file, built from the
+// individual column orderings.  Precedence of the columns for sorting is given by
+// the order of the columns in the SortSchema.  "timestamp" must appear, with
+// SortColumnType INT64, SortDirection DESCENDING, last.
+//
+// NOTE: Carefully consider whether you need to modify the implementation of
+//       schemautils.go/EquivalentSchemas function whenever this struct is modified.
+message SortSchema {
+    // Very common schemas used all over the place are assigned a unique version number (reference
+    // in dd-go/logs/apps/logs-event-store/storage/fragment/schema_versions.go).
+    //
+    // This version number can be used in place of the complete schema description in the fragment
+    // metadata entries, to reduce their size. When this value is not null, the other fields in the
+    // SortSchema are not used and don't need to be set.
+    uint64 version = 4;
+
+    repeated SortColumn column = 1;
+
+    // Used in metrics, etc.
+    string name = 2;
+
+    // SortVersion specifies what type of sorting of the data has been done.
+    // Fragments with different SortVersion numbers are *never* merged by the
+    // compactor.
+    enum SortVersion {
+        // The initial version of per-file locality sorting had a bug where it sorted
+        // the rows by the full values of string fields in the sort columns, but reported
+        // trimmed values in the RowKeys.  This could yield what appeared to be
+        // "out of order" keys because keys that only differ in the portion that was
+        // trimmed could sort differently than trimmed values, and this would yield
+        // what looked like overlapping fragments after m:n merges.  Fragments produced
+        // this way are not compatible with the LSM algorithm and must be excluded.
+        INCORRECT_TRIM = 0;
+
+        // Files marked with this version use LessTrimmed both in their production
+        // in the writer, and in the merges produced by the compactor.  Trimming has
+        // a "budget" that allows the leading string fields to take up "more"
+        // characters if they need to without overflowing into huge values in the
+        // metadata server.  This budgeting is intended to cover when e.g. "service"
+        // is a leading tag with long service names.
+        TRIMMED_WITH_BUDGET = 1;
+    }
+
+    SortVersion sort_version = 3;
+
+    // Cutoff position for LSM compaction comparisons. Only columns 0 through
+    // lsm_comparison_cutoff-1 are considered for fragment locality decisions.
+    // Allows sorting by columns that improve compression without creating
+    // unnecessary extra compaction work.
+    // This cutoff is represented in the sort schema string format with an `&`
+    // at the beginning of the name of the first column that should be
+    // ignored, like "service__s|&env__s|timestamp/V1" which would only use
+    // "service__s" for locality comparisons.
+    // When unset, or explicitly set to 0, the schema utils will use the
+    // legacy logic of ignoring timestamp and tiebreaker, if they are present.
+    int32 lsm_comparison_cutoff = 5;
+}
+
+// A ColumnValue is a string, int, uint or float which corresponds to a sort column.
+message ColumnValue {
+    oneof value {
+        bytes type_string = 1;
+        int64 type_int = 2;
+        double type_float = 3;
+    }
+}
+
+// ColumnValues represent a set of column values that correspond with a particular
+// SortSchema.
+message ColumnValues {
+    repeated ColumnValue column = 1;
+}
+
+// RowKeys represent the "row keys" of the first and last rows of a fragment file.
+// The values are the values of the sort columns at the first and last row, respectively.
+message RowKeys {
+    // These are the values of the sort colums at the first row, thus defining the
+    // lower inclusive boundary of the fragment file, i.e. using the minimum values of any
+    // multi-values in the column values(1), which is what the sorting operates upon.
+    ColumnValues min_row_values = 1;
+
+    // These are the values of the sort columns at the last row, thus defining the
+    // upper boundary of the sorted rows of the fragment file, i.e. using the minimum
+    // values of any multi-values in the column values(1), which is what the sorting
+    // operates upon.
+    //
+    // Note that the string fields in min_row_values and max_row_values are trimmed
+    // to avoid storing "too large" values in the metadata server, and the sort order
+    // of the rows in the fragment files are determined using these trimmed keys to
+    // match.
+    //
+    // During m:n merging of fragments, when boundaries of fragments are selected,
+    // they are selected at transitions of key values of the trimmed keys so that the
+    // [min_row_values, max_row_values] ranges of the output fragments do not overlap.
+    // This property is important for two reasons.  First, we want to prune fragments
+    // from queries based on the values of the sort schema columns.  By making the
+    // fragments non-overlapping, we enable this pruning.  Secondly, we don't want
+    // compaction strategies like the classic LSM strategy to re-merge the same
+    // fragments over and over.
+    //
+    // An interesting consequence of the key trimming is that a fragment may have
+    // a row in it with values that come "after" the max_row_values keys outside
+    // of the "trimmed" range of the column values.  For example, if the trim budget
+    // for a column is four characters, and the fragment ends at "abcd", "abcd" and
+    // "abcd123" would yield the same trimmed key value and would both be stored in
+    // the same fragment.  However "abce" would be stored in a different fragment.
+    ColumnValues max_row_values = 2;
+
+    // This set of values defines the all-inclusive range boundary of the file considering
+    // the max values of any multi-values in the column values(2).  This boundary is the
+    // boundary w.r.t. queries.
+    ColumnValues all_inclusive_max_row_values = 3;
+
+
+    // For track table expirations (splitting M fragments into N1 expired fragments and N2 live fragments),
+    // there is a potential for output fragments to overlap in key range. This is because the sort schema of track
+    // table starts with retention at intake + scopeID, and retention can change between the moment the fragment
+    // is created and the moment it is expired. To give an example, say a fragment has scopeIDs 1, 2, 3, 4, 5,
+    // all with a retention at intake of 7 days. If the retention for 3 and 5 is changed to 15 days,
+    // then 7 days later at expiration the fragment will be split into two fragments,
+    // one "expired" with scopeIDs 1, 2 and 4 and one "live" with scopeIDs 3 and 5.
+    // The row keys will be (7:1..., 7:4...) for the first fragment, and (7:3...,7:5...) for the second,
+    // which overlap. To avoid this, we add an `expired` boolean to the row keys, so we allow a live and an expired
+    // fragment to overlap in key range.
+    bool expired = 4;
+
+    // Footnotes:
+    //
+    //   (1) For min_row_values and max_row_values with multi-values in the column
+    //       values, the sort order is by the minimum value of a multi-value for an
+    //       ascending-sort-order column and the maximum value for a descending-sort-order
+    //       column.
+    //   (2) For all_inclusive_max_row_values, the value of any multi-values encountered
+    //       that is used is the maximum value for an ascending-sort-order column and
+    //       the minimum value for a descending-sort-order column.
+
+
+
+    // Example:
+    //
+    // Let's say we have the following columns.  Values at each row enclosed with [brackets].
+    // Column B is sorted in descending order.  The others are sorted in ascending order.
+    //
+    //     Column A           Column B          Column C
+    //     ========           ========          ========
+    // 1:  [1, 2, 99]         [B]               [x, y]
+    // 2:  [1, 4]             [A, C]            [y]
+    // 3:  [2]                [B, C]            [z]
+    // 4:  [2]                [A, B, C]         [x]
+    // 5:  [2]                [A]               [y, z]
+    // 6:  [3]                [B]               [x, y, z]
+    //
+    // min_row_key is {A=1, B=B, C=x}
+    // max_row_key is {A=3, B=B, C=x}
+    // all_inclusive_max_row_key is {A=99, B=B, C=y}
+    //
+    // The idea with these keys is that [min_row_key, max_row_key] defines the range that
+    // covers a single fragment file for the purposes of computing overlaps for deciding
+    // what to perform an m:n merge to "flatten" files that overlap in key range so that
+    // queries don't have to visit all the files.
+    //
+    // If there are no multi-valued sort columns, this is the end of the story.  However
+    // if there are multi-valued sort columns, the range that a given file covers in
+    // terms of queries is given by [min_row_key, all_inclusive_max_row_key].  A lot of
+    // overlaps with these ranges indicate "atomic rows" for a table that cause queries
+    // to hit more files, degrading performance.
+    //
+    // Initially the compactor will only consider [min_row_key, max_row_key], with the
+    // all_inclusive_max_row_key present for diagnostics.  In the future we may use
+    // this key to determine whether we want to do anything "special" with files that
+    // have atomic rows, such as duplicating the rows at each value of the multi-value.
+}
diff --git a/quickwit/quickwit-proto/src/codegen/sortschema/sortschema.rs b/quickwit/quickwit-proto/src/codegen/sortschema/sortschema.rs
new file mode 100644
index 00000000000..2550c847794
--- /dev/null
+++ b/quickwit/quickwit-proto/src/codegen/sortschema/sortschema.rs
@@ -0,0 +1,230 @@
+// This file is @generated by prost-build.
+/// SortColumn represents a single column that's participating in the sort order
+/// for rows in the fragment files.
+///
+/// NOTE: Carefully consider whether you need to modify the implementation of
+/// schemautils.go/EquivalentSchemas function whenever this struct is modified.
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct SortColumn {
+    #[prost(string, tag = "1")]
+    pub name: ::prost::alloc::string::String,
+    /// This integer corresponds 1:1 with the application-level column type which is also a number.
+    /// We do that instead of breaking out a separate ENUM here to avoid having to maintain a bunch
+    /// of mapping code that converts from column types to protobuf ENUMs and back again for no reason.
+    #[prost(uint64, tag = "2")]
+    pub column_type: u64,
+    #[prost(enumeration = "SortColumnDirection", tag = "3")]
+    pub sort_direction: i32,
+}
+/// SortSchema represents a set of column names and precedence values which define
+/// the partial lexicographical sort order of rows in a fragment file, built from the
+/// individual column orderings.  Precedence of the columns for sorting is given by
+/// the order of the columns in the SortSchema.  "timestamp" must appear, with
+/// SortColumnType INT64, SortDirection DESCENDING, last.
+///
+/// NOTE: Carefully consider whether you need to modify the implementation of
+/// schemautils.go/EquivalentSchemas function whenever this struct is modified.
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct SortSchema {
+    /// Very common schemas used all over the place are assigned a unique version number (reference
+    /// in dd-go/logs/apps/logs-event-store/storage/fragment/schema_versions.go).
+    ///
+    /// This version number can be used in place of the complete schema description in the fragment
+    /// metadata entries, to reduce their size. When this value is not null, the other fields in the
+    /// SortSchema are not used and don't need to be set.
+    #[prost(uint64, tag = "4")]
+    pub version: u64,
+    #[prost(message, repeated, tag = "1")]
+    pub column: ::prost::alloc::vec::Vec<SortColumn>,
+    /// Used in metrics, etc.
+    #[prost(string, tag = "2")]
+    pub name: ::prost::alloc::string::String,
+    #[prost(enumeration = "sort_schema::SortVersion", tag = "3")]
+    pub sort_version: i32,
+    /// Cutoff position for LSM compaction comparisons. Only columns 0 through
+    /// lsm_comparison_cutoff-1 are considered for fragment locality decisions.
+    /// Allows sorting by columns that improve compression without creating
+    /// unnecessary extra compaction work.
+    /// This cutoff is represented in the sort schema string format with an `&`
+    /// at the beginning of the name of the first column that should be
+    /// ignored, like "service__s|&env__s|timestamp/V1" which would only use
+    /// "service__s" for locality comparisons.
+    /// When unset, or explicitly set to 0, the schema utils will use the
+    /// legacy logic of ignoring timestamp and tiebreaker, if they are present.
+    #[prost(int32, tag = "5")]
+    pub lsm_comparison_cutoff: i32,
+}
+/// Nested message and enum types in `SortSchema`.
+pub mod sort_schema {
+    /// SortVersion specifies what type of sorting of the data has been done.
+    /// Fragments with different SortVersion numbers are *never* merged by the
+    /// compactor.
+    #[derive(serde::Serialize, serde::Deserialize)]
+    #[derive(
+        Clone,
+        Copy,
+        Debug,
+        PartialEq,
+        Eq,
+        Hash,
+        PartialOrd,
+        Ord,
+        ::prost::Enumeration
+    )]
+    #[repr(i32)]
+    pub enum SortVersion {
+        /// The initial version of per-file locality sorting had a bug where it sorted
+        /// the rows by the full values of string fields in the sort columns, but reported
+        /// trimmed values in the RowKeys.  This could yield what appeared to be
+        /// "out of order" keys because keys that only differ in the portion that was
+        /// trimmed could sort differently than trimmed values, and this would yield
+        /// what looked like overlapping fragments after m:n merges.  Fragments produced
+        /// this way are not compatible with the LSM algorithm and must be excluded.
+        IncorrectTrim = 0,
+        /// Files marked with this version use LessTrimmed both in their production
+        /// in the writer, and in the merges produced by the compactor.  Trimming has
+        /// a "budget" that allows the leading string fields to take up "more"
+        /// characters if they need to without overflowing into huge values in the
+        /// metadata server.  This budgeting is intended to cover when e.g. "service"
+        /// is a leading tag with long service names.
+        TrimmedWithBudget = 1,
+    }
+    impl SortVersion {
+        /// String value of the enum field names used in the ProtoBuf definition.
+        ///
+        /// The values are not transformed in any way and thus are considered stable
+        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+        pub fn as_str_name(&self) -> &'static str {
+            match self {
+                Self::IncorrectTrim => "INCORRECT_TRIM",
+                Self::TrimmedWithBudget => "TRIMMED_WITH_BUDGET",
+            }
+        }
+        /// Creates an enum from field names used in the ProtoBuf definition.
+        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+            match value {
+                "INCORRECT_TRIM" => Some(Self::IncorrectTrim),
+                "TRIMMED_WITH_BUDGET" => Some(Self::TrimmedWithBudget),
+                _ => None,
+            }
+        }
+    }
+}
+/// A ColumnValue is a string, int, uint or float which corresponds to a sort column.
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnValue {
+    #[prost(oneof = "column_value::Value", tags = "1, 2, 3")]
+    pub value: ::core::option::Option<column_value::Value>,
+}
+/// Nested message and enum types in `ColumnValue`.
+pub mod column_value {
+    #[derive(serde::Serialize, serde::Deserialize)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum Value {
+        #[prost(bytes, tag = "1")]
+        TypeString(::prost::alloc::vec::Vec<u8>),
+        #[prost(int64, tag = "2")]
+        TypeInt(i64),
+        #[prost(double, tag = "3")]
+        TypeFloat(f64),
+    }
+}
+/// ColumnValues represent a set of column values that correspond with a particular
+/// SortSchema.
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnValues {
+    #[prost(message, repeated, tag = "1")]
+    pub column: ::prost::alloc::vec::Vec<ColumnValue>,
+}
+/// RowKeys represent the "row keys" of the first and last rows of a fragment file.
+/// The values are the values of the sort columns at the first and last row, respectively.
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct RowKeys {
+    /// These are the values of the sort colums at the first row, thus defining the
+    /// lower inclusive boundary of the fragment file, i.e. using the minimum values of any
+    /// multi-values in the column values(1), which is what the sorting operates upon.
+    #[prost(message, optional, tag = "1")]
+    pub min_row_values: ::core::option::Option<ColumnValues>,
+    /// These are the values of the sort columns at the last row, thus defining the
+    /// upper boundary of the sorted rows of the fragment file, i.e. using the minimum
+    /// values of any multi-values in the column values(1), which is what the sorting
+    /// operates upon.
+    ///
+    /// Note that the string fields in min_row_values and max_row_values are trimmed
+    /// to avoid storing "too large" values in the metadata server, and the sort order
+    /// of the rows in the fragment files are determined using these trimmed keys to
+    /// match.
+    ///
+    /// During m:n merging of fragments, when boundaries of fragments are selected,
+    /// they are selected at transitions of key values of the trimmed keys so that the
+    /// \[min_row_values, max_row_values\] ranges of the output fragments do not overlap.
+    /// This property is important for two reasons.  First, we want to prune fragments
+    /// from queries based on the values of the sort schema columns.  By making the
+    /// fragments non-overlapping, we enable this pruning.  Secondly, we don't want
+    /// compaction strategies like the classic LSM strategy to re-merge the same
+    /// fragments over and over.
+    ///
+    /// An interesting consequence of the key trimming is that a fragment may have
+    /// a row in it with values that come "after" the max_row_values keys outside
+    /// of the "trimmed" range of the column values.  For example, if the trim budget
+    /// for a column is four characters, and the fragment ends at "abcd", "abcd" and
+    /// "abcd123" would yield the same trimmed key value and would both be stored in
+    /// the same fragment.  However "abce" would be stored in a different fragment.
+    #[prost(message, optional, tag = "2")]
+    pub max_row_values: ::core::option::Option<ColumnValues>,
+    /// This set of values defines the all-inclusive range boundary of the file considering
+    /// the max values of any multi-values in the column values(2).  This boundary is the
+    /// boundary w.r.t. queries.
+    #[prost(message, optional, tag = "3")]
+    pub all_inclusive_max_row_values: ::core::option::Option<ColumnValues>,
+    /// For track table expirations (splitting M fragments into N1 expired fragments and N2 live fragments),
+    /// there is a potential for output fragments to overlap in key range. This is because the sort schema of track
+    /// table starts with retention at intake + scopeID, and retention can change between the moment the fragment
+    /// is created and the moment it is expired. To give an example, say a fragment has scopeIDs 1, 2, 3, 4, 5,
+    /// all with a retention at intake of 7 days. If the retention for 3 and 5 is changed to 15 days,
+    /// then 7 days later at expiration the fragment will be split into two fragments,
+    /// one "expired" with scopeIDs 1, 2 and 4 and one "live" with scopeIDs 3 and 5.
+    /// The row keys will be (7:1..., 7:4...) for the first fragment, and (7:3...,7:5...) for the second,
+    /// which overlap. To avoid this, we add an `expired` boolean to the row keys, so we allow a live and an expired
+    /// fragment to overlap in key range.
+    #[prost(bool, tag = "4")]
+    pub expired: bool,
+}
+#[derive(serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
+pub enum SortColumnDirection {
+    /// SortDirectionUnknown will pop up if someone forgets to initialize a field.
+    SortDirectionUnknown = 0,
+    /// SortDirectionAscending sorts columns in ascending value order.
+    SortDirectionAscending = 1,
+    /// SortDirectionDescending sorts columns in descending value order.
+    SortDirectionDescending = 2,
+}
+impl SortColumnDirection {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            Self::SortDirectionUnknown => "SortDirectionUnknown",
+            Self::SortDirectionAscending => "SortDirectionAscending",
+            Self::SortDirectionDescending => "SortDirectionDescending",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "SortDirectionUnknown" => Some(Self::SortDirectionUnknown),
+            "SortDirectionAscending" => Some(Self::SortDirectionAscending),
+            "SortDirectionDescending" => Some(Self::SortDirectionDescending),
+            _ => None,
+        }
+    }
+}
diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs
index dbe850b55b7..3a7f7ea8992 100644
--- a/quickwit/quickwit-proto/src/lib.rs
+++ b/quickwit/quickwit-proto/src/lib.rs
@@ -15,6 +15,7 @@
 #![allow(clippy::derive_partial_eq_without_eq)]
 #![allow(clippy::disallowed_methods)]
 #![allow(clippy::doc_lazy_continuation)]
+#![allow(deprecated)] // prost::DecodeError::new is deprecated but used in generated decode impls
 #![allow(rustdoc::invalid_html_tags)]
 
 use std::cmp::Ordering;
@@ -37,10 +38,16 @@ pub mod indexing;
 pub mod ingest;
 pub mod metastore;
 pub mod search;
+pub mod sort_fields_error;
 pub mod types;
 
 pub use error::{GrpcServiceError, ServiceError, ServiceErrorCode};
 use search::ReportSplitsRequest;
+pub use sort_fields_error::SortFieldsError;
+
+pub mod sortschema {
+    include!("codegen/sortschema/sortschema.rs");
+}
 
 pub mod jaeger {
     pub mod api_v2 {
@@ -123,7 +130,8 @@ impl TryFrom<metastore::DeleteQuery> for search::SearchRequest {
 pub struct MutMetadataMap<'a>(&'a mut tonic::metadata::MetadataMap);
 
 impl Injector for MutMetadataMap<'_> {
-    /// Sets a key-value pair in the [`MetadataMap`]. No-op if the key or value is invalid.
+    /// Sets a key-value pair in the [`tonic::metadata::MetadataMap`]. No-op if the key or value
+    /// is invalid.
     fn set(&mut self, key: &str, value: String) {
         if let Ok(metadata_key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes())
             && let Ok(metadata_value) = tonic::metadata::MetadataValue::try_from(&value)
@@ -134,13 +142,13 @@ impl Injector for MutMetadataMap<'_> {
 }
 
 impl Extractor for MutMetadataMap<'_> {
-    /// Gets a value for a key from the MetadataMap.  If the value can't be converted to &str,
+    /// Gets a value for a key from the `MetadataMap`. If the value can't be converted to &str,
     /// returns None.
     fn get(&self, key: &str) -> Option<&str> {
         self.0.get(key).and_then(|metadata| metadata.to_str().ok())
     }
 
-    /// Collect all the keys from the MetadataMap.
+    /// Collect all the keys from the `MetadataMap`.
     fn keys(&self) -> Vec<&str> {
         self.0
             .keys()
@@ -174,13 +182,13 @@ impl Interceptor for SpanContextInterceptor {
 struct MetadataMap<'a>(&'a tonic::metadata::MetadataMap);
 
 impl Extractor for MetadataMap<'_> {
-    /// Gets a value for a key from the MetadataMap.  If the value can't be converted to &str,
+    /// Gets a value for a key from the `MetadataMap`. If the value can't be converted to &str,
     /// returns None.
     fn get(&self, key: &str) -> Option<&str> {
         self.0.get(key).and_then(|metadata| metadata.to_str().ok())
     }
 
-    /// Collect all the keys from the MetadataMap.
+    /// Collect all the keys from the `MetadataMap`.
     fn keys(&self) -> Vec<&str> {
         self.0
             .keys()
diff --git a/quickwit/quickwit-proto/src/metastore/mod.rs b/quickwit/quickwit-proto/src/metastore/mod.rs
index ba371c13d4a..4f53b9abb5c 100644
--- a/quickwit/quickwit-proto/src/metastore/mod.rs
+++ b/quickwit/quickwit-proto/src/metastore/mod.rs
@@ -193,6 +193,15 @@ impl From<TimeoutExceeded> for MetastoreError {
     }
 }
 
+impl From<crate::SortFieldsError> for MetastoreError {
+    fn from(err: crate::SortFieldsError) -> Self {
+        MetastoreError::Internal {
+            message: "sort fields error".to_string(),
+            cause: err.to_string(),
+        }
+    }
+}
+
 impl ServiceError for MetastoreError {
     fn error_code(&self) -> ServiceErrorCode {
         match self {
diff --git a/quickwit/quickwit-proto/src/sort_fields_error.rs b/quickwit/quickwit-proto/src/sort_fields_error.rs
new file mode 100644
index 00000000000..8fa6ebc15b3
--- /dev/null
+++ b/quickwit/quickwit-proto/src/sort_fields_error.rs
@@ -0,0 +1,78 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Error types for sort fields parsing and validation.
+
+/// Errors arising from sort fields parsing, validation, and time-window arithmetic.
+#[derive(Debug, thiserror::Error)]
+pub enum SortFieldsError {
+    /// Window duration does not meet the divisibility constraint.
+    #[error("invalid window duration {duration_secs}s: {reason}")]
+    InvalidWindowDuration {
+        duration_secs: u32,
+        reason: &'static str,
+    },
+
+    /// Schema string is syntactically malformed.
+    #[error("{0}")]
+    MalformedSchema(String),
+
+    /// Version suffix could not be parsed.
+    #[error("{0}")]
+    BadSortVersion(String),
+
+    /// Sort version is below the minimum accepted (V2-only enforcement).
+    #[error("unsupported sort version {version}, minimum is {minimum}")]
+    UnsupportedVersion { version: i32, minimum: i32 },
+
+    /// Invalid placement or usage of the `&` LSM cutoff marker.
+    #[error("{0}")]
+    InvalidCutoffPlacement(String),
+
+    /// Column specification has wrong number of parts.
+    #[error("{0}")]
+    InvalidColumnFormat(String),
+
+    /// Unknown column type (from suffix or explicit name).
+    #[error("{0}")]
+    UnknownColumnType(String),
+
+    /// Explicit column type does not match the type inferred from the suffix.
+    #[error(
+        "column type doesn't match type deduced from suffix for {column}, deduced={from_suffix}, \
+         explicit={explicit}"
+    )]
+    TypeMismatch {
+        column: String,
+        from_suffix: String,
+        explicit: String,
+    },
+
+    /// Unrecognized sort direction string.
+    #[error("{0}")]
+    UnknownSortDirection(String),
+
+    /// Sort direction specified in multiple places (e.g., both prefix and suffix,
+    /// or prefix/suffix combined with colon-separated direction).
+    #[error("sort direction specified multiple times for column '{0}'")]
+    DuplicateDirection(String),
+
+    /// Schema is structurally invalid (missing timestamp, wrong order, etc.).
+    #[error("{0}")]
+    ValidationError(String),
+
+    /// window_start timestamp cannot be represented as a DateTime.
+    #[error("window_start timestamp {timestamp_secs} is out of representable range")]
+    WindowStartOutOfRange { timestamp_secs: i64 },
+}
diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs
index 79f6ba81702..0a4518174ce 100644
--- a/quickwit/quickwit-search/src/cluster_client.rs
+++ b/quickwit/quickwit-search/src/cluster_client.rs
@@ -138,7 +138,7 @@ impl ClusterClient {
 
     /// Attempts to store a given key value pair within the cluster.
     ///
-    /// Tries to replicate the pair to [`TARGET_NUM_REPLICATION`] nodes, but this function may fail
+    /// Tries to replicate the pair to `TARGET_NUM_REPLICATION` nodes, but this function may fail
     /// silently (e.g if no client was available). Even in case of success, this storage is not
     /// persistent. For instance during a rolling upgrade, all replicas will be lost as there is no
     /// mechanism to maintain the replication count.
diff --git a/quickwit/quickwit-storage/src/storage_resolver.rs b/quickwit/quickwit-storage/src/storage_resolver.rs
index 6203d6a8d02..85329c19a86 100644
--- a/quickwit/quickwit-storage/src/storage_resolver.rs
+++ b/quickwit/quickwit-storage/src/storage_resolver.rs
@@ -43,7 +43,7 @@ impl fmt::Debug for StorageResolver {
 }
 
 impl StorageResolver {
-    /// Creates an empty [`StorageResolverBuilder`].
+    /// Creates an empty `StorageResolverBuilder`.
     pub fn builder() -> StorageResolverBuilder {
         StorageResolverBuilder::default()
     }