diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index f930d3d7bdf..721244fdbe2 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -40,6 +40,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/util/base64.h" +#include "arrow/util/checked_cast.h" #include "arrow/util/key_value_metadata.h" using arrow::Field; @@ -2018,6 +2019,53 @@ TEST_F(TestConvertRoundTrip, FieldIdPreserveAllColumnTypes) { ASSERT_EQ(thrift_field_ids, expected_field_ids); } +TEST_F(TestConvertRoundTrip, MapNestedFieldMetadataPreserved) { + auto key_meta = ::arrow::key_value_metadata({"k"}, {"v"}); + auto inner_meta = ::arrow::key_value_metadata({"inner_k"}, {"inner_v"}); + + auto map_key = ::arrow::field("key", UTF8, /*nullable=*/false, key_meta); + auto map_value = ::arrow::field( + "value", + ::arrow::struct_({::arrow::field("inner", INT64, /*nullable=*/true, inner_meta)}), + /*nullable=*/true, inner_meta); + auto sorted_map = + std::make_shared<::arrow::MapType>(map_key, map_value, /*keys_sorted=*/true); + auto arrow_schema = ::arrow::schema( + {::arrow::field("m", sorted_map, /*nullable=*/true, FieldIdMetadata(99))}); + + std::shared_ptr parquet_schema; + ASSERT_OK(ToParquetSchema(arrow_schema.get(), *::parquet::default_writer_properties(), + &parquet_schema)); + + std::shared_ptr kv_metadata; + ASSERT_OK(ArrowSchemaToParquetMetadata(arrow_schema, kv_metadata)); + + std::shared_ptr<::arrow::Schema> restored_schema; + ASSERT_OK(FromParquetSchema(parquet_schema.get(), ArrowReaderProperties(), kv_metadata, + &restored_schema)); + ASSERT_EQ(restored_schema->num_fields(), 1); + + auto restored_map = ::arrow::internal::checked_pointer_cast<::arrow::MapType>( + restored_schema->field(0)->type()); + ASSERT_EQ(GetFieldId(*restored_schema->field(0)), 99); + + // It's a pity that we cannot directly use AssertTypeEqual on restored_map and + // sorted_map because ::arrow::MapType uses "entries" as the inner field name + // but Parquet uses "key_value" (see MapToNode in parquet/arrow/schema.cc). + ASSERT_TRUE(restored_map->keys_sorted()); + ASSERT_NE(restored_map->key_field()->metadata(), nullptr); + ASSERT_EQ(restored_map->key_field()->metadata()->Get("k").ValueOrDie(), "v"); + + ASSERT_NE(restored_map->item_field()->metadata(), nullptr); + ASSERT_EQ(restored_map->item_field()->metadata()->Get("inner_k").ValueOrDie(), + "inner_v"); + + auto restored_struct = restored_map->item_type(); + ASSERT_NE(restored_struct->field(0)->metadata(), nullptr); + ASSERT_EQ(restored_struct->field(0)->metadata()->Get("inner_k").ValueOrDie(), + "inner_v"); +} + TEST(InvalidSchema, ParquetNegativeDecimalScale) { const auto& type = ::arrow::decimal128(23, -2); const auto& field = ::arrow::field("f0", type); diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 9c0db1d5335..ed30661f9b4 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -991,6 +991,16 @@ std::function(FieldVector)> GetNestedFactory( }; } break; + case ::arrow::Type::MAP: + if (origin_type.id() == ::arrow::Type::MAP) { + const bool keys_sorted = + checked_cast(origin_type).keys_sorted(); + return [keys_sorted](FieldVector fields) { + DCHECK_EQ(fields.size(), 1); + return std::make_shared<::arrow::MapType>(std::move(fields[0]), keys_sorted); + }; + } + break; default: break; }