From bbf5682181e36eda12a2f32e7535b9924130ff49 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Fri, 27 Feb 2026 23:59:34 +0100 Subject: [PATCH 1/2] Use decoded value as s3 key so requests dont fail --- src/Storages/ObjectStorage/Utils.cpp | 5 +- .../integration/test_database_iceberg/test.py | 46 +++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index c4629582ee28..d3da3d3a249b 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -342,8 +342,9 @@ std::pair resolveObjectStorageForPath( normalized_path = "s3://" + target_decomposed.authority + "/" + target_decomposed.key; } S3::URI s3_uri(normalized_path); - - std::string key_to_use = s3_uri.key; + + std::string key_to_use = target_decomposed.key; + bool use_base_storage = false; if (base_storage->getType() == ObjectStorageType::S3) diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index 2de6586e3787..a146cf4d5c07 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -660,6 +660,52 @@ def test_table_with_slash(started_cluster): assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n" +def test_partition_value_with_slash(started_cluster): + """Partition value containing '/' produces object keys with %2F; reading must preserve encoding.""" + node = started_cluster.instances["node1"] + + test_ref = f"test_partition_slash_{uuid.uuid4()}" + table_name = f"{test_ref}_table" + root_namespace = f"{test_ref}_namespace" + + # Partition by symbol (string) so partition value "us/west" becomes path segment symbol=us%2Fwest + partition_spec = PartitionSpec( + PartitionField( + source_id=2, field_id=1000, transform=IdentityTransform(), name="symbol" + ) + ) + schema = DEFAULT_SCHEMA + + catalog = load_catalog_impl(started_cluster) + catalog.create_namespace(root_namespace) + + table = create_table( + catalog, + root_namespace, + table_name, + schema, + partition_spec=partition_spec, + sort_order=DEFAULT_SORT_ORDER, + ) + + # Write a row with partition value containing slash (path will have %2F in S3 key) + data = [ + { + "datetime": datetime.now(), + "symbol": "us/west", + "bid": 100.0, + "ask": 101.0, + "details": {"created_by": "test"}, + } + ] + df = pa.Table.from_pylist(data) + table.append(df) + + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + assert 1 == int(node.query(f"SELECT count() FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`")) + assert "us/west" in node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`") + + def test_cluster_joins(started_cluster): node = started_cluster.instances["node1"] From 697b43dacbe9085c915220005de1fb8838dfbc60 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Tue, 3 Mar 2026 16:47:06 +0100 Subject: [PATCH 2/2] Added comments --- src/Storages/ObjectStorage/Utils.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index d3da3d3a249b..229a7d1a99d7 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -342,10 +342,11 @@ std::pair resolveObjectStorageForPath( normalized_path = "s3://" + target_decomposed.authority + "/" + target_decomposed.key; } S3::URI s3_uri(normalized_path); - + + // Use key (parsed without URI decoding) so that percent-encoded + // characters in object keys (e.g. %2F in Iceberg partition paths) are preserved. std::string key_to_use = target_decomposed.key; - bool use_base_storage = false; if (base_storage->getType() == ObjectStorageType::S3) {