From 27a05d9847d77334a213c74434da1b513e8666d4 Mon Sep 17 00:00:00 2001 From: Cagri Yonca Date: Mon, 2 Mar 2026 15:38:12 +0100 Subject: [PATCH] feat:Modified internal span filtering logic according to new span-filtering mechanism Signed-off-by: Cagri Yonca --- src/instana/instrumentation/urllib3.py | 18 +- src/instana/options.py | 29 ++ src/instana/util/config.py | 6 +- tests/clients/test_urllib3.py | 17 +- tests/test_options.py | 375 ++++++++++++++++++++++--- 5 files changed, 378 insertions(+), 67 deletions(-) diff --git a/src/instana/instrumentation/urllib3.py b/src/instana/instrumentation/urllib3.py index b102714f..fa1f620a 100644 --- a/src/instana/instrumentation/urllib3.py +++ b/src/instana/instrumentation/urllib3.py @@ -94,23 +94,7 @@ def urlopen_with_instana( tracer, parent_span, span_name = get_tracer_tuple() # If we're not tracing, just return; boto3 has it's own visibility - # Also, skip creating spans for internal Instana calls when - # 'com.instana' appears in either the full URL, the path argument, - # or the connection host. - request_url_or_path = ( - kwargs.get("request_url") - or kwargs.get("url") - or (args[1] if len(args) >= 2 else "") - or "" - ) - host = getattr(instance, "host", "") or "" - - if ( - not tracer - or span_name == "boto3" - or "com.instana" in request_url_or_path - or "com.instana" in host - ): + if not tracer or span_name == "boto3": return wrapped(*args, **kwargs) parent_context = parent_span.get_span_context() if parent_span else None diff --git a/src/instana/options.py b/src/instana/options.py index 12afc710..ccc6955c 100644 --- a/src/instana/options.py +++ b/src/instana/options.py @@ -134,6 +134,35 @@ def set_trace_configurations(self) -> None: self.set_disable_trace_configurations() self.set_stack_trace_configurations() + self._add_instana_agent_span_filter() + + def _add_instana_agent_span_filter(self) -> None: + if "exclude" not in self.span_filters: + self.span_filters["exclude"] = [] + self.span_filters["exclude"].extend( + [ + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + ] + ) def _apply_env_stack_trace_config(self) -> None: """Apply stack trace configuration from environment variables.""" diff --git a/src/instana/util/config.py b/src/instana/util/config.py index 9ec951b0..ab1eb074 100644 --- a/src/instana/util/config.py +++ b/src/instana/util/config.py @@ -81,7 +81,7 @@ def parse_filtered_endpoints_string(params: Union[str, os.PathLike]) -> List[str return span_filters -def parse_filtered_endpoints_dict(filter_dict: dict[str, Any]) -> dict[str, list[Any]]: +def parse_filtered_endpoints_dict(filter_dict: Dict[str, Any]) -> Dict[str, List[Any]]: """ Parses 'exclude' and 'include' blocks from the filter dict. @@ -134,7 +134,7 @@ def parse_filtered_endpoints_dict(filter_dict: dict[str, Any]) -> dict[str, list def parse_filtered_endpoints( params: Union[Dict[str, Any], str], -) -> Union[List[str], dict[str, list[Any]]]: +) -> Union[List[str], Dict[str, List[Any]]]: """ Parses input to prepare a list for ignored endpoints. @@ -157,7 +157,7 @@ def parse_filtered_endpoints( def parse_filtered_endpoints_from_yaml( file_path: str, -) -> Union[List[str], dict[str, list[Any]]]: +) -> Union[List[str], Dict[str, List[Any]]]: """ Parses configuration yaml file and prepares a list of ignored endpoints. diff --git a/tests/clients/test_urllib3.py b/tests/clients/test_urllib3.py index 0a595721..3cdab441 100644 --- a/tests/clients/test_urllib3.py +++ b/tests/clients/test_urllib3.py @@ -1006,12 +1006,15 @@ def test_internal_span_creation_with_url_in_hostname(self) -> None: spans = self.recorder.queued_spans() - assert len(spans) == 1 + assert len(spans) == 2 + + filtered_spans = agent.filter_spans(spans) + assert len(filtered_spans) == 1 - test_span = spans[0] + test_span = filtered_spans[0] assert test_span.data["sdk"]["name"] == "test" - urllib3_spans = [span for span in spans if span.n == "urllib3"] + urllib3_spans = [span for span in filtered_spans if span.n == "urllib3"] assert len(urllib3_spans) == 0 def test_internal_span_creation_with_url_in_path(self) -> None: @@ -1024,11 +1027,13 @@ def test_internal_span_creation_with_url_in_path(self) -> None: pass spans = self.recorder.queued_spans() + assert len(spans) == 2 - assert len(spans) == 1 + filtered_spans = agent.filter_spans(spans) + assert len(filtered_spans) == 1 - test_span = spans[0] + test_span = filtered_spans[0] assert test_span.data["sdk"]["name"] == "test" - urllib3_spans = [span for span in spans if span.n == "urllib3"] + urllib3_spans = [span for span in filtered_spans if span.n == "urllib3"] assert len(urllib3_spans) == 0 diff --git a/tests/test_options.py b/tests/test_options.py index 5caf47d7..e0a4d35f 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -18,6 +18,29 @@ StandardOptions, ) +INTERNAL_SPAN_FILTERS = [ + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, +] + class TestBaseOptions: @pytest.fixture(autouse=True) @@ -39,7 +62,7 @@ def test_base_options(self) -> None: assert self.base_options.log_level == logging.WARN assert not self.base_options.extra_http_headers assert not self.base_options.allow_exit_as_root - assert not self.base_options.span_filters + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation assert self.base_options.secrets_matcher == "contains-ignore-case" assert self.base_options.secrets_list == ["key", "pass", "secret"] @@ -49,15 +72,61 @@ def test_base_options(self) -> None: def test_base_options_with_config(self) -> None: config["tracing"] = { - "filter": "service1;service3:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service3", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "kafka": {"trace_correlation": True}, } self.base_options = BaseOptions() - assert self.base_options.span_filters == [ - "service1.*", - "service3.method1", - "service3.method2", - ] + assert self.base_options.span_filters == { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + "suppression": True, + }, + { + "name": "service3", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + "suppression": True, + }, + *INTERNAL_SPAN_FILTERS, + ], + "include": [], + } assert self.base_options.kafka_trace_correlation @patch.dict( @@ -95,6 +164,26 @@ def test_base_options_with_env_vars(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -187,6 +276,26 @@ def test_base_options_with_endpoint_file(self) -> None: } ], }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } del self.base_options @@ -248,6 +357,26 @@ def test_set_trace_configurations_by_env_variable(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } assert not self.base_options.kafka_trace_correlation @@ -363,6 +492,26 @@ def test_set_trace_configurations_by_in_code_configuration(self) -> None: } ], }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -376,23 +525,107 @@ def test_set_trace_configurations_by_in_code_configuration(self) -> None: def test_set_trace_configurations_by_in_code_variable(self) -> None: config["tracing"] = {} - config["tracing"]["filter"] = "config_service1;config_service2:method1,method2" + config["tracing"]["filter"] = { + "exclude": [ + { + "name": "config_service1", + "attributes": [ + { + "key": "service", + "values": ["config_service1"], + "match_type": "strict", + } + ], + }, + { + "name": "config_service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + } config["tracing"]["kafka"] = {"trace_correlation": True} - test_tracing = {"filter": "service1;service2:method1,method2"} + test_tracing = { + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + ] + } + } self.base_options = StandardOptions() self.base_options.set_tracing(test_tracing) - assert self.base_options.span_filters == [ - "config_service1.*", - "config_service2.method1", - "config_service2.method2", - ] + assert self.base_options.span_filters == { + "exclude": [ + { + "name": "config_service1", + "attributes": [ + { + "key": "service", + "values": ["config_service1"], + "match_type": "strict", + } + ], + "suppression": True, + }, + { + "name": "config_service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + "suppression": True, + }, + *INTERNAL_SPAN_FILTERS, + ], + "include": [], + } assert self.base_options.kafka_trace_correlation def test_set_trace_configurations_by_agent_configuration(self) -> None: test_tracing = { - "filter": "service1;service2:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "trace-correlation": True, "disable": [ { @@ -406,11 +639,8 @@ def test_set_trace_configurations_by_agent_configuration(self) -> None: self.base_options = StandardOptions() self.base_options.set_tracing(test_tracing) - assert self.base_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + # set_tracing does not override span_filters when already set (has internal filters) + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation # Check disabled_spans list @@ -423,7 +653,7 @@ def test_set_trace_configurations_by_default(self) -> None: self.base_options = StandardOptions() self.base_options.set_tracing({}) - assert not self.base_options.span_filters + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation assert len(self.base_options.disabled_spans) == 0 assert len(self.base_options.enabled_spans) == 0 @@ -529,6 +759,26 @@ def test_tracing_filter_environment_variables(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -570,16 +820,35 @@ def test_set_tracing( self.standart_options = StandardOptions() test_tracing = { - "filter": "service1;service2:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "kafka": {"trace-correlation": "false", "header-format": "binary"}, } self.standart_options.set_tracing(test_tracing) - assert self.standart_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert not self.standart_options.kafka_trace_correlation assert ( "Binary header format for Kafka is deprecated. Please use string header format." @@ -610,7 +879,32 @@ def test_set_from(self) -> None: self.standart_options = StandardOptions() test_res_data = { "secrets": {"matcher": "sample-match", "list": ["sample", "list"]}, - "tracing": {"filter": "service1;service2:method1,method2"}, + "tracing": { + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + } + }, } self.standart_options.set_from(test_res_data) @@ -618,18 +912,16 @@ def test_set_from(self) -> None: self.standart_options.secrets_matcher == test_res_data["secrets"]["matcher"] ) assert self.standart_options.secrets_list == test_res_data["secrets"]["list"] - assert self.standart_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} - test_res_data = { + test_res_data2 = { "extraHeaders": {"header1": "sample-match", "header2": ["sample", "list"]}, } - self.standart_options.set_from(test_res_data) + self.standart_options.set_from(test_res_data2) - assert self.standart_options.extra_http_headers == test_res_data["extraHeaders"] + assert ( + self.standart_options.extra_http_headers == test_res_data2["extraHeaders"] + ) def test_set_from_bool( self, @@ -639,8 +931,7 @@ def test_set_from_bool( caplog.clear() self.standart_options = StandardOptions() - test_res_data = True - self.standart_options.set_from(test_res_data) + self.standart_options.set_from(True) # type: ignore[arg-type] assert len(caplog.messages) == 1 assert len(caplog.records) == 1 @@ -649,7 +940,7 @@ def test_set_from_bool( ) assert self.standart_options.secrets_list == ["key", "pass", "secret"] - assert self.standart_options.span_filters == {} + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert not self.standart_options.extra_http_headers @@ -666,7 +957,9 @@ def test_serverless_options(self) -> None: assert self.serverless_options.log_level == logging.WARN assert not self.serverless_options.extra_http_headers assert not self.serverless_options.allow_exit_as_root - assert not self.serverless_options.span_filters + assert self.serverless_options.span_filters == { + "exclude": INTERNAL_SPAN_FILTERS + } assert self.serverless_options.secrets_matcher == "contains-ignore-case" assert self.serverless_options.secrets_list == ["key", "pass", "secret"] assert not self.serverless_options.secrets @@ -811,7 +1104,7 @@ def test_gcr_options(self) -> None: assert self.gcr_options.log_level == logging.WARN assert not self.gcr_options.extra_http_headers assert not self.gcr_options.allow_exit_as_root - assert not self.gcr_options.span_filters + assert self.gcr_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.gcr_options.secrets_matcher == "contains-ignore-case" assert self.gcr_options.secrets_list == ["key", "pass", "secret"] assert not self.gcr_options.secrets