From ef0301a5fc51f1f413c145231d9ccbde83a52727 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 22 Oct 2024 12:16:50 -0700 Subject: [PATCH 01/15] Add code generator --- compile/plugin.py | 237 ++++++++++++ compile/templates/template.py.jinja2 | 44 +++ scripts/proto_codegen.sh | 63 ++++ .../proto/collector/logs/v1/logs_service.py | 42 +++ .../collector/metrics/v1/metrics_service.py | 42 +++ .../proto/collector/trace/v1/trace_service.py | 42 +++ .../opentelemetry/proto/common/v1/common.py | 87 +++++ .../opentelemetry/proto/logs/v1/logs.py | 120 ++++++ .../opentelemetry/proto/metrics/v1/metrics.py | 341 ++++++++++++++++++ .../proto/resource/v1/resource.py | 24 ++ .../opentelemetry/proto/trace/v1/trace.py | 181 ++++++++++ .../telemetry/_internal/serialize/__init__.py | 145 ++++++++ 12 files changed, 1368 insertions(+) create mode 100755 compile/plugin.py create mode 100644 compile/templates/template.py.jinja2 create mode 100755 scripts/proto_codegen.sh create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/collector/logs/v1/logs_service.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/collector/metrics/v1/metrics_service.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/collector/trace/v1/trace_service.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/common/v1/common.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/logs/v1/logs.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/metrics/v1/metrics.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/resource/v1/resource.py create mode 100644 src/snowflake/telemetry/_internal/opentelemetry/proto/trace/v1/trace.py create mode 100644 src/snowflake/telemetry/_internal/serialize/__init__.py diff --git a/compile/plugin.py b/compile/plugin.py new file mode 100755 index 0000000..588c2fc --- /dev/null +++ b/compile/plugin.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 + +import os +import sys +from dataclasses import dataclass, field +from typing import List, Optional +from enum import IntEnum + +from google.protobuf.compiler import plugin_pb2 as plugin +from google.protobuf.descriptor_pb2 import ( + FileDescriptorProto, + FieldDescriptorProto, + EnumDescriptorProto, + EnumValueDescriptorProto, + MethodDescriptorProto, + ServiceDescriptorProto, + DescriptorProto, +) +from jinja2 import Environment, FileSystemLoader +import black +import isort.api + +class WireType(IntEnum): + VARINT = 0 + I64 = 1 + LEN = 2 + I32 = 5 + +@dataclass +class ProtoTypeDescriptor: + name: str + wire_type: WireType + python_type: str + +proto_type_to_descriptor = { + FieldDescriptorProto.TYPE_BOOL: ProtoTypeDescriptor("bool", WireType.VARINT, "bool"), + FieldDescriptorProto.TYPE_ENUM: ProtoTypeDescriptor("enum", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_INT32: ProtoTypeDescriptor("int32", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_INT64: ProtoTypeDescriptor("int64", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_UINT32: ProtoTypeDescriptor("uint32", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_UINT64: ProtoTypeDescriptor("uint64", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_SINT32: ProtoTypeDescriptor("sint32", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_SINT64: ProtoTypeDescriptor("sint64", WireType.VARINT, "int"), + FieldDescriptorProto.TYPE_FIXED32: ProtoTypeDescriptor("fixed32", WireType.I32, "int"), + FieldDescriptorProto.TYPE_FIXED64: ProtoTypeDescriptor("fixed64", WireType.I64, "int"), + FieldDescriptorProto.TYPE_SFIXED32: ProtoTypeDescriptor("sfixed32", WireType.I32, "int"), + FieldDescriptorProto.TYPE_SFIXED64: ProtoTypeDescriptor("sfixed64", WireType.I64, "int"), + FieldDescriptorProto.TYPE_FLOAT: ProtoTypeDescriptor("float", WireType.I32, "float"), + FieldDescriptorProto.TYPE_DOUBLE: ProtoTypeDescriptor("double", WireType.I64, "float"), + FieldDescriptorProto.TYPE_STRING: ProtoTypeDescriptor("string", WireType.LEN, "str"), + FieldDescriptorProto.TYPE_BYTES: ProtoTypeDescriptor("bytes", WireType.LEN, "bytes"), + FieldDescriptorProto.TYPE_MESSAGE: ProtoTypeDescriptor("message", WireType.LEN, "bytes"), +} + +@dataclass +class EnumValueTemplate: + name: str + number: int + + @staticmethod + def from_descriptor(descriptor: EnumValueDescriptorProto) -> "EnumValueTemplate": + return EnumValueTemplate( + name=descriptor.name, + number=descriptor.number, + ) + +@dataclass +class EnumTemplate: + name: str + values: List["EnumValueTemplate"] = field(default_factory=list) + + @staticmethod + def from_descriptor(descriptor: EnumDescriptorProto, parent: str = "") -> "EnumTemplate": + return EnumTemplate( + name=parent + "_" + descriptor.name if parent else descriptor.name, + values=[EnumValueTemplate.from_descriptor(value) for value in descriptor.value], + ) + +def tag_to_repr_varint(tag: int) -> str: + out = bytearray() + while tag >= 128: + out.append((tag & 0x7F) | 0x80) + tag >>= 7 + out.append(tag) + return repr(bytes(out)) + +@dataclass +class FieldTemplate: + name: str + number: int + tag: str + python_type: str + proto_type: str + repeated: bool + group: str + encode_presence: bool + + @staticmethod + def from_descriptor(descriptor: FieldDescriptorProto, group: Optional[str] = None) -> "FieldTemplate": + repeated = descriptor.label == FieldDescriptorProto.LABEL_REPEATED + type_descriptor = proto_type_to_descriptor[descriptor.type] + + python_type = type_descriptor.python_type + proto_type = type_descriptor.name + + if repeated: + python_type = f"List[{python_type}]" + proto_type = f"repeated_{proto_type}" + + tag = (descriptor.number << 3) | type_descriptor.wire_type.value + if repeated and type_descriptor.wire_type != WireType.LEN: + # Special case: repeated primitive fields are packed + # So we need to use the length-delimited wire type + tag = (descriptor.number << 3) | WireType.LEN.value + # Convert the tag to a varint representation + # Saves us from having to calculate the tag at runtime + tag = tag_to_repr_varint(tag) + + # For group / oneof fields, we need to encode the presence of the field + # For message fields, we need to encode the presence of the field if it is not None + encode_presence = group is not None or proto_type == "message" + + return FieldTemplate( + name=descriptor.name, + tag=tag, + number=descriptor.number, + python_type=python_type, + proto_type=proto_type, + repeated=repeated, + group=group, + encode_presence=encode_presence, + ) + +@dataclass +class MessageTemplate: + name: str + fields: List[FieldTemplate] = field(default_factory=list) + enums: List["EnumTemplate"] = field(default_factory=list) + messages: List["MessageTemplate"] = field(default_factory=list) + + @staticmethod + def from_descriptor(descriptor: DescriptorProto, parent: str = "") -> "MessageTemplate": + def get_group(field: FieldDescriptorProto) -> str: + return descriptor.oneof_decl[field.oneof_index].name if field.HasField("oneof_index") else None + fields = [FieldTemplate.from_descriptor(field, get_group(field)) for field in descriptor.field] + fields.sort(key=lambda field: field.number) + + name = parent + "_" + descriptor.name if parent else descriptor.name + return MessageTemplate( + name=name, + fields=fields, + enums=[EnumTemplate.from_descriptor(enum, name) for enum in descriptor.enum_type], + messages=[MessageTemplate.from_descriptor(message, name) for message in descriptor.nested_type], + ) + +@dataclass +class MethodTemplate: + name: str + input_message: MessageTemplate + output_message: MessageTemplate + + @staticmethod + def from_descriptor(descriptor: MethodDescriptorProto) -> "MethodTemplate": + return MethodTemplate( + name=descriptor.name, + input_message=MessageTemplate(name=descriptor.input_type), + output_message=MessageTemplate(name=descriptor.output_type), + ) + +@dataclass +class ServiceTemplate: + name: str + methods: List["MethodTemplate"] = field(default_factory=list) + + @staticmethod + def from_descriptor(descriptor: ServiceDescriptorProto) -> "ServiceTemplate": + return ServiceTemplate( + name=descriptor.name, + methods=[MethodTemplate.from_descriptor(method) for method in descriptor.method], + ) + +@dataclass +class FileTemplate: + messages: List["MessageTemplate"] = field(default_factory=list) + enums: List["EnumTemplate"] = field(default_factory=list) + services: List["ServiceTemplate"] = field(default_factory=list) + name: str = "" + + @staticmethod + def from_descriptor(descriptor: FileDescriptorProto) -> "FileTemplate": + return FileTemplate( + messages=[MessageTemplate.from_descriptor(message) for message in descriptor.message_type], + enums=[EnumTemplate.from_descriptor(enum) for enum in descriptor.enum_type], + services=[ServiceTemplate.from_descriptor(service) for service in descriptor.service], + name=descriptor.name, + ) + +def main(): + request = plugin.CodeGeneratorRequest() + request.ParseFromString(sys.stdin.buffer.read()) + + response = plugin.CodeGeneratorResponse() + # needed since metrics.proto uses proto3 optional fields + response.supported_features = plugin.CodeGeneratorResponse.FEATURE_PROTO3_OPTIONAL + + template_env = Environment(loader=FileSystemLoader(f"{os.path.dirname(os.path.realpath(__file__))}/templates")) + jinja_body_template = template_env.get_template("template.py.jinja2") + + for proto_file in request.proto_file: + file_name = proto_file.name.replace('.proto', '.py') + file_descriptor_proto = proto_file + + file_template = FileTemplate.from_descriptor(file_descriptor_proto) + + code = jinja_body_template.render(file_template=file_template) + code = isort.api.sort_code_string( + code = code, + show_diff=False, + profile="black", + combine_as_imports=True, + lines_after_imports=2, + quiet=True, + force_grid_wrap=2, + ) + code = black.format_str( + src_contents=code, + mode=black.Mode(), + ) + + response_file = response.file.add() + response_file.name = file_name + response_file.content = code + + sys.stdout.buffer.write(response.SerializeToString()) + +if __name__ == '__main__': + main() diff --git a/compile/templates/template.py.jinja2 b/compile/templates/template.py.jinja2 new file mode 100644 index 0000000..2246245 --- /dev/null +++ b/compile/templates/template.py.jinja2 @@ -0,0 +1,44 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: {{ file_template.name }} + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) +from typing import List, Optional + +{% for enum in file_template.enums %} +class {{ enum.name }}(Enum): +{%- for value in enum.values %} + {{ value.name }} = {{ value.number }} +{%- endfor %} +{% endfor %} + +{% macro render_message(message) %} +def {{ message.name }}( +{%- for field in message.fields %} + {{ field.name }}: Optional[{{ field.python_type }}] = None, +{%- endfor %} +) -> bytes: + proto_serializer = ProtoSerializer() +{%- for field in message.fields %} + if {{ field.name }}{% if field.encode_presence %} is not None{% endif %}: {% if field.group %}# oneof group {{ field.group }}{% endif %} + proto_serializer.serialize_{{ field.proto_type }}({{ field.tag }}, {{ field.name }}) +{%- endfor %} + return proto_serializer.out + +{% for nested_enum in message.enums %} +class {{ nested_enum.name }}(Enum): +{%- for value in nested_enum.values %} + {{ value.name }} = {{ value.number }} +{%- endfor %} +{% endfor %} + +{% for nested_message in message.messages %} +{{ render_message(nested_message) }} +{% endfor %} +{% endmacro %} + +{% for message in file_template.messages %} +{{ render_message(message) }} +{% endfor %} \ No newline at end of file diff --git a/scripts/proto_codegen.sh b/scripts/proto_codegen.sh new file mode 100755 index 0000000..304b347 --- /dev/null +++ b/scripts/proto_codegen.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# +# Regenerate python code from OTLP protos in +# https://github.com/open-telemetry/opentelemetry-proto +# +# To use, update PROTO_REPO_BRANCH_OR_COMMIT variable below to a commit hash or +# tag in opentelemtry-proto repo that you want to build off of. Then, just run +# this script to update the proto files. Commit the changes as well as any +# fixes needed in the OTLP exporter. +# +# Optional envars: +# PROTO_REPO_DIR - the path to an existing checkout of the opentelemetry-proto repo + +# Pinned commit/branch/tag for the current version used in opentelemetry-proto python package. +PROTO_REPO_BRANCH_OR_COMMIT="v1.2.0" + +set -e + +PROTO_REPO_DIR=${PROTO_REPO_DIR:-"/tmp/opentelemetry-proto"} +# root of opentelemetry-python repo +repo_root="$(git rev-parse --show-toplevel)" +venv_dir="/tmp/proto_codegen_venv" + +# run on exit even if crash +cleanup() { + echo "Deleting $venv_dir" + rm -rf $venv_dir +} +trap cleanup EXIT + +echo "Creating temporary virtualenv at $venv_dir using $(python3 --version)" +python3 -m venv $venv_dir +source $venv_dir/bin/activate +python -m pip install protobuf Jinja2 grpcio-tools black isort +echo 'python -m grpc_tools.protoc --version' +python -m grpc_tools.protoc --version + +# Clone the proto repo if it doesn't exist +if [ ! -d "$PROTO_REPO_DIR" ]; then + git clone https://github.com/open-telemetry/opentelemetry-proto.git $PROTO_REPO_DIR +fi + +# Pull in changes and switch to requested branch +( + cd $PROTO_REPO_DIR + git fetch --all + git checkout $PROTO_REPO_BRANCH_OR_COMMIT + # pull if PROTO_REPO_BRANCH_OR_COMMIT is not a detached head + git symbolic-ref -q HEAD && git pull --ff-only || true +) + +cd $repo_root/src/snowflake/telemetry/_internal + +# clean up old generated code +find opentelemetry/ -regex ".*_pb2.*\.pyi?" -exec rm {} + + +# generate proto code for all protos +all_protos=$(find $PROTO_REPO_DIR/ -iname "*.proto") +python -m grpc_tools.protoc \ + -I $PROTO_REPO_DIR \ + --plugin=protoc-gen-custom-plugin=$repo_root/compile/plugin.py \ + --custom-plugin_out=. \ + $all_protos diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/logs/v1/logs_service.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/logs/v1/logs_service.py new file mode 100644 index 0000000..2cfad4a --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/logs/v1/logs_service.py @@ -0,0 +1,42 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/collector/logs/v1/logs_service.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +def ExportLogsServiceRequest( + resource_logs: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_logs: + proto_serializer.serialize_repeated_message(b"\n", resource_logs) + return proto_serializer.out + + +def ExportLogsServiceResponse( + partial_success: Optional[bytes] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if partial_success is not None: + proto_serializer.serialize_message(b"\n", partial_success) + return proto_serializer.out + + +def ExportLogsPartialSuccess( + rejected_log_records: Optional[int] = None, + error_message: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if rejected_log_records: + proto_serializer.serialize_int64(b"\x08", rejected_log_records) + if error_message: + proto_serializer.serialize_string(b"\x12", error_message) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/metrics/v1/metrics_service.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/metrics/v1/metrics_service.py new file mode 100644 index 0000000..0c31719 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/metrics/v1/metrics_service.py @@ -0,0 +1,42 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/collector/metrics/v1/metrics_service.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +def ExportMetricsServiceRequest( + resource_metrics: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_metrics: + proto_serializer.serialize_repeated_message(b"\n", resource_metrics) + return proto_serializer.out + + +def ExportMetricsServiceResponse( + partial_success: Optional[bytes] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if partial_success is not None: + proto_serializer.serialize_message(b"\n", partial_success) + return proto_serializer.out + + +def ExportMetricsPartialSuccess( + rejected_data_points: Optional[int] = None, + error_message: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if rejected_data_points: + proto_serializer.serialize_int64(b"\x08", rejected_data_points) + if error_message: + proto_serializer.serialize_string(b"\x12", error_message) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/trace/v1/trace_service.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/trace/v1/trace_service.py new file mode 100644 index 0000000..c4e2496 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/collector/trace/v1/trace_service.py @@ -0,0 +1,42 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/collector/trace/v1/trace_service.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +def ExportTraceServiceRequest( + resource_spans: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_spans: + proto_serializer.serialize_repeated_message(b"\n", resource_spans) + return proto_serializer.out + + +def ExportTraceServiceResponse( + partial_success: Optional[bytes] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if partial_success is not None: + proto_serializer.serialize_message(b"\n", partial_success) + return proto_serializer.out + + +def ExportTracePartialSuccess( + rejected_spans: Optional[int] = None, + error_message: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if rejected_spans: + proto_serializer.serialize_int64(b"\x08", rejected_spans) + if error_message: + proto_serializer.serialize_string(b"\x12", error_message) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/common/v1/common.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/common/v1/common.py new file mode 100644 index 0000000..27f8c02 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/common/v1/common.py @@ -0,0 +1,87 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/common/v1/common.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +def AnyValue( + string_value: Optional[str] = None, + bool_value: Optional[bool] = None, + int_value: Optional[int] = None, + double_value: Optional[float] = None, + array_value: Optional[bytes] = None, + kvlist_value: Optional[bytes] = None, + bytes_value: Optional[bytes] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if string_value is not None: # oneof group value + proto_serializer.serialize_string(b"\n", string_value) + if bool_value is not None: # oneof group value + proto_serializer.serialize_bool(b"\x10", bool_value) + if int_value is not None: # oneof group value + proto_serializer.serialize_int64(b"\x18", int_value) + if double_value is not None: # oneof group value + proto_serializer.serialize_double(b"!", double_value) + if array_value is not None: # oneof group value + proto_serializer.serialize_message(b"*", array_value) + if kvlist_value is not None: # oneof group value + proto_serializer.serialize_message(b"2", kvlist_value) + if bytes_value is not None: # oneof group value + proto_serializer.serialize_bytes(b":", bytes_value) + return proto_serializer.out + + +def ArrayValue( + values: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if values: + proto_serializer.serialize_repeated_message(b"\n", values) + return proto_serializer.out + + +def KeyValueList( + values: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if values: + proto_serializer.serialize_repeated_message(b"\n", values) + return proto_serializer.out + + +def KeyValue( + key: Optional[str] = None, + value: Optional[bytes] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if key: + proto_serializer.serialize_string(b"\n", key) + if value is not None: + proto_serializer.serialize_message(b"\x12", value) + return proto_serializer.out + + +def InstrumentationScope( + name: Optional[str] = None, + version: Optional[str] = None, + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if name: + proto_serializer.serialize_string(b"\n", name) + if version: + proto_serializer.serialize_string(b"\x12", version) + if attributes: + proto_serializer.serialize_repeated_message(b"\x1a", attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b" ", dropped_attributes_count) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/logs/v1/logs.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/logs/v1/logs.py new file mode 100644 index 0000000..66b0e47 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/logs/v1/logs.py @@ -0,0 +1,120 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/logs/v1/logs.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +class SeverityNumber(Enum): + SEVERITY_NUMBER_UNSPECIFIED = 0 + SEVERITY_NUMBER_TRACE = 1 + SEVERITY_NUMBER_TRACE2 = 2 + SEVERITY_NUMBER_TRACE3 = 3 + SEVERITY_NUMBER_TRACE4 = 4 + SEVERITY_NUMBER_DEBUG = 5 + SEVERITY_NUMBER_DEBUG2 = 6 + SEVERITY_NUMBER_DEBUG3 = 7 + SEVERITY_NUMBER_DEBUG4 = 8 + SEVERITY_NUMBER_INFO = 9 + SEVERITY_NUMBER_INFO2 = 10 + SEVERITY_NUMBER_INFO3 = 11 + SEVERITY_NUMBER_INFO4 = 12 + SEVERITY_NUMBER_WARN = 13 + SEVERITY_NUMBER_WARN2 = 14 + SEVERITY_NUMBER_WARN3 = 15 + SEVERITY_NUMBER_WARN4 = 16 + SEVERITY_NUMBER_ERROR = 17 + SEVERITY_NUMBER_ERROR2 = 18 + SEVERITY_NUMBER_ERROR3 = 19 + SEVERITY_NUMBER_ERROR4 = 20 + SEVERITY_NUMBER_FATAL = 21 + SEVERITY_NUMBER_FATAL2 = 22 + SEVERITY_NUMBER_FATAL3 = 23 + SEVERITY_NUMBER_FATAL4 = 24 + + +class LogRecordFlags(Enum): + LOG_RECORD_FLAGS_DO_NOT_USE = 0 + LOG_RECORD_FLAGS_TRACE_FLAGS_MASK = 255 + + +def LogsData( + resource_logs: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_logs: + proto_serializer.serialize_repeated_message(b"\n", resource_logs) + return proto_serializer.out + + +def ResourceLogs( + resource: Optional[bytes] = None, + scope_logs: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource is not None: + proto_serializer.serialize_message(b"\n", resource) + if scope_logs: + proto_serializer.serialize_repeated_message(b"\x12", scope_logs) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def ScopeLogs( + scope: Optional[bytes] = None, + log_records: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if scope is not None: + proto_serializer.serialize_message(b"\n", scope) + if log_records: + proto_serializer.serialize_repeated_message(b"\x12", log_records) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def LogRecord( + time_unix_nano: Optional[int] = None, + severity_number: Optional[int] = None, + severity_text: Optional[str] = None, + body: Optional[bytes] = None, + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, + flags: Optional[int] = None, + trace_id: Optional[bytes] = None, + span_id: Optional[bytes] = None, + observed_time_unix_nano: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\t", time_unix_nano) + if severity_number: + proto_serializer.serialize_enum(b"\x10", severity_number) + if severity_text: + proto_serializer.serialize_string(b"\x1a", severity_text) + if body is not None: + proto_serializer.serialize_message(b"*", body) + if attributes: + proto_serializer.serialize_repeated_message(b"2", attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b"8", dropped_attributes_count) + if flags: + proto_serializer.serialize_fixed32(b"E", flags) + if trace_id: + proto_serializer.serialize_bytes(b"J", trace_id) + if span_id: + proto_serializer.serialize_bytes(b"R", span_id) + if observed_time_unix_nano: + proto_serializer.serialize_fixed64(b"Y", observed_time_unix_nano) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/metrics/v1/metrics.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/metrics/v1/metrics.py new file mode 100644 index 0000000..d71f1e9 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/metrics/v1/metrics.py @@ -0,0 +1,341 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/metrics/v1/metrics.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +class AggregationTemporality(Enum): + AGGREGATION_TEMPORALITY_UNSPECIFIED = 0 + AGGREGATION_TEMPORALITY_DELTA = 1 + AGGREGATION_TEMPORALITY_CUMULATIVE = 2 + + +class DataPointFlags(Enum): + DATA_POINT_FLAGS_DO_NOT_USE = 0 + DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK = 1 + + +def MetricsData( + resource_metrics: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_metrics: + proto_serializer.serialize_repeated_message(b"\n", resource_metrics) + return proto_serializer.out + + +def ResourceMetrics( + resource: Optional[bytes] = None, + scope_metrics: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource is not None: + proto_serializer.serialize_message(b"\n", resource) + if scope_metrics: + proto_serializer.serialize_repeated_message(b"\x12", scope_metrics) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def ScopeMetrics( + scope: Optional[bytes] = None, + metrics: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if scope is not None: + proto_serializer.serialize_message(b"\n", scope) + if metrics: + proto_serializer.serialize_repeated_message(b"\x12", metrics) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def Metric( + name: Optional[str] = None, + description: Optional[str] = None, + unit: Optional[str] = None, + gauge: Optional[bytes] = None, + sum: Optional[bytes] = None, + histogram: Optional[bytes] = None, + exponential_histogram: Optional[bytes] = None, + summary: Optional[bytes] = None, + metadata: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if name: + proto_serializer.serialize_string(b"\n", name) + if description: + proto_serializer.serialize_string(b"\x12", description) + if unit: + proto_serializer.serialize_string(b"\x1a", unit) + if gauge is not None: # oneof group data + proto_serializer.serialize_message(b"*", gauge) + if sum is not None: # oneof group data + proto_serializer.serialize_message(b":", sum) + if histogram is not None: # oneof group data + proto_serializer.serialize_message(b"J", histogram) + if exponential_histogram is not None: # oneof group data + proto_serializer.serialize_message(b"R", exponential_histogram) + if summary is not None: # oneof group data + proto_serializer.serialize_message(b"Z", summary) + if metadata: + proto_serializer.serialize_repeated_message(b"b", metadata) + return proto_serializer.out + + +def Gauge( + data_points: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if data_points: + proto_serializer.serialize_repeated_message(b"\n", data_points) + return proto_serializer.out + + +def Sum( + data_points: Optional[List[bytes]] = None, + aggregation_temporality: Optional[int] = None, + is_monotonic: Optional[bool] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if data_points: + proto_serializer.serialize_repeated_message(b"\n", data_points) + if aggregation_temporality: + proto_serializer.serialize_enum(b"\x10", aggregation_temporality) + if is_monotonic: + proto_serializer.serialize_bool(b"\x18", is_monotonic) + return proto_serializer.out + + +def Histogram( + data_points: Optional[List[bytes]] = None, + aggregation_temporality: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if data_points: + proto_serializer.serialize_repeated_message(b"\n", data_points) + if aggregation_temporality: + proto_serializer.serialize_enum(b"\x10", aggregation_temporality) + return proto_serializer.out + + +def ExponentialHistogram( + data_points: Optional[List[bytes]] = None, + aggregation_temporality: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if data_points: + proto_serializer.serialize_repeated_message(b"\n", data_points) + if aggregation_temporality: + proto_serializer.serialize_enum(b"\x10", aggregation_temporality) + return proto_serializer.out + + +def Summary( + data_points: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if data_points: + proto_serializer.serialize_repeated_message(b"\n", data_points) + return proto_serializer.out + + +def NumberDataPoint( + start_time_unix_nano: Optional[int] = None, + time_unix_nano: Optional[int] = None, + as_double: Optional[float] = None, + exemplars: Optional[List[bytes]] = None, + as_int: Optional[int] = None, + attributes: Optional[List[bytes]] = None, + flags: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if start_time_unix_nano: + proto_serializer.serialize_fixed64(b"\x11", start_time_unix_nano) + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\x19", time_unix_nano) + if as_double is not None: # oneof group value + proto_serializer.serialize_double(b"!", as_double) + if exemplars: + proto_serializer.serialize_repeated_message(b"*", exemplars) + if as_int is not None: # oneof group value + proto_serializer.serialize_sfixed64(b"1", as_int) + if attributes: + proto_serializer.serialize_repeated_message(b":", attributes) + if flags: + proto_serializer.serialize_uint32(b"@", flags) + return proto_serializer.out + + +def HistogramDataPoint( + start_time_unix_nano: Optional[int] = None, + time_unix_nano: Optional[int] = None, + count: Optional[int] = None, + sum: Optional[float] = None, + bucket_counts: Optional[List[int]] = None, + explicit_bounds: Optional[List[float]] = None, + exemplars: Optional[List[bytes]] = None, + attributes: Optional[List[bytes]] = None, + flags: Optional[int] = None, + min: Optional[float] = None, + max: Optional[float] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if start_time_unix_nano: + proto_serializer.serialize_fixed64(b"\x11", start_time_unix_nano) + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\x19", time_unix_nano) + if count: + proto_serializer.serialize_fixed64(b"!", count) + if sum is not None: # oneof group _sum + proto_serializer.serialize_double(b")", sum) + if bucket_counts: + proto_serializer.serialize_repeated_fixed64(b"2", bucket_counts) + if explicit_bounds: + proto_serializer.serialize_repeated_double(b":", explicit_bounds) + if exemplars: + proto_serializer.serialize_repeated_message(b"B", exemplars) + if attributes: + proto_serializer.serialize_repeated_message(b"J", attributes) + if flags: + proto_serializer.serialize_uint32(b"P", flags) + if min is not None: # oneof group _min + proto_serializer.serialize_double(b"Y", min) + if max is not None: # oneof group _max + proto_serializer.serialize_double(b"a", max) + return proto_serializer.out + + +def ExponentialHistogramDataPoint( + attributes: Optional[List[bytes]] = None, + start_time_unix_nano: Optional[int] = None, + time_unix_nano: Optional[int] = None, + count: Optional[int] = None, + sum: Optional[float] = None, + scale: Optional[int] = None, + zero_count: Optional[int] = None, + positive: Optional[bytes] = None, + negative: Optional[bytes] = None, + flags: Optional[int] = None, + exemplars: Optional[List[bytes]] = None, + min: Optional[float] = None, + max: Optional[float] = None, + zero_threshold: Optional[float] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if attributes: + proto_serializer.serialize_repeated_message(b"\n", attributes) + if start_time_unix_nano: + proto_serializer.serialize_fixed64(b"\x11", start_time_unix_nano) + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\x19", time_unix_nano) + if count: + proto_serializer.serialize_fixed64(b"!", count) + if sum is not None: # oneof group _sum + proto_serializer.serialize_double(b")", sum) + if scale: + proto_serializer.serialize_sint32(b"0", scale) + if zero_count: + proto_serializer.serialize_fixed64(b"9", zero_count) + if positive is not None: + proto_serializer.serialize_message(b"B", positive) + if negative is not None: + proto_serializer.serialize_message(b"J", negative) + if flags: + proto_serializer.serialize_uint32(b"P", flags) + if exemplars: + proto_serializer.serialize_repeated_message(b"Z", exemplars) + if min is not None: # oneof group _min + proto_serializer.serialize_double(b"a", min) + if max is not None: # oneof group _max + proto_serializer.serialize_double(b"i", max) + if zero_threshold: + proto_serializer.serialize_double(b"q", zero_threshold) + return proto_serializer.out + + +def ExponentialHistogramDataPoint_Buckets( + offset: Optional[int] = None, + bucket_counts: Optional[List[int]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if offset: + proto_serializer.serialize_sint32(b"\x08", offset) + if bucket_counts: + proto_serializer.serialize_repeated_uint64(b"\x12", bucket_counts) + return proto_serializer.out + + +def SummaryDataPoint( + start_time_unix_nano: Optional[int] = None, + time_unix_nano: Optional[int] = None, + count: Optional[int] = None, + sum: Optional[float] = None, + quantile_values: Optional[List[bytes]] = None, + attributes: Optional[List[bytes]] = None, + flags: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if start_time_unix_nano: + proto_serializer.serialize_fixed64(b"\x11", start_time_unix_nano) + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\x19", time_unix_nano) + if count: + proto_serializer.serialize_fixed64(b"!", count) + if sum: + proto_serializer.serialize_double(b")", sum) + if quantile_values: + proto_serializer.serialize_repeated_message(b"2", quantile_values) + if attributes: + proto_serializer.serialize_repeated_message(b":", attributes) + if flags: + proto_serializer.serialize_uint32(b"@", flags) + return proto_serializer.out + + +def SummaryDataPoint_ValueAtQuantile( + quantile: Optional[float] = None, + value: Optional[float] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if quantile: + proto_serializer.serialize_double(b"\t", quantile) + if value: + proto_serializer.serialize_double(b"\x11", value) + return proto_serializer.out + + +def Exemplar( + time_unix_nano: Optional[int] = None, + as_double: Optional[float] = None, + span_id: Optional[bytes] = None, + trace_id: Optional[bytes] = None, + as_int: Optional[int] = None, + filtered_attributes: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\x11", time_unix_nano) + if as_double is not None: # oneof group value + proto_serializer.serialize_double(b"\x19", as_double) + if span_id: + proto_serializer.serialize_bytes(b'"', span_id) + if trace_id: + proto_serializer.serialize_bytes(b"*", trace_id) + if as_int is not None: # oneof group value + proto_serializer.serialize_sfixed64(b"1", as_int) + if filtered_attributes: + proto_serializer.serialize_repeated_message(b":", filtered_attributes) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/resource/v1/resource.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/resource/v1/resource.py new file mode 100644 index 0000000..683727c --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/resource/v1/resource.py @@ -0,0 +1,24 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/resource/v1/resource.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +def Resource( + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if attributes: + proto_serializer.serialize_repeated_message(b"\n", attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b"\x10", dropped_attributes_count) + return proto_serializer.out diff --git a/src/snowflake/telemetry/_internal/opentelemetry/proto/trace/v1/trace.py b/src/snowflake/telemetry/_internal/opentelemetry/proto/trace/v1/trace.py new file mode 100644 index 0000000..48f2908 --- /dev/null +++ b/src/snowflake/telemetry/_internal/opentelemetry/proto/trace/v1/trace.py @@ -0,0 +1,181 @@ +# Generated by the protoc compiler with a custom plugin. DO NOT EDIT! +# sources: opentelemetry/proto/trace/v1/trace.proto + +from typing import ( + List, + Optional, +) + +from snowflake.telemetry._internal.serialize import ( + Enum, + ProtoSerializer, +) + + +class SpanFlags(Enum): + SPAN_FLAGS_DO_NOT_USE = 0 + SPAN_FLAGS_TRACE_FLAGS_MASK = 255 + SPAN_FLAGS_CONTEXT_HAS_IS_REMOTE_MASK = 256 + SPAN_FLAGS_CONTEXT_IS_REMOTE_MASK = 512 + + +def TracesData( + resource_spans: Optional[List[bytes]] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource_spans: + proto_serializer.serialize_repeated_message(b"\n", resource_spans) + return proto_serializer.out + + +def ResourceSpans( + resource: Optional[bytes] = None, + scope_spans: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if resource is not None: + proto_serializer.serialize_message(b"\n", resource) + if scope_spans: + proto_serializer.serialize_repeated_message(b"\x12", scope_spans) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def ScopeSpans( + scope: Optional[bytes] = None, + spans: Optional[List[bytes]] = None, + schema_url: Optional[str] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if scope is not None: + proto_serializer.serialize_message(b"\n", scope) + if spans: + proto_serializer.serialize_repeated_message(b"\x12", spans) + if schema_url: + proto_serializer.serialize_string(b"\x1a", schema_url) + return proto_serializer.out + + +def Span( + trace_id: Optional[bytes] = None, + span_id: Optional[bytes] = None, + trace_state: Optional[str] = None, + parent_span_id: Optional[bytes] = None, + name: Optional[str] = None, + kind: Optional[int] = None, + start_time_unix_nano: Optional[int] = None, + end_time_unix_nano: Optional[int] = None, + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, + events: Optional[List[bytes]] = None, + dropped_events_count: Optional[int] = None, + links: Optional[List[bytes]] = None, + dropped_links_count: Optional[int] = None, + status: Optional[bytes] = None, + flags: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if trace_id: + proto_serializer.serialize_bytes(b"\n", trace_id) + if span_id: + proto_serializer.serialize_bytes(b"\x12", span_id) + if trace_state: + proto_serializer.serialize_string(b"\x1a", trace_state) + if parent_span_id: + proto_serializer.serialize_bytes(b'"', parent_span_id) + if name: + proto_serializer.serialize_string(b"*", name) + if kind: + proto_serializer.serialize_enum(b"0", kind) + if start_time_unix_nano: + proto_serializer.serialize_fixed64(b"9", start_time_unix_nano) + if end_time_unix_nano: + proto_serializer.serialize_fixed64(b"A", end_time_unix_nano) + if attributes: + proto_serializer.serialize_repeated_message(b"J", attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b"P", dropped_attributes_count) + if events: + proto_serializer.serialize_repeated_message(b"Z", events) + if dropped_events_count: + proto_serializer.serialize_uint32(b"`", dropped_events_count) + if links: + proto_serializer.serialize_repeated_message(b"j", links) + if dropped_links_count: + proto_serializer.serialize_uint32(b"p", dropped_links_count) + if status is not None: + proto_serializer.serialize_message(b"z", status) + if flags: + proto_serializer.serialize_fixed32(b"\x85\x01", flags) + return proto_serializer.out + + +class Span_SpanKind(Enum): + SPAN_KIND_UNSPECIFIED = 0 + SPAN_KIND_INTERNAL = 1 + SPAN_KIND_SERVER = 2 + SPAN_KIND_CLIENT = 3 + SPAN_KIND_PRODUCER = 4 + SPAN_KIND_CONSUMER = 5 + + +def Span_Event( + time_unix_nano: Optional[int] = None, + name: Optional[str] = None, + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if time_unix_nano: + proto_serializer.serialize_fixed64(b"\t", time_unix_nano) + if name: + proto_serializer.serialize_string(b"\x12", name) + if attributes: + proto_serializer.serialize_repeated_message(b"\x1a", attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b" ", dropped_attributes_count) + return proto_serializer.out + + +def Span_Link( + trace_id: Optional[bytes] = None, + span_id: Optional[bytes] = None, + trace_state: Optional[str] = None, + attributes: Optional[List[bytes]] = None, + dropped_attributes_count: Optional[int] = None, + flags: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if trace_id: + proto_serializer.serialize_bytes(b"\n", trace_id) + if span_id: + proto_serializer.serialize_bytes(b"\x12", span_id) + if trace_state: + proto_serializer.serialize_string(b"\x1a", trace_state) + if attributes: + proto_serializer.serialize_repeated_message(b'"', attributes) + if dropped_attributes_count: + proto_serializer.serialize_uint32(b"(", dropped_attributes_count) + if flags: + proto_serializer.serialize_fixed32(b"5", flags) + return proto_serializer.out + + +def Status( + message: Optional[str] = None, + code: Optional[int] = None, +) -> bytes: + proto_serializer = ProtoSerializer() + if message: + proto_serializer.serialize_string(b"\x12", message) + if code: + proto_serializer.serialize_enum(b"\x18", code) + return proto_serializer.out + + +class Status_StatusCode(Enum): + STATUS_CODE_UNSET = 0 + STATUS_CODE_OK = 1 + STATUS_CODE_ERROR = 2 diff --git a/src/snowflake/telemetry/_internal/serialize/__init__.py b/src/snowflake/telemetry/_internal/serialize/__init__.py new file mode 100644 index 0000000..9aa916c --- /dev/null +++ b/src/snowflake/telemetry/_internal/serialize/__init__.py @@ -0,0 +1,145 @@ +import struct +from enum import IntEnum +from typing import List, Union + +Enum = IntEnum + +class ProtoSerializer: + __slots__ = ("out") + + def __init__(self) -> None: + self.out = bytearray() + + def __bytes__(self) -> bytes: + return bytes(self.out) + + def serialize_bool(self, tag: bytes, value: bool) -> None: + self.out += tag + self._write_varint_unsigned(1 if value else 0) + + def serialize_enum(self, tag: bytes, value: Union[Enum, int]) -> None: + if not isinstance(value, int): + value = value.value + self.out += tag + self._write_varint_unsigned(value) + + def serialize_uint32(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value) + + def serialize_uint64(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value) + + def serialize_sint32(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value << 1 if value >= 0 else (value << 1) ^ (~0)) + + def serialize_sint64(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value << 1 if value >= 0 else (value << 1) ^ (~0)) + + def serialize_int32(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value + 1 << 32 if value < 0 else value) + + def serialize_int64(self, tag: bytes, value: int) -> None: + self.out += tag + self._write_varint_unsigned(value + 1 << 64 if value < 0 else value) + + def serialize_fixed32(self, tag: bytes, value: int) -> None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self.out += struct.pack(" None: + self.out += tag + self._write_varint_unsigned(len(value)) + self.out += value + + def serialize_string(self, tag: bytes, value: str) -> None: + self.serialize_bytes(tag, value.encode("utf-8")) + + def serialize_message( + self, + tag: bytes, + value: bytes, + ) -> None: + # If value is None, omit message entirely + if value is None: + return + # Otherwise, write the message + # Even if all fields are default (ommnited) + # The presence of the message is still encoded + self.out += tag + self._write_varint_unsigned(len(value)) + self.out += value + + def serialize_repeated_message( + self, + tag: bytes, + values: List[bytes], + ) -> None: + if not values: + return + # local reference to avoid repeated lookups + _self_serialize = self.serialize_message + for value in values: + _self_serialize(tag, value) + + def serialize_repeated_double(self, tag: bytes, values: List[float]) -> None: + if not values: + return + self.out += tag + self._write_varint_unsigned(len(values) * 8) + for value in values: + self.write_double_no_tag(value) + + def serialize_repeated_fixed64(self, tag: bytes, values: List[int]) -> None: + if not values: + return + self.out += tag + self._write_varint_unsigned(len(values) * 8) + for value in values: + self.write_fixed64_no_tag(value) + + def serialize_repeated_uint64(self, tag: bytes, values: List[int]) -> None: + if not values: + return + self.out += tag + tmp = ProtoSerializer() + for value in values: + tmp._write_varint_unsigned(value) + self._write_varint_unsigned(len(tmp.out)) + self.out += tmp.out + + def _write_varint_unsigned(self, value: int) -> None: + while value >= 128: + self.out.append((value & 0x7F) | 0x80) + value >>= 7 + self.out.append(value) + + def write_double_no_tag(self, value: float) -> None: + self.out += struct.pack(" None: + self.out += struct.pack(" Date: Tue, 22 Oct 2024 13:44:24 -0700 Subject: [PATCH 02/15] Add simple generator test case --- scripts/proto_codegen.sh | 2 +- tests/snowflake-telemetry-test-utils/setup.py | 4 + tests/test_compile.py | 87 +++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tests/test_compile.py diff --git a/scripts/proto_codegen.sh b/scripts/proto_codegen.sh index 304b347..4462470 100755 --- a/scripts/proto_codegen.sh +++ b/scripts/proto_codegen.sh @@ -52,7 +52,7 @@ fi cd $repo_root/src/snowflake/telemetry/_internal # clean up old generated code -find opentelemetry/ -regex ".*_pb2.*\.pyi?" -exec rm {} + +find opentelemetry/proto/ -regex ".*\.py?" -exec rm {} + # generate proto code for all protos all_protos=$(find $PROTO_REPO_DIR/ -iname "*.proto") diff --git a/tests/snowflake-telemetry-test-utils/setup.py b/tests/snowflake-telemetry-test-utils/setup.py index 4f78b56..0dbda1d 100644 --- a/tests/snowflake-telemetry-test-utils/setup.py +++ b/tests/snowflake-telemetry-test-utils/setup.py @@ -17,6 +17,10 @@ install_requires=[ "pytest >= 7.0.0", "snowflake-telemetry-python == 0.6.0.dev", + "Jinja2", + "grpcio-tools", + "black", + "isort", ], packages=find_namespace_packages( where='src' diff --git a/tests/test_compile.py b/tests/test_compile.py new file mode 100644 index 0000000..af66d23 --- /dev/null +++ b/tests/test_compile.py @@ -0,0 +1,87 @@ +""" +Test protoc code generator plugin for custom protoc message types +""" +import unittest +import tempfile +import subprocess +import os + +# Import into globals() so generated code string can be compiled +from snowflake.telemetry._internal.serialize import ProtoSerializer, Enum + +class TestCompile(unittest.TestCase): + def test_compile(self): + with tempfile.NamedTemporaryFile(suffix=".proto", mode="w", delete=False) as proto_file: + # Define a simple proto file + proto_file.write("""syntax = "proto3"; +package opentelemetry.proto.common.v1; + +message AnyValue { + oneof value { + string string_value = 1; + bool bool_value = 2; + int64 int_value = 3; + double double_value = 4; + ArrayValue array_value = 5; + KeyValueList kvlist_value = 6; + bytes bytes_value = 7; + } +} + +message ArrayValue { + repeated AnyValue values = 1; +} + +message KeyValueList { + repeated KeyValue values = 1; +} + +message KeyValue { + string key = 1; + AnyValue value = 2; +} + +message InstrumentationScope { + string name = 1; + string version = 2; + repeated KeyValue attributes = 3; + uint32 dropped_attributes_count = 4; +} +""" + ) + proto_file.flush() + proto_file.close() + + proto_file_dir = os.path.dirname(proto_file.name) + proto_file_name = os.path.basename(proto_file.name) + + # Run protoc with custom plugin to generate serialization code for messages + result = subprocess.run([ + "python", + "-m", + "grpc_tools.protoc", + "-I", + proto_file_dir, + "--plugin=protoc-gen-custom-plugin=compile/plugin.py", + f"--custom-plugin_out={tempfile.gettempdir()}", + proto_file_name, + ], capture_output=True) + + # Ensure protoc ran successfully + self.assertEqual(result.returncode, 0) + + generated_code_file_name = proto_file_name.replace(".proto", ".py") + generated_code_file_dir = tempfile.gettempdir() + generated_code_file = os.path.join(generated_code_file_dir, generated_code_file_name) + + # Ensure generated code file exists + self.assertTrue(os.path.exists(os.path.join(generated_code_file_dir, generated_code_file_name))) + + # Ensure code can be executed and serializes correctly + with open(generated_code_file, "r") as f: + generated_code = f.read() + local_namespace = {} + eval(compile(generated_code, generated_code_file, "exec"), globals(), local_namespace) + + self.assertTrue("AnyValue" in local_namespace) + self.assertEquals(local_namespace["AnyValue"](string_value="test"), b'\n\x04test') From 8ead5303232e51ee738b44a026388d52818593b8 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Thu, 24 Oct 2024 13:38:58 -0700 Subject: [PATCH 03/15] Update README with code generation --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index ef6e6ae..ca9d2b3 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ pip install --upgrade pip pip install . ``` +## Development + To develop this package, run ```bash @@ -33,3 +35,9 @@ source .venv/bin/activate pip install --upgrade pip pip install . ./tests/snowflake-telemetry-test-utils ``` + +### Code generation + +To regenerate the code under `src/snowflake/_internal/opentelemetry/proto/`, execute the script `./scripts/proto_codegen.sh`. The script expects the `src/snowflake/_internal/opentelemetry/proto/` directory to exist, and will delete all .py files in it before regerating the code. + +The commit/branch/tag of [opentelemetry-proto](https://github.com/open-telemetry/opentelemetry-proto) that the code is generated from is pinned to PROTO_REPO_BRANCH_OR_COMMIT, which can be configured in the script. It is currently pinned to the same tag as [opentelemetry-python](https://github.com/open-telemetry/opentelemetry-python/blob/main/scripts/proto_codegen.sh#L15). From 4d809c3d0e6c6c6336aa4de1e85111edf3078d0e Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Thu, 24 Oct 2024 13:39:28 -0700 Subject: [PATCH 04/15] Move plugin --- {compile => scripts}/plugin.py | 0 scripts/proto_codegen.sh | 2 +- {compile => scripts}/templates/template.py.jinja2 | 0 tests/{test_compile.py => test_protoc_plugin.py} | 12 ++++++++---- 4 files changed, 9 insertions(+), 5 deletions(-) rename {compile => scripts}/plugin.py (100%) rename {compile => scripts}/templates/template.py.jinja2 (100%) rename tests/{test_compile.py => test_protoc_plugin.py} (82%) diff --git a/compile/plugin.py b/scripts/plugin.py similarity index 100% rename from compile/plugin.py rename to scripts/plugin.py diff --git a/scripts/proto_codegen.sh b/scripts/proto_codegen.sh index 4462470..4713c04 100755 --- a/scripts/proto_codegen.sh +++ b/scripts/proto_codegen.sh @@ -58,6 +58,6 @@ find opentelemetry/proto/ -regex ".*\.py?" -exec rm {} + all_protos=$(find $PROTO_REPO_DIR/ -iname "*.proto") python -m grpc_tools.protoc \ -I $PROTO_REPO_DIR \ - --plugin=protoc-gen-custom-plugin=$repo_root/compile/plugin.py \ + --plugin=protoc-gen-custom-plugin=$repo_root/scripts/plugin.py \ --custom-plugin_out=. \ $all_protos diff --git a/compile/templates/template.py.jinja2 b/scripts/templates/template.py.jinja2 similarity index 100% rename from compile/templates/template.py.jinja2 rename to scripts/templates/template.py.jinja2 diff --git a/tests/test_compile.py b/tests/test_protoc_plugin.py similarity index 82% rename from tests/test_compile.py rename to tests/test_protoc_plugin.py index af66d23..2c341c9 100644 --- a/tests/test_compile.py +++ b/tests/test_protoc_plugin.py @@ -10,10 +10,15 @@ from snowflake.telemetry._internal.serialize import ProtoSerializer, Enum class TestCompile(unittest.TestCase): + def namespace_serialize_message(self, message_type: str, local_namespace: dict, **kwargs) -> bytes: + assert message_type in local_namespace, f"Message type {message_type} not found in local namespace" + return local_namespace[message_type](**kwargs) + def test_compile(self): with tempfile.NamedTemporaryFile(suffix=".proto", mode="w", delete=False) as proto_file: # Define a simple proto file - proto_file.write("""syntax = "proto3"; + proto_file.write( + """syntax = "proto3"; package opentelemetry.proto.common.v1; message AnyValue { @@ -62,7 +67,7 @@ def test_compile(self): "grpc_tools.protoc", "-I", proto_file_dir, - "--plugin=protoc-gen-custom-plugin=compile/plugin.py", + "--plugin=protoc-gen-custom-plugin=scripts/plugin.py", f"--custom-plugin_out={tempfile.gettempdir()}", proto_file_name, ], capture_output=True) @@ -83,5 +88,4 @@ def test_compile(self): local_namespace = {} eval(compile(generated_code, generated_code_file, "exec"), globals(), local_namespace) - self.assertTrue("AnyValue" in local_namespace) - self.assertEquals(local_namespace["AnyValue"](string_value="test"), b'\n\x04test') + self.assertEquals(b'\n\x04test', self.namespace_serialize_message("AnyValue", local_namespace, string_value="test")) From 37ffa2f415c847ff2625b0e80896d1edd4a95968 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Mon, 28 Oct 2024 16:05:00 -0700 Subject: [PATCH 05/15] Fix signed int serialization --- src/snowflake/telemetry/_internal/serialize/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/snowflake/telemetry/_internal/serialize/__init__.py b/src/snowflake/telemetry/_internal/serialize/__init__.py index 9aa916c..9d9063c 100644 --- a/src/snowflake/telemetry/_internal/serialize/__init__.py +++ b/src/snowflake/telemetry/_internal/serialize/__init__.py @@ -41,11 +41,11 @@ def serialize_sint64(self, tag: bytes, value: int) -> None: def serialize_int32(self, tag: bytes, value: int) -> None: self.out += tag - self._write_varint_unsigned(value + 1 << 32 if value < 0 else value) + self._write_varint_unsigned(value + (1 << 32) if value < 0 else value) def serialize_int64(self, tag: bytes, value: int) -> None: self.out += tag - self._write_varint_unsigned(value + 1 << 64 if value < 0 else value) + self._write_varint_unsigned(value + (1 << 64) if value < 0 else value) def serialize_fixed32(self, tag: bytes, value: int) -> None: self.out += tag From 10b58651bee7946d2a54d9805eb1e9b8160d2500 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Mon, 28 Oct 2024 16:05:39 -0700 Subject: [PATCH 06/15] Rename plugin test --- tests/test_protoc_plugin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_protoc_plugin.py b/tests/test_protoc_plugin.py index 2c341c9..a312995 100644 --- a/tests/test_protoc_plugin.py +++ b/tests/test_protoc_plugin.py @@ -9,12 +9,12 @@ # Import into globals() so generated code string can be compiled from snowflake.telemetry._internal.serialize import ProtoSerializer, Enum -class TestCompile(unittest.TestCase): +class TestProtocPlugin(unittest.TestCase): def namespace_serialize_message(self, message_type: str, local_namespace: dict, **kwargs) -> bytes: assert message_type in local_namespace, f"Message type {message_type} not found in local namespace" return local_namespace[message_type](**kwargs) - def test_compile(self): + def test_protoc_plugin(self): with tempfile.NamedTemporaryFile(suffix=".proto", mode="w", delete=False) as proto_file: # Define a simple proto file proto_file.write( @@ -88,4 +88,4 @@ def test_compile(self): local_namespace = {} eval(compile(generated_code, generated_code_file, "exec"), globals(), local_namespace) - self.assertEquals(b'\n\x04test', self.namespace_serialize_message("AnyValue", local_namespace, string_value="test")) + self.assertEqual(b'\n\x04test', self.namespace_serialize_message("AnyValue", local_namespace, string_value="test")) From 9bee4b12d8c4e8d59485722d79ad0d65524ac509 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Mon, 28 Oct 2024 16:15:51 -0700 Subject: [PATCH 07/15] Add property based testing --- tests/snowflake-telemetry-test-utils/setup.py | 1 + tests/test_proto_serialization.py | 182 ++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 tests/test_proto_serialization.py diff --git a/tests/snowflake-telemetry-test-utils/setup.py b/tests/snowflake-telemetry-test-utils/setup.py index 0dbda1d..fb290db 100644 --- a/tests/snowflake-telemetry-test-utils/setup.py +++ b/tests/snowflake-telemetry-test-utils/setup.py @@ -21,6 +21,7 @@ "grpcio-tools", "black", "isort", + "hypothesis >= 6.0.0", ], packages=find_namespace_packages( where='src' diff --git a/tests/test_proto_serialization.py b/tests/test_proto_serialization.py new file mode 100644 index 0000000..dd1b7bf --- /dev/null +++ b/tests/test_proto_serialization.py @@ -0,0 +1,182 @@ +import unittest + +from dataclasses import dataclass +from typing import Any, Dict, Callable +import hypothesis +from hypothesis.strategies import composite, text, booleans, integers, floats, lists, binary, sampled_from + +import hypothesis.strategies +import opentelemetry.proto.logs.v1.logs_pb2 as logs_pb2 +import opentelemetry.proto.trace.v1.trace_pb2 as trace_pb2 +import opentelemetry.proto.common.v1.common_pb2 as common_pb2 +import opentelemetry.proto.metrics.v1.metrics_pb2 as metrics_pb2 +import opentelemetry.proto.resource.v1.resource_pb2 as resource_pb2 + +import snowflake.telemetry._internal.opentelemetry.proto.logs.v1.logs as logs_sf +import snowflake.telemetry._internal.opentelemetry.proto.trace.v1.trace as trace_sf +import snowflake.telemetry._internal.opentelemetry.proto.common.v1.common as common_sf +import snowflake.telemetry._internal.opentelemetry.proto.metrics.v1.metrics as metrics_sf +import snowflake.telemetry._internal.opentelemetry.proto.resource.v1.resource as resource_sf + +# Strategy for generating protobuf types +def pb_uint32(): return integers(min_value=0, max_value=2**32-1) +def pb_uint64(): return integers(min_value=0, max_value=2**64-1) +def pb_int32(): return integers(min_value=-2**31, max_value=2**31-1) +def pb_int64(): return integers(min_value=-2**63, max_value=2**63-1) +def pb_sint32(): return integers(min_value=-2**31, max_value=2**31-1) +def pb_sint64(): return integers(min_value=-2**63, max_value=2**63-1) +def pb_float(): return floats(allow_nan=False, allow_infinity=False, width=32) +def pb_double(): return floats(allow_nan=False, allow_infinity=False, width=64) +def pb_fixed64(): return pb_uint64() +def pb_fixed32(): return pb_uint32() +def pb_sfixed64(): return pb_int64() +def pb_sfixed32(): return pb_int32() +def pb_bool(): return booleans() +def pb_string(): return text() +def pb_bytes(): return binary() +def pb_enum(enum): return sampled_from([member.value for member in enum]) + +# Maps protobuf types to their serialization functions, from the protobuf and snowflake serialization libraries +@dataclass +class EncodeStrategy: + pb2: Callable[[Any], Any] + sf: Callable[[Any], Any] + +Resource = EncodeStrategy(pb2=resource_pb2.Resource, sf=resource_sf.Resource) +InstrumentationScope = EncodeStrategy(pb2=common_pb2.InstrumentationScope, sf=common_sf.InstrumentationScope) +AnyValue = EncodeStrategy(pb2=common_pb2.AnyValue, sf=common_sf.AnyValue) +ArrayValue = EncodeStrategy(pb2=common_pb2.ArrayValue, sf=common_sf.ArrayValue) +KeyValue = EncodeStrategy(pb2=common_pb2.KeyValue, sf=common_sf.KeyValue) +KeyValueList = EncodeStrategy(pb2=common_pb2.KeyValueList, sf=common_sf.KeyValueList) +LogRecord = EncodeStrategy(pb2=logs_pb2.LogRecord, sf=logs_sf.LogRecord) +ScopeLogs = EncodeStrategy(pb2=logs_pb2.ScopeLogs, sf=logs_sf.ScopeLogs) +ResourceLogs = EncodeStrategy(pb2=logs_pb2.ResourceLogs, sf=logs_sf.ResourceLogs) +LogsData = EncodeStrategy(pb2=logs_pb2.LogsData, sf=logs_sf.LogsData) + +# Package the protobuf type with its arguments for serialization +@dataclass +class EncodeWithArgs: + kwargs: Dict[str, Any] + cls: EncodeStrategy + +# Strategies for generating opentelemetry-proto types +@composite +def instrumentation_scope(draw): + return EncodeWithArgs({ + "name": draw(pb_string()), + "version": draw(pb_string()), + "attributes": draw(lists(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + }, InstrumentationScope) + +@composite +def resource(draw): + return EncodeWithArgs({ + "attributes": draw(lists(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + }, Resource) + +@composite +def any_value(draw): + # oneof field so only set one + oneof = draw(integers(min_value=0, max_value=6)) + if oneof == 0: + kwargs = {"string_value": draw(pb_string())} + elif oneof == 1: + kwargs = {"bool_value": draw(pb_bool())} + elif oneof == 2: + kwargs = {"int_value": draw(pb_int64())} + elif oneof == 3: + kwargs = {"double_value": draw(pb_double())} + elif oneof == 4: + kwargs = {"array_value": draw(array_value())} + elif oneof == 5: + kwargs = {"kvlist_value": draw(key_value_list())} + elif oneof == 6: + kwargs = {"bytes_value": draw(pb_bytes())} + return EncodeWithArgs(kwargs, AnyValue) + +@composite +def array_value(draw): + return EncodeWithArgs({ + "values": draw(lists(any_value())), + }, ArrayValue) + +@composite +def key_value(draw): + return EncodeWithArgs({ + "key": draw(pb_string()), + "value": draw(any_value()), + }, KeyValue) + +@composite +def key_value_list(draw): + return EncodeWithArgs({ + "values": draw(lists(key_value())), + }, KeyValueList) + +@composite +def log_record(draw): + return EncodeWithArgs({ + "time_unix_nano": draw(pb_fixed64()), + "observed_time_unix_nano": draw(pb_fixed64()), + "severity_number": draw(pb_enum(logs_sf.SeverityNumber)), + "severity_text": draw(pb_string()), + "body": draw(any_value()), + "attributes": draw(lists(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + "flags": draw(pb_fixed32()), + "span_id": draw(pb_bytes()), + "trace_id": draw(pb_bytes()), + }, LogRecord) + +@composite +def scope_logs(draw): + return EncodeWithArgs({ + "scope": draw(instrumentation_scope()), + "log_records": draw(lists(log_record())), + "schema_url": draw(pb_string()), + }, ScopeLogs) + +@composite +def resource_logs(draw): + return EncodeWithArgs({ + "resource": draw(resource()), + "scope_logs": draw(lists(scope_logs())), + "schema_url": draw(pb_string()), + }, ResourceLogs) + +@composite +def logs_data(draw): + return EncodeWithArgs({ + "resource_logs": draw(lists(resource_logs())), + }, LogsData) + +# Helper function to recursively encode protobuf types using the generated args +# and the given serialization strategy +def encode_recurse(obj: EncodeWithArgs, strategy: str) -> Any: + kwargs = {} + for key, value in obj.kwargs.items(): + if isinstance(value, EncodeWithArgs): + kwargs[key] = encode_recurse(value, strategy) + elif isinstance(value, list) and value and isinstance(value[0], EncodeWithArgs): + kwargs[key] = [encode_recurse(v, strategy) for v in value] + else: + kwargs[key] = value + if strategy == "pb2": + return obj.cls.pb2(**kwargs) + elif strategy == "sf": + return obj.cls.sf(**kwargs) + +class TestProtoSerialization(unittest.TestCase): + @hypothesis.settings( + suppress_health_check=[ + hypothesis.HealthCheck.too_slow, + ], + ) + @hypothesis.given(logs_data()) + def test_log_data(self, logs_data): + self.assertEqual( + encode_recurse(logs_data, "pb2").SerializeToString(), + bytes(encode_recurse(logs_data, "sf")) + ) From 6f519c4dc141d7eb2176124a697180a65b1cecd1 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Mon, 28 Oct 2024 16:22:30 -0700 Subject: [PATCH 08/15] Speed up execution time of test --- tests/test_proto_serialization.py | 32 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/test_proto_serialization.py b/tests/test_proto_serialization.py index dd1b7bf..ea6dbd1 100644 --- a/tests/test_proto_serialization.py +++ b/tests/test_proto_serialization.py @@ -32,9 +32,12 @@ def pb_fixed32(): return pb_uint32() def pb_sfixed64(): return pb_int64() def pb_sfixed32(): return pb_int32() def pb_bool(): return booleans() -def pb_string(): return text() -def pb_bytes(): return binary() +def pb_string(): return text(max_size=100) +def pb_bytes(): return binary(max_size=100) def pb_enum(enum): return sampled_from([member.value for member in enum]) +def pb_repeated(type): return lists(type, max_size=3) # limit the size of the repeated field to speed up testing +def pb_span_id(): return binary(min_size=8, max_size=8) +def pb_trace_id(): return binary(min_size=16, max_size=16) # Maps protobuf types to their serialization functions, from the protobuf and snowflake serialization libraries @dataclass @@ -65,14 +68,14 @@ def instrumentation_scope(draw): return EncodeWithArgs({ "name": draw(pb_string()), "version": draw(pb_string()), - "attributes": draw(lists(key_value())), + "attributes": draw(pb_repeated(key_value())), "dropped_attributes_count": draw(pb_uint32()), }, InstrumentationScope) @composite def resource(draw): return EncodeWithArgs({ - "attributes": draw(lists(key_value())), + "attributes": draw(pb_repeated(key_value())), "dropped_attributes_count": draw(pb_uint32()), }, Resource) @@ -99,7 +102,7 @@ def any_value(draw): @composite def array_value(draw): return EncodeWithArgs({ - "values": draw(lists(any_value())), + "values": draw(pb_repeated(any_value())), }, ArrayValue) @composite @@ -112,7 +115,7 @@ def key_value(draw): @composite def key_value_list(draw): return EncodeWithArgs({ - "values": draw(lists(key_value())), + "values": draw(pb_repeated(key_value())), }, KeyValueList) @composite @@ -123,18 +126,18 @@ def log_record(draw): "severity_number": draw(pb_enum(logs_sf.SeverityNumber)), "severity_text": draw(pb_string()), "body": draw(any_value()), - "attributes": draw(lists(key_value())), + "attributes": draw(pb_repeated(key_value())), "dropped_attributes_count": draw(pb_uint32()), "flags": draw(pb_fixed32()), - "span_id": draw(pb_bytes()), - "trace_id": draw(pb_bytes()), + "span_id": draw(pb_span_id()), + "trace_id": draw(pb_trace_id()), }, LogRecord) @composite def scope_logs(draw): return EncodeWithArgs({ "scope": draw(instrumentation_scope()), - "log_records": draw(lists(log_record())), + "log_records": draw(pb_repeated(log_record())), "schema_url": draw(pb_string()), }, ScopeLogs) @@ -142,14 +145,14 @@ def scope_logs(draw): def resource_logs(draw): return EncodeWithArgs({ "resource": draw(resource()), - "scope_logs": draw(lists(scope_logs())), + "scope_logs": draw(pb_repeated(scope_logs())), "schema_url": draw(pb_string()), }, ResourceLogs) @composite def logs_data(draw): return EncodeWithArgs({ - "resource_logs": draw(lists(resource_logs())), + "resource_logs": draw(pb_repeated(resource_logs())), }, LogsData) # Helper function to recursively encode protobuf types using the generated args @@ -169,11 +172,6 @@ def encode_recurse(obj: EncodeWithArgs, strategy: str) -> Any: return obj.cls.sf(**kwargs) class TestProtoSerialization(unittest.TestCase): - @hypothesis.settings( - suppress_health_check=[ - hypothesis.HealthCheck.too_slow, - ], - ) @hypothesis.given(logs_data()) def test_log_data(self, logs_data): self.assertEqual( From 9004ef9db0af8fa762d02fc08f9447f0b3123833 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Mon, 28 Oct 2024 16:33:41 -0700 Subject: [PATCH 09/15] Disable too slow --- tests/test_proto_serialization.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_proto_serialization.py b/tests/test_proto_serialization.py index ea6dbd1..614d3c7 100644 --- a/tests/test_proto_serialization.py +++ b/tests/test_proto_serialization.py @@ -32,8 +32,8 @@ def pb_fixed32(): return pb_uint32() def pb_sfixed64(): return pb_int64() def pb_sfixed32(): return pb_int32() def pb_bool(): return booleans() -def pb_string(): return text(max_size=100) -def pb_bytes(): return binary(max_size=100) +def pb_string(): return text(max_size=20) +def pb_bytes(): return binary(max_size=20) def pb_enum(enum): return sampled_from([member.value for member in enum]) def pb_repeated(type): return lists(type, max_size=3) # limit the size of the repeated field to speed up testing def pb_span_id(): return binary(min_size=8, max_size=8) @@ -172,6 +172,9 @@ def encode_recurse(obj: EncodeWithArgs, strategy: str) -> Any: return obj.cls.sf(**kwargs) class TestProtoSerialization(unittest.TestCase): + @hypothesis.settings( + suppress_health_check=[hypothesis.HealthCheck.too_slow] + ) @hypothesis.given(logs_data()) def test_log_data(self, logs_data): self.assertEqual( From 657b6d2bd3e006db8c11262571ba52b48f5171c5 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 11:19:20 -0700 Subject: [PATCH 10/15] Test all messages --- tests/test_proto_serialization.py | 351 +++++++++++++++++++++++++++--- 1 file changed, 320 insertions(+), 31 deletions(-) diff --git a/tests/test_proto_serialization.py b/tests/test_proto_serialization.py index 614d3c7..020a21a 100644 --- a/tests/test_proto_serialization.py +++ b/tests/test_proto_serialization.py @@ -4,6 +4,7 @@ from typing import Any, Dict, Callable import hypothesis from hypothesis.strategies import composite, text, booleans, integers, floats, lists, binary, sampled_from +from hypothesis.control import assume import hypothesis.strategies import opentelemetry.proto.logs.v1.logs_pb2 as logs_pb2 @@ -27,6 +28,13 @@ def pb_sint32(): return integers(min_value=-2**31, max_value=2**31-1) def pb_sint64(): return integers(min_value=-2**63, max_value=2**63-1) def pb_float(): return floats(allow_nan=False, allow_infinity=False, width=32) def pb_double(): return floats(allow_nan=False, allow_infinity=False, width=64) +def draw_pb_double(draw): + # -0.0 is an edge case that is not handled by the snowflake serialization library + # Protobuf serialization will serialize -0.0 as "-0.0", and omit 0.0 + # Snowflake will omit both -0.0 and 0.0 + double = draw(pb_double()) + assume(str(double) != "-0.0") + return double def pb_fixed64(): return pb_uint64() def pb_fixed32(): return pb_uint32() def pb_sfixed64(): return pb_int64() @@ -46,16 +54,44 @@ class EncodeStrategy: sf: Callable[[Any], Any] Resource = EncodeStrategy(pb2=resource_pb2.Resource, sf=resource_sf.Resource) + InstrumentationScope = EncodeStrategy(pb2=common_pb2.InstrumentationScope, sf=common_sf.InstrumentationScope) AnyValue = EncodeStrategy(pb2=common_pb2.AnyValue, sf=common_sf.AnyValue) ArrayValue = EncodeStrategy(pb2=common_pb2.ArrayValue, sf=common_sf.ArrayValue) KeyValue = EncodeStrategy(pb2=common_pb2.KeyValue, sf=common_sf.KeyValue) KeyValueList = EncodeStrategy(pb2=common_pb2.KeyValueList, sf=common_sf.KeyValueList) + LogRecord = EncodeStrategy(pb2=logs_pb2.LogRecord, sf=logs_sf.LogRecord) ScopeLogs = EncodeStrategy(pb2=logs_pb2.ScopeLogs, sf=logs_sf.ScopeLogs) ResourceLogs = EncodeStrategy(pb2=logs_pb2.ResourceLogs, sf=logs_sf.ResourceLogs) LogsData = EncodeStrategy(pb2=logs_pb2.LogsData, sf=logs_sf.LogsData) +TracesData = EncodeStrategy(pb2=trace_pb2.TracesData, sf=trace_sf.TracesData) +ScopeSpans = EncodeStrategy(pb2=trace_pb2.ScopeSpans, sf=trace_sf.ScopeSpans) +ResourceSpans = EncodeStrategy(pb2=trace_pb2.ResourceSpans, sf=trace_sf.ResourceSpans) +Span = EncodeStrategy(pb2=trace_pb2.Span, sf=trace_sf.Span) +Event = EncodeStrategy(pb2=trace_pb2.Span.Event, sf=trace_sf.Span_Event) +Link = EncodeStrategy(pb2=trace_pb2.Span.Link, sf=trace_sf.Span_Link) +Status = EncodeStrategy(pb2=trace_pb2.Status, sf=trace_sf.Status) + +Metric = EncodeStrategy(pb2=metrics_pb2.Metric, sf=metrics_sf.Metric) +ScopeMetrics = EncodeStrategy(pb2=metrics_pb2.ScopeMetrics, sf=metrics_sf.ScopeMetrics) +ResourceMetrics = EncodeStrategy(pb2=metrics_pb2.ResourceMetrics, sf=metrics_sf.ResourceMetrics) +MetricsData = EncodeStrategy(pb2=metrics_pb2.MetricsData, sf=metrics_sf.MetricsData) +Gauge = EncodeStrategy(pb2=metrics_pb2.Gauge, sf=metrics_sf.Gauge) +Sum = EncodeStrategy(pb2=metrics_pb2.Sum, sf=metrics_sf.Sum) +Histogram = EncodeStrategy(pb2=metrics_pb2.Histogram, sf=metrics_sf.Histogram) +ExponentialHistogram = EncodeStrategy(pb2=metrics_pb2.ExponentialHistogram, sf=metrics_sf.ExponentialHistogram) +Summary = EncodeStrategy(pb2=metrics_pb2.Summary, sf=metrics_sf.Summary) +NumberDataPoint = EncodeStrategy(pb2=metrics_pb2.NumberDataPoint, sf=metrics_sf.NumberDataPoint) +Exemplar = EncodeStrategy(pb2=metrics_pb2.Exemplar, sf=metrics_sf.Exemplar) +HistogramDataPoint = EncodeStrategy(pb2=metrics_pb2.HistogramDataPoint, sf=metrics_sf.HistogramDataPoint) +ExponentialHistogramDataPoint = EncodeStrategy(pb2=metrics_pb2.ExponentialHistogramDataPoint, sf=metrics_sf.ExponentialHistogramDataPoint) +SummaryDataPoint = EncodeStrategy(pb2=metrics_pb2.SummaryDataPoint, sf=metrics_sf.SummaryDataPoint) +ValueAtQuantile = EncodeStrategy(pb2=metrics_pb2.SummaryDataPoint.ValueAtQuantile, sf=metrics_sf.SummaryDataPoint_ValueAtQuantile) +Buckets = EncodeStrategy(pb2=metrics_pb2.ExponentialHistogramDataPoint.Buckets, sf=metrics_sf.ExponentialHistogramDataPoint_Buckets) + + # Package the protobuf type with its arguments for serialization @dataclass class EncodeWithArgs: @@ -90,7 +126,7 @@ def any_value(draw): elif oneof == 2: kwargs = {"int_value": draw(pb_int64())} elif oneof == 3: - kwargs = {"double_value": draw(pb_double())} + kwargs = {"double_value": draw_pb_double(draw)} elif oneof == 4: kwargs = {"array_value": draw(array_value())} elif oneof == 5: @@ -119,41 +155,280 @@ def key_value_list(draw): }, KeyValueList) @composite -def log_record(draw): - return EncodeWithArgs({ - "time_unix_nano": draw(pb_fixed64()), - "observed_time_unix_nano": draw(pb_fixed64()), - "severity_number": draw(pb_enum(logs_sf.SeverityNumber)), - "severity_text": draw(pb_string()), - "body": draw(any_value()), - "attributes": draw(pb_repeated(key_value())), - "dropped_attributes_count": draw(pb_uint32()), - "flags": draw(pb_fixed32()), - "span_id": draw(pb_span_id()), - "trace_id": draw(pb_trace_id()), - }, LogRecord) +def logs_data(draw): + @composite + def log_record(draw): + return EncodeWithArgs({ + "time_unix_nano": draw(pb_fixed64()), + "observed_time_unix_nano": draw(pb_fixed64()), + "severity_number": draw(pb_enum(logs_sf.SeverityNumber)), + "severity_text": draw(pb_string()), + "body": draw(any_value()), + "attributes": draw(pb_repeated(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + "flags": draw(pb_fixed32()), + "span_id": draw(pb_span_id()), + "trace_id": draw(pb_trace_id()), + }, LogRecord) -@composite -def scope_logs(draw): + @composite + def scope_logs(draw): + return EncodeWithArgs({ + "scope": draw(instrumentation_scope()), + "log_records": draw(pb_repeated(log_record())), + "schema_url": draw(pb_string()), + }, ScopeLogs) + + @composite + def resource_logs(draw): + return EncodeWithArgs({ + "resource": draw(resource()), + "scope_logs": draw(pb_repeated(scope_logs())), + "schema_url": draw(pb_string()), + }, ResourceLogs) + return EncodeWithArgs({ - "scope": draw(instrumentation_scope()), - "log_records": draw(pb_repeated(log_record())), - "schema_url": draw(pb_string()), - }, ScopeLogs) + "resource_logs": draw(pb_repeated(resource_logs())), + }, LogsData) @composite -def resource_logs(draw): +def traces_data(draw): + @composite + def event(draw): + return EncodeWithArgs({ + "time_unix_nano": draw(pb_fixed64()), + "name": draw(pb_string()), + "attributes": draw(pb_repeated(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + }, Event) + + @composite + def link(draw): + return EncodeWithArgs({ + "trace_id": draw(pb_trace_id()), + "span_id": draw(pb_span_id()), + "trace_state": draw(pb_string()), + "attributes": draw(pb_repeated(key_value())), + "dropped_attributes_count": draw(pb_uint32()), + "flags": draw(pb_fixed32()), + }, Link) + + @composite + def status(draw): + return EncodeWithArgs({ + "code": draw(pb_enum(trace_sf.Status_StatusCode)), + "message": draw(pb_string()), + }, Status) + + @composite + def span(draw): + return EncodeWithArgs({ + "trace_id": draw(pb_trace_id()), + "span_id": draw(pb_span_id()), + "trace_state": draw(pb_string()), + "parent_span_id": draw(pb_span_id()), + "name": draw(pb_string()), + "kind": draw(pb_enum(trace_sf.Span_SpanKind)), + "start_time_unix_nano": draw(pb_fixed64()), + "end_time_unix_nano": draw(pb_fixed64()), + "attributes": draw(pb_repeated(key_value())), + "events": draw(pb_repeated(event())), + "links": draw(pb_repeated(link())), + "status": draw(status()), + "dropped_attributes_count": draw(pb_uint32()), + "dropped_events_count": draw(pb_uint32()), + "dropped_links_count": draw(pb_uint32()), + "flags": draw(pb_fixed32()), + }, Span) + + @composite + def scope_spans(draw): + return EncodeWithArgs({ + "scope": draw(instrumentation_scope()), + "spans": draw(pb_repeated(span())), + "schema_url": draw(pb_string()), + }, ScopeSpans) + + @composite + def resource_spans(draw): + return EncodeWithArgs({ + "resource": draw(resource()), + "scope_spans": draw(pb_repeated(scope_spans())), + "schema_url": draw(pb_string()), + }, ResourceSpans) + return EncodeWithArgs({ - "resource": draw(resource()), - "scope_logs": draw(pb_repeated(scope_logs())), - "schema_url": draw(pb_string()), - }, ResourceLogs) + "resource_spans": draw(pb_repeated(resource_spans())), + }, TracesData) @composite -def logs_data(draw): +def metrics_data(draw): + @composite + def exemplar(draw): + kwargs = {} + oneof = draw(integers(min_value=0, max_value=1)) + if oneof == 0: + kwargs["as_double"] = draw(pb_double()) + elif oneof == 1: + kwargs["as_int"] = draw(pb_sfixed64()) + + return EncodeWithArgs({ + **kwargs, + "time_unix_nano": draw(pb_fixed64()), + "trace_id": draw(pb_trace_id()), + "span_id": draw(pb_span_id()), + "filtered_attributes": draw(pb_repeated(key_value())), + }, Exemplar) + + @composite + def value_at_quantile(draw): + return EncodeWithArgs({ + "quantile": draw_pb_double(draw), + "value": draw_pb_double(draw), + }, ValueAtQuantile) + + @composite + def summary_data_point(draw): + return EncodeWithArgs({ + "start_time_unix_nano": draw(pb_fixed64()), + "time_unix_nano": draw(pb_fixed64()), + "count": draw(pb_fixed64()), + "sum": draw_pb_double(draw), + "quantile_values": draw(pb_repeated(value_at_quantile())), + "attributes": draw(pb_repeated(key_value())), + "flags": draw(pb_uint32()), + }, SummaryDataPoint) + + @composite + def buckets(draw): + return EncodeWithArgs({ + "offset": draw(pb_sint32()), + "bucket_counts": draw(pb_repeated(pb_uint64())), + }, Buckets) + + @composite + def exponential_histogram_data_point(draw): + return EncodeWithArgs({ + "start_time_unix_nano": draw(pb_fixed64()), + "time_unix_nano": draw(pb_fixed64()), + "count": draw(pb_fixed64()), + "sum": draw_pb_double(draw), + "positive": draw(buckets()), + "attributes": draw(pb_repeated(key_value())), + "flags": draw(pb_uint32()), + "exemplars": draw(pb_repeated(exemplar())), + "max": draw_pb_double(draw), + "zero_threshold": draw_pb_double(draw), + }, ExponentialHistogramDataPoint) + + @composite + def histogram_data_point(draw): + return EncodeWithArgs({ + "start_time_unix_nano": draw(pb_fixed64()), + "time_unix_nano": draw(pb_fixed64()), + "count": draw(pb_fixed64()), + "sum": draw_pb_double(draw), + "bucket_counts": draw(pb_repeated(pb_uint64())), + "attributes": draw(pb_repeated(key_value())), + "flags": draw(pb_uint32()), + "exemplars": draw(pb_repeated(exemplar())), + "explicit_bounds": draw(pb_repeated(pb_double())), + "max": draw_pb_double(draw), + }, HistogramDataPoint) + + @composite + def number_data_point(draw): + oneof = draw(integers(min_value=0, max_value=3)) + kwargs = {} + if oneof == 0: + kwargs["as_int"] = draw(pb_sfixed32()) + elif oneof == 1: + kwargs["as_double"] = draw(pb_double()) + + return EncodeWithArgs({ + "start_time_unix_nano": draw(pb_fixed64()), + "time_unix_nano": draw(pb_fixed64()), + **kwargs, + "exemplars": draw(pb_repeated(exemplar())), + "attributes": draw(pb_repeated(key_value())), + "flags": draw(pb_uint32()), + }, NumberDataPoint) + + @composite + def summary(draw): + return EncodeWithArgs({ + "data_points": draw(pb_repeated(summary_data_point())), + }, Summary) + + @composite + def exponential_histogram(draw): + return EncodeWithArgs({ + "data_points": draw(pb_repeated(exponential_histogram_data_point())), + "aggregation_temporality": draw(pb_enum(metrics_sf.AggregationTemporality)), + }, ExponentialHistogram) + + @composite + def histogram(draw): + return EncodeWithArgs({ + "data_points": draw(pb_repeated(histogram_data_point())), + "aggregation_temporality": draw(pb_enum(metrics_sf.AggregationTemporality)), + }, Histogram) + + @composite + def sum(draw): + return EncodeWithArgs({ + "data_points": draw(pb_repeated(number_data_point())), + "aggregation_temporality": draw(pb_enum(metrics_sf.AggregationTemporality)), + "is_monotonic": draw(pb_bool()), + }, Sum) + + @composite + def gauge(draw): + return EncodeWithArgs({ + "data_points": draw(pb_repeated(number_data_point())), + }, Gauge) + + @composite + def metric(draw): + oneof = draw(integers(min_value=0, max_value=3)) + kwargs = {} + if oneof == 0: + kwargs["gauge"] = draw(gauge()) + elif oneof == 1: + kwargs["sum"] = draw(sum()) + elif oneof == 2: + kwargs["histogram"] = draw(histogram()) + elif oneof == 3: + kwargs["exponential_histogram"] = draw(exponential_histogram()) + + return EncodeWithArgs({ + "name": draw(pb_string()), + "description": draw(pb_string()), + "unit": draw(pb_string()), + **kwargs, + "metadata": draw(pb_repeated(key_value())), + }, Metric) + + @composite + def scope_metrics(draw): + return EncodeWithArgs({ + "scope": draw(instrumentation_scope()), + "metrics": draw(pb_repeated(metric())), + "schema_url": draw(pb_string()), + }, ScopeMetrics) + + @composite + def resource_metrics(draw): + return EncodeWithArgs({ + "resource": draw(resource()), + "scope_metrics": draw(pb_repeated(scope_metrics())), + "schema_url": draw(pb_string()), + }, ResourceMetrics) + return EncodeWithArgs({ - "resource_logs": draw(pb_repeated(resource_logs())), - }, LogsData) + "resource_metrics": draw(pb_repeated(resource_metrics())), + }, MetricsData) + # Helper function to recursively encode protobuf types using the generated args # and the given serialization strategy @@ -172,12 +447,26 @@ def encode_recurse(obj: EncodeWithArgs, strategy: str) -> Any: return obj.cls.sf(**kwargs) class TestProtoSerialization(unittest.TestCase): - @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.too_slow] - ) + @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow]) @hypothesis.given(logs_data()) def test_log_data(self, logs_data): self.assertEqual( encode_recurse(logs_data, "pb2").SerializeToString(), bytes(encode_recurse(logs_data, "sf")) ) + + @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow]) + @hypothesis.given(traces_data()) + def test_trace_data(self, traces_data): + self.assertEqual( + encode_recurse(traces_data, "pb2").SerializeToString(), + bytes(encode_recurse(traces_data, "sf")) + ) + + @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.too_slow]) + @hypothesis.given(metrics_data()) + def test_metrics_data(self, metrics_data): + self.assertEqual( + encode_recurse(metrics_data, "pb2").SerializeToString(), + bytes(encode_recurse(metrics_data, "sf")) + ) From 3a0ee302ad7dc4c4a8828f04ee6ab3c8e25949c0 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 15:01:11 -0700 Subject: [PATCH 11/15] Add code generation workflow --- .github/workflows/check-codegen.yml | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/check-codegen.yml diff --git a/.github/workflows/check-codegen.yml b/.github/workflows/check-codegen.yml new file mode 100644 index 0000000..57a0cd1 --- /dev/null +++ b/.github/workflows/check-codegen.yml @@ -0,0 +1,33 @@ +# This test will check that the codegen script is up to date with the latest changes in the telemetry schema. + +name: Check Codegen + +on: + push: + branches: [ "main" ] + paths: + - "scripts/**" + - "src/snowflake/telemetry/_internal/opentelemetry/proto/**" + - ".github/workflows/check-codegen.yml" + pull_request: + branches: [ "main" ] + paths: + - "scripts/**" + - "src/snowflake/telemetry/_internal/opentelemetry/proto/**" + - ".github/workflows/check-codegen.yml" + +jobs: + check-codegen: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Run codegen script + run: | + ./scripts/proto_codegen.sh + - name: Check for changes + run: | + git diff --exit-code || { echo "Code generation produced changes! Regenerate the code using ./scripts/proto_codegen.sh"; exit 1; } From 74c003b43e2545f726406b222e942b5a95ad7256 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 15:03:36 -0700 Subject: [PATCH 12/15] Test failure of workflow --- scripts/templates/template.py.jinja2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/templates/template.py.jinja2 b/scripts/templates/template.py.jinja2 index 2246245..7e84afd 100644 --- a/scripts/templates/template.py.jinja2 +++ b/scripts/templates/template.py.jinja2 @@ -1,5 +1,5 @@ # Generated by the protoc compiler with a custom plugin. DO NOT EDIT! -# sources: {{ file_template.name }} +# sources: {{ file_template.name }} test failure from snowflake.telemetry._internal.serialize import ( Enum, From 83249c4b50ca7e151dcb5be46a312fec6eb51fe6 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 15:09:16 -0700 Subject: [PATCH 13/15] Revert "Test failure of workflow" This reverts commit 74c003b43e2545f726406b222e942b5a95ad7256. --- scripts/templates/template.py.jinja2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/templates/template.py.jinja2 b/scripts/templates/template.py.jinja2 index 7e84afd..2246245 100644 --- a/scripts/templates/template.py.jinja2 +++ b/scripts/templates/template.py.jinja2 @@ -1,5 +1,5 @@ # Generated by the protoc compiler with a custom plugin. DO NOT EDIT! -# sources: {{ file_template.name }} test failure +# sources: {{ file_template.name }} from snowflake.telemetry._internal.serialize import ( Enum, From faf9d250a412c793413a54ea4f48a5cb65f15655 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 15:35:18 -0700 Subject: [PATCH 14/15] Update workflow --- .github/workflows/check-codegen.yml | 5 ++++- scripts/proto_codegen.sh | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-codegen.yml b/.github/workflows/check-codegen.yml index 57a0cd1..17b4c6f 100644 --- a/.github/workflows/check-codegen.yml +++ b/.github/workflows/check-codegen.yml @@ -1,4 +1,6 @@ -# This test will check that the codegen script is up to date with the latest changes in the telemetry schema. +# This workflow will delete and regenerate the opentelemetry marshaling code using scripts/proto_codegen.sh. +# If generating the code produces any changes from what is currently checked in, the workflow will fail and prompt the user to regenerate the code. +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Check Codegen @@ -27,6 +29,7 @@ jobs: python-version: "3.11" - name: Run codegen script run: | + rm -rf src/snowflake/telemetry/_internal/opentelemetry/proto/ ./scripts/proto_codegen.sh - name: Check for changes run: | diff --git a/scripts/proto_codegen.sh b/scripts/proto_codegen.sh index 4713c04..4d7dffa 100755 --- a/scripts/proto_codegen.sh +++ b/scripts/proto_codegen.sh @@ -52,6 +52,7 @@ fi cd $repo_root/src/snowflake/telemetry/_internal # clean up old generated code +mkdir -p opentelemetry/proto find opentelemetry/proto/ -regex ".*\.py?" -exec rm {} + # generate proto code for all protos From 33fcd013b673fbb30f58e8b9d9e6122a3ae93234 Mon Sep 17 00:00:00 2001 From: Jeevan Opel Date: Tue, 29 Oct 2024 15:35:41 -0700 Subject: [PATCH 15/15] Add deterministic --- tests/test_proto_serialization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_proto_serialization.py b/tests/test_proto_serialization.py index 020a21a..c9f4cc5 100644 --- a/tests/test_proto_serialization.py +++ b/tests/test_proto_serialization.py @@ -451,7 +451,7 @@ class TestProtoSerialization(unittest.TestCase): @hypothesis.given(logs_data()) def test_log_data(self, logs_data): self.assertEqual( - encode_recurse(logs_data, "pb2").SerializeToString(), + encode_recurse(logs_data, "pb2").SerializeToString(deterministic=True), bytes(encode_recurse(logs_data, "sf")) ) @@ -459,7 +459,7 @@ def test_log_data(self, logs_data): @hypothesis.given(traces_data()) def test_trace_data(self, traces_data): self.assertEqual( - encode_recurse(traces_data, "pb2").SerializeToString(), + encode_recurse(traces_data, "pb2").SerializeToString(deterministic=True), bytes(encode_recurse(traces_data, "sf")) ) @@ -467,6 +467,6 @@ def test_trace_data(self, traces_data): @hypothesis.given(metrics_data()) def test_metrics_data(self, metrics_data): self.assertEqual( - encode_recurse(metrics_data, "pb2").SerializeToString(), + encode_recurse(metrics_data, "pb2").SerializeToString(deterministic=True), bytes(encode_recurse(metrics_data, "sf")) )