Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metal 3.1 and 3.2 #373

Open
maleadt opened this issue Jun 19, 2024 · 4 comments
Open

Metal 3.1 and 3.2 #373

maleadt opened this issue Jun 19, 2024 · 4 comments
Labels
libraries Things about libraries and how we use them.

Comments

@maleadt
Copy link
Member

maleadt commented Jun 19, 2024

There's useful features in both.

Metal 3.1 (macOS 14):

Metal 3.2 (macOS 15):

  • logging (@mtlprintf #418)
  • relaxed math
  • atomic changes: atomic_thread_fence, thread_scope
@tgymnich
Copy link
Member

tgymnich commented Jul 4, 2024

Logging in action:
; ModuleID = 'shader.air'
source_filename = "add_arrays"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "air64-apple-macosx15.0.0"

%struct.os_log = type { i8 addrspace(2)*, i8 addrspace(2)* }

@_ZN5metal14os_log_defaultE = internal addrspace(2) global %struct.os_log { i8 addrspace(2)* null, i8 addrspace(2)* inttoptr (i64 -1 to i8 addrspace(2)*) }, align 8
@.str = private unnamed_addr addrspace(2) constant [18 x i8] c"custom message %f\00", align 1

; Function Attrs: convergent mustprogress nounwind
define void @add_arrays(float addrspace(1)* nocapture noundef readonly "air-buffer-no-alias" %0, float addrspace(1)* nocapture noundef readonly "air-buffer-no-alias" %1, float addrspace(1)* nocapture noundef writeonly "air-buffer-no-alias" %2, i32 noundef %3) local_unnamed_addr #0 !dbg !35 {
  %5 = zext i32 %3 to i64, !dbg !37
  %6 = getelementptr inbounds float, float addrspace(1)* %0, i64 %5, !dbg !37
  %7 = load float, float addrspace(1)* %6, align 4, !dbg !37, !tbaa !38, !alias.scope !42, !noalias !45
  %8 = fpext float %7 to double, !dbg !37
  tail call void (%struct.os_log addrspace(2)*, i8 addrspace(2)*, i64, ...) @_ZNU11MTLconstantK5metal6os_log3logEPU11MTLconstantKcz(%struct.os_log addrspace(2)* noundef @_ZN5metal14os_log_defaultE, i8 addrspace(2)* noundef getelementptr inbounds ([18 x i8], [18 x i8] addrspace(2)* @.str, i64 0, i64 0), i64 noundef 8, double noundef %8) #5, !dbg !48
  %9 = load float, float addrspace(1)* %6, align 4, !dbg !49, !tbaa !38, !alias.scope !42, !noalias !45
  %10 = getelementptr inbounds float, float addrspace(1)* %1, i64 %5, !dbg !50
  %11 = load float, float addrspace(1)* %10, align 4, !dbg !50, !tbaa !38, !alias.scope !51, !noalias !52
  %12 = fadd fast float %11, %9, !dbg !53
  %13 = getelementptr inbounds float, float addrspace(1)* %2, i64 %5, !dbg !54
  store float %12, float addrspace(1)* %13, align 4, !dbg !55, !tbaa !38, !alias.scope !56, !noalias !57
  ret void, !dbg !58
}

; Function Attrs: alwaysinline mustprogress nounwind
define internal void @_ZNU11MTLconstantK5metal6os_log3logEPU11MTLconstantKcz(%struct.os_log addrspace(2)* noundef %0, i8 addrspace(2)* noundef %1, i64 noundef %2, ...) unnamed_addr #1 align 2 !dbg !59 {
  %4 = alloca i8*, align 8
  %5 = bitcast i8** %4 to i8*, !dbg !61
  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #6, !dbg !61
  call void @llvm.va_start(i8* nonnull %5), !dbg !62
  %6 = getelementptr inbounds %struct.os_log, %struct.os_log addrspace(2)* %0, i64 0, i32 0, !dbg !63
  %7 = load i8 addrspace(2)*, i8 addrspace(2)* addrspace(2)* %6, align 8, !dbg !63, !tbaa !66
  %8 = getelementptr inbounds %struct.os_log, %struct.os_log addrspace(2)* %0, i64 0, i32 1, !dbg !69
  %9 = load i8 addrspace(2)*, i8 addrspace(2)* addrspace(2)* %8, align 8, !dbg !69, !tbaa !72
  %10 = load i8*, i8** %4, align 8, !dbg !73, !tbaa !74
  call void @air.os_log(i8 addrspace(2)* %7, i8 addrspace(2)* %9, i32 0, i8 addrspace(2)* %1, i8* %10, i64 %2) #7, !dbg !75
  call void @llvm.va_end(i8* nonnull %5), !dbg !76
  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #6, !dbg !77
  ret void, !dbg !77
}

; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2

; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.va_end(i8*) #3

; Function Attrs: mustprogress nounwind willreturn
declare void @air.os_log(i8 addrspace(2)*, i8 addrspace(2)*, i32, i8 addrspace(2)*, i8*, i64) local_unnamed_addr #4

; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.va_start(i8*) #3

; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2

attributes #0 = { convergent mustprogress nounwind "approx-func-fp-math"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" }
attributes #1 = { alwaysinline mustprogress nounwind "approx-func-fp-math"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" }
attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn }
attributes #3 = { nocallback nofree nosync nounwind willreturn }
attributes #4 = { mustprogress nounwind willreturn }
attributes #5 = { nobuiltin "no-builtins" }
attributes #6 = { nounwind }
attributes #7 = { nounwind willreturn }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21}
!llvm.ident = !{!22}
!air.version = !{!23}
!air.language_version = !{!24}
!air.compile_options = !{!25, !26, !27}
!air.kernel = !{!28}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "Apple metal version 32023.329 (metalfe-32023.329.2)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, imports: !2, splitDebugInlining: false, nameTableKind: None, sysroot: "/Applications/Xcode-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX15.0.sdk", sdk: "MacOSX15.0.sdk")
!1 = !DIFile(filename: "/Users/tim/Developer/PerformingCalculationsOnAGPU/MetalComputeBasic/add.metal", directory: "/Users/tim/Developer/PerformingCalculationsOnAGPU")
!2 = !{!3, !6, !9}
!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !4, file: !5, line: 1)
!4 = !DIModule(scope: null, name: "metal_types", includePath: "/Applications/Xcode-beta.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/metal/32023/lib/clang/32023.329/include/metal")
!5 = !DIFile(filename: "<built-in>", directory: "/Users/tim/Developer/PerformingCalculationsOnAGPU")
!6 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !7, file: !8, line: 8)
!7 = !DIModule(scope: null, name: "metal_stdlib", includePath: "/Applications/Xcode-beta.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/metal/32023/lib/clang/32023.329/include/metal")
!8 = !DIFile(filename: "MetalComputeBasic/add.metal", directory: "/Users/tim/Developer/PerformingCalculationsOnAGPU")
!9 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !10, file: !8, line: 9)
!10 = !DIModule(scope: null, name: "metal_logging", includePath: "/Applications/Xcode-beta.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/metal/32023/lib/clang/32023.329/include/metal")
!11 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]}
!12 = !{i32 7, !"Dwarf Version", i32 4}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"wchar_size", i32 4}
!15 = !{i32 7, !"frame-pointer", i32 2}
!16 = !{i32 7, !"air.max_device_buffers", i32 31}
!17 = !{i32 7, !"air.max_constant_buffers", i32 31}
!18 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!19 = !{i32 7, !"air.max_textures", i32 128}
!20 = !{i32 7, !"air.max_read_write_textures", i32 8}
!21 = !{i32 7, !"air.max_samplers", i32 16}
!22 = !{!"Apple metal version 32023.329 (metalfe-32023.329.2)"}
!23 = !{i32 2, i32 7, i32 0}
!24 = !{!"Metal", i32 3, i32 2, i32 0}
!25 = !{!"air.compile.denorms_disable"}
!26 = !{!"air.compile.fast_math_enable"}
!27 = !{!"air.compile.framebuffer_fetch_enable"}
!28 = !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i32)* @add_arrays, !29, !30}
!29 = !{}
!30 = !{!31, !32, !33, !34}
!31 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read", !"air.address_space", i32 1, !"air.arg_type_size", i32 4, !"air.arg_type_align_size", i32 4, !"air.arg_type_name", !"float", !"air.arg_name", !"inA"}
!32 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read", !"air.address_space", i32 1, !"air.arg_type_size", i32 4, !"air.arg_type_align_size", i32 4, !"air.arg_type_name", !"float", !"air.arg_name", !"inB"}
!33 = !{i32 2, !"air.buffer", !"air.location_index", i32 2, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 4, !"air.arg_type_align_size", i32 4, !"air.arg_type_name", !"float", !"air.arg_name", !"result"}
!34 = !{i32 3, !"air.thread_position_in_grid", !"air.arg_type_name", !"uint", !"air.arg_name", !"index"}
!35 = distinct !DISubprogram(name: "add_arrays", scope: !8, file: !8, line: 13, type: !36, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !29)
!36 = !DISubroutineType(types: !29)
!37 = !DILocation(line: 18, column: 45, scope: !35)
!38 = !{!39, !39, i64 0}
!39 = !{!"float", !40, i64 0}
!40 = !{!"omnipotent char", !41, i64 0}
!41 = !{!"Simple C++ TBAA"}
!42 = !{!43}
!43 = distinct !{!43, !44, !"air-alias-scope-arg(0)"}
!44 = distinct !{!44, !"air-alias-scopes(add_arrays)"}
!45 = !{!46, !47}
!46 = distinct !{!46, !44, !"air-alias-scope-arg(1)"}
!47 = distinct !{!47, !44, !"air-alias-scope-arg(2)"}
!48 = !DILocation(line: 18, column: 20, scope: !35)
!49 = !DILocation(line: 19, column: 21, scope: !35)
!50 = !DILocation(line: 19, column: 34, scope: !35)
!51 = !{!46}
!52 = !{!43, !47}
!53 = !DILocation(line: 19, column: 32, scope: !35)
!54 = !DILocation(line: 19, column: 5, scope: !35)
!55 = !DILocation(line: 19, column: 19, scope: !35)
!56 = !{!47}
!57 = !{!43, !46}
!58 = !DILocation(line: 20, column: 1, scope: !35)
!59 = distinct !DISubprogram(name: "log", scope: !60, file: !60, line: 57, type: !36, scopeLine: 58, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !29)
!60 = !DIFile(filename: "/Applications/Xcode-beta.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/metal/32023/lib/clang/32023.329/include/metal/metal_logging", directory: "")
!61 = !DILocation(line: 60, column: 5, scope: !59)
!62 = !DILocation(line: 61, column: 5, scope: !59)
!63 = !DILocation(line: 40, column: 12, scope: !64, inlinedAt: !65)
!64 = distinct !DISubprogram(name: "subsystem", scope: !60, file: !60, line: 38, type: !36, scopeLine: 39, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !29)
!65 = distinct !DILocation(line: 62, column: 20, scope: !59)
!66 = !{!67, !68, i64 0}
!67 = !{!"_ZTSN5metal6os_logE", !68, i64 0, !68, i64 8}
!68 = !{!"any pointer", !40, i64 0}
!69 = !DILocation(line: 44, column: 12, scope: !70, inlinedAt: !71)
!70 = distinct !DISubprogram(name: "category", scope: !60, file: !60, line: 42, type: !36, scopeLine: 43, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !29)
!71 = distinct !DILocation(line: 62, column: 33, scope: !59)
!72 = !{!67, !68, i64 8}
!73 = !DILocation(line: 62, column: 76, scope: !59)
!74 = !{!68, !68, i64 0}
!75 = !DILocation(line: 62, column: 5, scope: !59)
!76 = !DILocation(line: 63, column: 5, scope: !59)
!77 = !DILocation(line: 65, column: 3, scope: !59)
  METAL_FUNC METAL_OS_LOG_FORMAT(2) void log(constant char *format, ...) constant METAL_VALIST_SIZE_CC
  {
#ifdef __METAL_ENABLE_LOGGING__
    __builtin_va_list args;
    __builtin_va_start(args, format);
    __metal_os_log(subsystem(), category(), int(log_type_default), format, args, METAL_VALIST_SIZE);
    __builtin_va_end(args);
#endif
  }

@maleadt
Copy link
Member Author

maleadt commented Jul 5, 2024

That seems reasonable. Did the output end up on stdout, or only in the Console app?

@tgymnich
Copy link
Member

tgymnich commented Jul 5, 2024

@maleadt I didn't get this to run yet (maybe because I tried in a VM).

There is also this: https://developer.apple.com/documentation/metal/mtllogstate/4354218-addloghandler?language=objc

@christiangnrd christiangnrd added enhancement libraries Things about libraries and how we use them. labels Jul 27, 2024
@tgymnich
Copy link
Member

tgymnich commented Sep 13, 2024

I got it to work.

Annotated example of using @air.os_log

; Function Attrs: alwaysinline mustprogress nounwind
define internal void @_ZNU11MTLconstantK5metal6os_log3logEPU11MTLconstantKcz(%struct.os_log addrspace(2)* noundef %logger, i8 addrspace(2)* noundef %format, i64 noundef %arg_size, ...) unnamed_addr #1 align 2 !dbg !58 {
  %4 = alloca i8*, align 8
  %5 = bitcast i8** %4 to i8*, !dbg !60
  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #6, !dbg !60
  call void @llvm.va_start(i8* nonnull %5), !dbg !61
  %subsystem_str_ptr = getelementptr inbounds %struct.os_log, %struct.os_log addrspace(2)* %logger, i64 0, i32 0, !dbg !62
  %subsystem_str = load i8 addrspace(2)*, i8 addrspace(2)* addrspace(2)* %subsystem_str_ptr, align 8, !dbg !62, !tbaa !65
  %category_str_ptr = getelementptr inbounds %struct.os_log, %struct.os_log addrspace(2)* %logger, i64 0, i32 1, !dbg !68
  %category_str = load i8 addrspace(2)*, i8 addrspace(2)* addrspace(2)* %category_str_ptr, align 8, !dbg !68, !tbaa !71
  %args = load i8*, i8** %4, align 8, !dbg !72, !tbaa !73                           ;log_type_default
  call void @air.os_log(i8 addrspace(2)* %subsystem_str, i8 addrspace(2)* %category_str, i32 0, i8 addrspace(2)* %format, i8* %args, i64 %arg_size) #7, !dbg !74
  call void @llvm.va_end(i8* nonnull %5), !dbg !75
  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #6, !dbg !76
  ret void, !dbg !76
}
  void log(constant char *format, ...) constant  {
    __builtin_va_list args;
    __builtin_va_start(args, format);
    __metal_os_log(subsystem(), category(), int(log_type_default), format, args, METAL_VALIST_SIZE);
    __builtin_va_end(args);
  }

Here is how to get the command queue to allocate the log buffer and redirect the output (also works on a per command buffer basis) :

MTLLogStateDescriptor *mLogStateDescriptor = [[MTLLogStateDescriptor alloc] init];
[mLogStateDescriptor setBufferSize:8192];
[mLogStateDescriptor setLevel:MTLLogLevelDebug];

id<MTLLogState> mLogState = [_mDevice newLogStateWithDescriptor:mLogStateDescriptor error:&error];
if (mLogState == nil) {
   NSLog(@"Failed to created log state, error %@.", error);
   return nil;
}
[mLogState addLogHandler:^(NSString * _Nullable subSystem, NSString * _Nullable category, MTLLogLevel logLevel, NSString * _Nonnull message) {
   NSLog(@"Got message: %@", message);
}];

MTLCommandQueueDescriptor *mCommandQueueDescriptor = [[MTLCommandQueueDescriptor alloc] init];
[mCommandQueueDescriptor setLogState:mLogState];

_mCommandQueue = [_mDevice newCommandQueueWithDescriptor:mCommandQueueDescriptor];
if (_mCommandQueue == nil) {
   NSLog(@"Failed to find the command queue.");
   return nil;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
libraries Things about libraries and how we use them.
Projects
None yet
Development

No branches or pull requests

3 participants