VirtualFramework/kernelsCache/AIDenoiser-53cb6d4c.v.01070000BBCFD912.NVIDIA GeForce RTX 3070 Ti.560.94_64.bin
2025-03-07 17:23:20 +08:00

121 lines
2.9 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: UNKNOWN
// Unknown Toolkit Version
// Based on NVVM 7.0.1
//
.version 8.5
.target sm_86, texmode_independent
.address_size 64
// .globl ConcatVectorsKernel
.entry ConcatVectorsKernel(
.param .u64 .ptr .global .align 16 ConcatVectorsKernel_param_0,
.param .u32 ConcatVectorsKernel_param_1,
.param .u32 ConcatVectorsKernel_param_2,
.param .u32 ConcatVectorsKernel_param_3,
.param .u64 .ptr .global .align 4 ConcatVectorsKernel_param_4,
.param .u32 ConcatVectorsKernel_param_5,
.param .u32 ConcatVectorsKernel_param_6,
.param .u32 ConcatVectorsKernel_param_7,
.param .u32 ConcatVectorsKernel_param_8,
.param .u32 ConcatVectorsKernel_param_9
)
{
.reg .pred %p<18>;
.reg .f32 %f<22>;
.reg .b32 %r<22>;
.reg .b64 %rd<16>;
ld.param.u64 %rd8, [ConcatVectorsKernel_param_0];
ld.param.u64 %rd9, [ConcatVectorsKernel_param_4];
ld.param.u32 %r3, [ConcatVectorsKernel_param_5];
mov.b32 %r4, %envreg3;
mov.u32 %r5, %ctaid.x;
mov.u32 %r6, %ntid.x;
mov.u32 %r7, %tid.x;
add.s32 %r8, %r7, %r4;
mad.lo.s32 %r1, %r6, %r5, %r8;
mov.u32 %r9, %ctaid.y;
mov.u32 %r10, %ntid.y;
mov.u32 %r11, %tid.y;
mov.b32 %r12, %envreg4;
add.s32 %r13, %r11, %r12;
mad.lo.s32 %r2, %r10, %r9, %r13;
setp.gt.s32 %p1, %r1, 2047;
setp.gt.s32 %p2, %r2, 2047;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB0_9;
shl.b32 %r14, %r2, 11;
add.s32 %r15, %r14, %r1;
mul.wide.s32 %rd10, %r15, 4;
add.s64 %rd14, %rd9, %rd10;
mul.wide.s32 %rd11, %r15, 16;
add.s64 %rd12, %rd8, %rd11;
ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd12];
abs.ftz.f32 %f10, %f5;
abs.ftz.f32 %f12, %f6;
abs.ftz.f32 %f14, %f7;
abs.ftz.f32 %f16, %f8;
setp.geu.ftz.f32 %p4, %f14, 0f7F800000;
setp.geu.ftz.f32 %p5, %f16, 0f7F800000;
setp.geu.ftz.f32 %p6, %f12, 0f7F800000;
setp.geu.ftz.f32 %p7, %f10, 0f7F800000;
or.pred %p8, %p7, %p6;
selp.b32 %r16, -1, 0, %p8;
or.pred %p9, %p5, %p4;
selp.b32 %r17, -1, %r16, %p9;
setp.gt.s32 %p10, %r17, -1;
mov.f32 %f17, 0f00000000;
selp.f32 %f18, %f5, 0f00000000, %p10;
max.f32 %f1, %f17, %f18;
selp.f32 %f19, %f6, 0f00000000, %p10;
max.f32 %f2, %f17, %f19;
selp.f32 %f20, %f7, 0f00000000, %p10;
max.f32 %f3, %f17, %f20;
selp.f32 %f21, %f8, 0f00000000, %p10;
max.f32 %f4, %f17, %f21;
and.b32 %r18, %r3, 1;
setp.eq.b32 %p11, %r18, 1;
mov.pred %p12, 0;
xor.pred %p13, %p11, %p12;
not.pred %p14, %p13;
@%p14 bra $L__BB0_3;
st.global.f32 [%rd14], %f1;
add.s64 %rd14, %rd14, 16777216;
$L__BB0_3:
and.b32 %r19, %r3, 2;
setp.eq.s32 %p15, %r19, 0;
@%p15 bra $L__BB0_5;
st.global.f32 [%rd14], %f2;
add.s64 %rd14, %rd14, 16777216;
$L__BB0_5:
and.b32 %r20, %r3, 4;
setp.eq.s32 %p16, %r20, 0;
@%p16 bra $L__BB0_7;
st.global.f32 [%rd14], %f3;
add.s64 %rd14, %rd14, 16777216;
$L__BB0_7:
and.b32 %r21, %r3, 8;
setp.eq.s32 %p17, %r21, 0;
@%p17 bra $L__BB0_9;
st.global.f32 [%rd14], %f4;
$L__BB0_9:
ret;
}