// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: UNKNOWN // Unknown Toolkit Version // Based on NVVM 7.0.1 // .version 8.5 .target sm_86, texmode_independent .address_size 64 // .globl ConcatVectorsKernel .entry ConcatVectorsKernel( .param .u64 .ptr .global .align 16 ConcatVectorsKernel_param_0, .param .u32 ConcatVectorsKernel_param_1, .param .u32 ConcatVectorsKernel_param_2, .param .u32 ConcatVectorsKernel_param_3, .param .u64 .ptr .global .align 4 ConcatVectorsKernel_param_4, .param .u32 ConcatVectorsKernel_param_5, .param .u32 ConcatVectorsKernel_param_6, .param .u32 ConcatVectorsKernel_param_7, .param .u32 ConcatVectorsKernel_param_8, .param .u32 ConcatVectorsKernel_param_9 ) { .reg .pred %p<18>; .reg .f32 %f<22>; .reg .b32 %r<22>; .reg .b64 %rd<16>; ld.param.u64 %rd8, [ConcatVectorsKernel_param_0]; ld.param.u64 %rd9, [ConcatVectorsKernel_param_4]; ld.param.u32 %r3, [ConcatVectorsKernel_param_5]; mov.b32 %r4, %envreg3; mov.u32 %r5, %ctaid.x; mov.u32 %r6, %ntid.x; mov.u32 %r7, %tid.x; add.s32 %r8, %r7, %r4; mad.lo.s32 %r1, %r6, %r5, %r8; mov.u32 %r9, %ctaid.y; mov.u32 %r10, %ntid.y; mov.u32 %r11, %tid.y; mov.b32 %r12, %envreg4; add.s32 %r13, %r11, %r12; mad.lo.s32 %r2, %r10, %r9, %r13; setp.gt.s32 %p1, %r1, 2047; setp.gt.s32 %p2, %r2, 2047; or.pred %p3, %p1, %p2; @%p3 bra $L__BB0_9; shl.b32 %r14, %r2, 11; add.s32 %r15, %r14, %r1; mul.wide.s32 %rd10, %r15, 4; add.s64 %rd14, %rd9, %rd10; mul.wide.s32 %rd11, %r15, 16; add.s64 %rd12, %rd8, %rd11; ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd12]; abs.ftz.f32 %f10, %f5; abs.ftz.f32 %f12, %f6; abs.ftz.f32 %f14, %f7; abs.ftz.f32 %f16, %f8; setp.geu.ftz.f32 %p4, %f14, 0f7F800000; setp.geu.ftz.f32 %p5, %f16, 0f7F800000; setp.geu.ftz.f32 %p6, %f12, 0f7F800000; setp.geu.ftz.f32 %p7, %f10, 0f7F800000; or.pred %p8, %p7, %p6; selp.b32 %r16, -1, 0, %p8; or.pred %p9, %p5, %p4; selp.b32 %r17, -1, %r16, %p9; setp.gt.s32 %p10, %r17, -1; mov.f32 %f17, 0f00000000; selp.f32 %f18, %f5, 0f00000000, %p10; max.f32 %f1, %f17, %f18; selp.f32 %f19, %f6, 0f00000000, %p10; max.f32 %f2, %f17, %f19; selp.f32 %f20, %f7, 0f00000000, %p10; max.f32 %f3, %f17, %f20; selp.f32 %f21, %f8, 0f00000000, %p10; max.f32 %f4, %f17, %f21; and.b32 %r18, %r3, 1; setp.eq.b32 %p11, %r18, 1; mov.pred %p12, 0; xor.pred %p13, %p11, %p12; not.pred %p14, %p13; @%p14 bra $L__BB0_3; st.global.f32 [%rd14], %f1; add.s64 %rd14, %rd14, 16777216; $L__BB0_3: and.b32 %r19, %r3, 2; setp.eq.s32 %p15, %r19, 0; @%p15 bra $L__BB0_5; st.global.f32 [%rd14], %f2; add.s64 %rd14, %rd14, 16777216; $L__BB0_5: and.b32 %r20, %r3, 4; setp.eq.s32 %p16, %r20, 0; @%p16 bra $L__BB0_7; st.global.f32 [%rd14], %f3; add.s64 %rd14, %rd14, 16777216; $L__BB0_7: and.b32 %r21, %r3, 8; setp.eq.s32 %p17, %r21, 0; @%p17 bra $L__BB0_9; st.global.f32 [%rd14], %f4; $L__BB0_9: ret; }