//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-23083092
// Cuda compilation tools, release 9.1, V9.1.85
// Based on LLVM 3.4svn
//

.version 6.1
.target sm_30
.address_size 64

        // .globl       _Z6vecAddPfS_S_i

.visible .entry _Z6vecAddPfS_S_i(
        .param .u64 _Z6vecAddPfS_S_i_param_0,
        .param .u64 _Z6vecAddPfS_S_i_param_1,
        .param .u64 _Z6vecAddPfS_S_i_param_2,
        .param .u32 _Z6vecAddPfS_S_i_param_3
)
{
        .reg .pred      %p<2>;
        .reg .f32       %f<4>;
        .reg .b32       %r<6>;
        .reg .b64       %rd<11>;


        ld.param.u64    %rd1, [_Z6vecAddPfS_S_i_param_0];
        ld.param.u64    %rd2, [_Z6vecAddPfS_S_i_param_1];
        ld.param.u64    %rd3, [_Z6vecAddPfS_S_i_param_2];
        ld.param.u32    %r2, [_Z6vecAddPfS_S_i_param_3];
        .loc 1 9 12
        mov.u32         %r3, %ctaid.x;
        mov.u32         %r4, %ntid.x;
        mov.u32         %r5, %tid.x;
        mad.lo.s32      %r1, %r4, %r3, %r5;
        .loc 1 12 5
        setp.ge.s32     %p1, %r1, %r2;
        @%p1 bra        BB0_2;

        .loc 1 9 12
        cvta.to.global.u64      %rd4, %rd1;
        .loc 1 13 9
        mul.wide.s32    %rd5, %r1, 4;
        add.s64         %rd6, %rd4, %rd5;
        .loc 1 9 12
        cvta.to.global.u64      %rd7, %rd2;
        .loc 1 13 9
        add.s64         %rd8, %rd7, %rd5;
        ld.global.f32   %f1, [%rd8];
        ld.global.f32   %f2, [%rd6];
        add.f32         %f3, %f2, %f1;
        .loc 1 9 12
        cvta.to.global.u64      %rd9, %rd3;
        .loc 1 13 9
        add.s64         %rd10, %rd9, %rd5;
        st.global.f32   [%rd10], %f3;

BB0_2:
        .loc 1 14 1
        ret;
}

        .file   1 "/tmp/moo.cu", 1525722349, 2016
        .file   2 "/opt/compiler-explorer/gcc-6.4.0/include/c++/6.4.0/cmath", 1517261630, 47676
