NVGPUOps

nvgpu.cluster_arrive (triton::nvgpu::ClusterArriveOp)

Syntax:

operation ::= `nvgpu.cluster_arrive` attr-dict

Attributes:

AttributeMLIR TypeDescription
relaxed::mlir::IntegerAttr1-bit signless integer attribute

nvgpu.cluster_id (triton::nvgpu::ClusterCTAIdOp)

Syntax:

operation ::= `nvgpu.cluster_id` attr-dict

Traits: AlwaysSpeculatableImplTrait

Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{}

Results:

Result

Description

result

32-bit signless integer

nvgpu.cluster_wait (triton::nvgpu::ClusterWaitOp)

Syntax:

operation ::= `nvgpu.cluster_wait` attr-dict

nvgpu.fence_async_shared (triton::nvgpu::FenceAsyncSharedOp)

Syntax:

operation ::= `nvgpu.fence_async_shared` attr-dict

Attributes:

AttributeMLIR TypeDescription
bCluster::mlir::BoolAttrbool attribute

nvgpu.stmatrix (triton::nvgpu::StoreMatrixOp)

Syntax:

operation ::= `nvgpu.stmatrix` operands attr-dict `:` type(operands)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Operands:

Operand

Description

addr

LLVM pointer in address space 3

datas

variadic of 32-bit signless integer

nvgpu.wgmma_commit_group (triton::nvgpu::WGMMACommitGroupOp)

Syntax:

operation ::= `nvgpu.wgmma_commit_group` attr-dict

nvgpu.wgmma_fence (triton::nvgpu::WGMMAFenceOp)

Syntax:

operation ::= `nvgpu.wgmma_fence` attr-dict

nvgpu.wgmma (triton::nvgpu::WGMMAOp)

Syntax:

operation ::= `nvgpu.wgmma` $opA `,` $opB `,` $useC (`,` $opC^)? attr-dict `:` functional-type(operands, $res)

Attributes:

AttributeMLIR TypeDescription
m::mlir::IntegerAttr32-bit signless integer attribute
n::mlir::IntegerAttr32-bit signless integer attribute
k::mlir::IntegerAttr32-bit signless integer attribute
eltTypeC::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
eltTypeA::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
eltTypeB::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
layoutA::mlir::triton::nvgpu::WGMMALayoutAttr
wgmma layout, either 'row' or 'col'{{% markdown %}}Enum cases: * row (`row`) * col (`col`){{% /markdown %}}
layoutB::mlir::triton::nvgpu::WGMMALayoutAttr
wgmma layout, either 'row' or 'col'{{% markdown %}}Enum cases: * row (`row`) * col (`col`){{% /markdown %}}

Operands:

Operand

Description

opA

wgmma operand A/B type

opB

wgmma operand A/B type

useC

1-bit signless integer

opC

LLVM structure type

Results:

Result

Description

res

LLVM structure type

nvgpu.wgmma_wait_group (triton::nvgpu::WGMMAWaitGroupOp)

Syntax:

operation ::= `nvgpu.wgmma_wait_group` $input attr-dict `:` type($input)

Interfaces: InferTypeOpInterface

Attributes:

AttributeMLIR TypeDescription
pendings::mlir::IntegerAttr32-bit signless integer attribute

Operands:

Operand

Description

input

LLVM structure type

Results:

Result

Description

output

LLVM structure type