Go to the end to download the full example code.
Visualize Flow Graph¶
Visualization is a key component of a machine learning tool to allow us have a better understanding of the model.
We customized the popular Netron viewer to visualize the flow graph of a hidet model. The customized Netron viewer can be found at here, you can also find a link on the bottom of the documentation side bar.
In this tutorial, we will show you how to visualize the flow graph of a model.
Define model¶
We first define a model with a self-attention layer.
import math
import hidet
from hidet import Tensor
from hidet.graph import nn, ops
class SelfAttention(nn.Module):
def __init__(self, hidden_size=768, num_attention_heads=12):
self.num_attention_heads = num_attention_heads
self.attention_head_size = hidden_size // num_attention_heads
self.query_layer = nn.Linear(hidden_size, hidden_size)
self.key_layer = nn.Linear(hidden_size, hidden_size)
self.value_layer = nn.Linear(hidden_size, hidden_size)
def transpose_for_scores(self, x: Tensor) -> Tensor:
batch_size, seq_length, hidden_size = x.shape
x = x.reshape([batch_size, seq_length, self.num_attention_heads, self.attention_head_size])
x = x.rearrange([[0, 2], [1], [3]])
return x # [batch_size * num_attention_heads, seq_length, attention_head_size]
def forward(self, hidden_states: Tensor, attention_mask: Tensor):
batch_size, seq_length, _ = hidden_states.shape
query = self.transpose_for_scores(self.query_layer(hidden_states))
key = self.transpose_for_scores(self.key_layer(hidden_states))
value = self.transpose_for_scores(self.value_layer(hidden_states))
attention_scores = ops.matmul(query, ops.transpose(key, [-1, -2])) / math.sqrt(
attention_scores = attention_scores + attention_mask
attention_probs = ops.softmax(attention_scores, axis=-1)
context = ops.matmul(attention_probs, value)
context = context.reshape(
[batch_size, self.num_attention_heads, seq_length, self.attention_head_size]
context = context.rearrange([[0], [2], [1, 3]])
return context
model = SelfAttention()
(query_layer): Linear(in_features=768, out_features=768)
(key_layer): Linear(in_features=768, out_features=768)
(value_layer): Linear(in_features=768, out_features=768)
Generate flow graph¶
Then we generate the flow graph of the model.
graph = model.flow_graph_for(
inputs=[hidet.randn([1, 128, 768]), hidet.ones([1, 128], dtype='int32')]
Graph(x: float32[1, 128, 768][cpu], x_1: int32[1, 128][cpu]){
c = Constant(float32[768, 768][cpu])
c_1 = Constant(float32[768][cpu])
c_2 = Constant(float32[768, 768][cpu])
c_3 = Constant(float32[768][cpu])
c_4 = Constant(float32[768, 768][cpu])
c_5 = Constant(float32[768][cpu])
x_2: float32[1, 128, 768][cpu] = Matmul(x, c, require_prologue=False, transpose_b=False)
x_3: float32[1, 128, 768][cpu] = Add(x_2, c_1)
x_4: float32[1, 128, 12, 64][cpu] = Reshape(x_3, shape=[1, 128, 12, 64])
x_5: float32[12, 128, 64][cpu] = Rearrange(x_4, plan=[[0, 2], [1], [3]])
x_6: float32[1, 128, 768][cpu] = Matmul(x, c_2, require_prologue=False, transpose_b=False)
x_7: float32[1, 128, 768][cpu] = Add(x_6, c_3)
x_8: float32[1, 128, 12, 64][cpu] = Reshape(x_7, shape=[1, 128, 12, 64])
x_9: float32[12, 128, 64][cpu] = Rearrange(x_8, plan=[[0, 2], [1], [3]])
x_10: float32[12, 64, 128][cpu] = PermuteDims(x_9, axes=[0, 2, 1])
x_11: float32[12, 128, 128][cpu] = Matmul(x_5, x_10, require_prologue=False, transpose_b=False)
x_12: float32[12, 128, 128][cpu] = DivideScalar(x_11, scalar=8.0f)
x_13: float32[12, 128, 128][cpu] = Add(x_12, x_1)
x_14: float32[12, 128, 1][cpu] = ReduceMax(x_13, dims=[2], keepdims=True)
x_15: float32[12, 128, 128][cpu] = Subtract(x_13, x_14)
x_16: float32[12, 128, 128][cpu] = Exp(x_15)
x_17: float32[12, 128, 1][cpu] = ReduceSum(x_16, dims=[2], keepdims=True)
x_18: float32[12, 128, 128][cpu] = Divide(x_16, x_17)
x_19: float32[1, 128, 768][cpu] = Matmul(x, c_4, require_prologue=False, transpose_b=False)
x_20: float32[1, 128, 768][cpu] = Add(x_19, c_5)
x_21: float32[1, 128, 12, 64][cpu] = Reshape(x_20, shape=[1, 128, 12, 64])
x_22: float32[12, 128, 64][cpu] = Rearrange(x_21, plan=[[0, 2], [1], [3]])
x_23: float32[12, 128, 64][cpu] = Matmul(x_18, x_22, require_prologue=False, transpose_b=False)
x_24: float32[1, 12, 128, 64][cpu] = Reshape(x_23, shape=[1, 12, 128, 64])
x_25: float32[1, 128, 768][cpu] = Rearrange(x_24, plan=[[0], [2], [1, 3]])
return x_25
Dump netron graph¶
To visualize the flow graph, we need to dump the graph structure to a json file using
from hidet.utils import netron
with open('attention-graph.json', 'w') as f:
netron.dump(graph, f)
Above code will generate a json file named attention-graph.json
You can download the generated json file
and open it with the customized Netron viewer.
Visualize optimization intermediate graphs¶
Hidet also provides a way to visualize the intermediate graphs of the optimization passes.
To get the json files for the intermediate graphs, we need to add an instrument that dumps the graph in the
pass context before optimize it. We can use
method to do that.
with hidet.graph.PassContext() as ctx:
# print the time cost of each pass
# save the intermediate graph of each pass to './outs' directory
# run the optimization passes
graph_opt = hidet.graph.optimize(graph)
ConvChannelLastPass started...
ConvChannelLastPass 0.002 seconds
SubgraphRewritePass started...
SubgraphRewritePass 0.014 seconds
AutoMixPrecisionPass started...
AutoMixPrecisionPass 0.002 seconds
SelectiveQuantizePass started...
SubgraphRewritePass started...
SubgraphRewritePass 0.014 seconds
SelectiveQuantizePass 0.016 seconds
ResolveVariantPass started...
ResolveVariantPass 0.003 seconds
FuseOperatorPass started...
FuseOperatorPass 0.021 seconds
EliminateBarrierPass started...
EliminateBarrierPass 0.002 seconds
Above code will generate a directory named outs
that contains the json files for the intermediate graphs.
The optimized graph:
Graph(x: float32[1, 128, 768][cpu], x_1: int32[1, 128][cpu]){
c = Constant(float32[768, 768][cpu])
c_1 = Constant(float32[768][cpu])
c_2 = Constant(float32[768, 768][cpu])
c_3 = Constant(float32[768][cpu])
c_4 = Constant(float32[768, 768][cpu])
c_5 = Constant(float32[768][cpu])
x_2: float32[12, 128, 64][cpu] = FusedMatmul(x, c, c_1, fused_graph=FlowGraph(Matmul, Add, Reshape, Rearrange), anchor=0)
x_3: float32[12, 128, 64][cpu] = FusedMatmul(x, c_2, c_3, fused_graph=FlowGraph(Matmul, Add, Reshape, Rearrange), anchor=0)
x_4: float32[12, 64, 128][cpu] = FusedMatmul(x, c_4, c_5, fused_graph=FlowGraph(Matmul, Add, Reshape, Rearrange, PermuteDims), anchor=0)
x_5: float32[12, 128, 128][cpu] = FusedMatmul(x_3, x_4, x_1, fused_graph=FlowGraph(Matmul, DivideScalar, Add), anchor=0)
x_6: float32[12, 128, 1][cpu] = ReduceMax(x_5, dims=[2], keepdims=True)
x_7: float32[12, 128, 128][cpu] = FusedExp(x_5, x_6, fused_graph=FlowGraph(Subtract, Exp), anchor=1)
x_8: float32[12, 128, 1][cpu] = ReduceSum(x_7, dims=[2], keepdims=True)
x_9: float32[1, 128, 768][cpu] = FusedMatmul(x_2, x_7, x_8, fused_graph=FlowGraph(Divide, Matmul, Reshape, Rearrange), anchor=1)
return x_9
This tutorial shows how to visualize the flow graph of a model and the intermediate graphs of the optimization passes.
Total running time of the script: (0 minutes 0.301 seconds)