Creating new Ultralytics Settings v0.0.6 file ✅
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading rtdetr-l.pt...100%|██████████| 63.4M/63.4M [00:02<00:00, 31.5MB/s]
RT-DETR Architecture & Complexity:
layer name type gradient parameters shape mu sigma
0 model.0.stem1.conv.weight Conv2d False 864 [32, 3, 3, 3] 0.000286 0.236 float32
1 model.0.stem1.bn.weight BatchNorm2d False 32 [32] 0.258 0.116 float32
1 model.0.stem1.bn.bias BatchNorm2d False 32 [32] 0.01 0.353 float32
2 model.0.stem1.act ReLU False 0 [] - - -
3 model.0.stem2a.conv.weight Conv2d False 2048 [16, 32, 2, 2] -0.00379 0.0969 float32
4 model.0.stem2a.bn.weight BatchNorm2d False 16 [16] 0.298 0.0581 float32
4 model.0.stem2a.bn.bias BatchNorm2d False 16 [16] 0.133 0.141 float32
5 model.0.stem2a.act ReLU False 0 [] - - -
6 model.0.stem2b.conv.weight Conv2d False 2048 [32, 16, 2, 2] -0.00226 0.0979 float32
7 model.0.stem2b.bn.weight BatchNorm2d False 32 [32] 0.384 0.062 float32
7 model.0.stem2b.bn.bias BatchNorm2d False 32 [32] 0.0132 0.0914 float32
8 model.0.stem2b.act ReLU False 0 [] - - -
9 model.0.stem3.conv.weight Conv2d False 18432 [32, 64, 3, 3] 0.0006 0.0582 float32
10 model.0.stem3.bn.weight BatchNorm2d False 32 [32] 0.262 0.0657 float32
10 model.0.stem3.bn.bias BatchNorm2d False 32 [32] 0.344 0.246 float32
11 model.0.stem3.act ReLU False 0 [] - - -
12 model.0.stem4.conv.weight Conv2d False 1536 [48, 32, 1, 1] -0.000254 0.138 float32
13 model.0.stem4.bn.weight BatchNorm2d False 48 [48] 0.465 0.0983 float32
13 model.0.stem4.bn.bias BatchNorm2d False 48 [48] 0.176 0.358 float32
14 model.0.stem4.act ReLU False 0 [] - - -
15 model.0.pool MaxPool2d False 0 [] - - -
16 model.1.m.0.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00125 0.0567 float32
17 model.1.m.0.bn.weight BatchNorm2d False 48 [48] 0.664 0.277 float32
17 model.1.m.0.bn.bias BatchNorm2d False 48 [48] 0.406 0.759 float32
18 model.1.m.0.act ReLU False 0 [] - - -
19 model.1.m.1.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00177 0.0476 float32
20 model.1.m.1.bn.weight BatchNorm2d False 48 [48] 0.921 0.515 float32
20 model.1.m.1.bn.bias BatchNorm2d False 48 [48] 0.314 1.11 float32
21 model.1.m.2.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00167 0.0425 float32
22 model.1.m.2.bn.weight BatchNorm2d False 48 [48] 0.901 0.129 float32
22 model.1.m.2.bn.bias BatchNorm2d False 48 [48] 0.234 0.798 float32
23 model.1.m.3.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00119 0.0383 float32
24 model.1.m.3.bn.weight BatchNorm2d False 48 [48] 0.901 0.121 float32
24 model.1.m.3.bn.bias BatchNorm2d False 48 [48] -0.1 0.482 float32
25 model.1.m.4.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00298 0.0362 float32
26 model.1.m.4.bn.weight BatchNorm2d False 48 [48] 1 0.296 float32
26 model.1.m.4.bn.bias BatchNorm2d False 48 [48] -0.213 0.673 float32
27 model.1.m.5.conv.weight Conv2d False 20736 [48, 48, 3, 3] -0.00274 0.0308 float32
28 model.1.m.5.bn.weight BatchNorm2d False 48 [48] 1.51 0.526 float32
28 model.1.m.5.bn.bias BatchNorm2d False 48 [48] -1.48 1.42 float32
29 model.1.sc.conv.weight Conv2d False 21504 [64, 336, 1, 1] 0.000581 0.0604 float32
30 model.1.sc.bn.weight BatchNorm2d False 64 [64] 2.25 0.766 float32
30 model.1.sc.bn.bias BatchNorm2d False 64 [64] 1.86 2.35 float32
31 model.1.ec.conv.weight Conv2d False 8192 [128, 64, 1, 1] -0.00113 0.0824 float32
32 model.1.ec.bn.weight BatchNorm2d False 128 [128] 1.79 0.509 float32
32 model.1.ec.bn.bias BatchNorm2d False 128 [128] -0.275 1.02 float32
33 model.2.conv.weight Conv2d False 1152 [128, 1, 3, 3] -0.003 0.0848 float32
34 model.2.bn.weight BatchNorm2d False 128 [128] 1.54 0.257 float32
34 model.2.bn.bias BatchNorm2d False 128 [128] 0.00827 0.826 float32
35 model.2.act Identity False 0 [] - - -
36 model.3.m.0.conv.weight Conv2d False 110592 [96, 128, 3, 3] -0.000206 0.0265 float32
37 model.3.m.0.bn.weight BatchNorm2d False 96 [96] 1.91 0.561 float32
37 model.3.m.0.bn.bias BatchNorm2d False 96 [96] -0.221 1.55 float32
38 model.3.m.1.conv.weight Conv2d False 82944 [96, 96, 3, 3] -0.000993 0.0266 float32
39 model.3.m.1.bn.weight BatchNorm2d False 96 [96] 2.01 0.595 float32
39 model.3.m.1.bn.bias BatchNorm2d False 96 [96] -0.578 1.53 float32
40 model.3.m.2.conv.weight Conv2d False 82944 [96, 96, 3, 3] -0.00155 0.0239 float32
41 model.3.m.2.bn.weight BatchNorm2d False 96 [96] 1.88 0.277 float32
41 model.3.m.2.bn.bias BatchNorm2d False 96 [96] -1.04 1.04 float32
42 model.3.m.3.conv.weight Conv2d False 82944 [96, 96, 3, 3] -0.00167 0.0224 float32
43 model.3.m.3.bn.weight BatchNorm2d False 96 [96] 1.85 0.439 float32
43 model.3.m.3.bn.bias BatchNorm2d False 96 [96] -1.11 1.26 float32
44 model.3.m.4.conv.weight Conv2d False 82944 [96, 96, 3, 3] -0.00151 0.0208 float32
45 model.3.m.4.bn.weight BatchNorm2d False 96 [96] 1.91 0.391 float32
45 model.3.m.4.bn.bias BatchNorm2d False 96 [96] -1.3 1.14 float32
46 model.3.m.5.conv.weight Conv2d False 82944 [96, 96, 3, 3] -0.000755 0.0182 float32
47 model.3.m.5.bn.weight BatchNorm2d False 96 [96] 2.28 0.55 float32
47 model.3.m.5.bn.bias BatchNorm2d False 96 [96] -1.2 1.27 float32
48 model.3.sc.conv.weight Conv2d False 180224 [256, 704, 1, 1] -0.000106 0.0303 float32
49 model.3.sc.bn.weight BatchNorm2d False 256 [256] 1.86 0.383 float32
49 model.3.sc.bn.bias BatchNorm2d False 256 [256] 0.0356 1.38 float32
50 model.3.ec.conv.weight Conv2d False 131072 [512, 256, 1, 1] -0.00144 0.0336 float32
51 model.3.ec.bn.weight BatchNorm2d False 512 [512] 1.25 0.17 float32
51 model.3.ec.bn.bias BatchNorm2d False 512 [512] -0.925 0.636 float32
52 model.4.conv.weight Conv2d False 4608 [512, 1, 3, 3] 0.000926 0.0554 float32
53 model.4.bn.weight BatchNorm2d False 512 [512] 1.26 0.232 float32
53 model.4.bn.bias BatchNorm2d False 512 [512] -3.34e-06 0.000236 float32
54 model.4.act Identity False 0 [] - - -
55 model.5.m.0.conv1.conv.weight Conv2d False 98304 [192, 512, 1, 1] -0.000119 0.0255 float32
56 model.5.m.0.conv1.bn.weight BatchNorm2d False 192 [192] 1.09 0.155 float32
56 model.5.m.0.conv1.bn.bias BatchNorm2d False 192 [192] 0.0193 0.503 float32
57 model.5.m.0.conv1.act Identity False 0 [] - - -
58 model.5.m.0.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000725 0.0667 float32
59 model.5.m.0.conv2.bn.weight BatchNorm2d False 192 [192] 1.67 0.39 float32
59 model.5.m.0.conv2.bn.bias BatchNorm2d False 192 [192] 0.293 1.21 float32
60 model.5.m.1.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -0.000115 0.0309 float32
61 model.5.m.1.conv1.bn.weight BatchNorm2d False 192 [192] 0.967 0.132 float32
61 model.5.m.1.conv1.bn.bias BatchNorm2d False 192 [192] -0.0815 0.558 float32
62 model.5.m.1.conv1.act Identity False 0 [] - - -
63 model.5.m.1.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000311 0.0433 float32
64 model.5.m.1.conv2.bn.weight BatchNorm2d False 192 [192] 1.41 0.508 float32
64 model.5.m.1.conv2.bn.bias BatchNorm2d False 192 [192] 0.0643 0.974 float32
65 model.5.m.2.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000143 0.0266 float32
66 model.5.m.2.conv1.bn.weight BatchNorm2d False 192 [192] 0.894 0.159 float32
66 model.5.m.2.conv1.bn.bias BatchNorm2d False 192 [192] 0.0288 0.626 float32
67 model.5.m.2.conv1.act Identity False 0 [] - - -
68 model.5.m.2.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000371 0.0389 float32
69 model.5.m.2.conv2.bn.weight BatchNorm2d False 192 [192] 1.29 0.488 float32
69 model.5.m.2.conv2.bn.bias BatchNorm2d False 192 [192] -0.168 0.988 float32
70 model.5.m.3.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000281 0.0236 float32
71 model.5.m.3.conv1.bn.weight BatchNorm2d False 192 [192] 0.938 0.143 float32
71 model.5.m.3.conv1.bn.bias BatchNorm2d False 192 [192] -0.0198 0.506 float32
72 model.5.m.3.conv1.act Identity False 0 [] - - -
73 model.5.m.3.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000389 0.0347 float32
74 model.5.m.3.conv2.bn.weight BatchNorm2d False 192 [192] 1.12 0.387 float32
74 model.5.m.3.conv2.bn.bias BatchNorm2d False 192 [192] -0.243 0.753 float32
75 model.5.m.4.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000242 0.0214 float32
76 model.5.m.4.conv1.bn.weight BatchNorm2d False 192 [192] 0.945 0.112 float32
76 model.5.m.4.conv1.bn.bias BatchNorm2d False 192 [192] 0.0214 0.419 float32
77 model.5.m.4.conv1.act Identity False 0 [] - - -
78 model.5.m.4.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000177 0.0297 float32
79 model.5.m.4.conv2.bn.weight BatchNorm2d False 192 [192] 1.1 0.335 float32
79 model.5.m.4.conv2.bn.bias BatchNorm2d False 192 [192] -0.429 0.778 float32
80 model.5.m.5.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -2.67e-05 0.0183 float32
81 model.5.m.5.conv1.bn.weight BatchNorm2d False 192 [192] 0.836 0.178 float32
81 model.5.m.5.conv1.bn.bias BatchNorm2d False 192 [192] -0.00317 0.649 float32
82 model.5.m.5.conv1.act Identity False 0 [] - - -
83 model.5.m.5.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000146 0.0247 float32
84 model.5.m.5.conv2.bn.weight BatchNorm2d False 192 [192] 1.63 0.478 float32
84 model.5.m.5.conv2.bn.bias BatchNorm2d False 192 [192] -0.707 0.856 float32
85 model.5.sc.conv.weight Conv2d False 851968 [512, 1664, 1, 1] -0.000388 0.0188 float32
86 model.5.sc.bn.weight BatchNorm2d False 512 [512] 1.61 0.444 float32
86 model.5.sc.bn.bias BatchNorm2d False 512 [512] -0.468 1.37 float32
87 model.5.ec.conv.weight Conv2d False 524288 [1024, 512, 1, 1] -0.00204 0.022 float32
88 model.5.ec.bn.weight BatchNorm2d False 1024 [1024] 1.02 0.33 float32
88 model.5.ec.bn.bias BatchNorm2d False 1024 [1024] -1.11 0.959 float32
89 model.6.m.0.conv1.conv.weight Conv2d False 196608 [192, 1024, 1, 1] 9.16e-05 0.0175 float32
90 model.6.m.0.conv1.bn.weight BatchNorm2d False 192 [192] 1.01 0.131 float32
90 model.6.m.0.conv1.bn.bias BatchNorm2d False 192 [192] -0.00657 0.482 float32
91 model.6.m.0.conv1.act Identity False 0 [] - - -
92 model.6.m.0.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000143 0.0583 float32
93 model.6.m.0.conv2.bn.weight BatchNorm2d False 192 [192] 0.86 0.222 float32
93 model.6.m.0.conv2.bn.bias BatchNorm2d False 192 [192] 0.207 1.07 float32
94 model.6.m.1.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.00012 0.0289 float32
95 model.6.m.1.conv1.bn.weight BatchNorm2d False 192 [192] 0.989 0.0794 float32
95 model.6.m.1.conv1.bn.bias BatchNorm2d False 192 [192] 0.0249 0.425 float32
96 model.6.m.1.conv1.act Identity False 0 [] - - -
97 model.6.m.1.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000294 0.0481 float32
98 model.6.m.1.conv2.bn.weight BatchNorm2d False 192 [192] 0.899 0.228 float32
98 model.6.m.1.conv2.bn.bias BatchNorm2d False 192 [192] -0.45 0.687 float32
99 model.6.m.2.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000113 0.0275 float32
100 model.6.m.2.conv1.bn.weight BatchNorm2d False 192 [192] 0.942 0.124 float32
100 model.6.m.2.conv1.bn.bias BatchNorm2d False 192 [192] 0.00908 0.527 float32
101 model.6.m.2.conv1.act Identity False 0 [] - - -
102 model.6.m.2.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000696 0.0431 float32
103 model.6.m.2.conv2.bn.weight BatchNorm2d False 192 [192] 0.801 0.215 float32
103 model.6.m.2.conv2.bn.bias BatchNorm2d False 192 [192] -0.187 0.712 float32
104 model.6.m.3.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -0.000185 0.027 float32
105 model.6.m.3.conv1.bn.weight BatchNorm2d False 192 [192] 0.933 0.13 float32
105 model.6.m.3.conv1.bn.bias BatchNorm2d False 192 [192] 0.00563 0.514 float32
106 model.6.m.3.conv1.act Identity False 0 [] - - -
107 model.6.m.3.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000308 0.0414 float32
108 model.6.m.3.conv2.bn.weight BatchNorm2d False 192 [192] 0.809 0.239 float32
108 model.6.m.3.conv2.bn.bias BatchNorm2d False 192 [192] -0.191 0.726 float32
109 model.6.m.4.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -2.36e-05 0.0251 float32
110 model.6.m.4.conv1.bn.weight BatchNorm2d False 192 [192] 0.929 0.11 float32
110 model.6.m.4.conv1.bn.bias BatchNorm2d False 192 [192] -0.0466 0.471 float32
111 model.6.m.4.conv1.act Identity False 0 [] - - -
112 model.6.m.4.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000333 0.0363 float32
113 model.6.m.4.conv2.bn.weight BatchNorm2d False 192 [192] 0.934 0.282 float32
113 model.6.m.4.conv2.bn.bias BatchNorm2d False 192 [192] -0.44 0.715 float32
114 model.6.m.5.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -0.000116 0.0215 float32
115 model.6.m.5.conv1.bn.weight BatchNorm2d False 192 [192] 0.911 0.131 float32
115 model.6.m.5.conv1.bn.bias BatchNorm2d False 192 [192] -0.0148 0.487 float32
116 model.6.m.5.conv1.act Identity False 0 [] - - -
117 model.6.m.5.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000435 0.0298 float32
118 model.6.m.5.conv2.bn.weight BatchNorm2d False 192 [192] 0.958 0.331 float32
118 model.6.m.5.conv2.bn.bias BatchNorm2d False 192 [192] -0.252 0.98 float32
119 model.6.sc.conv.weight Conv2d False 1.11411e+06 [512, 2176, 1, 1] -0.000638 0.0142 float32
120 model.6.sc.bn.weight BatchNorm2d False 512 [512] 1.47 0.304 float32
120 model.6.sc.bn.bias BatchNorm2d False 512 [512] -1.25 0.928 float32
121 model.6.ec.conv.weight Conv2d False 524288 [1024, 512, 1, 1] -0.00196 0.0178 float32
122 model.6.ec.bn.weight BatchNorm2d False 1024 [1024] 0.936 0.323 float32
122 model.6.ec.bn.bias BatchNorm2d False 1024 [1024] -0.899 0.697 float32
123 model.7.m.0.conv1.conv.weight Conv2d False 196608 [192, 1024, 1, 1] 1.26e-05 0.0191 float32
124 model.7.m.0.conv1.bn.weight BatchNorm2d False 192 [192] 0.993 0.11 float32
124 model.7.m.0.conv1.bn.bias BatchNorm2d False 192 [192] 0.000583 0.45 float32
125 model.7.m.0.conv1.act Identity False 0 [] - - -
126 model.7.m.0.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000475 0.0465 float32
127 model.7.m.0.conv2.bn.weight BatchNorm2d False 192 [192] 0.941 0.424 float32
127 model.7.m.0.conv2.bn.bias BatchNorm2d False 192 [192] -0.129 1.57 float32
128 model.7.m.1.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -0.000374 0.0266 float32
129 model.7.m.1.conv1.bn.weight BatchNorm2d False 192 [192] 0.888 0.115 float32
129 model.7.m.1.conv1.bn.bias BatchNorm2d False 192 [192] -0.0736 0.572 float32
130 model.7.m.1.conv1.act Identity False 0 [] - - -
131 model.7.m.1.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.00134 0.0379 float32
132 model.7.m.1.conv2.bn.weight BatchNorm2d False 192 [192] 0.791 0.248 float32
132 model.7.m.1.conv2.bn.bias BatchNorm2d False 192 [192] -0.638 0.88 float32
133 model.7.m.2.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000466 0.0242 float32
134 model.7.m.2.conv1.bn.weight BatchNorm2d False 192 [192] 0.904 0.117 float32
134 model.7.m.2.conv1.bn.bias BatchNorm2d False 192 [192] 0.0649 0.501 float32
135 model.7.m.2.conv1.act Identity False 0 [] - - -
136 model.7.m.2.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.00229 0.0373 float32
137 model.7.m.2.conv2.bn.weight BatchNorm2d False 192 [192] 0.659 0.178 float32
137 model.7.m.2.conv2.bn.bias BatchNorm2d False 192 [192] -0.327 0.648 float32
138 model.7.m.3.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000348 0.0236 float32
139 model.7.m.3.conv1.bn.weight BatchNorm2d False 192 [192] 0.91 0.121 float32
139 model.7.m.3.conv1.bn.bias BatchNorm2d False 192 [192] 0.00219 0.489 float32
140 model.7.m.3.conv1.act Identity False 0 [] - - -
141 model.7.m.3.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.000101 0.0365 float32
142 model.7.m.3.conv2.bn.weight BatchNorm2d False 192 [192] 0.701 0.22 float32
142 model.7.m.3.conv2.bn.bias BatchNorm2d False 192 [192] -0.403 0.604 float32
143 model.7.m.4.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] 0.000513 0.0228 float32
144 model.7.m.4.conv1.bn.weight BatchNorm2d False 192 [192] 0.929 0.102 float32
144 model.7.m.4.conv1.bn.bias BatchNorm2d False 192 [192] 0.0657 0.429 float32
145 model.7.m.4.conv1.act Identity False 0 [] - - -
146 model.7.m.4.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] -0.00196 0.0324 float32
147 model.7.m.4.conv2.bn.weight BatchNorm2d False 192 [192] 0.749 0.212 float32
147 model.7.m.4.conv2.bn.bias BatchNorm2d False 192 [192] -0.497 0.704 float32
148 model.7.m.5.conv1.conv.weight Conv2d False 36864 [192, 192, 1, 1] -5.37e-05 0.0204 float32
149 model.7.m.5.conv1.bn.weight BatchNorm2d False 192 [192] 0.883 0.146 float32
149 model.7.m.5.conv1.bn.bias BatchNorm2d False 192 [192] 0.0296 0.493 float32
150 model.7.m.5.conv1.act Identity False 0 [] - - -
151 model.7.m.5.conv2.conv.weight Conv2d False 4800 [192, 1, 5, 5] 0.000219 0.0279 float32
152 model.7.m.5.conv2.bn.weight BatchNorm2d False 192 [192] 0.942 0.289 float32
152 model.7.m.5.conv2.bn.bias BatchNorm2d False 192 [192] -0.448 0.773 float32
153 model.7.sc.conv.weight Conv2d False 1.11411e+06 [512, 2176, 1, 1] -0.00086 0.0152 float32
154 model.7.sc.bn.weight BatchNorm2d False 512 [512] 1.5 0.342 float32
154 model.7.sc.bn.bias BatchNorm2d False 512 [512] -1.75 1.25 float32
155 model.7.ec.conv.weight Conv2d False 524288 [1024, 512, 1, 1] -0.00201 0.0192 float32
156 model.7.ec.bn.weight BatchNorm2d False 1024 [1024] 2.08 0.728 float32
156 model.7.ec.bn.bias BatchNorm2d False 1024 [1024] -2.21 0.934 float32
157 model.8.conv.weight Conv2d False 9216 [1024, 1, 3, 3] 0.000143 0.0397 float32
158 model.8.bn.weight BatchNorm2d False 1024 [1024] 0.799 0.0685 float32
158 model.8.bn.bias BatchNorm2d False 1024 [1024] 2.96e-06 0.000225 float32
159 model.8.act Identity False 0 [] - - -
160 model.9.m.0.conv1.conv.weight Conv2d False 393216 [384, 1024, 1, 1] 4.09e-06 0.0185 float32
161 model.9.m.0.conv1.bn.weight BatchNorm2d False 384 [384] 0.977 0.131 float32
161 model.9.m.0.conv1.bn.bias BatchNorm2d False 384 [384] -0.0094 0.515 float32
162 model.9.m.0.conv1.act Identity False 0 [] - - -
163 model.9.m.0.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] 2.48e-06 0.0403 float32
164 model.9.m.0.conv2.bn.weight BatchNorm2d False 384 [384] 1.43 0.528 float32
164 model.9.m.0.conv2.bn.bias BatchNorm2d False 384 [384] -0.941 1.54 float32
165 model.9.m.1.conv1.conv.weight Conv2d False 147456 [384, 384, 1, 1] 0.000307 0.0216 float32
166 model.9.m.1.conv1.bn.weight BatchNorm2d False 384 [384] 0.888 0.131 float32
166 model.9.m.1.conv1.bn.bias BatchNorm2d False 384 [384] 0.0369 0.58 float32
167 model.9.m.1.conv1.act Identity False 0 [] - - -
168 model.9.m.1.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] -0.000825 0.0331 float32
169 model.9.m.1.conv2.bn.weight BatchNorm2d False 384 [384] 1.19 0.386 float32
169 model.9.m.1.conv2.bn.bias BatchNorm2d False 384 [384] -0.81 1.1 float32
170 model.9.m.2.conv1.conv.weight Conv2d False 147456 [384, 384, 1, 1] 9.04e-05 0.0195 float32
171 model.9.m.2.conv1.bn.weight BatchNorm2d False 384 [384] 0.889 0.137 float32
171 model.9.m.2.conv1.bn.bias BatchNorm2d False 384 [384] 0.0253 0.565 float32
172 model.9.m.2.conv1.act Identity False 0 [] - - -
173 model.9.m.2.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] -0.00045 0.0337 float32
174 model.9.m.2.conv2.bn.weight BatchNorm2d False 384 [384] 1.12 0.316 float32
174 model.9.m.2.conv2.bn.bias BatchNorm2d False 384 [384] -0.731 0.89 float32
175 model.9.m.3.conv1.conv.weight Conv2d False 147456 [384, 384, 1, 1] -9.82e-05 0.0182 float32
176 model.9.m.3.conv1.bn.weight BatchNorm2d False 384 [384] 0.88 0.127 float32
176 model.9.m.3.conv1.bn.bias BatchNorm2d False 384 [384] -0.0142 0.563 float32
177 model.9.m.3.conv1.act Identity False 0 [] - - -
178 model.9.m.3.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] 0.000362 0.0287 float32
179 model.9.m.3.conv2.bn.weight BatchNorm2d False 384 [384] 1.12 0.265 float32
179 model.9.m.3.conv2.bn.bias BatchNorm2d False 384 [384] -0.765 0.719 float32
180 model.9.m.4.conv1.conv.weight Conv2d False 147456 [384, 384, 1, 1] 0.000138 0.0166 float32
181 model.9.m.4.conv1.bn.weight BatchNorm2d False 384 [384] 0.873 0.127 float32
181 model.9.m.4.conv1.bn.bias BatchNorm2d False 384 [384] 0.0154 0.529 float32
182 model.9.m.4.conv1.act Identity False 0 [] - - -
183 model.9.m.4.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] -0.000647 0.0265 float32
184 model.9.m.4.conv2.bn.weight BatchNorm2d False 384 [384] 1.11 0.244 float32
184 model.9.m.4.conv2.bn.bias BatchNorm2d False 384 [384] -0.987 0.728 float32
185 model.9.m.5.conv1.conv.weight Conv2d False 147456 [384, 384, 1, 1] -0.000107 0.0147 float32
186 model.9.m.5.conv1.bn.weight BatchNorm2d False 384 [384] 0.896 0.127 float32
186 model.9.m.5.conv1.bn.bias BatchNorm2d False 384 [384] -0.00775 0.463 float32
187 model.9.m.5.conv1.act Identity False 0 [] - - -
188 model.9.m.5.conv2.conv.weight Conv2d False 9600 [384, 1, 5, 5] -3.79e-05 0.0229 float32
189 model.9.m.5.conv2.bn.weight BatchNorm2d False 384 [384] 1.37 0.38 float32
189 model.9.m.5.conv2.bn.bias BatchNorm2d False 384 [384] -0.764 0.94 float32
190 model.9.sc.conv.weight Conv2d False 3.40787e+06 [1024, 3328, 1, 1] -0.000582 0.013 float32
191 model.9.sc.bn.weight BatchNorm2d False 1024 [1024] 1.48 0.401 float32
191 model.9.sc.bn.bias BatchNorm2d False 1024 [1024] -1.58 1.28 float32
192 model.9.ec.conv.weight Conv2d False 2.09715e+06 [2048, 1024, 1, 1] -0.000945 0.0142 float32
193 model.9.ec.bn.weight BatchNorm2d False 2048 [2048] 4.42 0.487 float32
193 model.9.ec.bn.bias BatchNorm2d False 2048 [2048] -5.61 1.2 float32
194 model.10.conv.weight Conv2d False 524288 [256, 2048, 1, 1] 0.00158 0.0746 float32
195 model.10.bn.weight BatchNorm2d False 256 [256] 0.881 0.0365 float32
195 model.10.bn.bias BatchNorm2d False 256 [256] -0.0164 0.0795 float32
196 model.10.act Identity False 0 [] - - -
197 model.11.ma.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] -0.000238 0.0807 float32
197 model.11.ma.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.000216 0.104 float32
198 model.11.fc1.weight Linear False 262144 [1024, 256] 8.32e-05 0.0722 float32
198 model.11.fc1.bias Linear False 1024 [1024] -0.101 0.0442 float32
199 model.11.fc2.weight Linear False 262144 [256, 1024] -5.94e-06 0.0646 float32
199 model.11.fc2.bias Linear False 256 [256] -0.00152 0.242 float32
200 model.11.norm1.weight LayerNorm False 256 [256] 0.948 0.0422 float32
200 model.11.norm1.bias LayerNorm False 256 [256] -0.00571 0.183 float32
201 model.11.norm2.weight LayerNorm False 256 [256] 1.01 0.041 float32
201 model.11.norm2.bias LayerNorm False 256 [256] -8.51e-07 2.19e-05 float32
202 model.11.dropout Dropout False 0 [] - - -
203 model.11.dropout1 Dropout False 0 [] - - -
204 model.11.dropout2 Dropout False 0 [] - - -
205 model.11.act GELU False 0 [] - - -
206 model.12.conv.weight Conv2d False 65536 [256, 256, 1, 1] 0.000279 0.0762 float32
207 model.12.bn.weight BatchNorm2d False 256 [256] 1.07 0.0765 float32
207 model.12.bn.bias BatchNorm2d False 256 [256] -0.0149 0.0864 float32
208 model.12.act SiLU False 0 [] - - -
209 model.13 Upsample False 0 [] - - -
210 model.14.conv.weight Conv2d False 262144 [256, 1024, 1, 1] -6.37e-05 0.0825 float32
211 model.14.bn.weight BatchNorm2d False 256 [256] 0.916 0.0305 float32
211 model.14.bn.bias BatchNorm2d False 256 [256] -5.09e-07 1.16e-05 float32
212 model.14.act Identity False 0 [] - - -
213 model.15 Concat False 0 [] - - -
214 model.16.cv1.conv.weight Conv2d False 131072 [256, 512, 1, 1] 0.000616 0.0724 float32
215 model.16.cv1.bn.weight BatchNorm2d False 256 [256] 1.06 0.0509 float32
215 model.16.cv1.bn.bias BatchNorm2d False 256 [256] -0.2 0.0762 float32
216 model.16.cv2.conv.weight Conv2d False 131072 [256, 512, 1, 1] 0.000428 0.0733 float32
217 model.16.cv2.bn.weight BatchNorm2d False 256 [256] 1.06 0.0674 float32
217 model.16.cv2.bn.bias BatchNorm2d False 256 [256] -0.0468 0.109 float32
218 model.16.m.0.act SiLU False 0 [] - - -
219 model.16.m.0.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00267 0.073 float32
220 model.16.m.0.conv1.bn.weight BatchNorm2d False 256 [256] 1.06 0.0692 float32
220 model.16.m.0.conv1.bn.bias BatchNorm2d False 256 [256] -0.19 0.0599 float32
221 model.16.m.0.conv1.act Identity False 0 [] - - -
222 model.16.m.0.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00736 0.111 float32
223 model.16.m.0.conv2.bn.weight BatchNorm2d False 256 [256] 0.943 0.055 float32
223 model.16.m.0.conv2.bn.bias BatchNorm2d False 256 [256] -0.19 0.0599 float32
224 model.16.m.0.conv2.act Identity False 0 [] - - -
225 model.16.m.1.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00342 0.0732 float32
226 model.16.m.1.conv1.bn.weight BatchNorm2d False 256 [256] 1.07 0.0802 float32
226 model.16.m.1.conv1.bn.bias BatchNorm2d False 256 [256] -0.169 0.0594 float32
227 model.16.m.1.conv1.act Identity False 0 [] - - -
228 model.16.m.1.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00736 0.112 float32
229 model.16.m.1.conv2.bn.weight BatchNorm2d False 256 [256] 0.93 0.0527 float32
229 model.16.m.1.conv2.bn.bias BatchNorm2d False 256 [256] -0.169 0.0594 float32
230 model.16.m.1.conv2.act Identity False 0 [] - - -
231 model.16.m.2.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00284 0.0733 float32
232 model.16.m.2.conv1.bn.weight BatchNorm2d False 256 [256] 1.06 0.0608 float32
232 model.16.m.2.conv1.bn.bias BatchNorm2d False 256 [256] -0.0896 0.0653 float32
233 model.16.m.2.conv1.act Identity False 0 [] - - -
234 model.16.m.2.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00815 0.112 float32
235 model.16.m.2.conv2.bn.weight BatchNorm2d False 256 [256] 0.887 0.0486 float32
235 model.16.m.2.conv2.bn.bias BatchNorm2d False 256 [256] -0.0896 0.0653 float32
236 model.16.m.2.conv2.act Identity False 0 [] - - -
237 model.16.cv3 Identity False 0 [] - - -
238 model.17.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00499 0.0768 float32
239 model.17.bn.weight BatchNorm2d False 256 [256] 1.12 0.0826 float32
239 model.17.bn.bias BatchNorm2d False 256 [256] 0.0314 0.0922 float32
240 model.18 Upsample False 0 [] - - -
241 model.19.conv.weight Conv2d False 131072 [256, 512, 1, 1] 2.3e-05 0.0928 float32
242 model.19.bn.weight BatchNorm2d False 256 [256] 0.819 0.0459 float32
242 model.19.bn.bias BatchNorm2d False 256 [256] -3.47e-07 1.78e-05 float32
243 model.19.act Identity False 0 [] - - -
244 model.20 Concat False 0 [] - - -
245 model.21.cv1.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00148 0.0734 float32
246 model.21.cv1.bn.weight BatchNorm2d False 256 [256] 1.02 0.0755 float32
246 model.21.cv1.bn.bias BatchNorm2d False 256 [256] -0.179 0.119 float32
247 model.21.cv2.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00244 0.0736 float32
248 model.21.cv2.bn.weight BatchNorm2d False 256 [256] 0.895 0.0842 float32
248 model.21.cv2.bn.bias BatchNorm2d False 256 [256] 0.0516 0.116 float32
249 model.21.m.0.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00203 0.0732 float32
250 model.21.m.0.conv1.bn.weight BatchNorm2d False 256 [256] 1.04 0.0755 float32
250 model.21.m.0.conv1.bn.bias BatchNorm2d False 256 [256] -0.251 0.0989 float32
251 model.21.m.0.conv1.act Identity False 0 [] - - -
252 model.21.m.0.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.0111 0.112 float32
253 model.21.m.0.conv2.bn.weight BatchNorm2d False 256 [256] 0.873 0.0523 float32
253 model.21.m.0.conv2.bn.bias BatchNorm2d False 256 [256] -0.251 0.0989 float32
254 model.21.m.0.conv2.act Identity False 0 [] - - -
255 model.21.m.1.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00389 0.0734 float32
256 model.21.m.1.conv1.bn.weight BatchNorm2d False 256 [256] 1.07 0.0854 float32
256 model.21.m.1.conv1.bn.bias BatchNorm2d False 256 [256] -0.211 0.0907 float32
257 model.21.m.1.conv1.act Identity False 0 [] - - -
258 model.21.m.1.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.0136 0.112 float32
259 model.21.m.1.conv2.bn.weight BatchNorm2d False 256 [256] 0.876 0.0553 float32
259 model.21.m.1.conv2.bn.bias BatchNorm2d False 256 [256] -0.211 0.0907 float32
260 model.21.m.1.conv2.act Identity False 0 [] - - -
261 model.21.m.2.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00314 0.0731 float32
262 model.21.m.2.conv1.bn.weight BatchNorm2d False 256 [256] 1.1 0.125 float32
262 model.21.m.2.conv1.bn.bias BatchNorm2d False 256 [256] 0.017 0.107 float32
263 model.21.m.2.conv1.act Identity False 0 [] - - -
264 model.21.m.2.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.0131 0.112 float32
265 model.21.m.2.conv2.bn.weight BatchNorm2d False 256 [256] 0.951 0.127 float32
265 model.21.m.2.conv2.bn.bias BatchNorm2d False 256 [256] 0.017 0.107 float32
266 model.21.m.2.conv2.act Identity False 0 [] - - -
267 model.21.cv3 Identity False 0 [] - - -
268 model.22.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.000837 0.0666 float32
269 model.22.bn.weight BatchNorm2d False 256 [256] 1.15 0.0852 float32
269 model.22.bn.bias BatchNorm2d False 256 [256] -0.234 0.106 float32
270 model.23 Concat False 0 [] - - -
271 model.24.cv1.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00411 0.0721 float32
272 model.24.cv1.bn.weight BatchNorm2d False 256 [256] 1.07 0.0645 float32
272 model.24.cv1.bn.bias BatchNorm2d False 256 [256] -0.204 0.0868 float32
273 model.24.cv2.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00651 0.0725 float32
274 model.24.cv2.bn.weight BatchNorm2d False 256 [256] 0.968 0.0901 float32
274 model.24.cv2.bn.bias BatchNorm2d False 256 [256] -0.0336 0.0746 float32
275 model.24.m.0.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00253 0.0725 float32
276 model.24.m.0.conv1.bn.weight BatchNorm2d False 256 [256] 1.08 0.0784 float32
276 model.24.m.0.conv1.bn.bias BatchNorm2d False 256 [256] -0.243 0.0816 float32
277 model.24.m.0.conv1.act Identity False 0 [] - - -
278 model.24.m.0.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.0107 0.112 float32
279 model.24.m.0.conv2.bn.weight BatchNorm2d False 256 [256] 0.892 0.0653 float32
279 model.24.m.0.conv2.bn.bias BatchNorm2d False 256 [256] -0.243 0.0816 float32
280 model.24.m.0.conv2.act Identity False 0 [] - - -
281 model.24.m.1.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00306 0.0731 float32
282 model.24.m.1.conv1.bn.weight BatchNorm2d False 256 [256] 1.08 0.0823 float32
282 model.24.m.1.conv1.bn.bias BatchNorm2d False 256 [256] -0.228 0.0742 float32
283 model.24.m.1.conv1.act Identity False 0 [] - - -
284 model.24.m.1.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00876 0.112 float32
285 model.24.m.1.conv2.bn.weight BatchNorm2d False 256 [256] 0.892 0.0654 float32
285 model.24.m.1.conv2.bn.bias BatchNorm2d False 256 [256] -0.228 0.0742 float32
286 model.24.m.1.conv2.act Identity False 0 [] - - -
287 model.24.m.2.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00316 0.073 float32
288 model.24.m.2.conv1.bn.weight BatchNorm2d False 256 [256] 1.1 0.0971 float32
288 model.24.m.2.conv1.bn.bias BatchNorm2d False 256 [256] -0.0732 0.0904 float32
289 model.24.m.2.conv1.act Identity False 0 [] - - -
290 model.24.m.2.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.0129 0.112 float32
291 model.24.m.2.conv2.bn.weight BatchNorm2d False 256 [256] 0.924 0.101 float32
291 model.24.m.2.conv2.bn.bias BatchNorm2d False 256 [256] -0.0732 0.0904 float32
292 model.24.m.2.conv2.act Identity False 0 [] - - -
293 model.24.cv3 Identity False 0 [] - - -
294 model.25.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00249 0.0672 float32
295 model.25.bn.weight BatchNorm2d False 256 [256] 1.13 0.103 float32
295 model.25.bn.bias BatchNorm2d False 256 [256] -0.117 0.115 float32
296 model.26 Concat False 0 [] - - -
297 model.27.cv1.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00234 0.073 float32
298 model.27.cv1.bn.weight BatchNorm2d False 256 [256] 1.06 0.0917 float32
298 model.27.cv1.bn.bias BatchNorm2d False 256 [256] -0.0958 0.137 float32
299 model.27.cv2.conv.weight Conv2d False 131072 [256, 512, 1, 1] -0.00179 0.0727 float32
300 model.27.cv2.bn.weight BatchNorm2d False 256 [256] 0.997 0.08 float32
300 model.27.cv2.bn.bias BatchNorm2d False 256 [256] 0.124 0.118 float32
301 model.27.m.0.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00165 0.072 float32
302 model.27.m.0.conv1.bn.weight BatchNorm2d False 256 [256] 1.1 0.101 float32
302 model.27.m.0.conv1.bn.bias BatchNorm2d False 256 [256] -0.134 0.102 float32
303 model.27.m.0.conv1.act Identity False 0 [] - - -
304 model.27.m.0.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00495 0.112 float32
305 model.27.m.0.conv2.bn.weight BatchNorm2d False 256 [256] 0.917 0.0769 float32
305 model.27.m.0.conv2.bn.bias BatchNorm2d False 256 [256] -0.134 0.102 float32
306 model.27.m.0.conv2.act Identity False 0 [] - - -
307 model.27.m.1.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00287 0.0731 float32
308 model.27.m.1.conv1.bn.weight BatchNorm2d False 256 [256] 1.09 0.114 float32
308 model.27.m.1.conv1.bn.bias BatchNorm2d False 256 [256] -0.128 0.105 float32
309 model.27.m.1.conv1.act Identity False 0 [] - - -
310 model.27.m.1.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00362 0.112 float32
311 model.27.m.1.conv2.bn.weight BatchNorm2d False 256 [256] 0.924 0.105 float32
311 model.27.m.1.conv2.bn.bias BatchNorm2d False 256 [256] -0.128 0.105 float32
312 model.27.m.1.conv2.act Identity False 0 [] - - -
313 model.27.m.2.conv1.conv.weight Conv2d False 589824 [256, 256, 3, 3] -0.00234 0.0731 float32
314 model.27.m.2.conv1.bn.weight BatchNorm2d False 256 [256] 1.02 0.111 float32
314 model.27.m.2.conv1.bn.bias BatchNorm2d False 256 [256] -0.0278 0.127 float32
315 model.27.m.2.conv1.act Identity False 0 [] - - -
316 model.27.m.2.conv2.conv.weight Conv2d False 65536 [256, 256, 1, 1] -0.00602 0.112 float32
317 model.27.m.2.conv2.bn.weight BatchNorm2d False 256 [256] 0.921 0.102 float32
317 model.27.m.2.conv2.bn.bias BatchNorm2d False 256 [256] -0.0278 0.127 float32
318 model.27.m.2.conv2.act Identity False 0 [] - - -
319 model.27.cv3 Identity False 0 [] - - -
320 model.28.input_proj.0.0.weight Conv2d False 65536 [256, 256, 1, 1] -0.000359 0.0917 float32
321 model.28.input_proj.0.1.weight BatchNorm2d False 256 [256] 0.876 0.0403 float32
321 model.28.input_proj.0.1.bias BatchNorm2d False 256 [256] 0.00426 0.0908 float32
322 model.28.input_proj.1.0.weight Conv2d False 65536 [256, 256, 1, 1] -5.44e-05 0.0918 float32
323 model.28.input_proj.1.1.weight BatchNorm2d False 256 [256] 0.873 0.0424 float32
323 model.28.input_proj.1.1.bias BatchNorm2d False 256 [256] 0.00558 0.0985 float32
324 model.28.input_proj.2.0.weight Conv2d False 65536 [256, 256, 1, 1] 0.000294 0.0914 float32
325 model.28.input_proj.2.1.weight BatchNorm2d False 256 [256] 0.928 0.0368 float32
325 model.28.input_proj.2.1.bias BatchNorm2d False 256 [256] -0.000983 0.13 float32
326model.28.decoder.layers.0.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 4.56e-05 0.0793 float32
326model.28.decoder.layers.0.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.00115 0.0778 float32
327 model.28.decoder.layers.0.dropout1 Dropout False 0 [] - - -
328 model.28.decoder.layers.0.norm1.weight LayerNorm False 256 [256] 0.817 0.0852 float32
328 model.28.decoder.layers.0.norm1.bias LayerNorm False 256 [256] 0.00124 0.0875 float32
329model.28.decoder.layers.0.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] 0.000213 0.0505 float32
329model.28.decoder.layers.0.cross_attn.sampling_offsets.bias Linear False 192 [192] 0.00267 2.38 float32
330model.28.decoder.layers.0.cross_attn.attention_weights.weight Linear False 24576 [96, 256] -0.000566 0.0505 float32
330model.28.decoder.layers.0.cross_attn.attention_weights.bias Linear False 96 [96] 0.0139 0.0547 float32
331model.28.decoder.layers.0.cross_attn.value_proj.weight Linear False 65536 [256, 256] -0.000176 0.0868 float32
331model.28.decoder.layers.0.cross_attn.value_proj.bias Linear False 256 [256] -0.00462 0.0858 float32
332model.28.decoder.layers.0.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.000337 0.0882 float32
332model.28.decoder.layers.0.cross_attn.output_proj.bias Linear False 256 [256] -0.000168 0.0444 float32
333 model.28.decoder.layers.0.dropout2 Dropout False 0 [] - - -
334 model.28.decoder.layers.0.norm2.weight LayerNorm False 256 [256] 1.01 0.0409 float32
334 model.28.decoder.layers.0.norm2.bias LayerNorm False 256 [256] 0.00502 0.0656 float32
335model.28.decoder.layers.0.linear1.weight Linear False 262144 [1024, 256] -0.000172 0.0728 float32
335 model.28.decoder.layers.0.linear1.bias Linear False 1024 [1024] -0.0488 0.0505 float32
336 model.28.decoder.layers.0.act ReLU False 0 [] - - -
337 model.28.decoder.layers.0.dropout3 Dropout False 0 [] - - -
338model.28.decoder.layers.0.linear2.weight Linear False 262144 [256, 1024] -0.000146 0.0705 float32
338 model.28.decoder.layers.0.linear2.bias Linear False 256 [256] 0.00136 0.0561 float32
339 model.28.decoder.layers.0.dropout4 Dropout False 0 [] - - -
340 model.28.decoder.layers.0.norm3.weight LayerNorm False 256 [256] 0.965 0.0755 float32
340 model.28.decoder.layers.0.norm3.bias LayerNorm False 256 [256] 0.0113 0.0754 float32
341model.28.decoder.layers.1.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 7.86e-05 0.0748 float32
341model.28.decoder.layers.1.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.00051 0.0782 float32
342 model.28.decoder.layers.1.dropout1 Dropout False 0 [] - - -
343 model.28.decoder.layers.1.norm1.weight LayerNorm False 256 [256] 0.888 0.105 float32
343 model.28.decoder.layers.1.norm1.bias LayerNorm False 256 [256] -0.00429 0.0747 float32
344model.28.decoder.layers.1.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] 0.000185 0.0605 float32
344model.28.decoder.layers.1.cross_attn.sampling_offsets.bias Linear False 192 [192] 0.00287 2.38 float32
345model.28.decoder.layers.1.cross_attn.attention_weights.weight Linear False 24576 [96, 256] -0.000605 0.0578 float32
345model.28.decoder.layers.1.cross_attn.attention_weights.bias Linear False 96 [96] 0.0109 0.0589 float32
346model.28.decoder.layers.1.cross_attn.value_proj.weight Linear False 65536 [256, 256] -0.000188 0.0826 float32
346model.28.decoder.layers.1.cross_attn.value_proj.bias Linear False 256 [256] 0.00208 0.059 float32
347model.28.decoder.layers.1.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.000337 0.0843 float32
347model.28.decoder.layers.1.cross_attn.output_proj.bias Linear False 256 [256] -0.000174 0.055 float32
348 model.28.decoder.layers.1.dropout2 Dropout False 0 [] - - -
349 model.28.decoder.layers.1.norm2.weight LayerNorm False 256 [256] 1 0.0509 float32
349 model.28.decoder.layers.1.norm2.bias LayerNorm False 256 [256] -0.00283 0.0679 float32
350model.28.decoder.layers.1.linear1.weight Linear False 262144 [1024, 256] -0.000191 0.0717 float32
350 model.28.decoder.layers.1.linear1.bias Linear False 1024 [1024] -0.0557 0.049 float32
351 model.28.decoder.layers.1.act ReLU False 0 [] - - -
352 model.28.decoder.layers.1.dropout3 Dropout False 0 [] - - -
353model.28.decoder.layers.1.linear2.weight Linear False 262144 [256, 1024] -3.94e-05 0.0703 float32
353 model.28.decoder.layers.1.linear2.bias Linear False 256 [256] 0.000974 0.0637 float32
354 model.28.decoder.layers.1.dropout4 Dropout False 0 [] - - -
355 model.28.decoder.layers.1.norm3.weight LayerNorm False 256 [256] 0.98 0.0779 float32
355 model.28.decoder.layers.1.norm3.bias LayerNorm False 256 [256] -0.00634 0.103 float32
356model.28.decoder.layers.2.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 3.51e-05 0.07 float32
356model.28.decoder.layers.2.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.000171 0.0767 float32
357 model.28.decoder.layers.2.dropout1 Dropout False 0 [] - - -
358 model.28.decoder.layers.2.norm1.weight LayerNorm False 256 [256] 0.874 0.102 float32
358 model.28.decoder.layers.2.norm1.bias LayerNorm False 256 [256] -0.013 0.0878 float32
359model.28.decoder.layers.2.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] 0.00038 0.0596 float32
359model.28.decoder.layers.2.cross_attn.sampling_offsets.bias Linear False 192 [192] 0.00298 2.38 float32
360model.28.decoder.layers.2.cross_attn.attention_weights.weight Linear False 24576 [96, 256] -0.000166 0.0606 float32
360model.28.decoder.layers.2.cross_attn.attention_weights.bias Linear False 96 [96] 0.00557 0.0535 float32
361model.28.decoder.layers.2.cross_attn.value_proj.weight Linear False 65536 [256, 256] -0.000237 0.083 float32
361model.28.decoder.layers.2.cross_attn.value_proj.bias Linear False 256 [256] 0.000856 0.0555 float32
362model.28.decoder.layers.2.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.00035 0.0841 float32
362model.28.decoder.layers.2.cross_attn.output_proj.bias Linear False 256 [256] -0.000166 0.06 float32
363 model.28.decoder.layers.2.dropout2 Dropout False 0 [] - - -
364 model.28.decoder.layers.2.norm2.weight LayerNorm False 256 [256] 1 0.0421 float32
364 model.28.decoder.layers.2.norm2.bias LayerNorm False 256 [256] -0.00169 0.0656 float32
365model.28.decoder.layers.2.linear1.weight Linear False 262144 [1024, 256] -0.000357 0.0712 float32
365 model.28.decoder.layers.2.linear1.bias Linear False 1024 [1024] -0.054 0.0492 float32
366 model.28.decoder.layers.2.act ReLU False 0 [] - - -
367 model.28.decoder.layers.2.dropout3 Dropout False 0 [] - - -
368model.28.decoder.layers.2.linear2.weight Linear False 262144 [256, 1024] -0.000115 0.0699 float32
368 model.28.decoder.layers.2.linear2.bias Linear False 256 [256] 0.000483 0.0591 float32
369 model.28.decoder.layers.2.dropout4 Dropout False 0 [] - - -
370 model.28.decoder.layers.2.norm3.weight LayerNorm False 256 [256] 0.964 0.0644 float32
370 model.28.decoder.layers.2.norm3.bias LayerNorm False 256 [256] 0.00185 0.0863 float32
371model.28.decoder.layers.3.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 6.83e-05 0.0686 float32
371model.28.decoder.layers.3.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.000278 0.0708 float32
372 model.28.decoder.layers.3.dropout1 Dropout False 0 [] - - -
373 model.28.decoder.layers.3.norm1.weight LayerNorm False 256 [256] 0.864 0.0887 float32
373 model.28.decoder.layers.3.norm1.bias LayerNorm False 256 [256] -0.00481 0.0808 float32
374model.28.decoder.layers.3.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] -0.000127 0.0606 float32
374model.28.decoder.layers.3.cross_attn.sampling_offsets.bias Linear False 192 [192] 0.00325 2.38 float32
375model.28.decoder.layers.3.cross_attn.attention_weights.weight Linear False 24576 [96, 256] 3.16e-06 0.0635 float32
375model.28.decoder.layers.3.cross_attn.attention_weights.bias Linear False 96 [96] 0.00355 0.0467 float32
376model.28.decoder.layers.3.cross_attn.value_proj.weight Linear False 65536 [256, 256] -0.00017 0.0837 float32
376model.28.decoder.layers.3.cross_attn.value_proj.bias Linear False 256 [256] -0.00267 0.0531 float32
377model.28.decoder.layers.3.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.000355 0.0837 float32
377model.28.decoder.layers.3.cross_attn.output_proj.bias Linear False 256 [256] 0.00047 0.0577 float32
378 model.28.decoder.layers.3.dropout2 Dropout False 0 [] - - -
379 model.28.decoder.layers.3.norm2.weight LayerNorm False 256 [256] 1 0.0471 float32
379 model.28.decoder.layers.3.norm2.bias LayerNorm False 256 [256] 0.00562 0.0569 float32
380model.28.decoder.layers.3.linear1.weight Linear False 262144 [1024, 256] -0.000128 0.0722 float32
380 model.28.decoder.layers.3.linear1.bias Linear False 1024 [1024] -0.0523 0.046 float32
381 model.28.decoder.layers.3.act ReLU False 0 [] - - -
382 model.28.decoder.layers.3.dropout3 Dropout False 0 [] - - -
383model.28.decoder.layers.3.linear2.weight Linear False 262144 [256, 1024] 1.54e-06 0.0689 float32
383 model.28.decoder.layers.3.linear2.bias Linear False 256 [256] 0.00104 0.0614 float32
384 model.28.decoder.layers.3.dropout4 Dropout False 0 [] - - -
385 model.28.decoder.layers.3.norm3.weight LayerNorm False 256 [256] 0.908 0.0626 float32
385 model.28.decoder.layers.3.norm3.bias LayerNorm False 256 [256] 0.00996 0.0845 float32
386model.28.decoder.layers.4.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 9.06e-05 0.0656 float32
386model.28.decoder.layers.4.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.000194 0.0673 float32
387 model.28.decoder.layers.4.dropout1 Dropout False 0 [] - - -
388 model.28.decoder.layers.4.norm1.weight LayerNorm False 256 [256] 0.871 0.0733 float32
388 model.28.decoder.layers.4.norm1.bias LayerNorm False 256 [256] -0.0112 0.0658 float32
389model.28.decoder.layers.4.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] 0.00139 0.0646 float32
389model.28.decoder.layers.4.cross_attn.sampling_offsets.bias Linear False 192 [192] -0.0153 2.38 float32
390model.28.decoder.layers.4.cross_attn.attention_weights.weight Linear False 24576 [96, 256] -0.000201 0.0627 float32
390model.28.decoder.layers.4.cross_attn.attention_weights.bias Linear False 96 [96] 0.00637 0.0451 float32
391model.28.decoder.layers.4.cross_attn.value_proj.weight Linear False 65536 [256, 256] -0.000148 0.0831 float32
391model.28.decoder.layers.4.cross_attn.value_proj.bias Linear False 256 [256] 0.000473 0.0499 float32
392model.28.decoder.layers.4.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.000314 0.083 float32
392model.28.decoder.layers.4.cross_attn.output_proj.bias Linear False 256 [256] 0.000257 0.056 float32
393 model.28.decoder.layers.4.dropout2 Dropout False 0 [] - - -
394 model.28.decoder.layers.4.norm2.weight LayerNorm False 256 [256] 0.995 0.0476 float32
394 model.28.decoder.layers.4.norm2.bias LayerNorm False 256 [256] 0.00793 0.0543 float32
395model.28.decoder.layers.4.linear1.weight Linear False 262144 [1024, 256] 7.44e-05 0.073 float32
395 model.28.decoder.layers.4.linear1.bias Linear False 1024 [1024] -0.0499 0.0424 float32
396 model.28.decoder.layers.4.act ReLU False 0 [] - - -
397 model.28.decoder.layers.4.dropout3 Dropout False 0 [] - - -
398model.28.decoder.layers.4.linear2.weight Linear False 262144 [256, 1024] -0.000194 0.069 float32
398 model.28.decoder.layers.4.linear2.bias Linear False 256 [256] 0.000405 0.05 float32
399 model.28.decoder.layers.4.dropout4 Dropout False 0 [] - - -
400 model.28.decoder.layers.4.norm3.weight LayerNorm False 256 [256] 0.858 0.0627 float32
400 model.28.decoder.layers.4.norm3.bias LayerNorm False 256 [256] 0.011 0.0775 float32
401model.28.decoder.layers.5.self_attn.out_proj.weightNonDynamicallyQuantizableLinear False 65536 [256, 256] 9.18e-05 0.063 float32
401model.28.decoder.layers.5.self_attn.out_proj.biasNonDynamicallyQuantizableLinear False 256 [256] -0.00136 0.0455 float32
402 model.28.decoder.layers.5.dropout1 Dropout False 0 [] - - -
403 model.28.decoder.layers.5.norm1.weight LayerNorm False 256 [256] 1.03 0.056 float32
403 model.28.decoder.layers.5.norm1.bias LayerNorm False 256 [256] 0.00519 0.0488 float32
404model.28.decoder.layers.5.cross_attn.sampling_offsets.weight Linear False 49152 [192, 256] 0.000118 0.0567 float32
404model.28.decoder.layers.5.cross_attn.sampling_offsets.bias Linear False 192 [192] 0.00523 2.37 float32
405model.28.decoder.layers.5.cross_attn.attention_weights.weight Linear False 24576 [96, 256] -0.00101 0.057 float32
405model.28.decoder.layers.5.cross_attn.attention_weights.bias Linear False 96 [96] 0.0322 0.0202 float32
406model.28.decoder.layers.5.cross_attn.value_proj.weight Linear False 65536 [256, 256] 2.55e-06 0.0798 float32
406model.28.decoder.layers.5.cross_attn.value_proj.bias Linear False 256 [256] -0.00323 0.034 float32
407model.28.decoder.layers.5.cross_attn.output_proj.weight Linear False 65536 [256, 256] 0.000389 0.0792 float32
407model.28.decoder.layers.5.cross_attn.output_proj.bias Linear False 256 [256] -0.000457 0.0513 float32
408 model.28.decoder.layers.5.dropout2 Dropout False 0 [] - - -
409 model.28.decoder.layers.5.norm2.weight LayerNorm False 256 [256] 0.981 0.0489 float32
409 model.28.decoder.layers.5.norm2.bias LayerNorm False 256 [256] 0.00673 0.0493 float32
410model.28.decoder.layers.5.linear1.weight Linear False 262144 [1024, 256] 0.000114 0.0731 float32
410 model.28.decoder.layers.5.linear1.bias Linear False 1024 [1024] -0.0531 0.044 float32
411 model.28.decoder.layers.5.act ReLU False 0 [] - - -
412 model.28.decoder.layers.5.dropout3 Dropout False 0 [] - - -
413model.28.decoder.layers.5.linear2.weight Linear False 262144 [256, 1024] -8.27e-05 0.0692 float32
413 model.28.decoder.layers.5.linear2.bias Linear False 256 [256] 0.00175 0.0398 float32
414 model.28.decoder.layers.5.dropout4 Dropout False 0 [] - - -
415 model.28.decoder.layers.5.norm3.weight LayerNorm False 256 [256] 0.817 0.0731 float32
415 model.28.decoder.layers.5.norm3.bias LayerNorm False 256 [256] -0.00276 0.0747 float32
416 model.28.denoising_class_embed.weight Embedding False 20480 [80, 256] 0.00374 0.97 float32
417 model.28.query_pos_head.layers.0.weight Linear False 2048 [512, 4] -0.0497 0.235 float32
417 model.28.query_pos_head.layers.0.bias Linear False 512 [512] -0.0388 0.228 float32
418 model.28.query_pos_head.layers.1.weight Linear False 131072 [256, 512] 0.000355 0.121 float32
418 model.28.query_pos_head.layers.1.bias Linear False 256 [256] -0.00352 0.0439 float32
419 model.28.enc_output.0.weight Linear False 65536 [256, 256] 0.000145 0.094 float32
419 model.28.enc_output.0.bias Linear False 256 [256] -0.000814 0.18 float32
420 model.28.enc_output.1.weight LayerNorm False 256 [256] 0.841 0.0972 float32
420 model.28.enc_output.1.bias LayerNorm False 256 [256] 0.00209 0.167 float32
421 model.28.enc_score_head.weight Linear False 20480 [80, 256] 0.000762 0.0777 float32
421 model.28.enc_score_head.bias Linear False 80 [80] -4.74 0.0774 float32
422 model.28.enc_bbox_head.layers.0.weight Linear False 65536 [256, 256] -8.33e-05 0.0655 float32
422 model.28.enc_bbox_head.layers.0.bias Linear False 256 [256] -0.0555 0.106 float32
423 model.28.enc_bbox_head.layers.1.weight Linear False 65536 [256, 256] -0.0087 0.0613 float32
423 model.28.enc_bbox_head.layers.1.bias Linear False 256 [256] -0.0146 0.077 float32
424 model.28.enc_bbox_head.layers.2.weight Linear False 1024 [4, 256] 0.0224 0.118 float32
424 model.28.enc_bbox_head.layers.2.bias Linear False 4 [4] -0.00709 0.0372 float32
425 model.28.dec_score_head.0.weight Linear False 20480 [80, 256] 0.00103 0.111 float32
425 model.28.dec_score_head.0.bias Linear False 80 [80] -4.65 0.057 float32
426 model.28.dec_score_head.1.weight Linear False 20480 [80, 256] -0.000737 0.0847 float32
426 model.28.dec_score_head.1.bias Linear False 80 [80] -4.62 0.0509 float32
427 model.28.dec_score_head.2.weight Linear False 20480 [80, 256] -0.000446 0.0692 float32
427 model.28.dec_score_head.2.bias Linear False 80 [80] -4.62 0.0469 float32
428 model.28.dec_score_head.3.weight Linear False 20480 [80, 256] 0.00149 0.0635 float32
428 model.28.dec_score_head.3.bias Linear False 80 [80] -4.63 0.0361 float32
429 model.28.dec_score_head.4.weight Linear False 20480 [80, 256] 0.000908 0.0598 float32
429 model.28.dec_score_head.4.bias Linear False 80 [80] -4.63 0.0308 float32
430 model.28.dec_score_head.5.weight Linear False 20480 [80, 256] -0.000775 0.0534 float32
430 model.28.dec_score_head.5.bias Linear False 80 [80] -4.62 0.0283 float32
431model.28.dec_bbox_head.0.layers.0.weight Linear False 65536 [256, 256] -9.2e-05 0.0662 float32
431 model.28.dec_bbox_head.0.layers.0.bias Linear False 256 [256] -0.0256 0.0741 float32
432model.28.dec_bbox_head.0.layers.1.weight Linear False 65536 [256, 256] -0.0143 0.0882 float32
432 model.28.dec_bbox_head.0.layers.1.bias Linear False 256 [256] -0.00181 0.0696 float32
433model.28.dec_bbox_head.0.layers.2.weight Linear False 1024 [4, 256] 0.0131 0.109 float32
433 model.28.dec_bbox_head.0.layers.2.bias Linear False 4 [4] -0.00705 0.00811 float32
434model.28.dec_bbox_head.1.layers.0.weight Linear False 65536 [256, 256] -0.000363 0.0681 float32
434 model.28.dec_bbox_head.1.layers.0.bias Linear False 256 [256] -0.0439 0.0637 float32
435model.28.dec_bbox_head.1.layers.1.weight Linear False 65536 [256, 256] -0.0127 0.0717 float32
435 model.28.dec_bbox_head.1.layers.1.bias Linear False 256 [256] -0.0275 0.0578 float32
436model.28.dec_bbox_head.1.layers.2.weight Linear False 1024 [4, 256] 0.0142 0.136 float32
436 model.28.dec_bbox_head.1.layers.2.bias Linear False 4 [4] -0.00429 0.00494 float32
437model.28.dec_bbox_head.2.layers.0.weight Linear False 65536 [256, 256] -0.000684 0.0659 float32
437 model.28.dec_bbox_head.2.layers.0.bias Linear False 256 [256] -0.0563 0.0666 float32
438model.28.dec_bbox_head.2.layers.1.weight Linear False 65536 [256, 256] -0.0121 0.0687 float32
438 model.28.dec_bbox_head.2.layers.1.bias Linear False 256 [256] -0.0346 0.0619 float32
439model.28.dec_bbox_head.2.layers.2.weight Linear False 1024 [4, 256] 0.00724 0.0876 float32
439 model.28.dec_bbox_head.2.layers.2.bias Linear False 4 [4] -0.00147 0.00203 float32
440model.28.dec_bbox_head.3.layers.0.weight Linear False 65536 [256, 256] 3.85e-05 0.0664 float32
440 model.28.dec_bbox_head.3.layers.0.bias Linear False 256 [256] -0.0546 0.0681 float32
441model.28.dec_bbox_head.3.layers.1.weight Linear False 65536 [256, 256] -0.0127 0.0592 float32
441 model.28.dec_bbox_head.3.layers.1.bias Linear False 256 [256] -0.046 0.0417 float32
442model.28.dec_bbox_head.3.layers.2.weight Linear False 1024 [4, 256] 0.00755 0.0761 float32
442 model.28.dec_bbox_head.3.layers.2.bias Linear False 4 [4] -0.00193 0.00232 float32
443model.28.dec_bbox_head.4.layers.0.weight Linear False 65536 [256, 256] 0.000246 0.0663 float32
443 model.28.dec_bbox_head.4.layers.0.bias Linear False 256 [256] -0.0706 0.0753 float32
444model.28.dec_bbox_head.4.layers.1.weight Linear False 65536 [256, 256] -0.0126 0.0517 float32
444 model.28.dec_bbox_head.4.layers.1.bias Linear False 256 [256] -0.0403 0.0419 float32
445model.28.dec_bbox_head.4.layers.2.weight Linear False 1024 [4, 256] 2.5e-05 0.0447 float32
445 model.28.dec_bbox_head.4.layers.2.bias Linear False 4 [4] 0.000239 0.000273 float32
446model.28.dec_bbox_head.5.layers.0.weight Linear False 65536 [256, 256] 8.33e-05 0.0393 float32
446 model.28.dec_bbox_head.5.layers.0.bias Linear False 256 [256] 0.000403 0.0617 float32
447model.28.dec_bbox_head.5.layers.1.weight Linear False 65536 [256, 256] -0.00979 0.0362 float32
447 model.28.dec_bbox_head.5.layers.1.bias Linear False 256 [256] -0.0184 0.0337 float32
448model.28.dec_bbox_head.5.layers.2.weight Linear False 1024 [4, 256] 0.000198 0.00791 float32
448 model.28.dec_bbox_head.5.layers.2.bias Linear False 4 [4] 6.71e-07 2.1e-06 float32
rt-detr-l summary: 449 layers, 32,970,476 parameters, 0 gradients, 108.3 GFLOPs (449, 32970476, 0, 108.3437056)