@@ -13,179 +13,6 @@ static void _ccv_nnc_remove_unused_from_marked(const uint32_t* const tensor_used
13
13
tensor_marked [i ] &= tensor_used [i ];
14
14
}
15
15
16
- typedef struct {
17
- int * chain_ids ;
18
- int * chain_pos ;
19
- ccv_sparse_matrix_t * deps ;
20
- } ccv_nnc_exec_dep_t ;
21
-
22
- // Implement the new method for exec_dep. We use chain decomposition such that each node only needs to log which chain and at which node to be dependent on.
23
- static ccv_nnc_exec_dep_t _ccv_nnc_exec_dep_new (const ccv_nnc_symbolic_graph_t * const graph , const ccv_nnc_graph_visit_t * const visit , const ccv_nnc_graph_visit_t * const reversed_visit )
24
- {
25
- const int exec_symbol_info_size = graph -> exec_symbol_info -> rnum ;
26
- int * chain_ids = ccmalloc (sizeof (int ) * exec_symbol_info_size * 2 );
27
- int * chain_pos = chain_ids + exec_symbol_info_size ;
28
- int * buf = (int * )ccmalloc (sizeof (int ) * exec_symbol_info_size * 3 );
29
- int * reversed_depth = buf ;
30
- const ccv_nnc_graph_exec_symbol_info_t * const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t * )ccv_array_get (graph -> exec_symbol_info , 0 );
31
- int i , j ;
32
- // Go reverse order to generate the distance from sink.
33
- ccv_nnc_graph_visit_for (reversed_visit , exec_symbol_info , node , idx , term ) {
34
- chain_ids [idx ] = -1 ;
35
- if (!node -> outgoings || node -> outgoings -> rnum == 0 )
36
- {
37
- reversed_depth [idx ] = 0 ;
38
- continue ;
39
- }
40
- const int outgoing = * (int * )ccv_array_get (node -> outgoings , 0 );
41
- int depth = reversed_depth [outgoing ];
42
- for (i = 1 ; i < node -> outgoings -> rnum ; i ++ )
43
- {
44
- const int outgoing = * (int * )ccv_array_get (node -> outgoings , i );
45
- depth = ccv_max (depth , reversed_depth [outgoing ]);
46
- }
47
- reversed_depth [idx ] = depth + 1 ;
48
- } ccv_nnc_graph_visit_endfor
49
- // Go in order to generate chain ids (if there are multiple exits, we use the reverse depth to break the tie).
50
- // Note that we cannot use depth so-far because then multiple exit nodes are equally good to "inherit" the chain selection.
51
- int chain_count = 0 ;
52
- ccv_nnc_graph_visit_for (visit , exec_symbol_info , node , idx , term ) {
53
- int chain_id = chain_ids [idx ];
54
- if (chain_ids [idx ] < 0 )
55
- {
56
- chain_id = chain_count ;
57
- chain_ids [idx ] = chain_id ;
58
- chain_pos [idx ] = 1 ; // The first one in this chain. 1-based index because in sparse matrix, 0 is the default value.
59
- chain_count += 1 ;
60
- }
61
- if (!node -> outgoings || node -> outgoings -> rnum == 0 )
62
- continue ;
63
- int depth = 0 ;
64
- int next_idx = -1 ;
65
- for (i = 0 ; i < node -> outgoings -> rnum ; i ++ )
66
- {
67
- const int outgoing = * (int * )ccv_array_get (node -> outgoings , i );
68
- if (chain_ids [outgoing ] < 0 && reversed_depth [outgoing ] > depth )
69
- depth = reversed_depth [outgoing ], next_idx = outgoing ;
70
- }
71
- if (next_idx >= 0 )
72
- {
73
- chain_ids [next_idx ] = chain_id ;
74
- chain_pos [next_idx ] = chain_pos [idx ] + 1 ;
75
- }
76
- } ccv_nnc_graph_visit_endfor
77
- ccv_sparse_matrix_t * deps = ccv_sparse_matrix_new (graph -> exec_symbol_info -> rnum , chain_count , CCV_32S | CCV_C2 , CCV_SPARSE_ROW_MAJOR , 0 );
78
- // It logs which pos on that chain we depend on. We can simply compare that with the chain_pos for a node to know if they are ancestors.
79
- #define for_block (x , val ) \
80
- do { \
81
- if (((int32_t*)val)[0] > 0) \
82
- { \
83
- buf[buf_size * 3] = x; \
84
- buf[buf_size * 3 + 1] = ((int32_t*)val)[0]; \
85
- buf[buf_size * 3 + 2] = ((int32_t*)val)[1] + 1; \
86
- ++buf_size; \
87
- } \
88
- } while (0)
89
- int buf_size ;
90
- ccv_nnc_graph_visit_for (visit , exec_symbol_info , node , idx , term ) {
91
- buf_size = 0 ; /* save all its parent deps to this buffer */
92
- ccv_sparse_matrix_vector_t * vector = ccv_get_sparse_matrix_vector (deps , idx );
93
- if (vector )
94
- CCV_SPARSE_VECTOR_FOREACH (deps , vector , for_block );
95
- if (!node -> outgoings )
96
- continue ;
97
- const int chain_id = chain_ids [idx ];
98
- const int pos = chain_pos [idx ];
99
- for (i = 0 ; i < node -> outgoings -> rnum ; i ++ )
100
- {
101
- const int outgoing = * (int * )ccv_array_get (node -> outgoings , i );
102
- const int outgoing_chain_id = chain_ids [outgoing ];
103
- if (outgoing_chain_id != chain_id )
104
- {
105
- ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell (deps , outgoing , chain_id );
106
- /* If not found, set, if the current node is the destination node, no need
107
- * set itself as parent of subsequent nodes because its terminal nature. */
108
- if (!cell .i32 || cell .i32 [0 ] == 0 || cell .i32 [0 ] < pos )
109
- {
110
- int p [2 ] = { pos , 1 };
111
- ccv_set_sparse_matrix_cell (deps , outgoing , chain_id , & p );
112
- }
113
- }
114
- if (buf_size > 0 )
115
- {
116
- ccv_sparse_matrix_vector_t * vector = ccv_get_sparse_matrix_vector (deps , outgoing );
117
- for (j = 0 ; j < buf_size ; j ++ ) /* set with all idx's dependencies as well */
118
- {
119
- if (outgoing_chain_id == buf [j * 3 ]) // We don't need to add as dependency for the same chain.
120
- continue ;
121
- if (!vector )
122
- {
123
- ccv_set_sparse_matrix_cell (deps , outgoing , buf [j * 3 ], & buf [j * 3 + 1 ]);
124
- vector = ccv_get_sparse_matrix_vector (deps , outgoing );
125
- continue ;
126
- }
127
- ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell_from_vector (deps , vector , buf [j * 3 ]);
128
- /* If not found, set. Otherwise, set to the latest one only if it is later. */
129
- if (!cell .i32 )
130
- ccv_set_sparse_matrix_cell_from_vector (deps , vector , buf [j * 3 ], & buf [j * 3 + 1 ]);
131
- else if (cell .i32 [0 ] == 0 || cell .i32 [0 ] < buf [j * 3 + 1 ])
132
- ccv_set_sparse_matrix_cell_from_vector (deps , vector , buf [j * 3 ], & buf [j * 3 + 1 ]);
133
- else if (cell .i32 [0 ] == buf [j * 3 + 1 ]) { // If we point to the same one, use the longest.
134
- int p [2 ] = { cell .i32 [0 ], ccv_max (buf [j * 3 + 2 ], cell .i32 [1 ]) };
135
- ccv_set_sparse_matrix_cell_from_vector (deps , vector , buf [j * 3 ], & p );
136
- }
137
- }
138
- }
139
- }
140
- } ccv_nnc_graph_visit_endfor
141
- #undef for_block
142
- ccfree (buf );
143
- ccv_nnc_exec_dep_t exec_dep = {
144
- .chain_ids = chain_ids ,
145
- .chain_pos = chain_pos ,
146
- .deps = deps
147
- };
148
- return exec_dep ;
149
- }
150
-
151
- static int _ccv_nnc_exec_dep_dist (const ccv_nnc_exec_dep_t exec_dep , const int d , ccv_sparse_matrix_vector_t * const vector , const int dd )
152
- {
153
- // Check if dd is d's ancestor.
154
- const int dd_chain_id = exec_dep .chain_ids [dd ];
155
- const int dd_chain_pos = exec_dep .chain_pos [dd ];
156
- if (exec_dep .chain_ids [d ] == dd_chain_id )
157
- return exec_dep .chain_pos [d ] - dd_chain_pos ;
158
- const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell_from_vector (exec_dep .deps , vector , dd_chain_id );
159
- if (cell .i32 && cell .i32 [0 ] > 0 && cell .i32 [0 ] >= dd_chain_pos )
160
- {
161
- // Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
162
- return cell .i32 [0 ] - dd_chain_pos + cell .i32 [1 ];
163
- }
164
- return -1 ;
165
- }
166
-
167
- static int _ccv_nnc_exec_dep_check (const ccv_nnc_exec_dep_t exec_dep , const int d , const int dd )
168
- {
169
- // Check if dd is d's ancestor.
170
- const int dd_chain_id = exec_dep .chain_ids [dd ];
171
- const int dd_chain_pos = exec_dep .chain_pos [dd ];
172
- if (exec_dep .chain_ids [d ] == dd_chain_id )
173
- return exec_dep .chain_pos [d ] > dd_chain_pos ;
174
- const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell (exec_dep .deps , d , dd_chain_id );
175
- if (cell .i32 && cell .i32 [0 ] > 0 )
176
- {
177
- // Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
178
- return cell .i32 [0 ] >= dd_chain_pos ;
179
- }
180
- return 0 ;
181
- }
182
-
183
- static void _ccv_nnc_exec_dep_free (const ccv_nnc_exec_dep_t exec_dep )
184
- {
185
- ccfree (exec_dep .chain_ids );
186
- ccv_matrix_free (exec_dep .deps );
187
- }
188
-
189
16
typedef struct {
190
17
int okay ;
191
18
int original ;
@@ -254,7 +81,7 @@ void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const gra
254
81
tensor_marked [d >> 5 ] &= ~(1u << (d & 0x1f ));
255
82
}
256
83
ccv_nnc_graph_visit_t * const reversed_visit = ccv_nnc_graph_visit_new (graph , reversed_nodes , exec_symbol_info_size , destinations , destination_size , sources , source_size , 0 );
257
- ccv_nnc_exec_dep_t exec_deps = _ccv_nnc_exec_dep_new (graph , visit , reversed_visit );
84
+ ccv_nnc_exec_dep_t exec_deps = ccv_nnc_exec_dep_new (graph , visit , reversed_visit );
258
85
ccv_nnc_graph_visit_free (reversed_visit );
259
86
// Now tensor_marked only contains the tensors that we think beneficial to reconvert. Find the best place to insert conversion.
260
87
ccv_nnc_conversion_info_t * const conversion_info = cccalloc (tensor_symbol_info_size , sizeof (ccv_nnc_conversion_info_t ));
@@ -304,8 +131,8 @@ void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const gra
304
131
for (k = 0 ; k < old_conversion_nodes -> rnum ; k ++ )
305
132
{
306
133
const int dd = * (int * )ccv_array_get (old_conversion_nodes , k );
307
- const int dist = _ccv_nnc_exec_dep_dist (exec_deps , d , vector , dd );
308
- if (dist >= 0 && dist <= 3 )
134
+ const int hop = ccv_nnc_exec_dep_hop (exec_deps , d , vector , dd );
135
+ if (hop >= 0 && hop <= 3 )
309
136
flag = 1 ;
310
137
}
311
138
if (flag )
@@ -338,7 +165,7 @@ void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const gra
338
165
continue ;
339
166
}
340
167
// Check dependencies, if there is a dependency from y node to dd, dd cannot be source.
341
- const int checked = _ccv_nnc_exec_dep_check (exec_deps , dd , ddd );
168
+ const int checked = ccv_nnc_exec_dep_check (exec_deps , dd , ddd );
342
169
if (checked )
343
170
flag = 1 ;
344
171
}
@@ -393,7 +220,7 @@ void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const gra
393
220
}
394
221
}
395
222
ccv_nnc_graph_visit_free (visit );
396
- _ccv_nnc_exec_dep_free (exec_deps );
223
+ ccv_nnc_exec_dep_free (exec_deps );
397
224
ccfree (tensor_marked );
398
225
for (i = 0 ; i < tensor_symbol_info_size ; i ++ )
399
226
{
0 commit comments