-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsparsification.py
246 lines (204 loc) · 7.5 KB
/
sparsification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
from __future__ import division
import math
import networkx as nx
import Queue as queue
from fibonacci_heap_mod import Fibonacci_heap
from graph_util import *
# Graph compression/sparsification --
# Reduce a graph with N vertices and M edges to
# one with N vertices and O(n log n) edges, in linear
# time, with high probability 1 - O(n ** -d)
#
# "Randomized Approximation Schemes for Cuts and Flows in Capacitated Graphs"
# Benczur, Karger 2008
def weighted_sparsify(g, epsilon, d=0.5):
compressed_g = nx.Graph()
n = g.number_of_nodes()
edge_strength = window_estimation(g)
print 'max edge strength estimate:', max(edge_strength.values())
compression_factor = 3 * (d + 4) * math.log(n) / (epsilon ** 2)
for u, v, edge_data in g.edges(data=True):
w = edge_data[EDGE_CAPACITY_ATTR]
if (u,v) in edge_strength:
edge_str = edge_strength[(u,v)]
else:
edge_str = edge_strength[(v,u)]
p_e = min(1, compression_factor * w / edge_str)
if p_e is not 1 and random.random() > p_e:
continue
# keep the edge
compressed_g.add_edge(u, v, {EDGE_CAPACITY_ATTR: w / p_e})
return compressed_g
def sparsify(g, epsilon, d=0.5):
compressed_g = nx.Graph()
n = g.number_of_nodes()
edge_strength = estimation(g, 1)
print 'max edge strength estimate:', max(edge_strength.values())
compression_factor = 3 * (d + 4) * math.log(n) / (epsilon ** 2)
for u, v, edge_data in g.edges(data=True):
w = edge_data[EDGE_CAPACITY_ATTR]
if (u,v) in edge_strength:
edge_str = edge_strength[(u,v)]
else:
edge_str = edge_strength[(v,u)]
p_e = min(1, compression_factor * w / edge_str)
if p_e is not 1 and random.random() > p_e:
continue
# keep the edge
compressed_g.add_edge(u, v, {EDGE_CAPACITY_ATTR: w / p_e})
return compressed_g
def estimation(g, k):
h = g.copy()
approx_edge_strength = {}
edges = weak_edges(h, 2*k)
for e in edges:
approx_edge_strength[e] = k
h.remove_edge(*e)
for connected_comp in nx.connected_components(h):
if len(connected_comp) <= 1:
continue
approx_edge_strength.update(estimation(h.subgraph(connected_comp), 2*k))
return approx_edge_strength
def weak_edges(g, k):
edges = set()
g_ = nx.MultiGraph(g)
for i in range(int(math.log(g_.number_of_nodes(), 2)+0.5)):
cert = partition(g_, 2*k)
edges.update(cert)
g_.remove_edges_from(cert)
return edges
def certificate(g, k):
cert = set()
forest = nagamochi_forest(g)
for kk, es in forest.items():
if kk <= k:
cert.update(es)
return cert
def weighted_certificate(g, k):
edges = set()
capforest = nagamochi_capforest(g)
for key, val in capforest.items():
if val <= k:
edges.add(key)
return edges
def partition(multi_g, k):
for u, v, edict in multi_g.edges(data=True):
edict['original_edge'] = (u, v)
while multi_g.number_of_edges() > 2 * k * (multi_g.number_of_nodes() - 1):
cert = weighted_certificate(multi_g, k)
g_minus_cert = [(u, v, w) for (u, v, w) in multi_g.edges(keys=True) if ((u, v, w) not in cert) and ((v, u, w) not in cert)]
multi_g = multigraph_contract_edges(multi_g, g_minus_cert)
return set(edict['original_edge'] for (_, _, edict) in multi_g.edges(data=True))
def window_estimation(g):
def reverse_sorted_bin_search(a, x, lo=0, hi=None):
if not hi:
hi = len(a)
while lo < hi:
mid = int((lo + hi) / 2)
midval = a[mid]
if midval >= x:
lo = mid+1
else: # midval < x
hi = mid
return lo
n = g.number_of_nodes()
bottleneck_dict = compute_mst_bottleneck_dist(g)
L = sorted(g.edges(), reverse=True, key=lambda e: bottleneck_dict[e])
L_de_vals = [bottleneck_dict[e] for e in L]
g_ = nx.MultiGraph()
g_.add_nodes_from(g)
edge_labels = {}
while L:
D = L_de_vals[0]
contract_edges = [e for e in g_.edges() if bottleneck_dict[e] > n * n * D]
g_ = multigraph_contract_edges(g_, contract_edges)
split_pt = reverse_sorted_bin_search(L_de_vals, D / n)
new_edges = L[:split_pt]
L = L[split_pt:]
L_de_vals = L_de_vals[split_pt:]
for u, v in new_edges:
g_.add_edge(u, v, attr_dict=g[u][v])
strength_estimation = estimation(g_, max(D / n, 1))
for u, v in new_edges:
if (u, v) in strength_estimation:
edge_labels[(u,v)] = strength_estimation[(u,v)]
else:
edge_labels[(v,u)] = strength_estimation[(v,u)]
return edge_labels
# Finding forests of k-connected subtrees
# "Computing Edge-Connectivity in Multigraphs and Capacitated Graphs"
# Hiroshi Nagamochi, Toshihide Ibaraki 1992
# Note that the Nagamochi-Ibaraki FOREST and CAPFOREST routines MUST work
# on multigraphs to be useful for Benczur's sparsification scheme.
# Return a list iterator over all the edges in a multigraph (two for each
# undirected edge, backwards and forwards) as a triple of
# (vertex, vertex, edge index)
# This makes keeping track of scanned edges easy since the triples returned
# are hashable.
def multigraph_edges_as_triples(g):
return [(u, v, i) for (u, vdict) in g.adjacency_iter() for (v, uvdict) in vdict.items() for i in uvdict.keys()]
# This is the Nagamochi-Ibaraki FOREST routine for unit-weight multigraphs.
# It returns a dictionary of partitions E_1, E_2, ... E_|E| as described in
# Nagamochi's paper.
# The union of partitions G_k = (V, union(E_1, E_2, ... E_k)) is a
# k-edge-connected spanning subgraphs.
def nagamochi_forest(g):
unscanned_edges = set(multigraph_edges_as_triples(g))
r = dict((node, 0) for node in g)
graph_to_heap = {}
unscanned_r_heap = Fibonacci_heap()
for node in g:
graph_to_heap[node] = unscanned_r_heap.enqueue(node, 0)
partitions = {}
while unscanned_r_heap:
x_heap_node = unscanned_r_heap.dequeue_min()
x = x_heap_node.get_value()
graph_to_heap[x] = None
for y, xy_es in g[x].items():
for i, edge_dict in xy_es.items():
edge = (x, y, i)
if not edge in unscanned_edges:
continue
unscanned_edges.remove(edge)
unscanned_edges.remove((y, x, i))
k = r[y] + 1
if not k in partitions:
partitions[k] = set()
partitions[k].add(edge)
if r[x] is r[y]:
r[x] += 1
r[y] += 1
y_heap_node = graph_to_heap[y]
y_prio = y_heap_node.get_priority()
unscanned_r_heap.decrease_key(y_heap_node, y_prio - 1)
return partitions
# This is the Nagamochi-Ibaraki CAPFOREST routine, which is similar to above
# but for capacitated multigraphs.
# The edge capacity property is assumed to be graph_util.EDGE_CAPACITY_ATTR.
# Returns similar partitions as above, but returns them as a dictionary from
# edge triples to partition index value 'q' as described in the paper.
def nagamochi_capforest(g):
unscanned_edges = set(multigraph_edges_as_triples(g))
r = dict((node, 0) for node in g)
q = {}
graph_to_heap = {}
unscanned_r_heap = Fibonacci_heap()
for node in g:
graph_to_heap[node] = unscanned_r_heap.enqueue(node, 0)
while unscanned_r_heap:
x_heap_node = unscanned_r_heap.dequeue_min()
x = x_heap_node.get_value()
for y, xy_dict in g[x].items():
for i, edge_data in xy_dict.items():
edge = (x, y, i)
if not edge in unscanned_edges:
continue
unscanned_edges.remove(edge)
unscanned_edges.remove((y, x, i))
c = edge_data[EDGE_CAPACITY_ATTR]
q[edge] = r[y] + c
r[y] = r[y] + c
y_heap_node = graph_to_heap[y]
y_prio = y_heap_node.get_priority()
unscanned_r_heap.decrease_key(y_heap_node, y_prio - c)
return q