ioftools / networkxMiCe / networkxmaster / networkx / algorithms / similarity.py @ 5cef0f13
History  View  Annotate  Download (47.5 KB)
1 
# * coding: utf8 *


2 
# Copyright (C) 2010 by

3 
# Aric Hagberg <hagberg@lanl.gov>

4 
# Dan Schult <dschult@colgate.edu>

5 
# Pieter Swart <swart@lanl.gov>

6 
# All rights reserved.

7 
# BSD license.

8 
#

9 
# Author: Andrey Paramonov <paramon@acdlabs.ru>

10 
""" Functions measuring similarity using graph edit distance.

11 

12 
The graph edit distance is the number of edge/node changes needed

13 
to make two graphs isomorphic.

14 

15 
The default algorithm/implementation is suboptimal for some graphs.

16 
The problem of finding the exact Graph Edit Distance (GED) is NPhard

17 
so it is often slow. If the simple interface `graph_edit_distance`

18 
takes too long for your graph, try `optimize_graph_edit_distance`

19 
and/or `optimize_edit_paths`.

20 

21 
At the same time, I encourage capable people to investigate

22 
alternative GED algorithms, in order to improve the choices available.

23 
"""

24 
from __future__ import print_function, division 
25 
from itertools import product 
26 
import math 
27 
import networkx as nx 
28 
from operator import * 
29 
import sys 
30  
31 
__author__ = 'Andrey Paramonov <paramon@acdlabs.ru>'

32  
33 
__all__ = [ 
34 
'graph_edit_distance',

35 
'optimal_edit_paths',

36 
'optimize_graph_edit_distance',

37 
'optimize_edit_paths',

38 
'simrank_similarity',

39 
'simrank_similarity_numpy',

40 
] 
41  
42  
43 
def debug_print(*args, **kwargs): 
44 
print(*args, **kwargs) 
45  
46  
47 
def graph_edit_distance(G1, G2, node_match=None, edge_match=None, 
48 
node_subst_cost=None, node_del_cost=None, 
49 
node_ins_cost=None,

50 
edge_subst_cost=None, edge_del_cost=None, 
51 
edge_ins_cost=None,

52 
upper_bound=None):

53 
"""Returns GED (graph edit distance) between graphs G1 and G2.

54 

55 
Graph edit distance is a graph similarity measure analogous to

56 
Levenshtein distance for strings. It is defined as minimum cost

57 
of edit path (sequence of node and edge edit operations)

58 
transforming graph G1 to graph isomorphic to G2.

59 

60 
Parameters

61 


62 
G1, G2: graphs

63 
The two graphs G1 and G2 must be of the same type.

64 

65 
node_match : callable

66 
A function that returns True if node n1 in G1 and n2 in G2

67 
should be considered equal during matching.

68 

69 
The function will be called like

70 

71 
node_match(G1.nodes[n1], G2.nodes[n2]).

72 

73 
That is, the function will receive the node attribute

74 
dictionaries for n1 and n2 as inputs.

75 

76 
Ignored if node_subst_cost is specified. If neither

77 
node_match nor node_subst_cost are specified then node

78 
attributes are not considered.

79 

80 
edge_match : callable

81 
A function that returns True if the edge attribute dictionaries

82 
for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should

83 
be considered equal during matching.

84 

85 
The function will be called like

86 

87 
edge_match(G1[u1][v1], G2[u2][v2]).

88 

89 
That is, the function will receive the edge attribute

90 
dictionaries of the edges under consideration.

91 

92 
Ignored if edge_subst_cost is specified. If neither

93 
edge_match nor edge_subst_cost are specified then edge

94 
attributes are not considered.

95 

96 
node_subst_cost, node_del_cost, node_ins_cost : callable

97 
Functions that return the costs of node substitution, node

98 
deletion, and node insertion, respectively.

99 

100 
The functions will be called like

101 

102 
node_subst_cost(G1.nodes[n1], G2.nodes[n2]),

103 
node_del_cost(G1.nodes[n1]),

104 
node_ins_cost(G2.nodes[n2]).

105 

106 
That is, the functions will receive the node attribute

107 
dictionaries as inputs. The functions are expected to return

108 
positive numeric values.

109 

110 
Function node_subst_cost overrides node_match if specified.

111 
If neither node_match nor node_subst_cost are specified then

112 
default node substitution cost of 0 is used (node attributes

113 
are not considered during matching).

114 

115 
If node_del_cost is not specified then default node deletion

116 
cost of 1 is used. If node_ins_cost is not specified then

117 
default node insertion cost of 1 is used.

118 

119 
edge_subst_cost, edge_del_cost, edge_ins_cost : callable

120 
Functions that return the costs of edge substitution, edge

121 
deletion, and edge insertion, respectively.

122 

123 
The functions will be called like

124 

125 
edge_subst_cost(G1[u1][v1], G2[u2][v2]),

126 
edge_del_cost(G1[u1][v1]),

127 
edge_ins_cost(G2[u2][v2]).

128 

129 
That is, the functions will receive the edge attribute

130 
dictionaries as inputs. The functions are expected to return

131 
positive numeric values.

132 

133 
Function edge_subst_cost overrides edge_match if specified.

134 
If neither edge_match nor edge_subst_cost are specified then

135 
default edge substitution cost of 0 is used (edge attributes

136 
are not considered during matching).

137 

138 
If edge_del_cost is not specified then default edge deletion

139 
cost of 1 is used. If edge_ins_cost is not specified then

140 
default edge insertion cost of 1 is used.

141 

142 
upper_bound : numeric

143 
Maximum edit distance to consider. Return None if no edit

144 
distance under or equal to upper_bound exists.

145 

146 
Examples

147 


148 
>>> G1 = nx.cycle_graph(6)

149 
>>> G2 = nx.wheel_graph(7)

150 
>>> nx.graph_edit_distance(G1, G2)

151 
7.0

152 

153 
See Also

154 


155 
optimal_edit_paths, optimize_graph_edit_distance,

156 

157 
is_isomorphic (test for graph edit distance of 0)

158 

159 
References

160 


161 
.. [1] Zeina AbuAisheh, Romain Raveaux, JeanYves Ramel, Patrick

162 
Martineau. An Exact Graph Edit Distance Algorithm for Solving

163 
Pattern Recognition Problems. 4th International Conference on

164 
Pattern Recognition Applications and Methods 2015, Jan 2015,

165 
Lisbon, Portugal. 2015,

166 
<10.5220/0005209202710278>. <hal01168816>

167 
https://hal.archivesouvertes.fr/hal01168816

168 

169 
"""

170 
bestcost = None

171 
for vertex_path, edge_path, cost in \ 
172 
optimize_edit_paths(G1, G2, node_match, edge_match, 
173 
node_subst_cost, node_del_cost, node_ins_cost, 
174 
edge_subst_cost, edge_del_cost, edge_ins_cost, 
175 
upper_bound, True):

176 
#assert bestcost is None or cost < bestcost

177 
bestcost = cost 
178 
return bestcost

179  
180  
181 
def optimal_edit_paths(G1, G2, node_match=None, edge_match=None, 
182 
node_subst_cost=None, node_del_cost=None, 
183 
node_ins_cost=None,

184 
edge_subst_cost=None, edge_del_cost=None, 
185 
edge_ins_cost=None,

186 
upper_bound=None):

187 
"""Returns all minimumcost edit paths transforming G1 to G2.

188 

189 
Graph edit path is a sequence of node and edge edit operations

190 
transforming graph G1 to graph isomorphic to G2. Edit operations

191 
include substitutions, deletions, and insertions.

192 

193 
Parameters

194 


195 
G1, G2: graphs

196 
The two graphs G1 and G2 must be of the same type.

197 

198 
node_match : callable

199 
A function that returns True if node n1 in G1 and n2 in G2

200 
should be considered equal during matching.

201 

202 
The function will be called like

203 

204 
node_match(G1.nodes[n1], G2.nodes[n2]).

205 

206 
That is, the function will receive the node attribute

207 
dictionaries for n1 and n2 as inputs.

208 

209 
Ignored if node_subst_cost is specified. If neither

210 
node_match nor node_subst_cost are specified then node

211 
attributes are not considered.

212 

213 
edge_match : callable

214 
A function that returns True if the edge attribute dictionaries

215 
for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should

216 
be considered equal during matching.

217 

218 
The function will be called like

219 

220 
edge_match(G1[u1][v1], G2[u2][v2]).

221 

222 
That is, the function will receive the edge attribute

223 
dictionaries of the edges under consideration.

224 

225 
Ignored if edge_subst_cost is specified. If neither

226 
edge_match nor edge_subst_cost are specified then edge

227 
attributes are not considered.

228 

229 
node_subst_cost, node_del_cost, node_ins_cost : callable

230 
Functions that return the costs of node substitution, node

231 
deletion, and node insertion, respectively.

232 

233 
The functions will be called like

234 

235 
node_subst_cost(G1.nodes[n1], G2.nodes[n2]),

236 
node_del_cost(G1.nodes[n1]),

237 
node_ins_cost(G2.nodes[n2]).

238 

239 
That is, the functions will receive the node attribute

240 
dictionaries as inputs. The functions are expected to return

241 
positive numeric values.

242 

243 
Function node_subst_cost overrides node_match if specified.

244 
If neither node_match nor node_subst_cost are specified then

245 
default node substitution cost of 0 is used (node attributes

246 
are not considered during matching).

247 

248 
If node_del_cost is not specified then default node deletion

249 
cost of 1 is used. If node_ins_cost is not specified then

250 
default node insertion cost of 1 is used.

251 

252 
edge_subst_cost, edge_del_cost, edge_ins_cost : callable

253 
Functions that return the costs of edge substitution, edge

254 
deletion, and edge insertion, respectively.

255 

256 
The functions will be called like

257 

258 
edge_subst_cost(G1[u1][v1], G2[u2][v2]),

259 
edge_del_cost(G1[u1][v1]),

260 
edge_ins_cost(G2[u2][v2]).

261 

262 
That is, the functions will receive the edge attribute

263 
dictionaries as inputs. The functions are expected to return

264 
positive numeric values.

265 

266 
Function edge_subst_cost overrides edge_match if specified.

267 
If neither edge_match nor edge_subst_cost are specified then

268 
default edge substitution cost of 0 is used (edge attributes

269 
are not considered during matching).

270 

271 
If edge_del_cost is not specified then default edge deletion

272 
cost of 1 is used. If edge_ins_cost is not specified then

273 
default edge insertion cost of 1 is used.

274 

275 
upper_bound : numeric

276 
Maximum edit distance to consider.

277 

278 
Returns

279 


280 
edit_paths : list of tuples (node_edit_path, edge_edit_path)

281 
node_edit_path : list of tuples (u, v)

282 
edge_edit_path : list of tuples ((u1, v1), (u2, v2))

283 

284 
cost : numeric

285 
Optimal edit path cost (graph edit distance).

286 

287 
Examples

288 


289 
>>> G1 = nx.cycle_graph(6)

290 
>>> G2 = nx.wheel_graph(7)

291 
>>> paths, cost = nx.optimal_edit_paths(G1, G2)

292 
>>> len(paths)

293 
84

294 
>>> cost

295 
7.0

296 

297 
See Also

298 


299 
graph_edit_distance, optimize_edit_paths

300 

301 
References

302 


303 
.. [1] Zeina AbuAisheh, Romain Raveaux, JeanYves Ramel, Patrick

304 
Martineau. An Exact Graph Edit Distance Algorithm for Solving

305 
Pattern Recognition Problems. 4th International Conference on

306 
Pattern Recognition Applications and Methods 2015, Jan 2015,

307 
Lisbon, Portugal. 2015,

308 
<10.5220/0005209202710278>. <hal01168816>

309 
https://hal.archivesouvertes.fr/hal01168816

310 

311 
"""

312 
paths = list()

313 
bestcost = None

314 
for vertex_path, edge_path, cost in \ 
315 
optimize_edit_paths(G1, G2, node_match, edge_match, 
316 
node_subst_cost, node_del_cost, node_ins_cost, 
317 
edge_subst_cost, edge_del_cost, edge_ins_cost, 
318 
upper_bound, False):

319 
#assert bestcost is None or cost <= bestcost

320 
if bestcost is not None and cost < bestcost: 
321 
paths = list()

322 
paths.append((vertex_path, edge_path)) 
323 
bestcost = cost 
324 
return paths, bestcost

325  
326  
327 
def optimize_graph_edit_distance(G1, G2, node_match=None, edge_match=None, 
328 
node_subst_cost=None, node_del_cost=None, 
329 
node_ins_cost=None,

330 
edge_subst_cost=None, edge_del_cost=None, 
331 
edge_ins_cost=None,

332 
upper_bound=None):

333 
"""Returns consecutive approximations of GED (graph edit distance)

334 
between graphs G1 and G2.

335 

336 
Graph edit distance is a graph similarity measure analogous to

337 
Levenshtein distance for strings. It is defined as minimum cost

338 
of edit path (sequence of node and edge edit operations)

339 
transforming graph G1 to graph isomorphic to G2.

340 

341 
Parameters

342 


343 
G1, G2: graphs

344 
The two graphs G1 and G2 must be of the same type.

345 

346 
node_match : callable

347 
A function that returns True if node n1 in G1 and n2 in G2

348 
should be considered equal during matching.

349 

350 
The function will be called like

351 

352 
node_match(G1.nodes[n1], G2.nodes[n2]).

353 

354 
That is, the function will receive the node attribute

355 
dictionaries for n1 and n2 as inputs.

356 

357 
Ignored if node_subst_cost is specified. If neither

358 
node_match nor node_subst_cost are specified then node

359 
attributes are not considered.

360 

361 
edge_match : callable

362 
A function that returns True if the edge attribute dictionaries

363 
for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should

364 
be considered equal during matching.

365 

366 
The function will be called like

367 

368 
edge_match(G1[u1][v1], G2[u2][v2]).

369 

370 
That is, the function will receive the edge attribute

371 
dictionaries of the edges under consideration.

372 

373 
Ignored if edge_subst_cost is specified. If neither

374 
edge_match nor edge_subst_cost are specified then edge

375 
attributes are not considered.

376 

377 
node_subst_cost, node_del_cost, node_ins_cost : callable

378 
Functions that return the costs of node substitution, node

379 
deletion, and node insertion, respectively.

380 

381 
The functions will be called like

382 

383 
node_subst_cost(G1.nodes[n1], G2.nodes[n2]),

384 
node_del_cost(G1.nodes[n1]),

385 
node_ins_cost(G2.nodes[n2]).

386 

387 
That is, the functions will receive the node attribute

388 
dictionaries as inputs. The functions are expected to return

389 
positive numeric values.

390 

391 
Function node_subst_cost overrides node_match if specified.

392 
If neither node_match nor node_subst_cost are specified then

393 
default node substitution cost of 0 is used (node attributes

394 
are not considered during matching).

395 

396 
If node_del_cost is not specified then default node deletion

397 
cost of 1 is used. If node_ins_cost is not specified then

398 
default node insertion cost of 1 is used.

399 

400 
edge_subst_cost, edge_del_cost, edge_ins_cost : callable

401 
Functions that return the costs of edge substitution, edge

402 
deletion, and edge insertion, respectively.

403 

404 
The functions will be called like

405 

406 
edge_subst_cost(G1[u1][v1], G2[u2][v2]),

407 
edge_del_cost(G1[u1][v1]),

408 
edge_ins_cost(G2[u2][v2]).

409 

410 
That is, the functions will receive the edge attribute

411 
dictionaries as inputs. The functions are expected to return

412 
positive numeric values.

413 

414 
Function edge_subst_cost overrides edge_match if specified.

415 
If neither edge_match nor edge_subst_cost are specified then

416 
default edge substitution cost of 0 is used (edge attributes

417 
are not considered during matching).

418 

419 
If edge_del_cost is not specified then default edge deletion

420 
cost of 1 is used. If edge_ins_cost is not specified then

421 
default edge insertion cost of 1 is used.

422 

423 
upper_bound : numeric

424 
Maximum edit distance to consider.

425 

426 
Returns

427 


428 
Generator of consecutive approximations of graph edit distance.

429 

430 
Examples

431 


432 
>>> G1 = nx.cycle_graph(6)

433 
>>> G2 = nx.wheel_graph(7)

434 
>>> for v in nx.optimize_graph_edit_distance(G1, G2):

435 
... minv = v

436 
>>> minv

437 
7.0

438 

439 
See Also

440 


441 
graph_edit_distance, optimize_edit_paths

442 

443 
References

444 


445 
.. [1] Zeina AbuAisheh, Romain Raveaux, JeanYves Ramel, Patrick

446 
Martineau. An Exact Graph Edit Distance Algorithm for Solving

447 
Pattern Recognition Problems. 4th International Conference on

448 
Pattern Recognition Applications and Methods 2015, Jan 2015,

449 
Lisbon, Portugal. 2015,

450 
<10.5220/0005209202710278>. <hal01168816>

451 
https://hal.archivesouvertes.fr/hal01168816

452 
"""

453 
for vertex_path, edge_path, cost in \ 
454 
optimize_edit_paths(G1, G2, node_match, edge_match, 
455 
node_subst_cost, node_del_cost, node_ins_cost, 
456 
edge_subst_cost, edge_del_cost, edge_ins_cost, 
457 
upper_bound, True):

458 
yield cost

459  
460  
461 
def optimize_edit_paths(G1, G2, node_match=None, edge_match=None, 
462 
node_subst_cost=None, node_del_cost=None, 
463 
node_ins_cost=None,

464 
edge_subst_cost=None, edge_del_cost=None, 
465 
edge_ins_cost=None,

466 
upper_bound=None, strictly_decreasing=True): 
467 
"""GED (graph edit distance) calculation: advanced interface.

468 

469 
Graph edit path is a sequence of node and edge edit operations

470 
transforming graph G1 to graph isomorphic to G2. Edit operations

471 
include substitutions, deletions, and insertions.

472 

473 
Graph edit distance is defined as minimum cost of edit path.

474 

475 
Parameters

476 


477 
G1, G2: graphs

478 
The two graphs G1 and G2 must be of the same type.

479 

480 
node_match : callable

481 
A function that returns True if node n1 in G1 and n2 in G2

482 
should be considered equal during matching.

483 

484 
The function will be called like

485 

486 
node_match(G1.nodes[n1], G2.nodes[n2]).

487 

488 
That is, the function will receive the node attribute

489 
dictionaries for n1 and n2 as inputs.

490 

491 
Ignored if node_subst_cost is specified. If neither

492 
node_match nor node_subst_cost are specified then node

493 
attributes are not considered.

494 

495 
edge_match : callable

496 
A function that returns True if the edge attribute dictionaries

497 
for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should

498 
be considered equal during matching.

499 

500 
The function will be called like

501 

502 
edge_match(G1[u1][v1], G2[u2][v2]).

503 

504 
That is, the function will receive the edge attribute

505 
dictionaries of the edges under consideration.

506 

507 
Ignored if edge_subst_cost is specified. If neither

508 
edge_match nor edge_subst_cost are specified then edge

509 
attributes are not considered.

510 

511 
node_subst_cost, node_del_cost, node_ins_cost : callable

512 
Functions that return the costs of node substitution, node

513 
deletion, and node insertion, respectively.

514 

515 
The functions will be called like

516 

517 
node_subst_cost(G1.nodes[n1], G2.nodes[n2]),

518 
node_del_cost(G1.nodes[n1]),

519 
node_ins_cost(G2.nodes[n2]).

520 

521 
That is, the functions will receive the node attribute

522 
dictionaries as inputs. The functions are expected to return

523 
positive numeric values.

524 

525 
Function node_subst_cost overrides node_match if specified.

526 
If neither node_match nor node_subst_cost are specified then

527 
default node substitution cost of 0 is used (node attributes

528 
are not considered during matching).

529 

530 
If node_del_cost is not specified then default node deletion

531 
cost of 1 is used. If node_ins_cost is not specified then

532 
default node insertion cost of 1 is used.

533 

534 
edge_subst_cost, edge_del_cost, edge_ins_cost : callable

535 
Functions that return the costs of edge substitution, edge

536 
deletion, and edge insertion, respectively.

537 

538 
The functions will be called like

539 

540 
edge_subst_cost(G1[u1][v1], G2[u2][v2]),

541 
edge_del_cost(G1[u1][v1]),

542 
edge_ins_cost(G2[u2][v2]).

543 

544 
That is, the functions will receive the edge attribute

545 
dictionaries as inputs. The functions are expected to return

546 
positive numeric values.

547 

548 
Function edge_subst_cost overrides edge_match if specified.

549 
If neither edge_match nor edge_subst_cost are specified then

550 
default edge substitution cost of 0 is used (edge attributes

551 
are not considered during matching).

552 

553 
If edge_del_cost is not specified then default edge deletion

554 
cost of 1 is used. If edge_ins_cost is not specified then

555 
default edge insertion cost of 1 is used.

556 

557 
upper_bound : numeric

558 
Maximum edit distance to consider.

559 

560 
strictly_decreasing : bool

561 
If True, return consecutive approximations of strictly

562 
decreasing cost. Otherwise, return all edit paths of cost

563 
less than or equal to the previous minimum cost.

564 

565 
Returns

566 


567 
Generator of tuples (node_edit_path, edge_edit_path, cost)

568 
node_edit_path : list of tuples (u, v)

569 
edge_edit_path : list of tuples ((u1, v1), (u2, v2))

570 
cost : numeric

571 

572 
See Also

573 


574 
graph_edit_distance, optimize_graph_edit_distance, optimal_edit_paths

575 

576 
References

577 


578 
.. [1] Zeina AbuAisheh, Romain Raveaux, JeanYves Ramel, Patrick

579 
Martineau. An Exact Graph Edit Distance Algorithm for Solving

580 
Pattern Recognition Problems. 4th International Conference on

581 
Pattern Recognition Applications and Methods 2015, Jan 2015,

582 
Lisbon, Portugal. 2015,

583 
<10.5220/0005209202710278>. <hal01168816>

584 
https://hal.archivesouvertes.fr/hal01168816

585 

586 
"""

587 
# TODO: support DiGraph

588  
589 
import numpy as np 
590 
from scipy.optimize import linear_sum_assignment 
591  
592 
class CostMatrix: 
593 
def __init__(self, C, lsa_row_ind, lsa_col_ind, ls): 
594 
#assert C.shape[0] == len(lsa_row_ind)

595 
#assert C.shape[1] == len(lsa_col_ind)

596 
#assert len(lsa_row_ind) == len(lsa_col_ind)

597 
#assert set(lsa_row_ind) == set(range(len(lsa_row_ind)))

598 
#assert set(lsa_col_ind) == set(range(len(lsa_col_ind)))

599 
#assert ls == C[lsa_row_ind, lsa_col_ind].sum()

600 
self.C = C

601 
self.lsa_row_ind = lsa_row_ind

602 
self.lsa_col_ind = lsa_col_ind

603 
self.ls = ls

604  
605 
def make_CostMatrix(C, m, n): 
606 
#assert(C.shape == (m + n, m + n))

607 
lsa_row_ind, lsa_col_ind = linear_sum_assignment(C) 
608  
609 
# Fixup dummy assignments:

610 
# each substitution i<>j should have dummy assignment m+j<>n+i

611 
# NOTE: fast reduce of Cv relies on it

612 
#assert len(lsa_row_ind) == len(lsa_col_ind)

613 
indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind) 
614 
subst_ind = list(k for k, i, j in indexes if i < m and j < n) 
615 
indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind) 
616 
dummy_ind = list(k for k, i, j in indexes if i >= m and j >= n) 
617 
#assert len(subst_ind) == len(dummy_ind)

618 
lsa_row_ind[dummy_ind] = lsa_col_ind[subst_ind] + m 
619 
lsa_col_ind[dummy_ind] = lsa_row_ind[subst_ind] + n 
620  
621 
return CostMatrix(C, lsa_row_ind, lsa_col_ind,

622 
C[lsa_row_ind, lsa_col_ind].sum()) 
623  
624 
def extract_C(C, i, j, m, n): 
625 
#assert(C.shape == (m + n, m + n))

626 
row_ind = [k in i or k  m in j for k in range(m + n)] 
627 
col_ind = [k in j or k  n in i for k in range(m + n)] 
628 
return C[row_ind, :][:, col_ind]

629  
630 
def reduce_C(C, i, j, m, n): 
631 
#assert(C.shape == (m + n, m + n))

632 
row_ind = [k not in i and k  m not in j for k in range(m + n)] 
633 
col_ind = [k not in j and k  n not in i for k in range(m + n)] 
634 
return C[row_ind, :][:, col_ind]

635  
636 
def reduce_ind(ind, i): 
637 
#assert set(ind) == set(range(len(ind)))

638 
rind = ind[[k not in i for k in ind]] 
639 
for k in set(i): 
640 
rind[rind >= k] = 1

641 
return rind

642  
643 
def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=[]): 
644 
"""

645 
Parameters:

646 
u, v: matched vertices, u=None or v=None for

647 
deletion/insertion

648 
pending_g, pending_h: lists of edges not yet mapped

649 
Ce: CostMatrix of pending edge mappings

650 
matched_uv: partial vertex edit path

651 
list of tuples (u, v) of previously matched vertex

652 
mappings u<>v, u=None or v=None for

653 
deletion/insertion

654 

655 
Returns:

656 
list of (i, j): indices of edge mappings g<>h

657 
localCe: local CostMatrix of edge mappings

658 
(basically submatrix of Ce at cross of rows i, cols j)

659 
"""

660 
M = len(pending_g)

661 
N = len(pending_h)

662 
#assert Ce.C.shape == (M + N, M + N)

663  
664 
g_ind = [i for i in range(M) if pending_g[i][:2] == (u, u) or 
665 
any(pending_g[i][:2] in ((p, u), (u, p)) 
666 
for p, q in matched_uv)] 
667 
h_ind = [j for j in range(N) if pending_h[j][:2] == (v, v) or 
668 
any(pending_h[j][:2] in ((q, v), (v, q)) 
669 
for p, q in matched_uv)] 
670 
m = len(g_ind)

671 
n = len(h_ind)

672  
673 
if m or n: 
674 
C = extract_C(Ce.C, g_ind, h_ind, M, N) 
675 
#assert C.shape == (m + n, m + n)

676  
677 
# Forbid structurally invalid matches

678 
# NOTE: inf remembered from Ce construction

679 
for k, i in zip(range(m), g_ind): 
680 
g = pending_g[i][:2]

681 
for l, j in zip(range(n), h_ind): 
682 
h = pending_h[j][:2]

683 
if nx.is_directed(G1) or nx.is_directed(G2): 
684 
if any(g == (p, u) and h == (q, v) or 
685 
g == (u, p) and h == (v, q)

686 
for p, q in matched_uv): 
687 
continue

688 
else:

689 
if any(g in ((p, u), (u, p)) and h in ((q, v), (v, q)) 
690 
for p, q in matched_uv): 
691 
continue

692 
if g == (u, u):

693 
continue

694 
if h == (v, v):

695 
continue

696 
C[k, l] = inf 
697  
698 
localCe = make_CostMatrix(C, m, n) 
699 
ij = list((g_ind[k] if k < m else M + h_ind[l], 
700 
h_ind[l] if l < n else N + g_ind[k]) 
701 
for k, l in zip(localCe.lsa_row_ind, localCe.lsa_col_ind) 
702 
if k < m or l < n) 
703  
704 
else:

705 
ij = [] 
706 
localCe = CostMatrix(np.empty((0, 0)), [], [], 0) 
707  
708 
return ij, localCe

709  
710 
def reduce_Ce(Ce, ij, m, n): 
711 
if len(ij): 
712 
i, j = zip(*ij)

713 
m_i = m  sum(1 for t in i if t < m) 
714 
n_j = n  sum(1 for t in j if t < n) 
715 
return make_CostMatrix(reduce_C(Ce.C, i, j, m, n), m_i, n_j)

716 
else:

717 
return Ce

718  
719 
def get_edit_ops(matched_uv, pending_u, pending_v, Cv, 
720 
pending_g, pending_h, Ce, matched_cost): 
721 
"""

722 
Parameters:

723 
matched_uv: partial vertex edit path

724 
list of tuples (u, v) of vertex mappings u<>v,

725 
u=None or v=None for deletion/insertion

726 
pending_u, pending_v: lists of vertices not yet mapped

727 
Cv: CostMatrix of pending vertex mappings

728 
pending_g, pending_h: lists of edges not yet mapped

729 
Ce: CostMatrix of pending edge mappings

730 
matched_cost: cost of partial edit path

731 

732 
Returns:

733 
sequence of

734 
(i, j): indices of vertex mapping u<>v

735 
Cv_ij: reduced CostMatrix of pending vertex mappings

736 
(basically Cv with row i, col j removed)

737 
list of (x, y): indices of edge mappings g<>h

738 
Ce_xy: reduced CostMatrix of pending edge mappings

739 
(basically Ce with rows x, cols y removed)

740 
cost: total cost of edit operation

741 
NOTE: most promising ops first

742 
"""

743 
m = len(pending_u)

744 
n = len(pending_v)

745 
#assert Cv.C.shape == (m + n, m + n)

746  
747 
# 1) a vertex mapping from optimal linear sum assignment

748 
i, j = min((k, l) for k, l in zip(Cv.lsa_row_ind, Cv.lsa_col_ind) 
749 
if k < m or l < n) 
750 
xy, localCe = match_edges(pending_u[i] if i < m else None, 
751 
pending_v[j] if j < n else None, 
752 
pending_g, pending_h, Ce, matched_uv) 
753 
Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h)) 
754 
#assert Ce.ls <= localCe.ls + Ce_xy.ls

755 
if prune(matched_cost + Cv.ls + localCe.ls + Ce_xy.ls):

756 
pass

757 
else:

758 
# get reduced Cv efficiently

759 
Cv_ij = CostMatrix(reduce_C(Cv.C, (i,), (j,), m, n), 
760 
reduce_ind(Cv.lsa_row_ind, (i, m + j)), 
761 
reduce_ind(Cv.lsa_col_ind, (j, n + i)), 
762 
Cv.ls  Cv.C[i, j]) 
763 
yield (i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls

764  
765 
# 2) other candidates, sorted by lowerbound cost estimate

766 
other = list()

767 
fixed_i, fixed_j = i, j 
768 
if m <= n:

769 
candidates = ((t, fixed_j) for t in range(m + n) 
770 
if t != fixed_i and (t < m or t == m + fixed_j)) 
771 
else:

772 
candidates = ((fixed_i, t) for t in range(m + n) 
773 
if t != fixed_j and (t < n or t == n + fixed_i)) 
774 
for i, j in candidates: 
775 
if prune(matched_cost + Cv.C[i, j] + Ce.ls):

776 
continue

777 
Cv_ij = make_CostMatrix(reduce_C(Cv.C, (i,), (j,), m, n), 
778 
m  1 if i < m else m, 
779 
n  1 if j < n else n) 
780 
#assert Cv.ls <= Cv.C[i, j] + Cv_ij.ls

781 
if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + Ce.ls):

782 
continue

783 
xy, localCe = match_edges(pending_u[i] if i < m else None, 
784 
pending_v[j] if j < n else None, 
785 
pending_g, pending_h, Ce, matched_uv) 
786 
if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls):

787 
continue

788 
Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h)) 
789 
#assert Ce.ls <= localCe.ls + Ce_xy.ls

790 
if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls +

791 
Ce_xy.ls): 
792 
continue

793 
other.append(((i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls)) 
794  
795 
# yield from

796 
for t in sorted(other, key=lambda t: t[4] + t[1].ls + t[3].ls): 
797 
yield t

798  
799 
def get_edit_paths(matched_uv, pending_u, pending_v, Cv, 
800 
matched_gh, pending_g, pending_h, Ce, matched_cost): 
801 
"""

802 
Parameters:

803 
matched_uv: partial vertex edit path

804 
list of tuples (u, v) of vertex mappings u<>v,

805 
u=None or v=None for deletion/insertion

806 
pending_u, pending_v: lists of vertices not yet mapped

807 
Cv: CostMatrix of pending vertex mappings

808 
matched_gh: partial edge edit path

809 
list of tuples (g, h) of edge mappings g<>h,

810 
g=None or h=None for deletion/insertion

811 
pending_g, pending_h: lists of edges not yet mapped

812 
Ce: CostMatrix of pending edge mappings

813 
matched_cost: cost of partial edit path

814 

815 
Returns:

816 
sequence of (vertex_path, edge_path, cost)

817 
vertex_path: complete vertex edit path

818 
list of tuples (u, v) of vertex mappings u<>v,

819 
u=None or v=None for deletion/insertion

820 
edge_path: complete edge edit path

821 
list of tuples (g, h) of edge mappings g<>h,

822 
g=None or h=None for deletion/insertion

823 
cost: total cost of edit path

824 
NOTE: path costs are nonincreasing

825 
"""

826 
#debug_print('matcheduv:', matched_uv)

827 
#debug_print('matchedgh:', matched_gh)

828 
#debug_print('matchedcost:', matched_cost)

829 
#debug_print('pendingu:', pending_u)

830 
#debug_print('pendingv:', pending_v)

831 
#debug_print(Cv.C)

832 
#assert list(sorted(G1.nodes)) == list(sorted(list(u for u, v in matched_uv if u is not None) + pending_u))

833 
#assert list(sorted(G2.nodes)) == list(sorted(list(v for u, v in matched_uv if v is not None) + pending_v))

834 
#debug_print('pendingg:', pending_g)

835 
#debug_print('pendingh:', pending_h)

836 
#debug_print(Ce.C)

837 
#assert list(sorted(G1.edges)) == list(sorted(list(g for g, h in matched_gh if g is not None) + pending_g))

838 
#assert list(sorted(G2.edges)) == list(sorted(list(h for g, h in matched_gh if h is not None) + pending_h))

839 
#debug_print()

840  
841 
if prune(matched_cost + Cv.ls + Ce.ls):

842 
return

843  
844 
if not max(len(pending_u), len(pending_v)): 
845 
#assert not len(pending_g)

846 
#assert not len(pending_h)

847 
# path completed!

848 
#assert matched_cost <= maxcost.value

849 
maxcost.value = min(maxcost.value, matched_cost)

850 
yield matched_uv, matched_gh, matched_cost

851  
852 
else:

853 
edit_ops = get_edit_ops(matched_uv, pending_u, pending_v, Cv, 
854 
pending_g, pending_h, Ce, matched_cost) 
855 
for ij, Cv_ij, xy, Ce_xy, edit_cost in edit_ops: 
856 
i, j = ij 
857 
#assert Cv.C[i, j] + sum(Ce.C[t] for t in xy) == edit_cost

858 
if prune(matched_cost + edit_cost + Cv_ij.ls + Ce_xy.ls):

859 
continue

860  
861 
# dive deeper

862 
u = pending_u.pop(i) if i < len(pending_u) else None 
863 
v = pending_v.pop(j) if j < len(pending_v) else None 
864 
matched_uv.append((u, v)) 
865 
for x, y in xy: 
866 
len_g = len(pending_g)

867 
len_h = len(pending_h)

868 
matched_gh.append((pending_g[x] if x < len_g else None, 
869 
pending_h[y] if y < len_h else None)) 
870 
sortedx = list(sorted(x for x, y in xy)) 
871 
sortedy = list(sorted(y for x, y in xy)) 
872 
G = list((pending_g.pop(x) if x < len(pending_g) else None) 
873 
for x in reversed(sortedx)) 
874 
H = list((pending_h.pop(y) if y < len(pending_h) else None) 
875 
for y in reversed(sortedy)) 
876  
877 
# yield from

878 
for t in get_edit_paths(matched_uv, pending_u, pending_v, 
879 
Cv_ij, 
880 
matched_gh, pending_g, pending_h, 
881 
Ce_xy, 
882 
matched_cost + edit_cost): 
883 
yield t

884  
885 
# backtrack

886 
if u is not None: 
887 
pending_u.insert(i, u) 
888 
if v is not None: 
889 
pending_v.insert(j, v) 
890 
matched_uv.pop() 
891 
for x, g in zip(sortedx, reversed(G)): 
892 
if g is not None: 
893 
pending_g.insert(x, g) 
894 
for y, h in zip(sortedy, reversed(H)): 
895 
if h is not None: 
896 
pending_h.insert(y, h) 
897 
for t in xy: 
898 
matched_gh.pop() 
899  
900 
# Initialization

901  
902 
pending_u = list(G1.nodes)

903 
pending_v = list(G2.nodes)

904  
905 
# cost matrix of vertex mappings

906 
m = len(pending_u)

907 
n = len(pending_v)

908 
C = np.zeros((m + n, m + n)) 
909 
if node_subst_cost:

910 
C[0:m, 0:n] = np.array([node_subst_cost(G1.nodes[u], G2.nodes[v]) 
911 
for u in pending_u for v in pending_v] 
912 
).reshape(m, n) 
913 
elif node_match:

914 
C[0:m, 0:n] = np.array([1  int(node_match(G1.nodes[u], G2.nodes[v])) 
915 
for u in pending_u for v in pending_v] 
916 
).reshape(m, n) 
917 
else:

918 
# all zeroes

919 
pass

920 
#assert not min(m, n) or C[0:m, 0:n].min() >= 0

921 
if node_del_cost:

922 
del_costs = [node_del_cost(G1.nodes[u]) for u in pending_u] 
923 
else:

924 
del_costs = [1] * len(pending_u) 
925 
#assert not m or min(del_costs) >= 0

926 
if node_ins_cost:

927 
ins_costs = [node_ins_cost(G2.nodes[v]) for v in pending_v] 
928 
else:

929 
ins_costs = [1] * len(pending_v) 
930 
#assert not n or min(ins_costs) >= 0

931 
inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1 
932 
C[0:m, n:n + m] = np.array([del_costs[i] if i == j else inf 
933 
for i in range(m) for j in range(m)] 
934 
).reshape(m, m) 
935 
C[m:m + n, 0:n] = np.array([ins_costs[i] if i == j else inf 
936 
for i in range(n) for j in range(n)] 
937 
).reshape(n, n) 
938 
Cv = make_CostMatrix(C, m, n) 
939 
#debug_print('Cv: {} x {}'.format(m, n))

940 
#debug_print(Cv.C)

941  
942 
pending_g = list(G1.edges)

943 
pending_h = list(G2.edges)

944  
945 
# cost matrix of edge mappings

946 
m = len(pending_g)

947 
n = len(pending_h)

948 
C = np.zeros((m + n, m + n)) 
949 
if edge_subst_cost:

950 
C[0:m, 0:n] = np.array([edge_subst_cost(G1.edges[g], G2.edges[h]) 
951 
for g in pending_g for h in pending_h] 
952 
).reshape(m, n) 
953 
elif edge_match:

954 
C[0:m, 0:n] = np.array([1  int(edge_match(G1.edges[g], G2.edges[h])) 
955 
for g in pending_g for h in pending_h] 
956 
).reshape(m, n) 
957 
else:

958 
# all zeroes

959 
pass

960 
#assert not min(m, n) or C[0:m, 0:n].min() >= 0

961 
if edge_del_cost:

962 
del_costs = [edge_del_cost(G1.edges[g]) for g in pending_g] 
963 
else:

964 
del_costs = [1] * len(pending_g) 
965 
#assert not m or min(del_costs) >= 0

966 
if edge_ins_cost:

967 
ins_costs = [edge_ins_cost(G2.edges[h]) for h in pending_h] 
968 
else:

969 
ins_costs = [1] * len(pending_h) 
970 
#assert not n or min(ins_costs) >= 0

971 
inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1 
972 
C[0:m, n:n + m] = np.array([del_costs[i] if i == j else inf 
973 
for i in range(m) for j in range(m)] 
974 
).reshape(m, m) 
975 
C[m:m + n, 0:n] = np.array([ins_costs[i] if i == j else inf 
976 
for i in range(n) for j in range(n)] 
977 
).reshape(n, n) 
978 
Ce = make_CostMatrix(C, m, n) 
979 
#debug_print('Ce: {} x {}'.format(m, n))

980 
#debug_print(Ce.C)

981 
#debug_print()

982  
983 
class MaxCost: 
984 
def __init__(self): 
985 
# initial upperbound estimate

986 
# NOTE: should work for empty graph

987 
self.value = Cv.C.sum() + Ce.C.sum() + 1 
988 
maxcost = MaxCost() 
989  
990 
def prune(cost): 
991 
if upper_bound is not None: 
992 
if cost > upper_bound:

993 
return True 
994 
if cost > maxcost.value:

995 
return True 
996 
elif strictly_decreasing and cost >= maxcost.value: 
997 
return True 
998  
999 
# Now go!

1000  
1001 
for vertex_path, edge_path, cost in \ 
1002 
get_edit_paths([], pending_u, pending_v, Cv, 
1003 
[], pending_g, pending_h, Ce, 0):

1004 
#assert sorted(G1.nodes) == sorted(u for u, v in vertex_path if u is not None)

1005 
#assert sorted(G2.nodes) == sorted(v for u, v in vertex_path if v is not None)

1006 
#assert sorted(G1.edges) == sorted(g for g, h in edge_path if g is not None)

1007 
#assert sorted(G2.edges) == sorted(h for g, h in edge_path if h is not None)

1008 
#print(vertex_path, edge_path, cost, file = sys.stderr)

1009 
#assert cost == maxcost.value

1010 
yield list(vertex_path), list(edge_path), cost 
1011  
1012  
1013 
def _is_close(d1, d2, atolerance=0, rtolerance=0): 
1014 
"""Determines whether two adjacency matrices are within

1015 
a provided tolerance.

1016 

1017 
Parameters

1018 


1019 
d1 : dict

1020 
Adjacency dictionary

1021 

1022 
d2 : dict

1023 
Adjacency dictionary

1024 

1025 
atolerance : float

1026 
Some scalar tolerance value to determine closeness

1027 

1028 
rtolerance : float

1029 
A scalar tolerance value that will be some proportion

1030 
of ``d2``'s value

1031 

1032 
Returns

1033 


1034 
closeness : bool

1035 
If all of the nodes within ``d1`` and ``d2`` are within

1036 
a predefined tolerance, they are considered "close" and

1037 
this method will return True. Otherwise, this method will

1038 
return False.

1039 

1040 
"""

1041 
# Precondition: d1 and d2 have the same keys at each level if they

1042 
# are dictionaries.

1043 
if not isinstance(d1, dict) and not isinstance(d2, dict): 
1044 
return abs(d1  d2) <= atolerance + rtolerance * abs(d2) 
1045 
return all(all(_is_close(d1[u][v], d2[u][v]) for v in d1[u]) for u in d1) 
1046  
1047  
1048 
def simrank_similarity(G, source=None, target=None, importance_factor=0.9, 
1049 
max_iterations=100, tolerance=1e4): 
1050 
"""Returns the SimRank similarity of nodes in the graph ``G``.

1051 

1052 
SimRank is a similarity metric that says "two objects are considered

1053 
to be similar if they are referenced by similar objects." [1]_.

1054 

1055 
The pseudocode definition from the paper is:

1056 

1057 
def simrank(G, u, v):

1058 
in_neighbors_u = G.predecessors(u)

1059 
in_neighbors_v = G.predecessors(v)

1060 
scale = C / (len(in_neighbors_u) * len(in_neighbors_v))

1061 
return scale * sum(simrank(G, w, x)

1062 
for w, x in product(in_neighbors_u,

1063 
in_neighbors_v))

1064 

1065 
where ``G`` is the graph, ``u`` is the source, ``v`` is the target,

1066 
and ``C`` is a float decay or importance factor between 0 and 1.

1067 

1068 
The SimRank algorithm for determining node similarity is defined in

1069 
[2]_.

1070 

1071 
Parameters

1072 


1073 
G : NetworkX graph

1074 
A NetworkX graph

1075 

1076 
source : node

1077 
If this is specified, the returned dictionary maps each node

1078 
``v`` in the graph to the similarity between ``source`` and

1079 
``v``.

1080 

1081 
target : node

1082 
If both ``source`` and ``target`` are specified, the similarity

1083 
value between ``source`` and ``target`` is returned. If

1084 
``target`` is specified but ``source`` is not, this argument is

1085 
ignored.

1086 

1087 
importance_factor : float

1088 
The relative importance of indirect neighbors with respect to

1089 
direct neighbors.

1090 

1091 
max_iterations : integer

1092 
Maximum number of iterations.

1093 

1094 
tolerance : float

1095 
Error tolerance used to check convergence. When an iteration of

1096 
the algorithm finds that no similarity value changes more than

1097 
this amount, the algorithm halts.

1098 

1099 
Returns

1100 


1101 
similarity : dictionary or float

1102 
If ``source`` and ``target`` are both ``None``, this returns a

1103 
dictionary of dictionaries, where keys are node pairs and value

1104 
are similarity of the pair of nodes.

1105 

1106 
If ``source`` is not ``None`` but ``target`` is, this returns a

1107 
dictionary mapping node to the similarity of ``source`` and that

1108 
node.

1109 

1110 
If neither ``source`` nor ``target`` is ``None``, this returns

1111 
the similarity value for the given pair of nodes.

1112 

1113 
Examples

1114 


1115 
If the nodes of the graph are numbered from zero to *n  1*, where *n*

1116 
is the number of nodes in the graph, you can create a SimRank matrix

1117 
from the return value of this function where the node numbers are

1118 
the row and column indices of the matrix::

1119 

1120 
>>> import networkx as nx

1121 
>>> from numpy import array

1122 
>>> G = nx.cycle_graph(4)

1123 
>>> sim = nx.simrank_similarity(G)

1124 
>>> lol = [[sim[u][v] for v in sorted(sim[u])] for u in sorted(sim)]

1125 
>>> sim_array = array(lol)

1126 

1127 
References

1128 


1129 
.. [1] https://en.wikipedia.org/wiki/SimRank

1130 
.. [2] G. Jeh and J. Widom.

1131 
"SimRank: a measure of structuralcontext similarity",

1132 
In KDD'02: Proceedings of the Eighth ACM SIGKDD

1133 
International Conference on Knowledge Discovery and Data Mining,

1134 
pp. 538543. ACM Press, 2002.

1135 
"""

1136 
prevsim = None

1137  
1138 
# build up our similarity adjacency dictionary output

1139 
newsim = {u: {v: 1 if u == v else 0 for v in G} for u in G} 
1140  
1141 
# These functions compute the update to the similarity value of the nodes

1142 
# `u` and `v` with respect to the previous similarity values.

1143 
avg_sim = lambda s: sum(newsim[w][x] for (w, x) in s) / len(s) 
1144 
sim = lambda u, v: importance_factor * avg_sim(list(product(G[u], G[v]))) 
1145  
1146 
for _ in range(max_iterations): 
1147 
if prevsim and _is_close(prevsim, newsim, tolerance): 
1148 
break

1149 
prevsim = newsim 
1150 
newsim = {u: {v: sim(u, v) if u is not v else 1 
1151 
for v in newsim[u]} for u in newsim} 
1152  
1153 
if source is not None and target is not None: 
1154 
return newsim[source][target]

1155 
if source is not None: 
1156 
return newsim[source]

1157 
return newsim

1158  
1159  
1160 
def simrank_similarity_numpy(G, source=None, target=None, importance_factor=0.9, 
1161 
max_iterations=100, tolerance=1e4): 
1162 
"""Calculate SimRank of nodes in ``G`` using matrices with ``numpy``.

1163 

1164 
The SimRank algorithm for determining node similarity is defined in

1165 
[1]_.

1166 

1167 
Parameters

1168 


1169 
G : NetworkX graph

1170 
A NetworkX graph

1171 

1172 
source : node

1173 
If this is specified, the returned dictionary maps each node

1174 
``v`` in the graph to the similarity between ``source`` and

1175 
``v``.

1176 

1177 
target : node

1178 
If both ``source`` and ``target`` are specified, the similarity

1179 
value between ``source`` and ``target`` is returned. If

1180 
``target`` is specified but ``source`` is not, this argument is

1181 
ignored.

1182 

1183 
importance_factor : float

1184 
The relative importance of indirect neighbors with respect to

1185 
direct neighbors.

1186 

1187 
max_iterations : integer

1188 
Maximum number of iterations.

1189 

1190 
tolerance : float

1191 
Error tolerance used to check convergence. When an iteration of

1192 
the algorithm finds that no similarity value changes more than

1193 
this amount, the algorithm halts.

1194 

1195 
Returns

1196 


1197 
similarity : dictionary or float

1198 
If ``source`` and ``target`` are both ``None``, this returns a

1199 
dictionary of dictionaries, where keys are node pairs and value

1200 
are similarity of the pair of nodes.

1201 

1202 
If ``source`` is not ``None`` but ``target`` is, this returns a

1203 
dictionary mapping node to the similarity of ``source`` and that

1204 
node.

1205 

1206 
If neither ``source`` nor ``target`` is ``None``, this returns

1207 
the similarity value for the given pair of nodes.

1208 

1209 
Examples

1210 


1211 
>>> import networkx as nx

1212 
>>> from numpy import array

1213 
>>> G = nx.cycle_graph(4)

1214 
>>> sim = nx.simrank_similarity_numpy(G)

1215 

1216 
References

1217 


1218 
.. [1] G. Jeh and J. Widom.

1219 
"SimRank: a measure of structuralcontext similarity",

1220 
In KDD'02: Proceedings of the Eighth ACM SIGKDD

1221 
International Conference on Knowledge Discovery and Data Mining,

1222 
pp. 538543. ACM Press, 2002.

1223 
"""

1224 
# This algorithm follows roughly

1225 
#

1226 
# S = max{C * (A.T * S * A), I}

1227 
#

1228 
# where C is the importance factor, A is the column normalized

1229 
# adjacency matrix, and I is the identity matrix.

1230 
import numpy as np 
1231 
adjacency_matrix = nx.to_numpy_array(G) 
1232  
1233 
# columnnormalize the ``adjacency_matrix``

1234 
adjacency_matrix /= adjacency_matrix.sum(axis=0)

1235  
1236 
newsim = np.eye(adjacency_matrix.shape[0], dtype=np.float64)

1237 
for _ in range(max_iterations): 
1238 
prevsim = np.copy(newsim) 
1239 
newsim = importance_factor * np.matmul( 
1240 
np.matmul(adjacency_matrix.T, prevsim), adjacency_matrix) 
1241 
np.fill_diagonal(newsim, 1.0)

1242  
1243 
if np.allclose(prevsim, newsim, atol=tolerance):

1244 
break

1245  
1246 
if source is not None and target is not None: 
1247 
return newsim[source, target]

1248 
if source is not None: 
1249 
return newsim[source]

1250 
return newsim

1251  
1252  
1253 
def setup_module(module): 
1254 
"""Fixture for nose tests."""

1255 
from nose import SkipTest 
1256 
try:

1257 
import numpy 
1258 
except:

1259 
raise SkipTest("NumPy not available") 
1260 
try:

1261 
import scipy 
1262 
except:

1263 
raise SkipTest("SciPy not available") 