degreeDiscount.py

''' Implementation of degree discount heuristic [1] for Independent Cascade model
of influence propagation in graph G

[1] -- Wei Chen et al. Efficient influence maximization in Social Networks (algorithm 4)
'''
__author__ = 'ivanovsergey'
from priorityQueue import PriorityQueue as PQ # priority queue
import networkx as nx

def degreeDiscountIC(G, k, p=.01):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = []
    dd = PQ() # degree discount
    t = dict() # number of adjacent vertices that are in S
    d = dict() # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u]) # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item() # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight'] # increase number of selected neighbors
                priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree
                dd.add_task(v, -priority)
    return S

# def degreeDiscountIC2(G, k, p=.01):
#     ''' Finds initial set of nodes to propagate in Independent Cascade model (without priority queue)
#     Input: G -- networkx graph object
#     k -- number of nodes needed
#     p -- propagation probability
#     Output:
#     S -- chosen k nodes
#     Note: the routine runs twice slower than using PQ. Implemented to verify results
#     '''
#     d = dict()
#     dd = dict() # degree discount
#     t = dict() # number of selected neighbors
#     S = [] # selected set of nodes
#     for u in G:
#         d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
#         # d[u] = len(G[u]) # each neighbor adds degree 1
#         dd[u] = d[u]
#         t[u] = 0
#     for i in range(k):
#         u, ddv = max(dd.iteritems(), key=lambda (k,v): v)
#         dd.pop(u)
#         S.append(u)
#         for v in G[u]:
#             if v not in S:
#                 t[v] += G[u][v]['weight'] # increase number of selected neighbors
#                 dd[v] = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p
#     return S
    
# def degreeDiscountStar(G,k,p=.01):
    
#     S = []
#     scores = PQ()
#     d = dict()
#     t = dict()
#     for u in G:
#         d[u] = sum([G[u][v]['weight'] for v in G[u]])
#         t[u] = 0
#         score = -((1-p)**t[u])*(1+(d[u]-t[u])*p)
#         scores.add_task(u, )
#     for iteration in range(k):
#         u, priority = scores.pop_item()
#         print iteration, -priority
#         S.append(u)
#         for v in G[u]:
#             if v not in S:
#                 t[v] += G[u][v]['weight']
#                 score = -((1-p)**t[u])*(1+(d[u]-t[u])*p)
#                 scores.add_task(v, score)
#     return S
            

if __name__ == "__main__":
    import time
    start = time.time()

    G = nx.read_gpickle("Graph/MIT.gpickle")
    print 'Read graph G'
    print time.time() - start

    S = degreeDiscountIC(G, 10, 0.01)
    print S
    print time.time() - start