dhitaj
diff --git a/‎.gitignore
+4 b/‎.gitignore
+4
diff --git a/‎README.md
+39 b/‎README.md
+39
diff --git a/‎config.json
+49 b/‎config.json
+49
diff --git a/‎data/wiki/README
+3 b/‎data/wiki/README
+3
diff --git a/‎data/wiki/test.txt
+4,358 b/‎data/wiki/test.txt
+4,358
diff --git a/‎data/wiki/train.txt
+36,718 b/‎data/wiki/train.txt
+36,718
diff --git a/‎data/wiki/valid.txt
+3,760 b/‎data/wiki/valid.txt
+3,760
diff --git a/‎fedcomm.py
+170 b/‎fedcomm.py
+170
diff --git a/‎models/__init__.py b/‎models/__init__.py
@@ -0,0 +1,4 @@
+/__pycache__
+/.idea
+/models/__pycache__
+/utils/__pycache__
@@ -0,0 +1,39 @@
+# FedComm
+Experiments are produced on MNIST, CIFAR-10, and WikiText-2 datasets.
+
+## Setting up the environment
+
+* Install the requirements.
+```
+pip install -r requirements.txt
+```
+
+## Data
+* Download the respective datasets and put them under 'data/' directory.
+
+## Running the experiments
+
+* To run the FedComm experiment:
+```
+python fedcomm.py
+```
+
+## Setting the experiment parameters
+To run the experiments in different conditions change the parameters in *config.json* file.
+
+#### Federated Learning Parameters
+* ```num_users:```Number of total users that have signed up for collaborating. (Default is 100).
+* ```frac:```     Fraction of users to be used for federated updates. Default is 1.0 (i.e., 100% participation).
+* ```epochs:``` Number of global training epochs. Default is 1000.
+* ```dataset:```  Default: 'mnist'. Options: 'mnist', 'cifar10', 'wiki'.
+
+#### Message transmission parameters:
+
+* ```senders:``` Fraction of participants in the federated learning scheme that will act as senders. Default: 0.1 (i.e., 10% of the participants). 
+* ```payload:``` The extension of the payload file (under *payloads/* directory. Default 'txt', Options: 'txt', 'png'.
+* ```injection:``` The FL global round when the senders should start transmitting the message. Default 10.
+* ```stealthy:``` The level of stealthiness of the *senders*. Default 'non', Options: 'non', 'inter', 'full'.
+* ```run_name:```  A name given to the particular run. It will create a directory structure where it will store model checkpoints, extracted payloads, train accuracy and loss values.
+
+----
+
@@ -0,0 +1,49 @@
+{
+    "comment": "federated arguments (Notation for the arguments followed from paper)",
+    "epochs": 10000,
+    "num_users": [100],
+    "frac": [1.0],
+    "local_ep": 10,
+    "local_bs": 30,
+    "lr": 0.01,
+    "momentum": 0.5,
+
+    "comment": "model arguments",
+    "model": "cnn",
+    "kernel_num": 9,
+    "kernel_sizes": "3,4,5",
+    "num_channels": 1, 
+    "norm": "batch_norm",
+    "num_filters": 32,
+    "max_pool": true,
+
+    "comment": "other arguments",
+    "dataset": "wiki",
+    "num_classes": 10, 
+    "gpu": true,
+    "optimizer": "sgd",
+    "iid": false,
+    "unequal": 0,
+    "stopping_rounds": 10,
+    "verbose": 1,
+    "seed": 42,
+
+    "comment": "evaluation arguments",
+    "federated": 0,
+    "runid": "attack_v1",
+    "eval_epoch": 0,
+
+    "comment": "attack arguments",
+    "senders": 0.1,
+    "payload": ["txt"],
+    "error_correction": true,
+    "store_global": 1,
+    "injection": 2,
+    "stealthy": "non",
+    "stealthiness_comment": "the stealthiness parameters can be: [non, inter, full]",
+    "run_name": "full_stealth",
+
+    "comment": "weight statistics arguments",
+    "byte_index": 0,
+    "statistics_path": "path"
+  }
@@ -0,0 +1,3 @@
+This is raw data from the wikitext-2 dataset.
+
+See https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/
@@ -0,0 +1,170 @@
+import os
+import math
+import copy
+import time
+import torch
+import random
+import numpy as np
+from tqdm import tqdm
+from csv import writer
+from update import LocalUpdate, test_inference
+from models.models import CNNMnistSmall, VGG, RNNModel
+from utils.utils_training import get_dataset, average_weights, add_delta_weights
+
+from user import UserType
+import json
+
+with open('config.json') as config_file:
+    config = json.load(config_file)
+
+
+def main(dataset, payload, num_users, frac, run_name):
+    result_folder_tree = os.path.join(os.getcwd(), run_name, dataset,
+                                      payload, str(num_users), str(int(frac * 100)))
+
+    if not os.path.exists(result_folder_tree):
+        os.makedirs(result_folder_tree)
+        os.makedirs(os.path.join(result_folder_tree, "models"))
+        os.makedirs(os.path.join(result_folder_tree, "payloads"))
+
+    device = 'cuda' if config["gpu"] else 'cpu'
+    # load ldpc matrixes
+    H, G, enc_length, preamble1, global_model = None, None, None, None, None
+
+    # load datasets
+    train_dataset, test_dataset, user_groups, ntokens = get_dataset(dataset, config["iid"], config["unequal"],
+                                                                    num_users)
+    # BUILD MODEL
+    if dataset == 'mnist':
+        global_model = CNNMnistSmall()
+    elif dataset == 'cifar10':
+        global_model = VGG('VGG11')
+    elif dataset == "wiki":
+        global_model = RNNModel("LSTM", ntokens, 200, 200, 2, 0.2, True)
+
+    error_correction = config["error_correction"]
+    stealthiness_level = config["stealthy"]
+
+    if not global_model:
+        print('Configuration Error!')
+    global_model.to(device)
+
+    # Training
+    epoch = 0
+    # Injections
+    injections = 0
+    # Check when we can start the decoding
+    payload_alive = False
+
+    # Define the number of sender users
+    m_comp = max(int(config["senders"] * num_users), 1)
+    sender_users = np.random.choice(range(num_users), m_comp, replace=False)
+
+    for user in user_groups:
+        if user.user_id in sender_users:
+            user.user_type = UserType.SENDER
+
+    with tqdm(range(config["epochs"])) as bar:
+        for _ in bar:
+            local_weights, local_losses = [], []
+            m = max(int(frac * num_users), 1)
+            np.random.seed(random.randint(100, 1000))
+            idxs_users = np.random.choice(range(num_users), m, replace=False)
+
+            global_model.train()
+
+            for idx in idxs_users:
+                user = user_groups[idx]
+                local_model = LocalUpdate(gpu=config["gpu"], dataset=train_dataset, idxs=user.data,
+                                          local_bs=config["local_bs"], dataset_name=dataset)
+
+                w, loss = local_model.update_weights(model=copy.deepcopy(global_model), global_round=epoch,
+                                                     optimizer=config["optimizer"], lr=config["lr"],
+                                                     local_ep=config["local_ep"])
+
+                bar.set_postfix({'Loss': {copy.deepcopy(loss)}})
+
+                if config["store_global"] <= epoch < config["injection"]:
+                    user.global_model = copy.deepcopy(global_model)
+                    user.previous_round = epoch
+
+                if user.user_type == UserType.SENDER and epoch >= config["injection"]:
+                    if enc_length is None:
+                        user.global_model = copy.deepcopy(global_model)
+
+                    payload_alive = user.extract_payload(copy.deepcopy(global_model),
+                                                         payload,
+                                                         result_folder_tree,
+                                                         enc_length,
+                                                         H, G,
+                                                         preamble1,
+                                                         error_correction)
+
+                    sender_weights, enc_length, H, G, preamble1 = user.inject_payload(copy.deepcopy(w),
+                                                                                      device,
+                                                                                      payload,
+                                                                                      stealthiness_level,
+                                                                                      error_correction)
+
+                    local_weights.append(copy.deepcopy(sender_weights))
+                    injections += 1
+                else:
+                    local_weights.append(copy.deepcopy(w.state_dict()))
+                    local_losses.append(copy.deepcopy(loss))
+
+            if epoch >= config["injection"] and payload_alive:
+                for idx in idxs_users:
+                    user = user_groups[idx]
+                    user.extract_payload(copy.deepcopy(global_model),
+                                         payload,
+                                         result_folder_tree,
+                                         enc_length,
+                                         H, G,
+                                         preamble1,
+                                         error_correction)
+
+            global_weights_delta = average_weights(local_weights)
+            global_weights = add_delta_weights(copy.deepcopy(global_model), global_weights_delta)
+            global_model.load_state_dict(global_weights)
+
+            if epoch % 5 == 0:
+                if dataset == "wiki":
+                    train_loss = test_inference(config["gpu"], copy.deepcopy(global_model), train_dataset, dataset)
+                    with open(os.path.join(result_folder_tree, "acc_loss.csv"), 'a+') as fp:
+                        writer_object = writer(fp)
+                        writer_object.writerow([epoch, train_loss, math.exp(train_loss)])
+                        fp.close()
+                else:
+                    train_acc, train_loss = test_inference(config["gpu"], copy.deepcopy(global_model), train_dataset,
+                                                           dataset)
+                    test_acc, test_loss = test_inference(config["gpu"], copy.deepcopy(global_model), test_dataset,
+                                                         dataset)
+
+                    with open(os.path.join(result_folder_tree, "acc_loss.csv"), 'a+') as fp:
+                        writer_object = writer(fp)
+                        writer_object.writerow([epoch, train_acc, train_loss, test_acc, test_loss])
+                        fp.close()
+
+            rnd_coverage = sum(
+                [1 if u.correctly_extracted and not u.user_type == UserType.SENDER else 0 for u in user_groups])
+            with open(os.path.join(result_folder_tree, "coverage.csv"), 'a+') as fp:
+                writer_object = writer(fp)
+                writer_object.writerow([epoch, rnd_coverage])
+                fp.close()
+            torch.save(global_model.state_dict(),
+                       os.path.join(result_folder_tree, "models", f"checkpoint.epoch{epoch}.pt"))
+            epoch += 1
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+    for p in config["payload"]:
+        for n in config["num_users"]:
+            for f in config["frac"]:
+                main(config["dataset"],
+                     p,  # payload
+                     n,  # num_users
+                     f,  # frac
+                     config["run_name"]
+                     )
+    print('\n Total Run Time: {0:0.4f}'.format(time.time() - start_time))
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+This is raw data from the wikitext-2 dataset.`
	`2`	`+`
	`3`	`+See https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/`