pureskillgg · billfreeman44 · Jun 12, 2022 · Jun 12, 2022 · Jun 12, 2022 · Jun 12, 2022
diff --git a/notebooks/tutorial/1 - Setup.ipynb b/notebooks/tutorial/1 - Setup.ipynb
@@ -21,7 +21,7 @@
    "source": [
     "## Setup your AWS credentials\n",
     "\n",
-    "Follow the [guide on setting up AWS credentials](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/setup-credentials.html) to properly download things from the AWS Data Exchange."
+    "Follow the [guide on setting up AWS credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration) to properly download things from the AWS Data Exchange."
    ]
   },
   {
@@ -34,11 +34,15 @@
     "Setup your enviornment to find all the yummy data science files on your computer box. We must set three things, \n",
     "\n",
     "1. Where you will save the data science files.\n",
-    "1. Where you will save your tomes (views of the data).\n",
+    "1. Where you will save your tomes (views of the data). Can be the same as step 1.\n",
     "1. What to call your header tome.\n",
     "\n",
     "These will be saved in a `.env` file in the `notebooks` folder. \n",
     "\n",
+    "By convention, the tome names are in this format: `tome_name.start-date,end-data.comment`\n",
+    "\n",
+    "The `start-date` (inclusive) and `end-date` (exclusive) are the beginning and end of the data included in that tome and are in yyyy-mm-dd format.\n",
+    "\n",
     "_**Fill out with your specific paths in the cell below, then run the notebook.**_\n"
    ]
   },
@@ -49,8 +53,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "collection_path = 'PATH/TO/TOMES'\n",
-    "ds_collection_path = 'PATH/TO/CSDS'"
+    "import os\n",
+    "tome_collection_path = os.path.join('..','..','tmp')\n",
+    "ds_collection_path = os.path.join('..','..','tmp')\n",
+    "header_name = 'header_tome.2022-05-15,2022-05-15'"
    ]
   },
   {
@@ -60,35 +66,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "import platform\n",
+    "import re\n",
+    "import warnings\n",
     "\n",
-    "header_name = 'header_tome'\n",
-    "ds_type = 'csds'\n",
+    "if not re.match(\"\\S+.\\d{4}-\\d{2}-\\d{2},\\d{4}-\\d{2}-\\d{2}.*\\S*\",header_name):\n",
+    "    warnings.warn(f'Header name of {header_name} does not match convention of header match')\n",
     "\n",
-    "if collection_path == 'PATH/TO/TOMES':\n",
-    "    collection_path = os.path.join('..','..','tmp','tomes')\n",
-    "if ds_collection_path == 'PATH/TO/CSDS':\n",
-    "    ds_collection_path = os.path.join('..','..','tmp','data')\n",
+    "ds_type = 'csds'\n",
     "    \n",
-    "collection_path = os.path.abspath(collection_path)\n",
+    "tome_collection_path = os.path.abspath(tome_collection_path)\n",
     "ds_collection_path = os.path.abspath(ds_collection_path)\n",
     "\n",
     "system = platform.system()\n",
     "with open(os.path.join('..','.env'), 'w') as f:\n",
     "    if system == 'Windows':\n",
     "        f.write(f'export PURESKILLGG_TOME_DEFAULT_HEADER_NAME={header_name}\\n')\n",
-    "        f.write(f'export PURESKILLGG_TOME_COLLECTION_PATH={collection_path}\\n')\n",
+    "        f.write(f'export PURESKILLGG_TOME_COLLECTION_PATH={tome_collection_path}\\n')\n",
     "        f.write(f'export PURESKILLGG_TOME_DS_COLLECTION_PATH={ds_collection_path}\\n')\n",
     "        f.write(f'export PURESKILLGG_TOME_DS_TYPE={ds_type}\\n')\n",
     "    elif system == 'Mac':\n",
     "        f.write(f'PURESKILLGG_TOME_DEFAULT_HEADER_NAME = {header_name}\\n')\n",
-    "        f.write(f'PURESKILLGG_TOME_COLLECTION_PATH = {collection_path}\\n')\n",
+    "        f.write(f'PURESKILLGG_TOME_COLLECTION_PATH = {tome_collection_path}\\n')\n",
     "        f.write(f'PURESKILLGG_TOME_DS_COLLECTION_PATH = {ds_collection_path}\\n')\n",
     "        f.write(f'PURESKILLGG_TOME_DS_TYPE = {ds_type}\\n')\n",
     "    else:\n",
     "        f.write(f'PURESKILLGG_TOME_DEFAULT_HEADER_NAME={header_name}\\n')\n",
-    "        f.write(f'PURESKILLGG_TOME_COLLECTION_PATH={collection_path}\\n')\n",
+    "        f.write(f'PURESKILLGG_TOME_COLLECTION_PATH={tome_collection_path}\\n')\n",
     "        f.write(f'PURESKILLGG_TOME_DS_COLLECTION_PATH={ds_collection_path}\\n')\n",
     "        f.write(f'PURESKILLGG_TOME_DS_TYPE={ds_type}\\n')\n",
     "        if system != 'Linux':\n",

diff --git a/notebooks/tutorial/2 - Download data.ipynb b/notebooks/tutorial/2 - Download data.ipynb
@@ -96,6 +96,9 @@
     "\n",
     "ds_collection_path = os.environ.get('PURESKILLGG_TOME_DS_COLLECTION_PATH')\n",
     "\n",
+    "if not os.path.isdir(ds_collection_path):\n",
+    "    os.makedirs(ds_collection_path)\n",
+    "\n",
     "for index, url in enumerate(dataset_sample_urls):\n",
     "    print(\"working on \",url)\n",
     "    response = requests.get(url)\n",

diff --git a/notebooks/tutorial/3 - Make header tome.ipynb b/notebooks/tutorial/3 - Make header tome.ipynb
@@ -86,7 +86,7 @@
    "outputs": [],
    "source": [
     "if not header_loader.exists:\n",
-    "    header_loader = curator.create_header_tome(path_depth=7)"
+    "    header_loader = curator.create_header_tome()"
    ]
   },
   {
@@ -110,7 +110,9 @@
    "source": [
     "## Make subheaders too\n",
     "\n",
-    "You might want to analyze players on a specific map, rank, or platform. You can create \"subheaders\" that are a filtered view of the main header. The `create_subheader_tome` will create the subheader with the specified filter applied to the header tome."
+    "You might want to analyze players on a specific map, rank, or platform. You can create \"subheaders\" that are a filtered view of the main header. The `create_subheader_tome` will create the subheader with the specified filter applied to the header tome.\n",
+    "\n",
+    "Remember that the convention for the tome names are: `tome_name.start-date,end-data.comment`"
    ]
   },
   {
@@ -123,7 +125,7 @@
     "def map_name_selector(map_name):\n",
     "    return lambda df: df['map_name']==map_name\n",
     "\n",
-    "subheader_loader = curator.create_subheader_tome('subheader_dust2', map_name_selector('de_dust2'))"
+    "subheader_loader = curator.create_subheader_tome('subheader_dust2.2022-05-15,2022-05-15', map_name_selector('de_dust2'))"
    ]
   },
   {

diff --git a/notebooks/tutorial/5 - Create tome.ipynb b/notebooks/tutorial/5 - Create tome.ipynb
@@ -96,8 +96,9 @@
    "outputs": [],
    "source": [
     "# Initialize our \"footsteps_by_rank\" tome\n",
+    "footsteps_tome_name = 'footsteps_by_rank.2022-05-15,2022-05-15'\n",
     "tomer = curator.make_tome(\n",
-    "    'footsteps_by_rank',\n",
+    "    footsteps_tome_name,\n",
     "    ds_reading_instructions=[\n",
     "        {\n",
     "            \"channel\": 'player_footstep',\n",
@@ -137,7 +138,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = curator.get_dataframe('footsteps_by_rank')"
+    "df = curator.get_dataframe(footsteps_tome_name)"
    ]
   },
   {

diff --git a/notebooks/tutorial/6 - Train data science models.ipynb b/notebooks/tutorial/6 - Train data science models.ipynb
@@ -74,8 +74,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = curator.get_dataframe('footsteps_by_rank')\n",
-    "keyset = curator.get_keyset('footsteps_by_rank')"
+    "footsteps_tome_name = 'footsteps_by_rank.2022-05-15,2022-05-15'\n",
+    "df = curator.get_dataframe(footsteps_tome_name)\n",
+    "keyset = curator.get_keyset(footsteps_tome_name)"
    ]
   },
   {

diff --git a/notebooks/tutorial/7 - Getting data from the ADX.ipynb b/notebooks/tutorial/7 - Getting data from the ADX.ipynb
@@ -619,10 +619,10 @@
     "There are many many ways to do this so we won't list them all here. We generally sync one month at a time with the AWS CLI like this:\n",
     "\n",
     "```\n",
-    "aws s3 sync s3://my-bucket/csds/2022/04/ /path/to/working/dir/csds/2022/04\n",
+    "aws s3 sync s3://my-bucket/csds/2022/04/ /PATH/TO/ROOT/csds/2022/04\n",
     "```\n",
     "\n",
-    "It is **CRITICALLY IMPORTANT** to maintain the path structure. In the example above, you could change the `/path/to/working/dir` but the `csds/2022/04` is not something that you may change. The reader will not work because the folder structure is part of the key used to read each file."
+    "It is **CRITICALLY IMPORTANT** to maintain the path structure. In the example above, you could change the `/PATH/TO/ROOT` but the `csds/2022/04` is not something that you may change. The reader will not work because the folder structure is part of the key used to read each file."
    ]
   },
   {