Skip to content

Commit

Permalink
Update for latest dsdk (#7)
Browse files Browse the repository at this point in the history
* Update setup link, header tome name, root path ends

* Repair default values for notebook 2

* Remove path_depth and fix subheader tome name

* Add note about default tome names

* update csgo dsdk and dsdk

* Mention inclusive/exclusive for dates

* Use proper tome name. specifiy header tome b/c bug

* Use correct tome name convention

* Use the same example path as notebook 1

* change path to csds to path to root

Co-authored-by: Evan Sosenko <[email protected]>

* User sets header and tome paths. Warning for bad name.

* Update dsdk and csgodsdk

* Double check if dir exists, if not make it.

* Update 2 - Download data.ipynb

* Remove hotfix for bug from dsdk

* Fix convention typo

Co-authored-by: Evan Sosenko <[email protected]>

Co-authored-by: Evan Sosenko <[email protected]>
  • Loading branch information
billfreeman44 and razor-x authored Jun 12, 2022
1 parent b5956df commit 9323671
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 235 deletions.
34 changes: 19 additions & 15 deletions notebooks/tutorial/1 - Setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"source": [
"## Setup your AWS credentials\n",
"\n",
"Follow the [guide on setting up AWS credentials](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/setup-credentials.html) to properly download things from the AWS Data Exchange."
"Follow the [guide on setting up AWS credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration) to properly download things from the AWS Data Exchange."
]
},
{
Expand All @@ -34,11 +34,15 @@
"Setup your enviornment to find all the yummy data science files on your computer box. We must set three things, \n",
"\n",
"1. Where you will save the data science files.\n",
"1. Where you will save your tomes (views of the data).\n",
"1. Where you will save your tomes (views of the data). Can be the same as step 1.\n",
"1. What to call your header tome.\n",
"\n",
"These will be saved in a `.env` file in the `notebooks` folder. \n",
"\n",
"By convention, the tome names are in this format: `tome_name.start-date,end-data.comment`\n",
"\n",
"The `start-date` (inclusive) and `end-date` (exclusive) are the beginning and end of the data included in that tome and are in yyyy-mm-dd format.\n",
"\n",
"_**Fill out with your specific paths in the cell below, then run the notebook.**_\n"
]
},
Expand All @@ -49,8 +53,10 @@
"metadata": {},
"outputs": [],
"source": [
"collection_path = 'PATH/TO/TOMES'\n",
"ds_collection_path = 'PATH/TO/CSDS'"
"import os\n",
"tome_collection_path = os.path.join('..','..','tmp')\n",
"ds_collection_path = os.path.join('..','..','tmp')\n",
"header_name = 'header_tome.2022-05-15,2022-05-15'"
]
},
{
Expand All @@ -60,35 +66,33 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import platform\n",
"import re\n",
"import warnings\n",
"\n",
"header_name = 'header_tome'\n",
"ds_type = 'csds'\n",
"if not re.match(\"\\S+.\\d{4}-\\d{2}-\\d{2},\\d{4}-\\d{2}-\\d{2}.*\\S*\",header_name):\n",
" warnings.warn(f'Header name of {header_name} does not match convention of tome_name.start-date,end-data.comment')\n",
"\n",
"if collection_path == 'PATH/TO/TOMES':\n",
" collection_path = os.path.join('..','..','tmp','tomes')\n",
"if ds_collection_path == 'PATH/TO/CSDS':\n",
" ds_collection_path = os.path.join('..','..','tmp','data')\n",
"ds_type = 'csds'\n",
" \n",
"collection_path = os.path.abspath(collection_path)\n",
"tome_collection_path = os.path.abspath(tome_collection_path)\n",
"ds_collection_path = os.path.abspath(ds_collection_path)\n",
"\n",
"system = platform.system()\n",
"with open(os.path.join('..','.env'), 'w') as f:\n",
" if system == 'Windows':\n",
" f.write(f'export PURESKILLGG_TOME_DEFAULT_HEADER_NAME={header_name}\\n')\n",
" f.write(f'export PURESKILLGG_TOME_COLLECTION_PATH={collection_path}\\n')\n",
" f.write(f'export PURESKILLGG_TOME_COLLECTION_PATH={tome_collection_path}\\n')\n",
" f.write(f'export PURESKILLGG_TOME_DS_COLLECTION_PATH={ds_collection_path}\\n')\n",
" f.write(f'export PURESKILLGG_TOME_DS_TYPE={ds_type}\\n')\n",
" elif system == 'Mac':\n",
" f.write(f'PURESKILLGG_TOME_DEFAULT_HEADER_NAME = {header_name}\\n')\n",
" f.write(f'PURESKILLGG_TOME_COLLECTION_PATH = {collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_COLLECTION_PATH = {tome_collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_DS_COLLECTION_PATH = {ds_collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_DS_TYPE = {ds_type}\\n')\n",
" else:\n",
" f.write(f'PURESKILLGG_TOME_DEFAULT_HEADER_NAME={header_name}\\n')\n",
" f.write(f'PURESKILLGG_TOME_COLLECTION_PATH={collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_COLLECTION_PATH={tome_collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_DS_COLLECTION_PATH={ds_collection_path}\\n')\n",
" f.write(f'PURESKILLGG_TOME_DS_TYPE={ds_type}\\n')\n",
" if system != 'Linux':\n",
Expand Down
3 changes: 3 additions & 0 deletions notebooks/tutorial/2 - Download data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
"\n",
"ds_collection_path = os.environ.get('PURESKILLGG_TOME_DS_COLLECTION_PATH')\n",
"\n",
"if not os.path.isdir(ds_collection_path):\n",
" os.makedirs(ds_collection_path)\n",
"\n",
"for index, url in enumerate(dataset_sample_urls):\n",
" print(\"working on \",url)\n",
" response = requests.get(url)\n",
Expand Down
8 changes: 5 additions & 3 deletions notebooks/tutorial/3 - Make header tome.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"outputs": [],
"source": [
"if not header_loader.exists:\n",
" header_loader = curator.create_header_tome(path_depth=7)"
" header_loader = curator.create_header_tome()"
]
},
{
Expand All @@ -110,7 +110,9 @@
"source": [
"## Make subheaders too\n",
"\n",
"You might want to analyze players on a specific map, rank, or platform. You can create \"subheaders\" that are a filtered view of the main header. The `create_subheader_tome` will create the subheader with the specified filter applied to the header tome."
"You might want to analyze players on a specific map, rank, or platform. You can create \"subheaders\" that are a filtered view of the main header. The `create_subheader_tome` will create the subheader with the specified filter applied to the header tome.\n",
"\n",
"Remember that the convention for the tome names are: `tome_name.start-date,end-data.comment`"
]
},
{
Expand All @@ -123,7 +125,7 @@
"def map_name_selector(map_name):\n",
" return lambda df: df['map_name']==map_name\n",
"\n",
"subheader_loader = curator.create_subheader_tome('subheader_dust2', map_name_selector('de_dust2'))"
"subheader_loader = curator.create_subheader_tome('subheader_dust2.2022-05-15,2022-05-15', map_name_selector('de_dust2'))"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions notebooks/tutorial/5 - Create tome.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@
"outputs": [],
"source": [
"# Initialize our \"footsteps_by_rank\" tome\n",
"footsteps_tome_name = 'footsteps_by_rank.2022-05-15,2022-05-15'\n",
"tomer = curator.make_tome(\n",
" 'footsteps_by_rank',\n",
" footsteps_tome_name,\n",
" ds_reading_instructions=[\n",
" {\n",
" \"channel\": 'player_footstep',\n",
Expand Down Expand Up @@ -137,7 +138,7 @@
"metadata": {},
"outputs": [],
"source": [
"df = curator.get_dataframe('footsteps_by_rank')"
"df = curator.get_dataframe(footsteps_tome_name)"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions notebooks/tutorial/6 - Train data science models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@
"metadata": {},
"outputs": [],
"source": [
"df = curator.get_dataframe('footsteps_by_rank')\n",
"keyset = curator.get_keyset('footsteps_by_rank')"
"footsteps_tome_name = 'footsteps_by_rank.2022-05-15,2022-05-15'\n",
"df = curator.get_dataframe(footsteps_tome_name)\n",
"keyset = curator.get_keyset(footsteps_tome_name)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions notebooks/tutorial/7 - Getting data from the ADX.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -619,10 +619,10 @@
"There are many many ways to do this so we won't list them all here. We generally sync one month at a time with the AWS CLI like this:\n",
"\n",
"```\n",
"aws s3 sync s3://my-bucket/csds/2022/04/ /path/to/working/dir/csds/2022/04\n",
"aws s3 sync s3://my-bucket/csds/2022/04/ /PATH/TO/ROOT/csds/2022/04\n",
"```\n",
"\n",
"It is **CRITICALLY IMPORTANT** to maintain the path structure. In the example above, you could change the `/path/to/working/dir` but the `csds/2022/04` is not something that you may change. The reader will not work because the folder structure is part of the key used to read each file."
"It is **CRITICALLY IMPORTANT** to maintain the path structure. In the example above, you could change the `/PATH/TO/ROOT` but the `csds/2022/04` is not something that you may change. The reader will not work because the folder structure is part of the key used to read each file."
]
},
{
Expand Down
Loading

0 comments on commit 9323671

Please sign in to comment.