Skip to content

Commit df42d93

Browse files
committed
Merge branch 'master' of https://github.com/jhuapl-bio/mytax
2 parents cefca4b + ed41f78 commit df42d93

File tree

2 files changed

+122
-3
lines changed

2 files changed

+122
-3
lines changed

README.md

+20-3
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,38 @@
11
# Mytax2 - Realtime reporting
22

3+
4+
## Create Conda Environment
5+
```
6+
conda env create -f environment.yml
7+
```
8+
9+
310
## Project setup
411
```
512
npm install
613
```
714

8-
### Compiles and hot-reloads for development
15+
## Starting Development
16+
17+
Run the below 2 commands:
18+
19+
### Compiles and hot-reloads for development on frontent
920
```
1021
npm run serve
1122
```
1223

13-
### Compiles and minifies for production
24+
### Compiles and hot-reloads for development on server
25+
```
26+
npm run server
27+
```
28+
29+
30+
## Building Production package
1431
```
1532
npm run build
1633
```
1734

18-
### Lints and fixes files
35+
### Lints and fixes files (development only)
1936
```
2037
npm run lint
2138
```

server/src/create_name_tab.sh

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# define colors for error messages
2+
red='\033[0;31m'
3+
RED='\033[1;31m'
4+
green='\033[0;32m'
5+
GREEN='\033[1;32m'
6+
yellow='\033[0;33m'
7+
YELLOW='\033[1;33m'
8+
blue='\033[0;34m'
9+
BLUE='\033[1;34m'
10+
purple='\033[0;35m'
11+
PURPLE='\033[1;35m'
12+
cyan='\033[0;36m'
13+
CYAN='\033[1;36m'
14+
NC='\033[0m'
15+
16+
# usage function
17+
usage() {
18+
echo -e "usage: ${YELLOW}$0${NC} [options]"
19+
echo -e "Creating a taxonomy.tab for a buggy krona script, primarily for the custom taxids from this repo"
20+
echo -e "OPTIONS:"
21+
echo -e " -h show this message"
22+
echo -e " -i names.dmp and nodes.dmp containing directory"
23+
echo -e " -o output taxonomy.tab file. Default is the same location as the -i parameter "
24+
echo -e ""
25+
}
26+
27+
gawk_install() {
28+
echo -e "" >&2
29+
echo -e " ${RED}Please make sure gawk is installed.${NC}" >&2
30+
echo -e "" >&2
31+
echo -e "" >&2
32+
}
33+
awk_version=$(gawk --version | head -n1)
34+
35+
36+
#---------------------------------------------------------------------------------------------------
37+
# set default values here
38+
39+
#---------------------------------------------------------------------------------------------------
40+
# parse input arguments
41+
while getopts "hi:o:" OPTION
42+
do
43+
case $OPTION in
44+
h) usage; exit 1 ;;
45+
i) input=$OPTARG ;;
46+
o) output=$OPTARG ;;
47+
?) usage; exit ;;
48+
esac
49+
done
50+
# check input arguments
51+
if [[ -z "$output" ]]; then
52+
echo -e "${CYAN}Warning: no output path for tab file specified, putting in -i ${input} ${NC}" >&2
53+
output="$input/taxonomy.tab"
54+
fi
55+
if [[ -z "$input" ]]; then
56+
echo -e "${RED}ERROR: no input path that contains a names and nodes dmp file -i ${input} ${NC}" >&2
57+
usage
58+
exit 2
59+
fi
60+
61+
if [[ ! -s "$input/names.dmp" ]] || [[ ! -s "$input/nodes.dmp" ]]; then
62+
echo -e "${RED}ERROR: names or nodes.dmp file dont exist in $input -i ${input}, exiting.... ${NC}" >&2
63+
usage
64+
exit 2
65+
fi
66+
echo $input
67+
echo $output
68+
69+
70+
# names.dmp file
71+
#1 is taxid (current)
72+
#2 is the text label of taxid
73+
#3 is category of (current) e.g. synonym, common name, scientific name, etc
74+
75+
# nodes.dmp file
76+
# 1 is child taxid (current)
77+
# 2 is parent taxid
78+
# 3 is tax rank of child (current)
79+
80+
gawk -F "\t" '
81+
{
82+
if ( NR==FNR){
83+
mapping[$1] = $5
84+
} else {
85+
if ($7 != "" ){
86+
names[$1][length(names[$1]) + 1] = $3
87+
}
88+
}
89+
90+
} END {
91+
i=0
92+
print "name\ttaxid\trank"
93+
for (name in names){
94+
if (name in mapping){
95+
for (i=1; i<=length(names[name]); i++){
96+
print names[name][i]"\t"name"\t"mapping[name]
97+
}
98+
}
99+
}
100+
}
101+
102+
' $input/nodes.dmp $input/names.dmp > $output

0 commit comments

Comments
 (0)