Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/jhuapl-bio/mytax
Browse files Browse the repository at this point in the history
  • Loading branch information
Merritt-Brian committed Mar 20, 2024
2 parents cefca4b + ed41f78 commit df42d93
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 3 deletions.
23 changes: 20 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
# Mytax2 - Realtime reporting


## Create Conda Environment
```
conda env create -f environment.yml
```


## Project setup
```
npm install
```

### Compiles and hot-reloads for development
## Starting Development

Run the below 2 commands:

### Compiles and hot-reloads for development on frontent
```
npm run serve
```

### Compiles and minifies for production
### Compiles and hot-reloads for development on server
```
npm run server
```


## Building Production package
```
npm run build
```

### Lints and fixes files
### Lints and fixes files (development only)
```
npm run lint
```
Expand Down
102 changes: 102 additions & 0 deletions server/src/create_name_tab.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# define colors for error messages
red='\033[0;31m'
RED='\033[1;31m'
green='\033[0;32m'
GREEN='\033[1;32m'
yellow='\033[0;33m'
YELLOW='\033[1;33m'
blue='\033[0;34m'
BLUE='\033[1;34m'
purple='\033[0;35m'
PURPLE='\033[1;35m'
cyan='\033[0;36m'
CYAN='\033[1;36m'
NC='\033[0m'

# usage function
usage() {
echo -e "usage: ${YELLOW}$0${NC} [options]"
echo -e "Creating a taxonomy.tab for a buggy krona script, primarily for the custom taxids from this repo"
echo -e "OPTIONS:"
echo -e " -h show this message"
echo -e " -i names.dmp and nodes.dmp containing directory"
echo -e " -o output taxonomy.tab file. Default is the same location as the -i parameter "
echo -e ""
}

gawk_install() {
echo -e "" >&2
echo -e " ${RED}Please make sure gawk is installed.${NC}" >&2
echo -e "" >&2
echo -e "" >&2
}
awk_version=$(gawk --version | head -n1)


#---------------------------------------------------------------------------------------------------
# set default values here

#---------------------------------------------------------------------------------------------------
# parse input arguments
while getopts "hi:o:" OPTION
do
case $OPTION in
h) usage; exit 1 ;;
i) input=$OPTARG ;;
o) output=$OPTARG ;;
?) usage; exit ;;
esac
done
# check input arguments
if [[ -z "$output" ]]; then
echo -e "${CYAN}Warning: no output path for tab file specified, putting in -i ${input} ${NC}" >&2
output="$input/taxonomy.tab"
fi
if [[ -z "$input" ]]; then
echo -e "${RED}ERROR: no input path that contains a names and nodes dmp file -i ${input} ${NC}" >&2
usage
exit 2
fi

if [[ ! -s "$input/names.dmp" ]] || [[ ! -s "$input/nodes.dmp" ]]; then
echo -e "${RED}ERROR: names or nodes.dmp file dont exist in $input -i ${input}, exiting.... ${NC}" >&2
usage
exit 2
fi
echo $input
echo $output


# names.dmp file
#1 is taxid (current)
#2 is the text label of taxid
#3 is category of (current) e.g. synonym, common name, scientific name, etc

# nodes.dmp file
# 1 is child taxid (current)
# 2 is parent taxid
# 3 is tax rank of child (current)

gawk -F "\t" '
{
if ( NR==FNR){
mapping[$1] = $5
} else {
if ($7 != "" ){
names[$1][length(names[$1]) + 1] = $3
}
}
} END {
i=0
print "name\ttaxid\trank"
for (name in names){
if (name in mapping){
for (i=1; i<=length(names[name]); i++){
print names[name][i]"\t"name"\t"mapping[name]
}
}
}
}
' $input/nodes.dmp $input/names.dmp > $output

0 comments on commit df42d93

Please sign in to comment.