Skip to content
This repository was archived by the owner on Jul 3, 2023. It is now read-only.

Commit 7fa914d

Browse files
committed
latest changes for compatibility with reinvent.v3.2
1 parent 1bb2f74 commit 7fa914d

File tree

252 files changed

+3091
-1558
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

252 files changed

+3091
-1558
lines changed

README.md

+13-7
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ $ conda activate reinvent_scoring
1818
## Run tests
1919
The tests use the `unittest` package testing framework. Before you can run the tests make sure that you have created a
2020
`config.json`file in the `reinvent_scoring/configs` directory. There is an example config in the same directory, which
21-
you can base your own config off of. The easiest way is to make a copy of the example config and name it `config.json`.
22-
Make sure that you set `MAIN_TEST_PATH` in the `config.json` to a non-existent directory; it is where temporary files will be
23-
written during the tests; if it is set to an existing directory, that directory will be removed once the tests have finished.
21+
you can base your own config off of. Make sure that you set `MAIN_TEST_PATH` to a non-existent directory; it is where
22+
temporary files will be written during the tests; if it is set to an existing directory, that directory will be removed
23+
once the tests have finished.
2424

2525
Some tests require a proprietary OpenEye license; you have to set up a few things to make the tests read your
2626
license. The simple way is to just set the `OE_LICENSE` environment variable to the path of the file containing the
@@ -50,15 +50,21 @@ unset OE_LICENSE
5050
Once you have created the files, deactivate and re-activate the environment, and `echo $OE_LICENSE` should output the
5151
path to the license file.
5252

53-
Once you have created a config file and configured your environment, you can run the tests, located in the
54-
`unittest_reinvent` directory, by running
53+
Once you have created and configured your environment, you can run unittests by running
5554

55+
```bash
56+
python main_test.py --unittests
5657
```
57-
$ python main_test.py
58+
59+
If you have a valid Open eye license and other dependencie configured, like Icolos and AZDOCK -
60+
you can also run integration tests, by running command (remember to submit this configuration, since the default one is test):
61+
62+
```bash
63+
python main_test.py --integration --base_config <path to your configuration>
5864
```
5965

6066
# Building
6167
- Building: `python setup.py sdist bdist_wheel`
62-
- Upload build to test: `$python -m twine upload --repository testpypi dist/*`
68+
- Upload build to test: `python -m twine upload --repository testpypi dist/*`
6369
- Upload build: `python -m twine upload dist/*`
6470

environment.yml

+2-3
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,9 @@ dependencies:
211211
- markdown==3.2.1
212212
- opt-einsum==3.2.0
213213
- protobuf==3.11.3
214-
- reinvent-chemistry==0.0.40
215-
- reinvent-models==0.0.12
214+
- reinvent-chemistry==0.0.50
216215
- tensorboard==1.15.0
217216
- tensorflow==1.15.2
218217
- tensorflow-estimator==1.15.1
219218
- termcolor==1.1.0
220-
- werkzeug==1.0.0
219+
- werkzeug==1.0.0

main_test.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,32 @@
11
#!/usr/bin/env python
22
# coding=utf-8
3+
import pytest
4+
import argparse
35

4-
import unittest
56

6-
from unittest_reinvent.scoring_tests import *
7-
from unittest_reinvent.diversity_filter_tests import *
7+
TESTS_FOLDER = 'unittest_reinvent'
8+
9+
10+
parser = argparse.ArgumentParser(description='Run reinvent_scoring tests')
11+
parser.add_argument(
12+
'--unittests', action='store_true',
13+
help='Only run unittests (Please indicate either integration or unittests flag)'
14+
)
15+
parser.add_argument(
16+
'--integration', action='store_true',
17+
help='Only run integration tests (Please indicate either integration or unittests flag)'
18+
)
19+
20+
args, _ = parser.parse_known_args()
21+
22+
23+
if args.unittests:
24+
pytest_args = ['-m', 'not integration', TESTS_FOLDER]
25+
elif args.integration:
26+
pytest_args = ['-m', 'integration', TESTS_FOLDER]
27+
else:
28+
raise Exception('Please provide either --unittests or --integration flag.')
829

930

1031
if __name__ == '__main__':
11-
unittest.main()
32+
pytest.main(pytest_args)

reinvent_scoring/configs/__init__.py

Whitespace-only changes.

reinvent_scoring/configs/config.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
7+
DEFAULT_BASE_CONFIG_PATH = (Path(__file__).parent / 'test_config.json').resolve()
8+
9+
parser = argparse.ArgumentParser(description='Reinvent Scoring configuration parser')
10+
parser.add_argument(
11+
'--base_config', type=str, default=DEFAULT_BASE_CONFIG_PATH,
12+
help='Path to basic configuration for Reinvent Scoring environment.'
13+
)
14+
15+
16+
def read_json_file(path):
17+
with open(path) as f:
18+
json_input = f.read().replace('\r', '').replace('\n', '')
19+
try:
20+
return json.loads(json_input)
21+
except (ValueError, KeyError, TypeError) as e:
22+
print(f"JSON format error in file ${path}: \n ${e}")
23+
24+
25+
args, _ = parser.parse_known_args()
26+
27+
reinvent_scoring_config = read_json_file(args.base_config)
28+
29+
for key, value in reinvent_scoring_config['ENVIRONMENTAL_VARIABLES'].items():
30+
os.environ[key] = value

reinvent_scoring/configs/example.config.json

+5-6
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
"DOCKSTREAM_ENV_PATH": "/<your_path>/miniconda3/envs/DockStream/bin/python",
1515
"DOCKSTREAM_DEBUG": true
1616
},
17-
"AZGARD": {
18-
"AZGARD_EXECUTOR_SCRIPT_PATH": "/<your_path>/executor.py",
19-
"AZGARD_ENV_PATH": "/<your_path>/miniconda3/envs/AZgard/bin/python",
20-
"AZGARD_DEBUG": true
17+
"ICOLOS": {
18+
"ICOLOS_EXECUTOR_PATH": "/<your_path>/miniconda3/envs/icolosprod/bin/icolos",
19+
"ICOLOS_DEBUG": true
2120
}
2221
},
2322
"ENVIRONMENTAL_VARIABLES": {
2423
"PIP_URL": "<confidential info>",
25-
"PIP_KEY": "<confidential info>",
26-
"PIP_GET_RESULTS": "<relevant only for batching implemented in BasePiPModelBatchingComponent>"
24+
"PIP_KEY": "<contact Atanas>",
25+
"PIP_GET_RESULTS": "<relevant only for batching implemented in BasePiPModelBatchingComponent>",
2726
}
2827
}
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"DEVELOPMENT_ENVIRONMENT": true,
3+
"MAIN_TEST_PATH": "tmp_test_folder",
4+
"COMPONENT_SPECIFIC": {
5+
"AZDOCK": {
6+
"AZDOCK_DOCKER_SCRIPT_PATH": "/opt/scp/services/reinvent/docking/azdock/docker.py",
7+
"AZDOCK_ENV_PATH": "/opt/scp/services/reinvent/miniconda3/envs/AZdock/bin/python",
8+
"AZDOCK_DEBUG": true
9+
},
10+
"DOCKSTREAM": {
11+
"DOCKSTREAM_DOCKER_SCRIPT_PATH": "/opt/scp/services/reinvent/docking/azdock/docker.py",
12+
"DOCKSTREAM_ENV_PATH": "/opt/scp/services/reinvent/miniconda3/envs/AZdock/bin/python",
13+
"DOCKSTREAM_DEBUG": true
14+
},
15+
"ICOLOS": {
16+
"ICOLOS_EXECUTOR_PATH": "/<your_path>/miniconda3/envs/icolosprod/bin/icolos",
17+
"ICOLOS_DEBUG": true
18+
}
19+
},
20+
"ENVIRONMENTAL_VARIABLES": {
21+
"PIP_URL": "https://pip.dummy.net/bapi/{}/predict",
22+
"PIP_KEY": "something-secret",
23+
"PIP_GET_RESULTS": "https://pip.dummy.net"
24+
}
25+
}

reinvent_scoring/scoring/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
from reinvent_scoring.scoring.score_summary import FinalSummary, ComponentSummary, LoggableComponent
77
from reinvent_scoring.scoring.score_transformations import TransformationFactory
88
from reinvent_scoring.scoring.scoring_function_factory import ScoringFunctionFactory
9-
from reinvent_scoring.scoring.scoring_function_parameters import ScoringFunctionParameters, ScoringFuncionParameters
9+
from reinvent_scoring.scoring.scoring_function_parameters import ScoringFunctionParameters
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from typing import List
21
from dataclasses import dataclass
32

43

@@ -7,6 +6,4 @@ class ComponentParameters:
76
component_type: str
87
name: str
98
weight: float
10-
smiles: List[str]
11-
model_path: str
129
specific_parameters: dict = None
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.diversity_filter_memory import DiversityFilterMemory
2+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.diversity_filter_parameters import \
3+
DiversityFilterParameters
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import abc
2+
3+
import numpy as np
4+
import pandas as pd
5+
from reinvent_chemistry.conversions import Conversions
6+
7+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters, \
8+
DiversityFilterMemory
9+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.loggable_data_dto import UpdateLoggableDataDTO
10+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO
11+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \
12+
UpdateDiversityFilterDTO
13+
14+
15+
class BaseDiversityFilter(abc.ABC):
16+
17+
@abc.abstractmethod
18+
def __init__(self, parameters: DiversityFilterParameters):
19+
self.parameters = parameters
20+
self._diversity_filter_memory = DiversityFilterMemory()
21+
self._chemistry = Conversions()
22+
23+
@abc.abstractmethod
24+
def update_score(self, update_dto: UpdateDiversityFilterDTO) -> np.array:
25+
raise NotImplementedError("The method 'update_score' is not implemented!")
26+
27+
def get_memory_as_dataframe(self) -> pd.DataFrame:
28+
return self._diversity_filter_memory.get_memory()
29+
30+
def set_memory_from_dataframe(self, memory: pd.DataFrame):
31+
self._diversity_filter_memory.set_memory(memory)
32+
33+
def number_of_smiles_in_memory(self) -> int:
34+
return self._diversity_filter_memory.number_of_smiles()
35+
36+
def number_of_scaffold_in_memory(self) -> int:
37+
return self._diversity_filter_memory.number_of_scaffolds()
38+
39+
def update_bucket_size(self, bucket_size: int):
40+
self.parameters.bucket_size = bucket_size
41+
42+
def _calculate_scaffold(self, smile):
43+
raise NotImplementedError
44+
45+
def _smiles_exists(self, smile):
46+
return self._diversity_filter_memory.smiles_exists(smile)
47+
48+
def _add_to_memory(self, memory_dto: MemoryRecordDTO):
49+
self._diversity_filter_memory.update(memory_dto)
50+
51+
def _penalize_score(self, scaffold, score):
52+
"""Penalizes the score if the scaffold bucket is full"""
53+
if self._diversity_filter_memory.scaffold_instances_count(scaffold) > self.parameters.bucket_size:
54+
score = 0.
55+
return score
56+
57+
def _compose_loggable_data(self, dto: UpdateLoggableDataDTO):
58+
prior_likelihood = f'{dto.prior_likelihood}|' if dto.prior_likelihood else ''
59+
likelihood = f'{dto.likelihood}|' if dto.likelihood else ''
60+
input = f'{dto.input}|' if dto.input else ''
61+
output = f'{dto.output}' if dto.output else ''
62+
loggable_data = f'{prior_likelihood}{likelihood}{input}{output}'
63+
return loggable_data
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass(frozen=True)
5+
class ColumnNamesEnum:
6+
STEP: str = "Step"
7+
SCAFFOLD: str = "Scaffold"
8+
SMILES: str = "SMILES"
9+
METADATA: str = "Metadata"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters
2+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter
3+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.identical_murcko_scaffold import \
4+
IdenticalMurckoScaffold
5+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.identical_topological_scaffold import \
6+
IdenticalTopologicalScaffold
7+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.no_filter import NoFilter
8+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.no_filter_with_penalty import NoFilterWithPenalty
9+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.scaffold_similarity import ScaffoldSimilarity
10+
11+
12+
class DiversityFilter:
13+
14+
def __new__(cls, parameters: DiversityFilterParameters) -> BaseDiversityFilter:
15+
all_filters = dict(IdenticalMurckoScaffold=IdenticalMurckoScaffold,
16+
NoFilterWithPenalty=NoFilterWithPenalty,
17+
IdenticalTopologicalScaffold=IdenticalTopologicalScaffold,
18+
ScaffoldSimilarity=ScaffoldSimilarity,
19+
NoFilter=NoFilter
20+
)
21+
div_filter = all_filters.get(parameters.name, KeyError(f"Invalid filter name: `{parameters.name}'"))
22+
return div_filter(parameters)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from typing import List, Dict
2+
3+
import pandas as pd
4+
5+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.column_names_enum import ColumnNamesEnum
6+
from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO
7+
from reinvent_scoring.scoring.score_summary import ComponentSummary
8+
from reinvent_scoring.scoring.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum
9+
10+
11+
class DiversityFilterMemory:
12+
13+
def __init__(self):
14+
self._sf_component_name = ScoringFunctionComponentNameEnum()
15+
self._column_name = ColumnNamesEnum()
16+
df_dict = {self._column_name.STEP: [], self._column_name.SCAFFOLD: [], self._column_name.SMILES: [],
17+
self._column_name.METADATA: []}
18+
self._memory_dataframe = pd.DataFrame(df_dict)
19+
20+
def update(self, dto: MemoryRecordDTO):
21+
component_scores = {c.parameters.name: float(c.total_score[dto.id]) for c in dto.components}
22+
component_scores = self._include_raw_score(dto.id, component_scores, dto.components)
23+
component_scores[self._sf_component_name.TOTAL_SCORE] = float(dto.score)
24+
if not self.smiles_exists(dto.smile): self._add_to_memory_dataframe(dto, component_scores)
25+
26+
def _add_to_memory_dataframe(self, dto: MemoryRecordDTO, component_scores: Dict):
27+
data = []
28+
headers = []
29+
for name, score in component_scores.items():
30+
headers.append(name)
31+
data.append(score)
32+
headers.append(self._column_name.STEP)
33+
data.append(dto.step)
34+
headers.append(self._column_name.SCAFFOLD)
35+
data.append(dto.scaffold)
36+
headers.append(self._column_name.SMILES)
37+
data.append(dto.smile)
38+
headers.append(self._column_name.METADATA)
39+
data.append(dto.loggable_data)
40+
new_data = pd.DataFrame([data], columns=headers)
41+
self._memory_dataframe = pd.concat([self._memory_dataframe, new_data], ignore_index=True, sort=False)
42+
43+
def get_memory(self) -> pd.DataFrame:
44+
return self._memory_dataframe
45+
46+
def set_memory(self, memory: pd.DataFrame):
47+
self._memory_dataframe = memory
48+
49+
def smiles_exists(self, smiles: str):
50+
if len(self._memory_dataframe) == 0:
51+
return False
52+
return smiles in self._memory_dataframe[self._column_name.SMILES].values
53+
54+
def scaffold_instances_count(self, scaffold: str):
55+
return (self._memory_dataframe[self._column_name.SCAFFOLD].values == scaffold).sum()
56+
57+
def number_of_scaffolds(self):
58+
return len(set(self._memory_dataframe[self._column_name.SCAFFOLD].values))
59+
60+
def number_of_smiles(self):
61+
return len(set(self._memory_dataframe[self._column_name.SMILES].values))
62+
63+
def _include_raw_score(self, indx: int, component_scores: dict, components: List[ComponentSummary]):
64+
raw_scores = {f'raw_{c.parameters.name}': float(c.raw_score[indx]) for c in components if
65+
c.raw_score is not None}
66+
all_scores = {**component_scores, **raw_scores}
67+
return all_scores
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass
5+
class DiversityFilterParameters:
6+
name: str
7+
minscore: float = 0.4
8+
bucket_size: int = 25
9+
minsimilarity: float = 0.4
10+
penalty_multiplier: float = 0.5

0 commit comments

Comments
 (0)