|
1 | 1 | import logging
|
2 |
| -import urllib |
3 | 2 | import urllib.request as urllib_request
|
4 | 3 | from itertools import filterfalse
|
5 | 4 | from typing import Iterable, List, Tuple, Union
|
|
13 | 12 |
|
14 | 13 | logger = logging.getLogger(__name__)
|
15 | 14 |
|
16 |
| -ZINC_DRUG_SEARCH_ROOT = 'http://zinc.docking.org/substances/search/?q=' |
17 |
| -ZINC_ID_SEARCH_ROOT = 'http://zinc.docking.org/substances/' |
18 |
| - |
19 | 15 | PUBCHEM_START = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound'
|
20 | 16 | PUBCHEM_MID = 'property'
|
21 | 17 | PUBCHEM_END = 'TXT'
|
22 | 18 |
|
23 | 19 |
|
24 |
| -def get_smiles_from_zinc(drug: Union[str, int]) -> str: |
25 |
| - """ |
26 |
| - Uses the ZINC databases to retrieve the SMILES of a ZINC ID (int) or a drug |
27 |
| - name (str). |
28 |
| -
|
29 |
| - Args: |
30 |
| - drug (Union[str, int]): a string with a drug name or an int of a ZINC |
31 |
| - ID. |
32 |
| - Returns: |
33 |
| - smiles (str): The SMILES string of the drug name or ZINC ID. |
34 |
| - """ |
35 |
| - |
36 |
| - if type(drug) != str and type(drug) != int: |
37 |
| - raise TypeError( |
38 |
| - f'Please insert drug of type {{str, int}}, given was {type(drug)}' |
39 |
| - f'({drug}).' |
40 |
| - ) |
41 |
| - |
42 |
| - if type(drug) == str: |
43 |
| - |
44 |
| - # Parse name, then retrieve ZINC ID from it |
45 |
| - stripped_drug = unidecode(drug).strip().replace(' ', '%20') |
46 |
| - zinc_ids = [] |
47 |
| - try: |
48 |
| - drug_url = urllib_request.pathname2url(stripped_drug) |
49 |
| - path = '{}{}'.format(ZINC_DRUG_SEARCH_ROOT, drug_url) |
50 |
| - response = urllib.request.urlopen(path) |
51 |
| - |
52 |
| - for line in response: |
53 |
| - line = line.decode(encoding='UTF-8').strip() |
54 |
| - if 'href="/substances/ZINC' in line: |
55 |
| - zinc_ids.append(line.split('/')[-2]) |
56 |
| - zinc_id = zinc_ids[0] |
57 |
| - |
58 |
| - except HTTPError: |
59 |
| - logger.warning(f'Did not find any result for drug: {drug}') |
60 |
| - return '' |
61 |
| - |
62 |
| - elif type(drug) == int: |
63 |
| - zinc_id = str(drug) |
64 |
| - |
65 |
| - zinc_id_url = ZINC_ID_SEARCH_ROOT + zinc_id |
66 |
| - id_response = urllib_request.urlopen(zinc_id_url) |
67 |
| - |
68 |
| - for id_line in id_response: |
69 |
| - id_line = id_line.decode(encoding='UTF-8').strip() |
70 |
| - if 'id="substance-smiles-field" readonly value=' in id_line: |
71 |
| - smiles = id_line.split('"')[-2] |
72 |
| - |
73 |
| - return smiles |
74 |
| - |
75 |
| - |
76 | 20 | def get_smiles_from_pubchem(
|
77 | 21 | drug: Union[str, int],
|
78 | 22 | query_type: str = 'name',
|
@@ -122,15 +66,13 @@ def get_smiles_from_pubchem(
|
122 | 66 | if isinstance(drug, str):
|
123 | 67 | drug = unidecode(drug).strip().replace(' ', '%20')
|
124 | 68 |
|
125 |
| - # Search ZINC for compound name |
| 69 | + # Search in PubChem for compound name |
126 | 70 | for option in options:
|
127 | 71 | try:
|
128 | 72 | path = '{}/{}/{}/{}/{}/{}'.format(
|
129 | 73 | PUBCHEM_START, query_type, drug, PUBCHEM_MID, option, PUBCHEM_END
|
130 | 74 | )
|
131 |
| - smiles = ( |
132 |
| - urllib_request.urlopen(path).read().decode('UTF-8').replace('\n', '') |
133 |
| - ) |
| 75 | + smiles = urllib_request.urlopen(path).read().decode('UTF-8').split()[0] |
134 | 76 | if not kekulize:
|
135 | 77 | smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles, sanitize=sanitize))
|
136 | 78 | return smiles
|
|
0 commit comments