4
4
5
5
from os import makedirs
6
6
from os import path
7
- from urllib .request import urlopen , Request
7
+ from urllib .request import Request
8
8
from urllib .error import HTTPError
9
- from zipfile import ZipFile , is_zipfile
10
9
11
- from .base import ContentProvider
10
+ from .doi import DoiProvider
12
11
from ..utils import copytree , deep_get
13
- from ..utils import normalize_doi , is_doi
14
- from .. import __version__
15
12
16
13
17
- class Zenodo (ContentProvider ):
14
+ class Zenodo (DoiProvider ):
18
15
"""Provide contents of a Zenodo deposit."""
19
16
20
- def _urlopen (self , req , headers = None ):
21
- """A urlopen() helper"""
22
- # someone passed a string, not a request
23
- if not isinstance (req , Request ):
24
- req = Request (req )
25
-
26
- req .add_header ("User-Agent" , "repo2docker {}" .format (__version__ ))
27
- if headers is not None :
28
- for key , value in headers .items ():
29
- req .add_header (key , value )
30
-
31
- return urlopen (req )
32
-
33
- def _doi2url (self , doi ):
34
- # Transform a DOI to a URL
35
- # If not a doi, assume we have a URL and return
36
- if is_doi (doi ):
37
- doi = normalize_doi (doi )
38
-
39
- try :
40
- resp = self ._urlopen ("https://doi.org/{}" .format (doi ))
41
- # If the DOI doesn't resolve, just return URL
42
- except HTTPError :
43
- return doi
44
- return resp .url
45
- else :
46
- # Just return what is actulally just a URL
47
- return doi
48
-
49
- def detect (self , doi , ref = None , extra_args = None ):
50
- """Trigger this provider for things that resolve to a Zenodo/Invenio record"""
17
+ def __init__ (self ):
51
18
# We need the hostname (url where records are), api url (for metadata),
52
19
# filepath (path to files in metadata), filename (path to filename in
53
20
# metadata), download (path to file download URL), and type (path to item type in metadata)
54
- hosts = [
21
+ self . hosts = [
55
22
{
56
23
"hostname" : ["https://zenodo.org/record/" , "http://zenodo.org/record/" ],
57
24
"api" : "https://zenodo.org/api/records/" ,
@@ -73,9 +40,11 @@ def detect(self, doi, ref=None, extra_args=None):
73
40
},
74
41
]
75
42
76
- url = self ._doi2url (doi )
43
+ def detect (self , doi , ref = None , extra_args = None ):
44
+ """Trigger this provider for things that resolve to a Zenodo/Invenio record"""
45
+ url = self .doi2url (doi )
77
46
78
- for host in hosts :
47
+ for host in self . hosts :
79
48
if any ([url .startswith (s ) for s in host ["hostname" ]]):
80
49
self .record_id = url .rsplit ("/" , maxsplit = 1 )[1 ]
81
50
return {"record" : self .record_id , "host" : host }
@@ -90,53 +59,17 @@ def fetch(self, spec, output_dir, yield_output=False):
90
59
"{}{}" .format (host ["api" ], record_id ),
91
60
headers = {"accept" : "application/json" },
92
61
)
93
- resp = self ._urlopen (req )
62
+ resp = self .urlopen (req )
94
63
95
64
record = json .loads (resp .read ().decode ("utf-8" ))
96
65
97
- def _fetch (file_ref , unzip = False ):
98
- # the assumption is that `unzip=True` means that this is the only
99
- # file related to the zenodo record
100
- with self ._urlopen (deep_get (file_ref , host ["download" ])) as src :
101
- fname = deep_get (file_ref , host ["filename" ])
102
- if path .dirname (fname ):
103
- sub_dir = path .join (output_dir , path .dirname (fname ))
104
- if not path .exists (sub_dir ):
105
- yield "Creating {}\n " .format (sub_dir )
106
- makedirs (sub_dir , exist_ok = True )
107
-
108
- dst_fname = path .join (output_dir , fname )
109
- with open (dst_fname , "wb" ) as dst :
110
- yield "Fetching {}\n " .format (fname )
111
- shutil .copyfileobj (src , dst )
112
- # first close the newly written file, then continue
113
- # processing it
114
- if unzip and is_zipfile (dst_fname ):
115
- yield "Extracting {}\n " .format (fname )
116
- zfile = ZipFile (dst_fname )
117
- zfile .extractall (path = output_dir )
118
- zfile .close ()
119
-
120
- # delete downloaded file ...
121
- os .remove (dst_fname )
122
- # ... and any directories we might have created,
123
- # in which case sub_dir will be defined
124
- if path .dirname (fname ):
125
- shutil .rmtree (sub_dir )
126
-
127
- new_subdirs = os .listdir (output_dir )
128
- # if there is only one new subdirectory move its contents
129
- # to the top level directory
130
- if len (new_subdirs ) == 1 :
131
- d = new_subdirs [0 ]
132
- copytree (path .join (output_dir , d ), output_dir )
133
- shutil .rmtree (path .join (output_dir , d ))
134
-
135
66
is_software = deep_get (record , host ["type" ]).lower () == "software"
136
67
files = deep_get (record , host ["filepath" ])
137
68
only_one_file = len (files ) == 1
138
69
for file_ref in files :
139
- for line in _fetch (file_ref , unzip = is_software and only_one_file ):
70
+ for line in self .fetch_file (
71
+ file_ref , host , output_dir , is_software and only_one_file
72
+ ):
140
73
yield line
141
74
142
75
@property
0 commit comments