Skip to content

Commit d9f9f59

Browse files
authored
Refactor: issue parser @sneakers-the-rat
Refactor issue parser - lifecycle methods, use pydantic models, support reviewer lists
2 parents 2a09fba + e59f603 commit d9f9f59

20 files changed

+809
-491
lines changed

Diff for: pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ dev = [
4040
"flake8",
4141
"pre-commit",
4242
"pytest",
43-
"pytest-cov"
43+
"pytest-cov",
44+
"pytest-mock"
4445
]
4546

4647
[project.urls]

Diff for: src/pyosmeta/cli/process_reviews.py

+8-15
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818

1919
import pickle
2020

21-
from pydantic import ValidationError
22-
from pyosmeta import ProcessIssues, ReviewModel
21+
from pyosmeta import ProcessIssues
2322
from pyosmeta.github_api import GitHubAPI
2423

2524

@@ -35,27 +34,21 @@ def main():
3534
# Get all issues for approved packages - load as dict
3635
# TODO: this doesn't have to be in process issues at all. it could fully
3736
# Call the github module
38-
issues = process_review.return_response()
39-
accepted_reviews = process_review.parse_issue_header(issues, 45)
37+
issues = process_review.get_issues()
38+
accepted_reviews, errors = process_review.parse_issues(issues)
39+
for url, error in errors.items():
40+
print(f"Error in review at url: {url}")
41+
print(error)
42+
print("-" * 20)
4043

4144
# Update gh metrics via api for all packages
4245
repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
4346
all_reviews = process_review.get_gh_metrics(
4447
repo_endpoints, accepted_reviews
4548
)
4649

47-
# Populate model objects with review data + metrics
48-
final_reviews = {}
49-
for key, review in all_reviews.items():
50-
# First add gh meta to each dict
51-
print("Parsing & validating", key)
52-
try:
53-
final_reviews[key] = ReviewModel(**review)
54-
except ValidationError as ve:
55-
print(key, ":", ve)
56-
5750
with open("all_reviews.pickle", "wb") as f:
58-
pickle.dump(final_reviews, f)
51+
pickle.dump(all_reviews, f)
5952

6053

6154
if __name__ == "__main__":

Diff for: src/pyosmeta/cli/update_review_teams.py

+61-77
Original file line numberDiff line numberDiff line change
@@ -28,100 +28,84 @@
2828
from pyosmeta.contributors import ProcessContributors
2929
from pyosmeta.file_io import clean_export_yml, load_pickle
3030
from pyosmeta.github_api import GitHubAPI
31-
from pyosmeta.models import PersonModel
31+
from pyosmeta.models import PersonModel, ReviewModel, ReviewUser
3232
from pyosmeta.utils_clean import get_clean_user
3333

3434

35+
def process_user(
36+
user: ReviewUser,
37+
role: str,
38+
pkg_name: str,
39+
contribs: dict[str, PersonModel],
40+
processor: ProcessContributors,
41+
) -> tuple[ReviewUser, dict[str, PersonModel]]:
42+
"""
43+
- Add a new contributor to `contribs` (mutating it)
44+
- Add user to any reviews/etc. that they're on (i don't rly understand that part,
45+
someone else write these docs plz (mutating `contribs`)
46+
- get their human name from the github name, mutating the `user` object.
47+
"""
48+
gh_user = get_clean_user(user.github_username)
49+
50+
if gh_user not in contribs.keys():
51+
# If they aren't already in contribs, add them
52+
print("Found a new contributor!", gh_user)
53+
new_contrib = processor.return_user_info(gh_user)
54+
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
55+
try:
56+
contribs[gh_user] = PersonModel(**new_contrib)
57+
except ValidationError as ve:
58+
print(ve)
59+
60+
# Update user package contributions (if it's unique)
61+
review_key = processor.contrib_types[role][0]
62+
contribs[gh_user].add_unique_value(review_key, pkg_name.lower())
63+
64+
# Update user contrib list (if it's unique)
65+
review_roles = processor.contrib_types[role][1]
66+
contribs[gh_user].add_unique_value("contributor_type", review_roles)
67+
68+
# If users's name is missing in issue, populate from contribs
69+
if not user.name:
70+
user.name = getattr(contribs[gh_user], "name")
71+
72+
return user, contribs
73+
74+
3575
def main():
3676
github_api = GitHubAPI()
3777
process_contribs = ProcessContributors(github_api, [])
3878

3979
# Two pickle files are outputs of the two other scripts
4080
# use that data to limit web calls
41-
contribs = load_pickle("all_contribs.pickle")
42-
packages = load_pickle("all_reviews.pickle")
81+
contribs: dict[str, PersonModel] = load_pickle("all_contribs.pickle")
82+
packages: dict[str, ReviewModel] = load_pickle("all_reviews.pickle")
4383

4484
contrib_types = process_contribs.contrib_types
4585

46-
for pkg_name, issue_meta in packages.items():
86+
for pkg_name, review in packages.items():
4787
print("Processing review team for:", pkg_name)
48-
for issue_role in contrib_types.keys():
49-
if issue_role == "all_current_maintainers":
50-
# Loop through each maintainer in the list
51-
for i, a_maintainer in enumerate(
52-
issue_meta.all_current_maintainers
53-
):
54-
gh_user = get_clean_user(a_maintainer["github_username"])
55-
56-
if gh_user not in contribs.keys():
57-
print("Found a new contributor!", gh_user)
58-
new_contrib = process_contribs.return_user_info(
59-
gh_user
60-
)
61-
new_contrib["date_added"] = datetime.now().strftime(
62-
"%Y-%m-%d"
63-
)
64-
try:
65-
contribs[gh_user] = PersonModel(**new_contrib)
66-
except ValidationError as ve:
67-
print(ve)
68-
69-
# Update user package contributions (if it's unique)
70-
review_key = contrib_types[issue_role][0]
71-
contribs[gh_user].add_unique_value(
72-
review_key, pkg_name.lower()
73-
)
74-
75-
# Update user contrib list (if it's unique)
76-
review_roles = contrib_types[issue_role][1]
77-
contribs[gh_user].add_unique_value(
78-
"contributor_type", review_roles
79-
)
80-
81-
# If name is missing in issue, populate from contribs
82-
if a_maintainer["name"] == "":
83-
name = getattr(contribs[gh_user], "name")
84-
packages[pkg_name].all_current_maintainers[i][
85-
"name"
86-
] = name
87-
88-
else:
89-
# Else we are processing editors, reviewers...
90-
gh_user = get_clean_user(
91-
getattr(packages[pkg_name], issue_role)["github_username"]
92-
)
93-
94-
if gh_user not in contribs.keys():
95-
# If they aren't already in contribs, add them
96-
print("Found a new contributor!", gh_user)
97-
new_contrib = process_contribs.return_user_info(gh_user)
98-
new_contrib["date_added"] = datetime.now().strftime(
99-
"%Y-%m-%d"
88+
for role in contrib_types.keys():
89+
user: list[ReviewUser] | ReviewUser = getattr(review, role)
90+
91+
# handle lists or singleton users separately
92+
if isinstance(user, list):
93+
for i, a_user in enumerate(user):
94+
a_user, contribs = process_user(
95+
a_user, role, pkg_name, contribs, process_contribs
10096
)
101-
try:
102-
contribs[gh_user] = PersonModel(**new_contrib)
103-
except ValidationError as ve:
104-
print(ve)
105-
106-
# Update user package contributions (if it's unique)
107-
review_key = contrib_types[issue_role][0]
108-
contribs[gh_user].add_unique_value(
109-
review_key, pkg_name.lower()
97+
# update individual user in reference to issue list
98+
user[i] = a_user
99+
elif isinstance(user, ReviewUser):
100+
user, contribs = process_user(
101+
user, role, pkg_name, contribs, process_contribs
110102
)
111-
112-
# Update user contrib list (if it's unique)
113-
review_roles = contrib_types[issue_role][1]
114-
contribs[gh_user].add_unique_value(
115-
"contributor_type", review_roles
103+
setattr(review, role, user)
104+
else:
105+
raise TypeError(
106+
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
116107
)
117108

118-
# If users's name is missing in issue, populate from contribs
119-
if getattr(issue_meta, issue_role)["name"] == "":
120-
attribute_value = getattr(packages[pkg_name], issue_role)
121-
attribute_value["name"] = getattr(
122-
contribs[gh_user], "name"
123-
)
124-
125109
# Export to yaml
126110
contribs_ls = [model.model_dump() for model in contribs.values()]
127111
pkgs_ls = [model.model_dump() for model in packages.values()]

Diff for: src/pyosmeta/contributors.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ def __init__(self, github_api: GitHubAPI, json_files: List) -> None:
3939
]
4040

4141
self.contrib_types = {
42-
"reviewer_1": ["packages_reviewed", ["reviewer", "peer-review"]],
43-
"reviewer_2": ["packages_reviewed", ["reviewer", "peer-review"]],
42+
"reviewers": ["packages_reviewed", ["reviewer", "peer-review"]],
4443
"editor": ["packages_editor", ["editor", "peer-review"]],
4544
"submitting_author": [
4645
"packages_submitted",

Diff for: src/pyosmeta/file_io.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def clean_yaml_file(filename):
168168

169169

170170
def clean_export_yml(
171-
a_dict: Dict[str, Union[str, List[str]]], filename: str
171+
a_dict: Dict[str, Union[str, List[str]]] | List[dict], filename: str
172172
) -> None:
173173
"""Inputs a dictionary with keys - contribs or packages.
174174
It then converse to a list for export, and creates a cleaned

Diff for: src/pyosmeta/models/__init__.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pyosmeta.models.base import (
2+
GhMeta,
3+
PersonModel,
4+
ReviewModel,
5+
ReviewUser,
6+
UrlValidatorMixin,
7+
)
8+
9+
__all__ = [
10+
"UrlValidatorMixin",
11+
"PersonModel",
12+
"GhMeta",
13+
"ReviewModel",
14+
"ReviewUser",
15+
]

Diff for: src/pyosmeta/models.py renamed to src/pyosmeta/models/base.py

+38-48
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"""
55

66
import re
7-
from typing import Optional, Set, Union
7+
from datetime import datetime
8+
from typing import Any, Optional, Set, Union
89

910
import requests
1011
from pydantic import (
@@ -202,6 +203,21 @@ def clean_date(cls, a_date: Optional[str]) -> str:
202203
return clean_date(a_date)
203204

204205

206+
class ReviewUser(BaseModel):
207+
"""Minimal model of a github user, used in several places in review parsing"""
208+
209+
name: str
210+
github_username: str
211+
212+
@field_validator("github_username", mode="after")
213+
def deurl_github_username(cls, github_username: str) -> str:
214+
return github_username.replace("https://github.com/", "")
215+
216+
@field_validator("name", mode="after")
217+
def demarkdown_name(cls, name: str) -> str:
218+
return re.sub(r"\[|\]", "", name)
219+
220+
205221
class ReviewModel(BaseModel):
206222
# Make sure model populates both aliases and original attr name
207223
model_config = ConfigDict(
@@ -214,23 +230,23 @@ class ReviewModel(BaseModel):
214230
package_description: str = Field(
215231
"", validation_alias=AliasChoices("one-line_description_of_package")
216232
)
217-
submitting_author: dict[str, str | None] = {}
218-
all_current_maintainers: list[dict[str, str | None]] = {}
219-
repository_link: str | None = None
233+
submitting_author: ReviewUser | None = None
234+
all_current_maintainers: list[ReviewUser] = Field(default_factory=list)
235+
repository_link: str
220236
version_submitted: Optional[str] = None
221237
categories: Optional[list[str]] = None
222-
editor: dict[str, str | None] = {}
223-
reviewer_1: dict[str, str | None] = {}
224-
reviewer_2: dict[str, str | None] = {}
238+
editor: ReviewUser | None = None
239+
eic: ReviewUser | None = None
240+
reviewers: list[ReviewUser] = Field(default_factory=list)
225241
archive: str | None = None
226242
version_accepted: str | None = None
227243
date_accepted: str | None = Field(
228244
default=None,
229245
validation_alias=AliasChoices("Date accepted", "date_accepted"),
230246
)
231-
created_at: str = None
232-
updated_at: str = None
233-
closed_at: Optional[str] = None
247+
created_at: datetime = None
248+
updated_at: datetime = None
249+
closed_at: Optional[datetime] = None
234250
issue_link: str = None
235251
joss: Optional[str] = None
236252
partners: Optional[list[str]] = None
@@ -255,22 +271,6 @@ def clean_date_review(cls, a_date: Optional[str]) -> str:
255271
else:
256272
return f"{new_date[2]}-{new_date[0]}-{new_date[1]}"
257273

258-
@field_validator(
259-
"created_at",
260-
"updated_at",
261-
"closed_at",
262-
mode="before",
263-
)
264-
@classmethod
265-
def clean_date(cls, a_date: Optional[str]) -> str:
266-
"""Cleans up a datetime from github and returns a date string
267-
268-
Runs the general clean_date function in this module as a validator.
269-
270-
"""
271-
272-
return clean_date(a_date)
273-
274274
@field_validator(
275275
"package_name",
276276
mode="before",
@@ -310,33 +310,12 @@ def clean_markdown_url(cls, repo: str) -> str:
310310
else:
311311
return repo
312312

313-
@field_validator(
314-
"editor",
315-
"reviewer_1",
316-
"reviewer_2",
317-
mode="before",
318-
)
319-
@classmethod
320-
def clean_gh_url(cls, user: dict[str, str]) -> dict[str, str]:
321-
"""Remove markdown link remnants from gh usernames and name.
322-
323-
Sometimes editors and reviewers add names using github links.
324-
Remove the link data.
325-
"""
326-
327-
user["github_username"] = user["github_username"].replace(
328-
"https://github.com/", ""
329-
)
330-
user["name"] = re.sub(r"\[|\]", "", user["name"])
331-
332-
return user
333-
334313
@field_validator(
335314
"categories",
336315
mode="before",
337316
)
338317
@classmethod
339-
def clean_categories(cls, categories: list[str]) -> list[str]:
318+
def clean_categories(cls, categories: list[str]) -> list[str] | None:
340319
"""Make sure each category in the list is a valid value.
341320
342321
Valid pyos software categories are:
@@ -358,6 +337,8 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
358337
list[str]
359338
List of cleaned categories.
360339
"""
340+
if categories is None:
341+
return None
361342

362343
valid_categories = {
363344
"data-processing": "data-processing-munging",
@@ -375,3 +356,12 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
375356
# No match found, keep the original category
376357
cleaned_cats.append(category)
377358
return cleaned_cats
359+
360+
@field_validator("all_current_maintainers", mode="before")
361+
@classmethod
362+
def listify(cls, item: Any):
363+
"""Make a field that's expected to be plural so before any validation"""
364+
if not isinstance(item, list):
365+
return [item]
366+
else:
367+
return item

0 commit comments

Comments
 (0)