Skip to content

Commit 5a447ec

Browse files
authored
Merge pull request #19 from simple-repository/feature/factor-fetch-description
Factor the fetch_description implementation
2 parents 27046e7 + deda370 commit 5a447ec

File tree

1 file changed

+105
-71
lines changed

1 file changed

+105
-71
lines changed

simple_repository_browser/fetch_description.py

Lines changed: 105 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,8 @@ def read(self):
125125
return content.encode()
126126

127127

128-
async def package_info(
129-
release_files: tuple[model.File, ...],
130-
repository: SimpleRepository,
131-
project_name: str,
132-
) -> tuple[model.File, PackageInfo]:
128+
def _select_best_file(release_files: tuple[model.File, ...]) -> model.File:
129+
"""Select the best file from release files based on priority criteria."""
133130
files = sorted(
134131
release_files,
135132
key=lambda file: (
@@ -140,100 +137,137 @@ async def package_info(
140137
file.upload_time, # Distinguish conflicts by picking the earliest one.
141138
),
142139
)
140+
return files[0]
143141

144-
files_info: typing.Dict[str, FileInfo] = {}
145142

146-
# Get the size from the repository files
147-
for file in files:
143+
def _create_files_info_mapping(
144+
release_files: tuple[model.File, ...],
145+
) -> typing.Dict[str, FileInfo]:
146+
"""Create mapping of filename to FileInfo for files with size information."""
147+
files_info: typing.Dict[str, FileInfo] = {}
148+
for file in release_files:
148149
if file.size:
149150
files_info[file.filename] = FileInfo(
150151
size=file.size or 0,
151152
)
153+
return files_info
152154

153-
file = files[0]
154155

156+
async def _fetch_metadata_resource(
157+
repository: SimpleRepository,
158+
project_name: str,
159+
file: model.File,
160+
tmp_file_path: str,
161+
) -> tuple[model.File, pkginfo.Distribution]:
162+
"""Fetch metadata resource and return updated file and package info."""
155163
if file.dist_info_metadata:
156164
resource_name = file.filename + ".metadata"
157165
else:
158166
raise ValueError(f"Metadata not available for {file}")
159167

160168
logging.debug(f"Downloading metadata for {file.filename} from {resource_name}")
161169

162-
with tempfile.NamedTemporaryFile(
163-
suffix=os.path.splitext(file.filename)[1],
164-
) as tmp:
165-
resource = await repository.get_resource(project_name, resource_name)
170+
resource = await repository.get_resource(project_name, resource_name)
166171

167-
if isinstance(resource, model.TextResource):
172+
if isinstance(resource, model.TextResource):
173+
with open(tmp_file_path, "wb") as tmp:
168174
tmp.write(resource.text.encode())
169-
if not file.upload_time:
170-
# If the repository doesn't provide information about the upload time, estimate
171-
# it from the headers of the resource, if they exist.
172-
if ct := resource.context.get("creation-date"):
173-
if isinstance(ct, str):
174-
file = dataclasses.replace(
175-
file, upload_time=datetime.datetime.fromisoformat(ct)
176-
)
177-
elif isinstance(resource, model.HttpResource):
178-
await fetch_file(resource.url, tmp.name)
179-
else:
180-
raise ValueError(f"Unhandled resource type ({type(resource)})")
175+
if not file.upload_time:
176+
# If the repository doesn't provide information about the upload time, estimate
177+
# it from the headers of the resource, if they exist.
178+
if ct := resource.context.get("creation-date"):
179+
if isinstance(ct, str):
180+
file = dataclasses.replace(
181+
file, upload_time=datetime.datetime.fromisoformat(ct)
182+
)
183+
elif isinstance(resource, model.HttpResource):
184+
await fetch_file(resource.url, tmp_file_path)
185+
else:
186+
raise ValueError(f"Unhandled resource type ({type(resource)})")
181187

182-
tmp.flush()
183-
tmp.seek(0)
184-
info = PkgInfoFromFile(tmp.name)
185-
description = generate_safe_description_html(info)
188+
info = PkgInfoFromFile(tmp_file_path)
189+
return file, info
186190

187-
# If there is email information, but not a name in the "author" or "maintainer"
188-
# attribute, extract this information from the first person's email address.
189-
# Will take something like ``"Ivan" [email protected]`` and extract the "Ivan" part.
190-
def extract_usernames(emails):
191-
names = []
192-
parsed = email.parser.Parser(policy=email.policy.default).parsestr(
193-
f"To: {info.author_email}",
194-
)
195-
for address in parsed["to"].addresses:
196-
names.append(address.display_name)
197-
return ", ".join(names)
198-
199-
if not info.author and info.author_email:
200-
info.author = extract_usernames(info.author_email)
201-
202-
if not info.maintainer and info.maintainer_email:
203-
info.maintainer = extract_usernames(info.maintainer_email)
204-
205-
project_urls = {
206-
url.split(",")[0].strip().title(): url.split(",")[1].strip()
207-
for url in info.project_urls or []
208-
}
209-
# Ensure that a Homepage exists in the project urls
210-
if info.home_page and "Homepage" not in project_urls:
211-
project_urls["Homepage"] = info.home_page
212-
213-
sorted_urls = {
214-
name: url
215-
for name, url in sorted(
216-
project_urls.items(),
217-
key=lambda item: (item[0] != "Homepage", item[0]),
218-
)
219-
}
220191

221-
reqs: list[Requirement | InvalidRequirementSpecification] = []
222-
for req in info.requires_dist:
223-
try:
224-
reqs.append(Requirement(req))
225-
except InvalidRequirement:
226-
reqs.append(InvalidRequirementSpecification(req))
192+
def _enhance_author_maintainer_info(info: pkginfo.Distribution) -> None:
193+
"""Extract author/maintainer names from email addresses if names are missing."""
194+
195+
def extract_usernames(emails: str) -> str:
196+
names = []
197+
parsed = email.parser.Parser(policy=email.policy.default).parsestr(
198+
f"To: {emails}",
199+
)
200+
for address in parsed["to"].addresses:
201+
names.append(address.display_name)
202+
return ", ".join(names)
203+
204+
if not info.author and info.author_email:
205+
info.author = extract_usernames(info.author_email)
206+
207+
if not info.maintainer and info.maintainer_email:
208+
info.maintainer = extract_usernames(info.maintainer_email)
209+
210+
211+
def _process_project_urls(info: pkginfo.Distribution) -> typing.Dict[str, str]:
212+
"""Process and sort project URLs, ensuring Homepage is first."""
213+
project_urls = {
214+
url.split(",")[0].strip().title(): url.split(",")[1].strip()
215+
for url in info.project_urls or []
216+
}
217+
# Ensure that a Homepage exists in the project urls
218+
if info.home_page and "Homepage" not in project_urls:
219+
project_urls["Homepage"] = info.home_page
220+
221+
sorted_urls = {
222+
name: url
223+
for name, url in sorted(
224+
project_urls.items(),
225+
key=lambda item: (item[0] != "Homepage", item[0]),
226+
)
227+
}
228+
return sorted_urls
229+
230+
231+
def _parse_requirements(info: pkginfo.Distribution) -> RequirementsSequence:
232+
"""Parse requirements from distribution info, handling invalid requirements."""
233+
reqs: list[Requirement | InvalidRequirementSpecification] = []
234+
for req in info.requires_dist:
235+
try:
236+
reqs.append(Requirement(req))
237+
except InvalidRequirement:
238+
reqs.append(InvalidRequirementSpecification(req))
239+
return RequirementsSequence(reqs)
240+
241+
242+
async def package_info(
243+
release_files: tuple[model.File, ...],
244+
repository: SimpleRepository,
245+
project_name: str,
246+
) -> tuple[model.File, PackageInfo]:
247+
files_info = _create_files_info_mapping(release_files)
248+
file = _select_best_file(release_files)
249+
250+
with tempfile.NamedTemporaryFile(
251+
suffix=os.path.splitext(file.filename)[1],
252+
) as tmp:
253+
file, info = await _fetch_metadata_resource(
254+
repository, project_name, file, tmp.name
255+
)
256+
257+
description = generate_safe_description_html(info)
258+
_enhance_author_maintainer_info(info)
259+
project_urls = _process_project_urls(info)
260+
requires_dist = _parse_requirements(info)
227261

228262
pkg = PackageInfo(
229263
summary=info.summary or "",
230264
description=description,
231265
author=info.author,
232266
maintainer=info.maintainer,
233267
classifiers=info.classifiers,
234-
project_urls=sorted_urls,
268+
project_urls=project_urls,
235269
requires_python=info.requires_python,
236-
requires_dist=RequirementsSequence(reqs),
270+
requires_dist=requires_dist,
237271
# We include files info as it is the only way to influence the file.size of
238272
# all files (for the files list page). In the future, this can be a standalone
239273
# component.

0 commit comments

Comments
 (0)