Skip to content

Commit 1d49f03

Browse files
authored
Merge pull request #68 from deepghs/dev/download
dev(narugo): add soft check for download single file
2 parents 000c5be + 23f9cd9 commit 1d49f03

File tree

4 files changed

+59
-12
lines changed

4 files changed

+59
-12
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ unittest:
3838
$(shell for type in ${COV_TYPES}; do echo "--cov-report=$$type"; done) \
3939
--cov="${RANGE_SRC_DIR}" \
4040
$(if ${MIN_COVERAGE},--cov-fail-under=${MIN_COVERAGE},) \
41-
$(if ${WORKERS},-n ${WORKERS},)
41+
$(if ${WORKERS},-n ${WORKERS},) \
42+
--reruns 8 --reruns-delay 2 --only-rerun '(OSError|Timeout|HTTPError.*502|HTTPError.*504|check your connection)'
4243

4344
docs:
4445
$(MAKE) -C "${DOC_DIR}" build

hfutils/entry/download.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def download(
142142
file_in_repo=file_in_repo,
143143
repo_type=repo_type,
144144
revision=revision,
145+
soft_mode_when_check=soft_mode_when_check,
145146
)
146147

147148
elif archive_in_repo:

hfutils/operate/download.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def _raw_download_file(td: str, local_file: str, repo_id: str, file_in_repo: str
7979

8080
def download_file_to_file(local_file: str, repo_id: str, file_in_repo: str,
8181
repo_type: RepoTypeTyping = 'dataset', revision: str = 'main',
82-
hf_token: Optional[str] = None):
82+
soft_mode_when_check: bool = False, hf_token: Optional[str] = None):
8383
"""
8484
Download a file from a Hugging Face repository and save it to a local file.
8585
@@ -93,19 +93,32 @@ def download_file_to_file(local_file: str, repo_id: str, file_in_repo: str,
9393
:type repo_type: RepoTypeTyping
9494
:param revision: The revision of the repository (e.g., branch, tag, commit hash).
9595
:type revision: str
96+
:param soft_mode_when_check: Just check the size of the expected file when enabled. Default is False.
97+
:type soft_mode_when_check: bool
9698
:param hf_token: Huggingface token for API client, use ``HF_TOKEN`` variable if not assigned.
9799
:type hf_token: str, optional
98100
"""
99101
with TemporaryDirectory() as td:
100-
_raw_download_file(
101-
td=td,
102-
local_file=local_file,
103-
repo_id=repo_id,
104-
file_in_repo=file_in_repo,
105-
repo_type=repo_type,
106-
revision=revision,
107-
hf_token=hf_token,
108-
)
102+
if os.path.exists(local_file) and is_local_file_ready(
103+
repo_id=repo_id,
104+
repo_type=repo_type,
105+
local_file=local_file,
106+
file_in_repo=file_in_repo,
107+
revision=revision,
108+
hf_token=hf_token,
109+
soft_mode=soft_mode_when_check,
110+
):
111+
logging.info(f'Local file {local_file!r} is ready, download skipped.')
112+
else:
113+
_raw_download_file(
114+
td=td,
115+
local_file=local_file,
116+
repo_id=repo_id,
117+
file_in_repo=file_in_repo,
118+
repo_type=repo_type,
119+
revision=revision,
120+
hf_token=hf_token,
121+
)
109122

110123

111124
def download_archive_as_directory(local_directory: str, repo_id: str, file_in_repo: str,

test/operate/test_download.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,46 @@
1212
class TestOperateDownload:
1313
def test_download_file_to_file(self):
1414
target_file = get_testfile('mashu.png')
15-
with isolated_directory():
15+
16+
call_times = 0
17+
18+
def _my_download(*args, **kwargs):
19+
nonlocal call_times
20+
call_times += 1
21+
return _raw_download_file(*args, **kwargs)
22+
23+
with patch('hfutils.operate.download._raw_download_file', _my_download), \
24+
isolated_directory():
25+
download_file_to_file(
26+
'mashu_download.png',
27+
repo_id='deepghs/game_character_skins',
28+
file_in_repo='fgo/1/常夏的泳装Ver_02.png',
29+
)
30+
file_compare(target_file, 'mashu_download.png')
31+
32+
assert call_times == 1
33+
34+
def test_download_file_to_file_skip(self):
35+
target_file = get_testfile('mashu.png')
36+
37+
call_times = 0
38+
39+
def _my_download(*args, **kwargs):
40+
nonlocal call_times
41+
call_times += 1
42+
return _raw_download_file(*args, **kwargs)
43+
44+
with patch('hfutils.operate.download._raw_download_file', _my_download), \
45+
isolated_directory({'mashu_download.png': target_file}):
1646
download_file_to_file(
1747
'mashu_download.png',
1848
repo_id='deepghs/game_character_skins',
1949
file_in_repo='fgo/1/常夏的泳装Ver_02.png',
2050
)
2151
file_compare(target_file, 'mashu_download.png')
2252

53+
assert call_times == 0
54+
2355
def test_download_archive_as_directory(self):
2456
target_dir = get_testfile('surtr_ds')
2557
with isolated_directory():

0 commit comments

Comments
 (0)