Skip to content

Commit cdb6378

Browse files
authored
Merge pull request #359 from arXiv/ARXIVCE-3313-fix-purging-old-papers
returns categories even if no updates
2 parents e33f6c6 + 83d237d commit cdb6378

File tree

1 file changed

+16
-23
lines changed

1 file changed

+16
-23
lines changed

arxiv/integration/fastly/purge.py

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,38 +33,30 @@ def purge_cache_for_paper(paper_id:str, old_cats:Optional[str]=None):
3333
purge_fastly_keys(keys)
3434
return
3535

36-
def _get_category_and_date(arxiv_id:Identifier)-> Tuple[str, date]:
36+
def _get_category_and_date(arxiv_id:Identifier)-> Tuple[str, Optional[date]]:
3737
"""fetches the current categories for a paper as well as the last date it had announced changes to determine if it belongs in recent or new page
3838
extra days were added to accomidate for weekends and holidays,
3939
these will occasionally purge new and recent papers more than is needed, but better to over clear than underclear
4040
"""
4141
meta=aliased(Metadata)
4242
up=aliased(Updates)
43-
sub= (
44-
Session.query(
45-
meta.abs_categories,
46-
meta.document_id
47-
)
48-
.filter(meta.paper_id==arxiv_id.id)
49-
.filter(meta.is_current==1)
50-
.subquery()
51-
)
5243

5344
result=(
5445
Session.query(
55-
sub.c.abs_categories,
56-
func.max(up.date)
46+
meta.abs_categories,
47+
func.max(up.date)
5748
)
58-
.join(up, up.document_id==sub.c.document_id)
59-
.group_by(sub.c.document_id)
60-
.filter(up.action != "absonly")
49+
.outerjoin(up, (up.document_id == meta.document_id) & (up.action != "absonly")) #left join
50+
.filter(meta.paper_id==arxiv_id.id)
51+
.filter(meta.is_current==1)
6152
.first()
6253
)
63-
if not result:
64-
raise IdentifierException(f'paper id does not exist: {arxiv_id.id}')
65-
6654
new_cats: str=result[0]
67-
recent_date: date=result[1]
55+
recent_date: Optional[date] = result[1] #Papers that havent been changed since 2007 may not be in updates table
56+
57+
if not new_cats:
58+
raise IdentifierException(f'paper id not found: {arxiv_id.id}')
59+
6860
return new_cats, recent_date
6961

7062
def _purge_category_change(arxiv_id:Identifier, old_cats:Optional[str]=None )-> List[str]:
@@ -80,10 +72,11 @@ def _purge_category_change(arxiv_id:Identifier, old_cats:Optional[str]=None )->
8072
today=date.today()
8173
new=False
8274
recent=False
83-
if today - timedelta(days=3) <= recent_date: #farthest away a date on the new page would likely be
84-
new=True
85-
if today - timedelta(days=7) <= recent_date:
86-
recent=True
75+
if recent_date:
76+
if today - timedelta(days=3) <= recent_date: #farthest away a date on the new page would likely be
77+
new=True
78+
if today - timedelta(days=7) <= recent_date:
79+
recent=True
8780

8881
groups, archives, cats = get_all_cats_from_string(new_cats, True)
8982
new_archive_ids={arch.id for arch in archives}

0 commit comments

Comments
 (0)