6
6
7
7
import dataclasses
8
8
import datetime
9
+ import json
9
10
import re
10
11
import warnings
11
12
from collections .abc import Callable , Iterable , Iterator
15
16
from urllib .parse import quote_plus
16
17
17
18
import httpx
19
+ import pydantic
18
20
19
21
from . import model
20
22
from ._base_model import convert_download_to_user_model
@@ -708,6 +710,112 @@ def get_dataset_model(
708
710
** dset_json ,
709
711
)
710
712
713
+ def query_datasets (
714
+ self ,
715
+ fields : dict [str , Any ],
716
+ * ,
717
+ limit : int | None = None ,
718
+ order : str | None = None ,
719
+ strict_validation : bool = False ,
720
+ ) -> list [model .DownloadDataset ]:
721
+ """Query for datasets in SciCat.
722
+
723
+ Attention
724
+ ---------
725
+ This function is experimental and may change or be removed in the future.
726
+ It is currently unclear how best to implement querying because SciCat
727
+ provides multiple, very different APIs and there are plans for supporting
728
+ queries via Mongo query language directly.
729
+
730
+ See `issue #177 <https://github.com/SciCatProject/scitacean/issues/177>`_
731
+ for a discussion.
732
+
733
+ Parameters
734
+ ----------
735
+ fields:
736
+ Fields to query for.
737
+ Returned datasets must match all fields exactly.
738
+ See examples below.
739
+ limit:
740
+ Maximum number of results to return.
741
+ Requires ``order`` to be specified.
742
+ If not given, all matching datasets are returned.
743
+ order:
744
+ Specify order of results.
745
+ For example, ``"creationTime:asc"`` and ``"creationTime:desc"`` return
746
+ results in ascending or descending order in creation time, respectively.
747
+ strict_validation:
748
+ If ``True``, the datasets must pass validation.
749
+ If ``False``, datasets are still returned if validation fails.
750
+ Note that some dataset fields may have a bad value or type.
751
+ A warning will be logged if validation fails.
752
+
753
+ Returns
754
+ -------
755
+ :
756
+ A list of dataset models that match the query.
757
+
758
+ Examples
759
+ --------
760
+ Get all datasets belonging to proposal ``abc.123``:
761
+
762
+ .. code-block:: python
763
+
764
+ scicat_client.query_datasets({'proposalId': 'abc.123'})
765
+
766
+ Get all datasets that belong to proposal ``abc.123``
767
+ **and** have name ``"ds name"``: (The name and proposal must match exactly.)
768
+
769
+ .. code-block:: python
770
+
771
+ scicat_client.query_datasets({
772
+ 'proposalId': 'abc.123',
773
+ 'datasetName': 'ds name'
774
+ })
775
+
776
+ Return only the newest 5 datasets for proposal ``bc.123``:
777
+
778
+ .. code-block:: python
779
+
780
+ scicat_client.query_datasets(
781
+ {'proposalId': 'bc.123'},
782
+ limit=5,
783
+ order="creationTime:desc",
784
+ )
785
+ """
786
+ # Use a pydantic model to support serializing custom types to JSON.
787
+ params_model = pydantic .create_model ( # type: ignore[call-overload]
788
+ "QueryParams" , ** {key : (type (field ), ...) for key , field in fields .items ()}
789
+ )
790
+ params = {"fields" : params_model (** fields ).model_dump_json ()}
791
+
792
+ limits : dict [str , str | int ] = {}
793
+ if order is not None :
794
+ limits ["order" ] = order
795
+ if limit is not None :
796
+ if order is None :
797
+ raise ValueError ("`order` is required when `limit` is specified." )
798
+ limits ["limit" ] = limit
799
+ if limits :
800
+ params ["limits" ] = json .dumps (limits )
801
+
802
+ dsets_json = self ._call_endpoint (
803
+ cmd = "get" ,
804
+ url = "datasets/fullquery" ,
805
+ params = params ,
806
+ operation = "query_datasets" ,
807
+ )
808
+ if not dsets_json :
809
+ return []
810
+ return [
811
+ model .construct (
812
+ model .DownloadDataset ,
813
+ _strict_validation = strict_validation ,
814
+ ** dset_json ,
815
+ )
816
+ for dset_json in dsets_json
817
+ ]
818
+
711
819
def get_orig_datablocks (
712
820
self , pid : PID , strict_validation : bool = False
713
821
) -> list [model .DownloadOrigDatablock ]:
@@ -1010,7 +1118,12 @@ def validate_dataset_model(
1010
1118
raise ValueError (f"Dataset { dset } did not pass validation in SciCat." )
1011
1119
1012
1120
def _send_to_scicat (
1013
- self , * , cmd : str , url : str , data : model .BaseModel | None = None
1121
+ self ,
1122
+ * ,
1123
+ cmd : str ,
1124
+ url : str ,
1125
+ data : model .BaseModel | None = None ,
1126
+ params : dict [str , str ] | None = None ,
1014
1127
) -> httpx .Response :
1015
1128
if self ._token is not None :
1016
1129
token = self ._token .get_str ()
@@ -1029,6 +1142,7 @@ def _send_to_scicat(
1029
1142
content = data .model_dump_json (exclude_none = True )
1030
1143
if data is not None
1031
1144
else None ,
1145
+ params = params ,
1032
1146
headers = headers ,
1033
1147
timeout = self ._timeout .seconds ,
1034
1148
)
@@ -1047,14 +1161,15 @@ def _call_endpoint(
1047
1161
* ,
1048
1162
cmd : str ,
1049
1163
url : str ,
1050
- data : model .BaseModel | None = None ,
1051
1164
operation : str ,
1165
+ data : model .BaseModel | None = None ,
1166
+ params : dict [str , str ] | None = None ,
1052
1167
) -> Any :
1053
1168
full_url = _url_concat (self ._base_url , url )
1054
1169
logger = get_logger ()
1055
1170
logger .info ("Calling SciCat API at %s for operation '%s'" , full_url , operation )
1056
1171
1057
- response = self ._send_to_scicat (cmd = cmd , url = full_url , data = data )
1172
+ response = self ._send_to_scicat (cmd = cmd , url = full_url , data = data , params = params )
1058
1173
if not response .is_success :
1059
1174
logger .error (
1060
1175
"SciCat API call to %s failed: %s %s: %s" ,
0 commit comments