From 19a046d2fb2732aafed26bc374d96d6c7be5a670 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 9 Dec 2024 20:58:34 -0600 Subject: [PATCH 001/103] generated files --- R/sysdata.rda | Bin 1543 -> 3467 bytes data-raw/fieldsdf.csv | 358 +++++++++++++++++++++--------------------- data/fieldsdf.rda | Bin 3328 -> 3467 bytes 3 files changed, 179 insertions(+), 179 deletions(-) diff --git a/R/sysdata.rda b/R/sysdata.rda index 45cde7c0ce2990c4d74ab48f5262e3217bd52228..b8fcef39f122175849ccbac1b14129e256997b2a 100644 GIT binary patch literal 3467 zcmV;64RrECT4*^jL0KkKSxl0p*Z>~zf589$|NsB<{DcGm5J120|L{Nn00H0?hkXk` z018k506_^B%~pU0f;4E$Qa}`li69CZGzcsTl8Sm004AoH7>taA&`mWklWKSZn^Vdg zDULK`$iguI!e}rFpwKd8(UT#B1~}1^BM8I;3826xgFwlXMofkf7)hl_)i$8?OrA|a zpa2Gd000JtfH={UBM8I;3826xgFwlXMofkf7%C(pAc9RjB+R2_Pt`pqsL{0sL5ZNq zz?rF{!#*sy#rla2S6GOF_NgHh_G9;bR}hf*e!p3x3{&Gm+%{gMNJ%tOFs-k@$~23D zrvaabAi)@G)sid>;Y>>kdFy$mI*z}$pc>SxZAz`FWtg)qZCP1F8d{Q!QX>+fk-=69 z7$IU6icuB@L@FsJ4Yn;t&?-ov+9?Z#mj;kgRw%@-T*;Su88IALM3p5$6MES&WrG!# zEti>+>Q+^~2c_KguzR0ZzZ0X6x{LzhX;IONQiDG|xq~P_! zW97y=JT%2=$oOQ*DN{2&DC1gf_aH#wM~SwT|9g#+|j(Ok&p-`t9R|fILB)9)b8ouOlehJF_l%+b*7i*rYxr(?we`0Cf)OrZCh!iqP;V(4b^m+XU5u&A8iCj+=wKWNfJXUFt)P9Bq8?sSs){VqDd0=Ni%LVthN&poF^R`C{?TB zj3nu&3AYck_(mfTAf@#0cGUjxz$1Jq@N3z9w;blYBd3j^YN@EpORaHvu z9X7;b9u*|VO)@Q9j*FKc6E%%1O;(y!V_I#}Y1@sqoTe*nx5Rd-;P&yQSlOAZ(&?th zlfmK2#bh2yJ5a1_=?$B7c1$Cr@Oe&&D-wD(Cm8U0UM5#P7;mz_cZl+dN}RB3&iQ!?d0`$>7Odx z+uJ6qo95d&$A&8%BDv{pw)!faG}u2u>ghk%qx)$#Hhd0}X$k6t$L%JvZWr>%lk$d3 z=7f^uXi+GXN=cAx&FQaATVAU5SE9Spd_Er!kH_7Ix}N`!z3{C;nwhEltY}iowY)eE z1&JTx?ov)S8B$13Y97VRN@i(Trs-p}Q%Sh^n%Etu1BAf%sl29XO0kEE^I8q$Wn9{Q zpJx+8I{S4Jc|@PCNe|Shswq`cs#PMikv|m2+Y&*hq9p$&?0&UkYK)jxCYnYiNmP`o zQYm3pQmRyxtyqkR$tE_R;SWdKKAz96S;;GXzSk|^s~xnJ-R|!0>NiuwN7D_hdDyAi2jAIOA7{)P-VyhU&E(((dB>ZSRNPF?T%2LG~PWy~FO{MW+OV0^;p|-?{ zrxjdN?|v_LvOP~!r>p8uQ%1^E6jLhXw5Ho(iLITkM?rw$B5se7KGQ;~58+9OQ|>}+ zhsV4&?x5xb>(H`AB83uG5K2(i5SUgoBvQi_6_~=X#Df|?Nub3$53;2E--HYh_MeGV zUqeMyA6fVqht*%X@#vaKG?i0V5No49EU{{QK1BRqs(Zebi6*%Epyj*2u*VyJ18upf zFZwjf@bANW~^oNERbiYEo2Gl}(=%N#Q?TlB!8;En5;nV&f_O+LGlP3e+mo1)Y%DsfL_kP^+FoXG9G z@A&*Z3;Vzh*c*LS?5m;(E|?&=gwatnsG90COr_2AlMvG1kd36@NF|B*`T58FYj%|G zFrxu4C-nCFzpNMIj*-r#DoDk>E5&PVbpl<~C{X=!9Ios<1ZCIVn1Xii@u zHYD<-u(YA+Wf;(wERqw#B-%v&f451ZdLAk*Pv$e#93O+X=2k1!2kekal%Z86TB=1f zuZu~Tn<8zS&jX2!#O^$;jfLkEkxel%rc*qP#8IUzDMYYXEJFn<)rqtx`wU4R&UA@C zGe}O2Dxrtj*Bp_qP$%ydDp@L7KJt{qB5^8JRJAIVju?i@QmJu7(G@hRsUBe`q4ZJQN%ah$qFomMldRsP^y(l zMMz?U5rm8}D6Cc)DlwBJ#w!yPRH-VK$g>6tM83?WI^t+!#&(qKbg!@}(-7Ep-S#-` z$8vQ~JoC+OD~%A=Naxx*kctM|J6e{ff-kC(-H=YkR)%F7#f`Svb01nH{u-ZbC6J4O zSf3S%YEsKlf@sAlRZB}MNkv2W62qg)gHaj`2SJW*yQ?PYF`_xn!*QX3Pg(HoESpri%5vhfSKl=y+i~8elDAXEoE@8U8*-y`@lCsI>8EMUHq_mlw;N2`JX5{1 zP3bv^cNrb3N}JnDkt$qC!0D$H;%Gx;g(y->5M(HKG^EU;BZ;1`$tPsn2W3ei=zNWy zh>`M=5;{)NCkdy#EK3U&vgNoKj8#TDNWng{_5f)TJrN}y<`WN62mWna3KpeWEk2Bx z+RI{6k%+Js6(x+90f8a{QDTNFOhl;5ou_c`mDx@(@iwVYlV z%n3d3)9AeUEVZMc(ADY6jZ0RSnHX14D+IV1`3Y zGBRNR(?A$a8f3r%28Rta!3>6)WMsktrhqV-G|7Mj4GtPASg4u3(gtUgbhW~KJ;Rht6W>f3nuKFnHX&2WtRLfs%WOL(9sOAtg>%ZyUh%uYS7h&g;o^PXlW?W z${K31td^T5gvzYS=NUHJX3T4=X1dE}wRK``v5Zy>;c9TkS*fNNGAYf>5*_r2B_I_! zaT%l{N$mRYl1R;jPW>QpC$^*C#K5ikpOSds{A7C|#5|{t84uIjMelMJEp zFg-8l3`3Ms%MVP5#C|U|hvt#T4TI@Uz~Y{W`F)+2c$L({+=@f3S$U6$sapz)(HJnL zexX=NJt^*GmKbn#jwY?|LsrQS1U2H{Tk{mldo0c7S(S!!rn%mY8`_N-Zf;=SrpzG5 zs@1DjjT$s%mRV?ws&Tg%Zw-tl^%G&i22x3ut_(JCWZcUwUQL)~SU5N>w?^jn_Rra} zl9+zOfebUcLzux1;WsB4Cpskf=eO*Cue0ltP3uSYN8$c<`@a9e{y&D?;y8$SVs}<5 zq^ea(;Yji-!in<)@T7y`Hj#8p6qKr}iCf0=dV9$yg%1fM5FZ;?5fiCUPo0$tl}d`I z?k42_iR?WoO_EOHyf5&<)c?*o#FDBil_+@qjgw|mtMc~W59hgFvz?*ne^&Rc4@)mc zHFrldx}9#b(Cs}ABsyNGiAmu*q2l=}9t9(1l){eRtE4w1Jb_OoH*o$utL5{i7{NIp z>zH}>Gg)fT!7$L!*wI*4V-0j`W>%@A3=CG`xTEGUdee7Wz*guiU5Xj<$j7XDW94W$`iNfGgq*1xf?FphI+T=*fiP@4PmAOeC^Js%a@O-fk zmUDH!Kbxf!u&O-c)vurA^L5|n+!L~*C&Wk5BdH0LJe6XWt_BIm#;E>UATn`??=v z#>3qG**5NJs6*0m>1F+t1A z5-L>hu^57uNh*=Rg$4wu!;#R?cVv;ElBCuli4uGjB$2)Rm5IU>MM+dtsT52hMhUo$ z$Zm-ZS&|F_WKZv_~J}kJ!`iTu! zScrl4sUZ~hWA}Ym5RmtNzgeRUQ{zJ1HeRGiNid|#0iTB;!5C}Rk}M42 zOiK!R>v^U+j=#5{8q}+8O0B77n6oWySy@CH6k3vuQX>+fk-=697$IU6icuB@M3YVd z7k^yImwFj799cw_B|#H<*)U~;6_zcRnUd;ORlNtL-1V?~pI5&Vu-nK?mi=y|U6jL$ z=)q8nYD)akjc?$vtA~kVe|n_g^}=K2#yUJS#c9a+WXUO0Gd(EdT8(o_>7k*e95C0M z)?u9*K)pICPMeN z8f2?j;bdfH^`&itdXsGmV``dZ-D75&+OJK~+b;HW>v_@r#~3@6^?J7^-cB`|nIpG} zT5aW~ISsheO}8{}W;d<+S-PhjbgH?x&YWs&&1A-FT6wKcEM}WyQ|fILB)9)b8ouOlehJF_l%+b*7i*rYxr( z?we`0Cf)OrZCh!iqP;V(4b^m+XUknzFWq)l?XtGQQ{XS)=N05A=T}(f8!$`{No6tbEUrKS-%G*^j*(M1Pxpme~E# z`_9uO)gj7t4-F?b_Wjt4kl-oHf zaa}3bTvbnF-S)_}V#TW-{w8A;|56&yIvc1wh6_0{xzt1w}bg#!O zwE0J}#d_goL)GOPSIo_{VSnuM)6>UJ-MHOhj$zejw{DK9T*o{*cz7$0o#A)QI&X@s zC+#EHnna$b7uAimG{;1Fd1K2;V-;0ZRZ8t0HpF5c6(q+^GA&$=i88h%!QslqWFAR7P^@g}4S$<-c1$Cr@Oe&& zD-wD(Cm8U0UM5#P7;mz_cZl+dN}RB3&iQ!?d0`$>7Odx+uJ6qo95d&$A&8%BDv{pw)!faG}u2u z>ghk%qx)$#Hhd0}X@3dogvaeBvThgh$dmGhOXh@<beE%C)>W4h4xH;_gyTHyKh$Pih{;%t~fySf=S? zv{OmA_?p-qrvrq*_^G_6X-ct&it}0x;$#xaays~E;E z3X=vT{AfH#d-1%=QpFrj`;0hErSV})&k1>p8uQ%1^E z6jLhXw5Ho(iLITkM?rw$B5se7KGQ;~58+9OQ|>}+hsV4&?x5xb>(H`AB83uG5K2(i z5SUgoBvQi_6_~=X#Df|?Nub3$53;2E--HYh_MeGVUqeMyA6fVqht*%X@#vaKG?i0V z5No49EPt_Td_F|{U#fe)m5C;}`k>{zzp%#}e*Qd*hFG`D?ww(+w*d z(+>M{pIQ%73uu@sD*Q>O)TnRKNOKvaMnglt1Kq zq4~-o(3K>dX&i)#kv>W(Dn$8`*Qtz5=!>NVNtn=6iz;9sT?gbYeoDB8NRq0mB~)S< z!1hQMBUNfrR8*BspA<>qKV6clNo*}!5?LnbTN`8K(w`)+2hAQx@=)A$Z98w%<0P4% zIe#)uKD&}l>5^ufqSP)baZhBC(qm|gQe3r_Csy}0Z&y8WMcrFG8!m1lI7%aMNuP{q z9p*-)Y8?1Rrw2{7TD90~!(o0|Dy&Hy%N}IvP1UQM$nCuE`20N!`@jy^8+}#mtD*=l zm>{@>(NQ$0n(8x5rOot{5Ypd}jild5C4Y(d`T58FYj%|GFrxu4C-nCFzpNMIj*-r# zDoD|8Ukcfc=2tslKO+mVB8RE6l`vlA;_$JVrfjwd%h?*7jgqn;;jovgFjb=vjWPAx8 z6L3-wQOHgNN$V0f&(25kDwOfH6=`W?FrAbANhShdBWO-vA~q!Qq_DK1>17zumMoGJ z!X(;6{(rYgqIw=GEl=h%)f^v#w}0kVE7b?=kV=%HRV7-gMKrIANtl}=ZJW;niHyYV zJgtp|=M#}lF)^l7JdMOrr7S5#uvjcZ1uE5vv?u!vNgvL1i9a()PK_#|huPO0k*-iD z?-eRpDp@}Al*1x%DpgdqDwU2HhRRZ@aYWG-G^(i{VJGDOcMixqNfe}1q<_5{5*kC; zNm4-4Z9Jg=XqqINHlQr28(S>4LY5&_3QDR%l#-~bRZ2{f6A}iV$e2dtuq>2}JrVTv z#F(**54$jV5`=A}4hWfwl1@aEjFM(c5TAN>Ir04`Oy`*pKDv=RJf;#&yM6Kz_#!$< zKkLx6jmaW?6WI19o)7amCVwQ7=#k1`BF<4aVi-j-5^g{#;fGpsPWz%yaJhIrZWhzz z@W{DI#YK<&^{zi=(UI2dVt!P)>q4ZJQN%ah$qFomMldRsP^y(lMMz?U5rm8}D6Cc) zDlwBJ#w!yPRH-VK$g>6tM83?WI^t+!#&(qKbg!@}(-7Ep-S#-`$A5BlPdxL@Z!3)u z)=1~tI*^J6+dEp8r-Co4k=>9^##V-98pVya*>fLSB>ozoY$cG3fmojviE2{IQG#g2 zDOF2LDoI5{_!7gT%7alF38BLpXhUR$C{jxhWGHtuq|BltiJq^?CuG|PWl16Ee2t!nk@AufI!@6i z38%a)OA8gU<+vD(RYp2U!9KJ00BI6E5hWhx6Aw}c{%u!%VJWIh_DtF zC5)E=fg%D?VtZ*}H z$r5&WNg5R*d!aD+$swIBBw#!8cx1@j9h*LOZ0r6oa`!pwn!0PAytSNO8O#Yi@6+hK z`7E`gpwQLn%8g4_mzf;I&b6SiO>75KOKp&2U{X@WDXHT*oFryF7x{_eNyv~dmXiI1 zauSbhOj0fvdY$$rbuu!Ya^2dxm5@$DLPS?8sum1lqLphbFj`V6SU=8LRamfCs^AH_ nU~p!}Ww3_GV+a{xHbF)kM3eXT{=zI*5( z01PDn01L6FcXD1u|928Sg&8ZaPwHo6|-vb;J-DMf2Ra~)^Rm^jym)xc-rw=zRwA&MIxYLH(X(+D@=d*QOrdjPSEpo;-=4s1qyENjo zTx)}!+}mq-nrlZLx^i!8ra5w(<1U(N&GotJtv7khb+$Hm<85zDI94v$>r(H_8tXRG z;y#K3K9oULQjpUMriNxT%5)iutjxqr^V=|RSVP+_^6_|o%RhX2_fU^M<{QnIF zOQ#eWf=h2Db9WuKnxfp;#wx0+s;a80w#%gBk80C2lh99-2koa^4oxv+bnP)aqqn%# zm9%Wkz~X);euv~~&+avk@P0UTGc*$D(P!o@LDCUuIcp?2jdi@xsba7j$V} z(>BqEle|uN~~*5xlKB7+iAryTW!7( z(x+p~wv}UMX0uDCn;p)_XBCRbJCbywv9pvmZOPFvj#IJTa!FW|!Lc}FvEg`3t~?lT zrkuAq=*_E!`qFfLCoWwTSgu_6QbW+DnjN)gE?PC|;BBe7Yn;Sh2Mx=*|O&_Uu z3!vM{O829WY0j%+bFi=`1j11Sw6L}ob7E{wfvqJp(`3ot4UuEXjG3N;*1c=`*!HU~ zpD_AQrHLQe$J$Bod$%k%^2LV;HfE7)5C!eTk2u0Mk(tf9rNXeOQ{KCKZXM zk%?rgN>wQoutp$>5=0XK1jL`18V*#|tZ}92$84}_Cs=95vKq~r)t(iFz9p6y6~!t@ z3t~uQs<8f_-tGIgorXL&zcRrW)O{@k(6J*7#i>gqh6R|is+uI)WK4ugSSnF%m2Ig> zN|jzJE@muX+Say7-(F|VzBU_jAIzaF^pAX7{(;~C_6}b{2!fFihWP5@DIRk(89TXBbT+^;)a|mxKrzW zk2jJ$PZXz%z{5o>6t6bZMRtBO!S8y%A<_5(d&i+ZgF>ng%99YM(uCO$PO$gq#(hNo z3^GY-T4JVHipCw3nyh9>rG_diF@<4?1~h&XL5g%AMM?5}AYg}-`wF7?8Y-lGN5H~+ ztL63Nnn^U3Q&$jclRpfxeLaNxA8hk{b%`dpd!XXo-^^o%-;K81YD@dtWp;Jhz3}1Y z-xj*u9I(={y)f&i9E#$jhbqQ)b+uNuxj187IAezF$mw~_H>@{PO5RR1%Q#k?H>ovD z;kKH_lyc?6is0e4_R|}><=w}398D_Yyh+&PaOjL8kwpGdx>Tzw!;j}4ibuH(J1k+I z=3^{oG)0C7u;WL7!5hNGO^sEuDS>k^!o@Cf)b=+EnI_8v4pW@RhmmiaFa|;K8Sz!p z86hgFqE$vAj1AQ*Em+K0Ss1KgtmR7IUTmdQlGs|dB(hD=wl>GHr9G0n9@#sQ+@ZMU z+H~;sg8O95R#9pf6*Q-c54@P!8XPqAN%_8S-L=~DMu<42xhw<3BV-3bWqJKT1gGUoziM4iPRy-JLllvJVtGx01nU^w--zhTtaMAO=T9R zeOA%P@6k1`_)|sxzaGK=9PZN{=L&C@^8H_bxAouKK0HpQF_7k5%jI2a$kch1k2!+F z4dpA)T8m(Jjup`Mkfeiz`2|AB4pge6^Ctm%QN97dx7I(;R$dw=(Hsa1rL$;bpz9$7b9a^vB>3ex>fLHQo;llKq0mgZ3mRsIB{C) zeS&8b@(HD1DPMA_RbKmV=o95~zkB%J_jhNT=1({EiSr!;5cVE2=}P+OJO_ZEVyRBs zQC60gLkZD8g(U1w#MUn-QuB_rXlgKj7a-kHqXM0gCekO}^l3Ct1GPn|`V8?$1K{cM zs}w^taM@v6Eg*3(-@GFN}mqR~vEU}h(p#I-49sKGR1l&Yp- zmMY1J>RKLuG92pio9CR}df&O-EyQ`w)0w=-v5wOkvs)Ymbe#3*Day1bq};1Rna}~s zS8!}qt*|tzYg{db(NWdHQ+>6+FtlQ2RH-mNbQL7?o|y9CRaIgz=kZ@xhflZ->C$mvp4 z-r8J=QsPb_%T5&GG$FDIqNK!+B9EKelhH_zrR#YnoeqkU4+GTM;F2DqNOGMbP6?-+ zEK3U&vgNoKj8#TCNWnfM@&IZ?JP{>4%qAW&1N7Rq6fKIhT6@-05thWIBN1RMDoYtI z0|G==D->d+#7dM?!AU!!O4(3QqKQ>WfbbqEv^Pazu8Pqy7Q(|Qm^aKoXit}$;oQ@+ zG)3QBS81r^3~hPB4eBP@ktcSQ8;E>l>O?qcR{?1CF^p3ElD3=bnv7E1i$jtlm1-%zcx!XC zJ39XzI{uHd&GUFV^L4MZjfts&Of&?spQcg+z?|Zx;HWvcI>eJX_z3#{5 zd!61cl`G9v?|rV4avt69A$e8esas7&w?%l154}2{6XVCo;&U;^ Date: Mon, 9 Dec 2024 21:01:18 -0600 Subject: [PATCH 002/103] setting group on top level attributes --- data-raw/fieldsdf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-raw/fieldsdf.R b/data-raw/fieldsdf.R index cd863fce..b1a14552 100644 --- a/data-raw/fieldsdf.R +++ b/data-raw/fieldsdf.R @@ -92,7 +92,7 @@ extract_relevant_schema_info <- function(schema_elements) { endpoint = lookup[[schema_element]], field = x, data_type = data_type, - group = "", + group = names(api$components$schemas[[schema_element]]$properties), common_name = x ) } From 004664ae8cbd585b1ff84ccd5f8a958e1688d369 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 19:16:10 -0600 Subject: [PATCH 003/103] feat: httr to httr2 --- DESCRIPTION | 2 +- R/process-error.R | 57 ----------------------------------------------- R/process-resp.R | 18 ++------------- R/search-pv.R | 25 ++++++++------------- 4 files changed, 12 insertions(+), 90 deletions(-) delete mode 100644 R/process-error.R diff --git a/DESCRIPTION b/DESCRIPTION index cacc3d76..37ee6a1e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ LazyData: TRUE Depends: R (>= 3.1) Imports: - httr, + httr2, lifecycle, jsonlite, utils diff --git a/R/process-error.R b/R/process-error.R deleted file mode 100644 index 4136f5d7..00000000 --- a/R/process-error.R +++ /dev/null @@ -1,57 +0,0 @@ -#' @noRd -throw_er <- function(resp) { - throw_if_loc_error(resp) - xheader_er_or_status(resp) -} - -#' @noRd -throw_if_loc_error <- function(resp) { - if (hit_locations_ep(resp$url) && httr::status_code(resp) == 500) { - num_grps <- get_num_groups(resp$url) - if (num_grps > 2) { - stop2( - "Your request resulted in a 500 error, likely because you have ", - "requested too many fields in your request (the location endpoint ", - "currently has restrictions on the number of fields/groups you can ", - "request). Try slimming down your field list and trying again." - ) - } - } -} - -# Not sure this is still applicable -#' @noRd -hit_locations_ep <- function(url) { - grepl( - "^https://search.patentsview.org/api/v1/location/", - url, - ignore.case = TRUE - ) -} - -#' @noRd -get_num_groups <- function(url) { - prsd_json_filds <- gsub(".*&f=([^&]*).*", "\\1", utils::URLdecode(url)) - fields <- jsonlite::fromJSON(prsd_json_filds) - grps <- fieldsdf[fieldsdf$endpoint == "location" & - fieldsdf$field %in% fields, "group"] - length(unique(grps)) -} - -#' @noRd -xheader_er_or_status <- function(resp) { - - # look for the api's ultra-helpful X-Status-Reason header - xhdr <- get_x_status(resp) - - if (length(xhdr) != 1) - httr::stop_for_status(resp) - else - stop(xhdr[[1]], call. = FALSE) -} - -#' @noRd -get_x_status <- function(resp) { - headers <- httr::headers(resp) - headers[grepl("x-status-reason$", names(headers), ignore.case = TRUE)] -} diff --git a/R/process-resp.R b/R/process-resp.R index 4fb9ed3d..426f3f4b 100644 --- a/R/process-resp.R +++ b/R/process-resp.R @@ -1,23 +1,10 @@ -#' @noRd -parse_resp <- function(resp) { - j <- httr::content(resp, as = "text", encoding = "UTF-8") - jsonlite::fromJSON( - j, - simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE - ) -} - #' @noRd get_request <- function(resp) { gp <- structure( - list(method = resp$req$method, url = resp$req$url), + list(method = resp$request$method, url = resp$request$url), class = c("list", "pv_request") ) - if (gp$method == "POST") { - gp$body <- rawToChar(resp$req$options$postfields) - } - gp } @@ -42,11 +29,10 @@ get_query_results <- function(prsd_resp) { #' @noRd process_resp <- function(resp) { - if (httr::http_error(resp)) throw_er(resp) - prsd_resp <- parse_resp(resp) request <- get_request(resp) data <- get_data(prsd_resp) + query_results <- get_query_results(prsd_resp) structure( diff --git a/R/search-pv.R b/R/search-pv.R index 7e711c0b..7e86f9d8 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -48,27 +48,20 @@ get_post_body <- function(query, arg_list) { } #' @noRd -one_request <- function(method, query, base_url, arg_list, api_key, ...) { - ua <- httr::user_agent("https://github.com/ropensci/patentsview") +patentsview_error_body <- function(resp) { + if (httr2::resp_status(resp) == 400) c(httr2::resp_header(resp, "X-Status-Reason")) else NULL +} if (method == "GET") { get_url <- get_get_url(query, base_url, arg_list) - resp <- httr::GET( - get_url, - httr::add_headers("X-Api-Key" = api_key), - ua, ... - ) + req <- httr2::request(get_url) |> + httr2::req_method("GET") } else { body <- get_post_body(query, arg_list) - resp <- httr::POST( - base_url, - httr::add_headers( - "X-Api-Key" = api_key, - "Content-Type" = "application/json" - ), - body = body, - ua, ... - ) + req <- httr2::request(base_url) |> + httr2::req_body_raw(body) |> + httr2::req_headers("Content-Type" = "application/json") |> + httr2::req_method("POST") } # Sleep and retry on a 429 (too many requests). The Retry-After header is the From 8c4457718556c37435d75be36af59198d327b8f1 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 19:27:54 -0600 Subject: [PATCH 004/103] feat: httr to httr2 --- R/utils.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/utils.R b/R/utils.R index cc487623..a6f50a6b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -4,6 +4,16 @@ stop2 <- function(...) stop(..., call. = FALSE) #' @noRd asrt <- function(expr, ...) if (!expr) stop2(...) +#' @noRd +parse_resp <- function(resp) { + j <- resp |> httr2::resp_body_string(encoding = "UTF-8") + + jsonlite::fromJSON( + j, + simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE + ) +} + #' @noRd format_num <- function(x) { format( From 0a2ad587ae43ea3326541ca43d9b6efa20aa29a9 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 21:54:14 -0600 Subject: [PATCH 005/103] docs: API link update --- R/data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.R b/R/data.R index 53cc8ecb..49ae72f3 100644 --- a/R/data.R +++ b/R/data.R @@ -3,7 +3,7 @@ #' A data frame containing the names of retrievable fields for each of the #' endpoints. You can find this data on the API's online documentation for each #' endpoint as well (e.g., the -#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent endpoint #' field list table}). #' #' @format A data frame with the following columns: From c9e8c5eec34d0f8d26866b262626616f68684116 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 21:55:26 -0600 Subject: [PATCH 006/103] feat: dont print document number in scientific notation --- R/print.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/print.R b/R/print.R index 65bb53fe..025e7b4f 100644 --- a/R/print.R +++ b/R/print.R @@ -24,7 +24,10 @@ print.pv_data_result <- function(x, ...) { ) utils::str( - x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut" + x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut", + formatNum = function(x, ...) { + format(x, trim = TRUE, drop0trailing = TRUE, scientific = FALSE, ...) + } ) } From 1d68033e127df30b56db71ae4930977a0c159698 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 22:07:56 -0600 Subject: [PATCH 007/103] build: version bumps to ko deprecated msgs --- .github/workflows/R-CMD-check.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 53eddc6f..0e579899 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,13 +29,13 @@ jobs: PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | @@ -46,7 +46,7 @@ jobs: - name: Restore R package cache if: runner.os != 'Windows' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} From 9902e2d1a51e2c6bcf0bcae426fe80bcad3d9528 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 22:08:45 -0600 Subject: [PATCH 008/103] added timeout - prevent run away jobs --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0e579899..9568a92f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -7,6 +7,7 @@ name: R-CMD-check jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} + timeout-minutes: 60 name: ${{ matrix.config.os }} (${{ matrix.config.r }}) From 7cbae0dc4e4d515623394c4cf53d49aaab9facd0 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:21:30 -0600 Subject: [PATCH 009/103] feat: casting type changes --- R/cast-pv-data.R | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 57716461..6d68a925 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -5,21 +5,23 @@ as_is <- function(x) x get_cast_fun <- function(data_type) { # Some fields aren't documented, so we don't know what their data type is. Use # string type for these. + # new version of the API: state of string vs fulltext is in flux. Latter currently unused if (length(data_type) != 1) data_type <- "string" - switch( - data_type, + switch(data_type, "string" = as_is, "date" = as.Date, - "float" = as.numeric, - "integer" = as.integer, + "number" = as_is, + "integer" = as_is, "int" = as.integer, - "fulltext" = as_is + "fulltext" = as_is, + "boolean" = as_is, + "bool" = as.logical ) } #' @noRd lookup_cast_fun <- function(name, typesdf) { - data_type <- typesdf[typesdf$field == name, "data_type"] + data_type <- typesdf[typesdf$common_name == name, "data_type"] get_cast_fun(data_type = data_type) } @@ -29,6 +31,18 @@ cast_one.character <- function(one, name, typesdf) { cast_fun(one) } +#' @noRd +cast_one.double <- function(one, name, typesdf) { + cast_fun <- lookup_cast_fun(name, typesdf) + cast_fun(one) +} + +#' @noRd +cast_one.integer <- function(one, name, typesdf) { + cast_fun <- lookup_cast_fun(name, typesdf) + cast_fun(one) +} + #' @noRd cast_one.default <- function(one, name, typesdf) NA From 1d733b7c48eeccc92a2fe748801c7322d062e988 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:22:40 -0600 Subject: [PATCH 010/103] docs: field name change --- R/cast-pv-data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 6d68a925..93354c80 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -83,7 +83,7 @@ cast_one <- function(one, name, typesdf) UseMethod("cast_one") #' \dontrun{ #' #' fields <- c("patent_date", "patent_title", "patent_year") -#' res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) +#' res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields) #' cast_pv_data(data = res$data) #' } #' From 3fa176531c01ddb6875cbd7f2af125f183944c46 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:23:43 -0600 Subject: [PATCH 011/103] feat: new casting methodology --- R/cast-pv-data.R | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 93354c80..e3cd59da 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -91,9 +91,25 @@ cast_one <- function(one, name, typesdf) UseMethod("cast_one") cast_pv_data <- function(data) { validate_pv_data(data) - endpoint <- names(data) + entity_name <- names(data) - typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("field", "data_type")] + if (entity_name == "rel_app_texts") { + # blend the fields from both rel_app_texts entities + typesdf <- unique(fieldsdf[fieldsdf$group == entity_name, c("common_name", "data_type")]) + } else { + # need to get the endpoint from entity_name + endpoint_df <- fieldsdf[fieldsdf$group == entity_name, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attorneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + } + + typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("common_name", "data_type")] + + } df <- data[[1]] @@ -103,7 +119,7 @@ cast_pv_data <- function(data) { df[] <- list_out out_data <- list(x = df) - names(out_data) <- endpoint + names(out_data) <- entity_name structure( out_data, From 4b1f8e9070b5a4d90b70bd7336e49254b9b1f346 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:31:20 -0600 Subject: [PATCH 012/103] docs: field name change --- R/query-dsl.R | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/R/query-dsl.R b/R/query-dsl.R index 51bf0640..bb9ff79f 100644 --- a/R/query-dsl.R +++ b/R/query-dsl.R @@ -144,26 +144,25 @@ qry_funs <- c( #' @return The result of \code{code} - i.e., your query. #' #' @examples -#' # Without with_qfuns, we have to do: #' qry_funs$and( #' qry_funs$gte(patent_date = "2007-01-01"), #' qry_funs$text_phrase(patent_abstract = c("computer program")), #' qry_funs$or( -#' qry_funs$eq(inventor_last_name = "ihaka"), -#' qry_funs$eq(inventor_first_name = "chris") +#' qry_funs$eq(inventors.inventor_name_last = "Ihaka"), +#' qry_funs$eq(inventors.inventor_name_last = "Chris") #' ) #' ) #' -#' #...With it, this becomes: +#' # ...With it, this becomes: #' with_qfuns( -#' and( -#' gte(patent_date = "2007-01-01"), -#' text_phrase(patent_abstract = c("computer program")), -#' or( -#' eq(inventor_last_name = "ihaka"), -#' eq(inventor_first_name = "chris") -#' ) -#' ) +#' and( +#' gte(patent_date = "2007-01-01"), +#' text_phrase(patent_abstract = c("computer program")), +#' or( +#' eq(inventors.inventor_name_last = "Ihaka"), +#' eq(inventors.inventor_name_last = "Chris") +#' ) +#' ) #' ) #' #' @export From 1b25760fa18c7189ff2e3afd1938badea2e60597 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:34:56 -0600 Subject: [PATCH 013/103] docs: field name change --- R/search-pv.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 7e86f9d8..998f6e0e 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -200,8 +200,8 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. #' search_pv( #' query = qry_funs$gt(patent_year = 2010), #' method = "POST", -#' fields = "patent_number", -#' sort = c("patent_number" = "asc") +#' fields = "patent_id", +#' sort = c("patent_id" = "asc") #' ) #' #' search_pv( @@ -216,9 +216,14 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. #' ) #' #' search_pv( -#' query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), +#' query = qry_funs$contains(inventors.inventor_name_last = "Smith"), #' endpoint = "patent", -#' config = httr::timeout(40) +#' timeout = 40 +#' ) +#' +#' search_pv( +#' query = qry_funs$eq(patent_id = "11530080"), +#' fields = "application" #' ) #' } #' From d078c352723d5ef5437e8e360bf731cdb65290fc Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:17:27 -0600 Subject: [PATCH 014/103] fix: parameters on posts --- R/search-pv.R | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 998f6e0e..a0e3ae92 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -11,26 +11,41 @@ tojson_2 <- function(x, ...) { } #' @noRd -to_arglist <- function(fields, page, per_page, sort) { +to_arglist <- function(fields, size, sort, after) { + opts <- list(size = size) + if (!is.null(after)) { + opts$after <- after + } + list( fields = fields, sort = list(as.list(sort)), - opts = list( - offset = (page - 1) * per_page, - size = per_page - ) + opts = opts ) } +#' @noRd +set_sort_param <- function(before) { + # Fixes former bug + # for sort = c("patent_id" = "asc", "citation_patent_id" = "asc") + # we sent [{"patent_id":"asc","citation_patent_id":"asc"}] + # API wants [{"patent_id": "asc" },{"citation_patent_id": "asc" }] + # TODO(any): brute meet force- there must be a better way... + after <- tojson_2(before, auto_unbox = TRUE) + after <- gsub('","', '"},{"', after) + after +} + #' @noRd get_get_url <- function(query, base_url, arg_list) { j <- paste0( base_url, "?q=", utils::URLencode(query, reserved = TRUE), "&f=", tojson_2(arg_list$fields), - "&o=", tojson_2(arg_list$opts, auto_unbox = TRUE), - "&s=", tojson_2(arg_list$sort, auto_unbox = TRUE) + "&s=", set_sort_param(arg_list$sort), + "&o=", tojson_2(arg_list$opts, auto_unbox = TRUE) ) + utils::URLencode(j) } @@ -40,11 +55,14 @@ get_post_body <- function(query, arg_list) { "{", '"q":', query, ",", '"f":', tojson_2(arg_list$fields), ",", - '"o":', tojson_2(arg_list$opts, auto_unbox = TRUE), ",", - '"s":', tojson_2(arg_list$sort, auto_unbox = TRUE), + '"s":', set_sort_param(arg_list$sort), ",", + '"o":', tojson_2(arg_list$opts, auto_unbox = TRUE), "}" ) - gsub('(,"[fs]":)([,}])', paste0("\\1", "{}", "\\2"), body) + # The API can now act weirdly if we pass f:{},s:{} as we did in the past. + # (Weirdly in that the post results may not equal the get results or posts error out) + # Now we'd remove "f":, and "s":, We're guaranteed to have q: and at least "size":1000 as o: + gsub('("[fs]":,)', "", body) } #' @noRd From e6bf61b2ae118c4620b83a3f232fc81da0cb5afe Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:33:01 -0600 Subject: [PATCH 015/103] feat: httr to httr2 --- R/search-pv.R | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index a0e3ae92..21b27727 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -82,21 +82,16 @@ patentsview_error_body <- function(resp) { httr2::req_method("POST") } - # Sleep and retry on a 429 (too many requests). The Retry-After header is the - # seconds to sleep - if (httr::status_code(resp) == 429) { - num_seconds <- httr::headers(resp)[["Retry-After"]] - maybe_an_s <- if (num_seconds == "1") "" else "s" - message(paste0( - "The API's requests per minute limit has been reached. ", - "Pausing for ", num_seconds, " second", maybe_an_s, - " before continuing." - )) - Sys.sleep(num_seconds) + resp <- req |> + httr2::req_user_agent("https://github.com/ropensci/patentsview") |> + httr2::req_options(...) |> + httr2::req_retry(max_tries = 20) |> # automatic 429 Retry-After + httr2::req_headers("X-Api-Key" = api_key, .redact = "X-Api-Key") |> + httr2::req_error(body = patentsview_error_body) |> + httr2::req_perform() - one_request(method, query, base_url, arg_list, api_key, ...) - } else { - resp + resp +} } } From 2386c45011296186b8cddb2f4b90ef9164fcaada Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:38:00 -0600 Subject: [PATCH 016/103] feat: new paging methodology --- R/search-pv.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/search-pv.R b/R/search-pv.R index 21b27727..200f353d 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -92,7 +92,31 @@ patentsview_error_body <- function(resp) { resp } + +#' Pad patent_id +#' +#' This function strategically pads a patent_id with zeroes to 8 characters, +#' needed only for custom paging that uses sorts by patent_id. +#' +#' @param patent_id The patent_id that needs to be padded. It can +#' be the patent_id for a utility, design, plant or reissue patent. +#' +#' @examples +#' \dontrun{ +#' padded <- pad_patent_id("RE36479") +#' +#' padded2 <- pad_patent_id("3930306") +#' } +#' +#' @export +# zero pad patent_id to 8 characters. +pad_patent_id <- function(patent_id) { + pad <- 8 - nchar(patent_id) + if (pad > 0) { + patent_id <- paste0(sprintf("%0*d", pad, 0), patent_id) + patent_id <- sub("(0+)([[:alpha:]]+)([[:digit:]]+)", "\\2\\1\\3", patent_id) } + patent_id } #' @noRd @@ -121,12 +145,32 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. arg_list$opts$offset <- (i - 1) * arg_list$opts$size x <- one_request(method, query, base_url, arg_list, api_key, ...) x <- process_resp(x) + + # now to page we need to set the "after" attribute to where we left off + # we want the last value of the primary sort field + s <- names(arg_list$sort[[1]])[[1]] + index <- nrow(x$data[[1]]) + last_value <- x$data[[1]][[s]][[index]] + + if (s == "patent_id") { + last_value <- pad_patent_id(last_value) + } + + arg_list$opts$after <<- last_value + x$data[[1]] }) do.call("rbind", c(tmp, make.row.names = FALSE)) } +#' @noRd +get_default_sort <- function(endpoint) { + default <- c("asc") + names(default) <- get_ok_pk(endpoint) + default +} + #' Search PatentsView #' #' This function makes an HTTP request to the PatentsView API for data matching From 3f74716955be9f272cf6b39baf3a9bbf2be477f8 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:39:26 -0600 Subject: [PATCH 017/103] feat: removed paging limits --- R/search-pv.R | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 200f353d..c810ff87 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -126,20 +126,6 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. if (req_pages < 1) { stop2("No records matched your query...Can't download multiple pages") } - if (matched_records > 10000) { - stop2( - "The API only allows you to download 10,000 records in a single query. ", - "Your query returned ", matched_records, " records. See for ", - "how to get around this limitation." - ) - } - if (req_pages > 10) { - stop2( - "The API only allows you to download 10 pages in a single query. ", - "Your query returned ", req_pages, " pages. See for ", - "how to get around this limitation." - ) - } tmp <- lapply(seq_len(req_pages), function(i) { arg_list$opts$offset <- (i - 1) * arg_list$opts$size From f22c08942ec62792b9760671852742dd48d64a56 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 16:59:08 -0600 Subject: [PATCH 018/103] feat: search_pv parameter updates --- R/search-pv.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index c810ff87..eb3cb43a 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -178,25 +178,36 @@ get_default_sort <- function(endpoint) { #' E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} #' } #' @param fields A character vector of the fields that you want returned to you. -#' A value of \code{NULL} indicates that the default fields should be -#' returned. Acceptable fields for a given endpoint can be found at the API's +#' A value of \code{NULL} indicates to the API that it should return the default fields +#' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the #' \href{https://patentsview.org/apis/api-endpoints/patents}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. +#' +#' Nested fields can be fully qualified, e.g., "application.filing_date" or the +#' group name can be used to retrieve all of its nested fields, E.g. "application". +#' The latter would be similar to passing \code{get_fields("patent", group = "application")} +#' except it's the API that decides what fields to return. #' @param endpoint The web service resource you wish to search. Use #' \code{get_endpoints()} to list the available endpoints. -#' @param subent_cnts `r lifecycle::badge("deprecated")` Non-matched subentities -#' will always be returned under the new version of the API +#' @param subent_cnts `r lifecycle::badge("deprecated")` This is always FALSE in the +#' new version of the API as the total counts of unique subentities is no longer available. #' @param mtchd_subent_only `r lifecycle::badge("deprecated")` This is always -#' FALSE in the new version of the API. -#' @param page The page number of the results that should be returned. -#' @param per_page The number of records that should be returned per page. This -#' value can be as high as 1,000 (e.g., \code{per_page = 1000}). +#' FALSE in the new version of the API as non-matched subentities +#' will always be returned. +#' @param page `r lifecycle::badge("deprecated")` The new version of the API does not use +#' \code{page} as a parameter for paging, it uses \code{after}. +#' @param per_page `r lifecycle::badge("deprecated")` The API now uses \code{size} +#' @param size The number of records that should be returned per page. This +#' value can be as high as 1,000 (e.g., \code{size = 1000}). +#' @param after A list of sort key values that defaults to NULL. This +#' exposes the API's paging parameter for users who want to implement their own +#' paging. It cannot be set when \code{all_pages = TRUE} as the R package manipulates it +#' for users automatically. See \href{../articles/result-set-paging.html}{result set paging} #' @param all_pages Do you want to download all possible pages of output? If -#' \code{all_pages = TRUE}, the values of \code{page} and \code{per_page} are -#' ignored. +#' \code{all_pages = TRUE}, the value of \code{size} is ignored. #' @param sort A named character vector where the name indicates the field to #' sort by and the value indicates the direction of sorting (direction should #' be either "asc" or "desc"). For example, \code{sort = c("patent_number" = From 4d2f0646bd64c95b23471273fc276c840b080ec6 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 09:43:53 -0600 Subject: [PATCH 019/103] feat: use new api group/field shorthand --- R/get-fields.R | 25 +++++++++++++++++++++++-- R/validate-args.R | 13 +++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index 850eff89..4088c8e5 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -45,11 +45,32 @@ #' @export get_fields <- function(endpoint, groups = NULL) { validate_endpoint(endpoint) + + # using API's shorthand notation, group names can be requested as fields instead of + # fully qualifying each nested field. Fully qualified, all patent endpoint's attributes + # is over 4K, too big to be sent on a GET with a modest query + + pk <- get_ok_pk(endpoint) + plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] + top_level_attributes <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == plural_entity, "field"] + if (is.null(groups)) { - fieldsdf[fieldsdf$endpoint == endpoint, "field"] + c( + top_level_attributes, + unique(fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group != plural_entity, "group"]) + ) } else { validate_groups(endpoint, groups = groups) - fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group %in% groups, "field"] + + # don't include pk if plural_entity group is requested (pk would be a member) + extra_field <- if (include_pk && !plural_entity %in% groups) pk else NULL + extra_fields <- if (plural_entity %in% groups) top_level_attributes else NULL + + c( + extra_field, + extra_fields, + groups[!groups == plural_entity] + ) } } diff --git a/R/validate-args.R b/R/validate-args.R index a669e182..275614c8 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -8,14 +8,23 @@ validate_endpoint <- function(endpoint) { } #' @noRd -validate_args <- function(api_key, fields, endpoint, method, page, per_page, - sort) { +validate_args <- function(api_key, fields, endpoint, method, + sort, after, size, all_pages) { asrt( !identical(api_key, ""), "The new version of the API requires an API key" ) flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint, "field"] + + # Now the API allows the group name to be requested as in fields to get all of + # the group's nested fields. ex.: "assignees" on the patent endpoint gets you all + # of the assignee fields. Note that "patents" can't be requested + groups <- unique(fieldsdf[fieldsdf$endpoint == endpoint, c("group")]) + pk <- get_ok_pk(endpoint) + plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] + flds_flt <- append(flds_flt, groups[!groups == plural_entity]) + asrt( all(fields %in% flds_flt), "Bad field(s): ", paste(fields[!(fields %in% flds_flt)], collapse = ", ") From edae0413a7ee8c3c895bad64d7d4a4aa4adef532 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 10:59:34 -0600 Subject: [PATCH 020/103] feat: added in_range query function --- R/query-dsl.R | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/R/query-dsl.R b/R/query-dsl.R index bb9ff79f..f3ea00ef 100644 --- a/R/query-dsl.R +++ b/R/query-dsl.R @@ -54,12 +54,31 @@ create_not_fun <- function(fun) { } } +#' @noRd +create_in_range_fun <- function(fun) { + force(fun) + function(...) { + value_p <- list(...) + field <- names(value_p) + value <- unlist(value_p) + names(value) <- NULL + + # throw an error if the length isn't two + asrt(length(value) == 2, fun, " expects a range of exactly two arguments") + + low <- create_one_fun(field = field, value = value[1], fun = "gte") + high <- create_one_fun(field = field, value = value[2], fun = "lte") + z <- list(`_and` = list(low, high)) + + structure(z, class = c(class(z), "pv_query")) + } +} + #' List of query functions #' #' A list of functions that make it easy to write PatentsView queries. See the -#' details section below for a list of the 14 functions, as well as the -#' \href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing -#' queries vignette} for further details. +#' details section below for a list of the 15 functions, as well as the +#' \href{../articles/writing-queries.html}{writing queries vignette} for further details. #' #' @details #' @@ -109,6 +128,13 @@ create_not_fun <- function(fun) { #' \item \code{not} - The comparison is not true #' } #' +#' \strong{4. Convenience function} \cr +#' +#' There is 1 convenience function: +#' \itemize{ +#' \item \code{in_range} - Builds a <= x <= b query +#' } +#' #' @return An object of class \code{pv_query}. This is basically just a simple #' list with a print method attached to it. #' @@ -117,6 +143,10 @@ create_not_fun <- function(fun) { #' #' qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) #' +#' qry_funs$in_range(patent_year = c(2010, 2021)) +#' +#' qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28")) + #' @export qry_funs <- c( lapply2( @@ -126,7 +156,8 @@ qry_funs <- c( ), create_key_fun ), lapply2(c("and", "or"), create_array_fun), - lapply2("not", create_not_fun) + lapply2("not", create_not_fun), + lapply2("in_range", create_in_range_fun) ) #' With qry_funs From ed8effedb8ecd0897e47c2424babce70c5d4dc17 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 11:09:15 -0600 Subject: [PATCH 021/103] feat: unnesting plural entities from singular endpoints --- R/unnest-pv-data.R | 58 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/R/unnest-pv-data.R b/R/unnest-pv-data.R index a86ca441..f6af8470 100644 --- a/R/unnest-pv-data.R +++ b/R/unnest-pv-data.R @@ -4,24 +4,50 @@ #' in \code{\link{unnest_pv_data}}, based on the endpoint you searched. #' It will return a potential unique identifier for a given entity (i.e., a #' given endpoint). For example, it will return "patent_id" when -#' \code{endpoint = "patent"}. +#' \code{endpoint_or_entity = "patent"}. It would return the same value if +#' the entity name "patents" was passed via \code{get_ok_pk(names(pv_return$data))} +#' where pv_return was returned from \code{\link{search_pv}}. #' -#' @param endpoint The endpoint which you would like to know a potential primary -#' key for. +#' @param endpoint_or_entity The endpoint or entity name for which you +#' would like to know a potential primary key for. #' #' @return The name of a primary key (\code{pk}) that you could pass to #' \code{\link{unnest_pv_data}}. #' #' @examples -#' get_ok_pk(endpoint = "inventor") -#' get_ok_pk(endpoint = "cpc_subclass") -#' get_ok_pk("publication/rel_app_text") +#' get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id" +#' get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id" #' #' @export -get_ok_pk <- function(endpoint) { +get_ok_pk <- function(endpoint_or_entity) { + endpoint_df <- fieldsdf[fieldsdf$endpoint == endpoint_or_entity, ] + if (nrow(endpoint_df) > 0) { + endpoint <- endpoint_or_entity + } else { + endpoint_df <- fieldsdf[fieldsdf$group == endpoint_or_entity, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attourneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + endpoint_df <- fieldsdf[fieldsdf$endpoint == endpoint, ] + } + } + unnested_endpoint <- sub("^(patent|publication)/", "", endpoint) possible_pks <- c("patent_id", "document_number", paste0(unnested_endpoint, "_id")) - fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field %in% possible_pks, "field"] + pk <- endpoint_df[endpoint_df$field %in% possible_pks, "field"] + + # we're unable to determine the pk if an entity name of rel_app_texts was passed + asrt( + length(pk) == 1, + "The primary key cannot be determined for ", endpoint_or_entity, + ". Try using the endpoint's name instead ", + paste(unique(fieldsdf[fieldsdf$group == endpoint_or_entity, "endpoint"]), collapse = ", ") + ) + + pk } #' Unnest PatentsView data @@ -58,12 +84,20 @@ get_ok_pk <- function(endpoint) { #' } #' #' @export -unnest_pv_data <- function(data, pk = get_ok_pk(names(data))) { - +unnest_pv_data <- function(data, pk = NULL) { validate_pv_data(data) df <- data[[1]] + if (is.null(pk)) { + # now there are two endpoints that return rel_app_texts entities with different pks + if (names(data) == "rel_app_texts") { + pk <- if ("document_number" %in% names(df)) "document_number" else "patent_id" + } else { + pk = get_ok_pk(names(data)) + } + } + asrt( pk %in% colnames(df), pk, " not in primary entity data frame...Did you include it in your ", @@ -75,14 +109,12 @@ unnest_pv_data <- function(data, pk = get_ok_pk(names(data))) { sub_ent_df <- df[, !prim_ent_var, drop = FALSE] sub_ents <- colnames(sub_ent_df) - ok_pk <- get_ok_pk(names(data)) - out_sub_ent <- lapply2(sub_ents, function(x) { temp <- sub_ent_df[[x]] asrt( length(unique(df[, pk])) == length(temp), pk, " cannot act as a primary key because it is not a unique identifier.\n\n", - "Try using ", ok_pk, " instead." + "Try using ", pk, " instead." ) names(temp) <- df[, pk] xn <- do.call("rbind", temp) From 8a1c8761eebbc180364f65ce9aab6cd9709db470 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 11:12:55 -0600 Subject: [PATCH 022/103] feat: added lifecycle deprecations --- R/validate-args.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/R/validate-args.R b/R/validate-args.R index 275614c8..8ad9f930 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -97,4 +97,21 @@ deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only) { version of the API" ) } -} \ No newline at end of file + + if (lifecycle::is_present(per_page)) { + lifecycle::deprecate_warn( + when = "0.3.0", + what = "search_pv(per_page)", + details = "The new version of the API uses 'size' instead of 'per_page'", + with = "search_pv(size)" + ) + } + + if (lifecycle::is_present(page)) { + lifecycle::deprecate_warn( + when = "0.3.0", + what = "search_pv(page)", + details = "The new version of the API does not support the page parameter" + ) + } +} From 01ab9789c7caadda6e99dd554fdff8a9154c8fd7 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:26:09 -0600 Subject: [PATCH 023/103] feat: added in_range query function --- R/check-query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check-query.R b/R/check-query.R index c572e02e..122ffca9 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -40,7 +40,7 @@ check_query <- function(query, endpoint) { num_opr <- c("_gt", "_gte", "_lt", "_lte") str_opr <- c("_begins", "_contains") fltxt_opr <- c("_text_all", "_text_any", "_text_phrase") - all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr) + all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr, "_in_range") flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$can_query == "y", ] From 1e817bca200c9e99a4d7b86064828f9ea077d9a2 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:30:45 -0600 Subject: [PATCH 024/103] fix: length check to avoid coercion warning --- R/check-query.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/check-query.R b/R/check-query.R index 122ffca9..dd8c6f2a 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -46,7 +46,10 @@ check_query <- function(query, endpoint) { apply_checks <- function(x, endpoint) { x <- swap_null_nms(x) - if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { + + # troublesome next line: 'length(x) = 2 > 1' in coercion to 'logical(1)' + # if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { + if (length(names(x)) > 1 || names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { lapply(x, FUN = apply_checks) } else if (names(x) %in% all_opr) { f1 <- flds_flt[flds_flt$field == names(x[[1]]), ] From 5f9e38f2bb9b14d2bd36466459f3f5870330856e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:31:44 -0600 Subject: [PATCH 025/103] feat: query checking in the new api version --- R/check-query.R | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/R/check-query.R b/R/check-query.R index dd8c6f2a..238c8824 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -10,28 +10,32 @@ is_int <- function(x) #' @noRd is_date <- function(x) - grepl("[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]", x) + grepl("^[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]$", x) #' @noRd one_check <- function(operator, field, value, f1) { - if (nrow(f1) == 0) stop2(field, " is not a valid field to query for your endpoint") if (f1$data_type == "date" && !is_date(value)) stop2("Bad date: ", value, ". Date must be in the format of yyyy-mm-dd") - if (f1$data_type %in% c("string", "fulltext") && !is.character(value)) + if (f1$data_type %in% c("bool", "int", "string", "fulltext") && !is.character(value)) stop2(value, " must be of type character") if (f1$data_type == "integer" && !is_int(value)) stop2(value, " must be an integer") + if (f1$data_type == "boolean" && !is.logical(value)) + stop2(value, " must be a boolean") + if (f1$data_type == "number" && !is.numeric(value)) + stop2(value, " must be a number") if ( - (operator %in% c("_begins", "_contains") && !(f1$data_type == "string")) || - (operator %in% c("_text_all", "_text_any", "_text_phrase") && - !(f1$data_type == "fulltext")) || - (f1$data_type %in% c("string", "fulltext") && - operator %in% c("_gt", "_gte", "_lt", "_lte")) - ) + # The new version of the API blurrs the distinction between string/fulltext fields. + # It looks like the string/fulltext functions can be used interchangeably + (operator %in% c("_begins", "_contains", "_text_all", "_text_any", "_text_phrase") && + !(f1$data_type == "fulltext" || f1$data_type == "string")) || + (f1$data_type %in% c("string", "fulltext") && + operator %in% c("_gt", "_gte", "_lt", "_lte"))) { stop2("You cannot use the operator ", operator, " with the field ", field) + } } #' @noRd @@ -42,7 +46,7 @@ check_query <- function(query, endpoint) { fltxt_opr <- c("_text_all", "_text_any", "_text_phrase") all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr, "_in_range") - flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$can_query == "y", ] + flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint, ] apply_checks <- function(x, endpoint) { x <- swap_null_nms(x) @@ -64,8 +68,8 @@ check_query <- function(query, endpoint) { ) } else { stop2( - names(x), " is either not a valid operator or not a ", - "queryable field for this endpoint" + names(x), " is not a valid operator or not a ", + "valid field for this endpoint" ) } } From 67637728b4934a6296e483271249c01da3c77d9f Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:37:18 -0600 Subject: [PATCH 026/103] feat: don't require sort fields to be fields param --- R/validate-args.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/validate-args.R b/R/validate-args.R index 8ad9f930..56aad289 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -44,13 +44,17 @@ validate_args <- function(api_key, fields, endpoint, method, all(is.numeric(per_page), length(per_page) == 1, per_page <= 1000), "per_page must be a numeric value less than or equal to 1,000" ) - if (!is.null(sort)) + + # Removed all(names(sort) %in% fields) Was it our requirement or the API's? + # It does seem to work when we don't request fields and rely on the API to sort + # using them. + if (!is.null(sort)) { asrt( all( - all(names(sort) %in% fields), all(sort %in% c("asc", "desc")), - !is.list(sort)), - "sort has to be a named character vector and each name has to be ", - "specified in the field argument. See examples" + all(sort %in% c("asc", "desc")), + !is.list(sort) + ), + "sort has to be a named character vector. See examples" ) } From 8ca6ee8031a68b59e0c0dde63bb61fae79186852 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:51:14 -0600 Subject: [PATCH 027/103] feat validation in the new api version --- R/validate-args.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/R/validate-args.R b/R/validate-args.R index 56aad289..4430892e 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -36,13 +36,10 @@ validate_args <- function(api_key, fields, endpoint, method, all(method %in% c("GET", "POST"), length(method) == 1), "method must be either 'GET' or 'POST'" ) + asrt( - all(is.numeric(page), length(page) == 1, page >= 1), - "page must be a numeric value greater than 1" - ) - asrt( - all(is.numeric(per_page), length(per_page) == 1, per_page <= 1000), - "per_page must be a numeric value less than or equal to 1,000" + all(is.numeric(size), length(size) == 1, size <= 1000), + "size must be a numeric value less than or equal to 1,000" ) # Removed all(names(sort) %in% fields) Was it our requirement or the API's? @@ -56,15 +53,23 @@ validate_args <- function(api_key, fields, endpoint, method, ), "sort has to be a named character vector. See examples" ) + } + + asrt( + any(is.null(after), !all_pages), + "'after' cannot be set when all_pages = TRUE" + ) } #' @noRd validate_groups <- function(endpoint, groups) { ok_grps <- unique(fieldsdf[fieldsdf$endpoint == endpoint, "group"]) + asrt( all(groups %in% ok_grps), "for the ", endpoint, " endpoint, group must be one of the following: ", paste(ok_grps, collapse = ", ") + ) ) } @@ -77,14 +82,14 @@ validate_pv_data <- function(data) { } #' @noRd -deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only) { +deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only, page, per_page) { if (!is.null(error_browser)) { lifecycle::deprecate_warn(when = "0.2.0", what = "search_pv(error_browser)") } # Was previously defaulting to FALSE and we're still defaulting to FALSE to # mirror the fact that the API doesn't support subent_cnts. Warn only if user - # tries to set subent_cnts to TRUE. - if (isTRUE(subent_cnts)) { + # tries to set subent_cnts to anything other than FALSE (test cases try NULL and 7) + if (!(is.logical(subent_cnts) && isFALSE(subent_cnts))) { lifecycle::deprecate_warn( when = "1.0.0", what = "search_pv(subent_cnts)", From 378e2214e803300eb5e8c005dbe73a0852ea8af6 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 13:00:49 -0600 Subject: [PATCH 028/103] feat: search_pv parameter changes --- R/search-pv.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index eb3cb43a..41407631 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -123,12 +123,8 @@ pad_patent_id <- function(patent_id) { request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, ...) { matched_records <- ex_res$query_results[[1]] req_pages <- ceiling(matched_records / arg_list$opts$size) - if (req_pages < 1) { - stop2("No records matched your query...Can't download multiple pages") - } tmp <- lapply(seq_len(req_pages), function(i) { - arg_list$opts$offset <- (i - 1) * arg_list$opts$size x <- one_request(method, query, base_url, arg_list, api_key, ...) x <- process_resp(x) @@ -287,23 +283,26 @@ search_pv <- function(query, endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", error_browser = NULL, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ...) { - - validate_args(api_key, fields, endpoint, method, page, per_page, sort) - deprecate_warn_all(error_browser, subent_cnts, mtchd_subent_only) + validate_args(api_key, fields, endpoint, method, sort, after, size, all_pages) + deprecate_warn_all(error_browser, subent_cnts, mtchd_subent_only, page, per_page) + if (lifecycle::is_present(per_page)) size <- per_page if (is.list(query)) { - # check_query(query, endpoint) + check_query(query, endpoint) query <- jsonlite::toJSON(query, auto_unbox = TRUE) } - arg_list <- to_arglist(fields, page, per_page, sort) + + arg_list <- to_arglist(fields, size, sort, after) base_url <- get_base(endpoint) result <- one_request(method, query, base_url, arg_list, api_key, ...) From f14126b1063de413687675faf74dbae8de5e573d Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 13:06:41 -0600 Subject: [PATCH 029/103] feat: new paging methodology --- R/search-pv.R | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 41407631..88ba516e 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -307,11 +307,62 @@ search_pv <- function(query, result <- one_request(method, query, base_url, arg_list, api_key, ...) result <- process_resp(result) - if (!all_pages) return(result) - full_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) - result$data[[1]] <- full_data + if (all_pages && result$query_result$total_hits == 0) { + stop2("No records matched your query...Can't download multiple pages") + } + + # return if we don't need to make additional API requests + if (!all_pages || + result$query_result$total_hits == 0 || + result$query_result$total_hits == nrow(result$data[[1]])) { + return(result) + } + + # Here we ignore the user's sort and instead have the API sort by the primary + # key for the requested endpoint. + primary_sort_key <- get_default_sort(endpoint) + + # We check what fields we got back from the first call. If the user didn't + # specify fields, we'd get back the API's defaults. We may need to request + # additional fields from the API so we can apply the users sort and then remove + # the additional fields. + returned_fields <- names(result$data[[1]]) + + if (!is.null(sort)) { + sort_fields <- names(sort) + additional_fields <- sort_fields[!(sort_fields %in% returned_fields)] + if (is.null(fields)) { + fields <- returned_fields # the default fields + } else { + fields <- fields # user passed + } + fields <- append(fields, additional_fields) + } else { + additional_fields <- c() + } + + arg_list <- to_arglist(fields, size, primary_sort_key, after) + paged_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) + + # apply the user's sort using order() + # was data.table::setorderv(paged_data, names(sort), ifelse(as.vector(sort) == "asc", 1, -1)) + + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(paged_data[[col]]) + } else { + return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + + # remove the fields we added in order to do the user's sort ourselves + paged_data <- paged_data[, !names(paged_data) %in% additional_fields] + result$data[[1]] <- paged_data result } From e3d0f184f74982b2acad76e7ed6246ae71fc6d76 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 14:43:15 -0600 Subject: [PATCH 030/103] refactor: getting top level attributes --- R/get-fields.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/get-fields.R b/R/get-fields.R index 4088c8e5..a7438f0c 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -1,3 +1,9 @@ +#' @noRd +get_top_level_attributes <- function(endpoint) { + fieldsdf[fieldsdf$endpoint == endpoint & !grepl("\\.", fieldsdf$field), "field"] +} + + #' Get list of retrievable fields #' #' This function returns a vector of fields that you can retrieve from a given @@ -52,7 +58,7 @@ get_fields <- function(endpoint, groups = NULL) { pk <- get_ok_pk(endpoint) plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] - top_level_attributes <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == plural_entity, "field"] + top_level_attributes <- get_top_level_attributes(endpoint) if (is.null(groups)) { c( From d5509b0424f4357f32085787915e18e5351321fa Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 14:57:25 -0600 Subject: [PATCH 031/103] feat: retrieve_linked can retrieve documentation links --- R/get-fields.R | 2 +- R/search-pv.R | 87 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index a7438f0c..8eb53892 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -49,7 +49,7 @@ get_top_level_attributes <- function(endpoint) { #' } #' #' @export -get_fields <- function(endpoint, groups = NULL) { +get_fields <- function(endpoint, groups = NULL, include_pk = FALSE) { validate_endpoint(endpoint) # using API's shorthand notation, group names can be requested as fields instead of diff --git a/R/search-pv.R b/R/search-pv.R index 88ba516e..f9fb9fab 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -366,38 +366,103 @@ search_pv <- function(query, result } -#' Get Linked Data +#' Retrieve Linked Data #' #' Some of the endpoints now return HATEOAS style links to get more data. E.g., -#' the inventors endpoint may return a link such as: -#' "https://search.patentsview.org/api/v1/inventor/252373/" +#' the patent endpoint may return a link such as: +#' "https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/" #' -#' @param url The link that was returned by the API on a previous call. +#' @param url A link that was returned by the API on a previous call, an example +#' in the documentation or a Request URL from the \href{https://search.patentsview.org/swagger-ui/}{API's Swagger UI page}. +#' +#' @param encoded_url boolean to indicate whether the url has been URL encoded, defaults to FALSE. +#' Set to TRUE for Request URLs from Swagger UI. +#' +#' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} function. +#' +#' @return A list with the following three elements: +#' \describe{ +#' \item{data}{A list with one element - a named data frame containing the +#' data returned by the server. Each row in the data frame corresponds to a +#' single value for the primary entity. For example, if you search the +#' assignee endpoint, then the data frame will be on the assignee-level, +#' where each row corresponds to a single assignee. Fields that are not on +#' the assignee-level would be returned in list columns.} +#' +#' \item{query_results}{Entity counts across all pages of output (not just +#' the page returned to you).} +#' +#' \item{request}{Details of the GET HTTP request that was sent to the server.} +#' } #' -#' @inherit search_pv return #' @inheritParams search_pv #' #' @examples #' \dontrun{ #' #' retrieve_linked_data( -#' "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/" -#' ) +#' "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/" +#' ) +#' +#' endpoint_url <- "https://search.patentsview.org/api/v1/patent/" +#' q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}' +#' s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}' +#' f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' +#' # (URL broken up to avoid a long line warning in this Rd) +#' +#' retrieve_linked_data( +#' paste0(endpoint_url, q_param, s_and_o_params, f_param) +#' ) +#' +#' retrieve_linked_data( +#' "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D", +#' encoded_url = TRUE +#' ) #' } #' #' @export retrieve_linked_data <- function(url, + encoded_url = FALSE, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ... ) { + if (encoded_url) { + url <- utils::URLdecode(url) + } - # Don't sent the API key to any domain other than patentsview.org - if (!grepl("^https://[^/]*\\.patentsview.org/", url)) { + # There wouldn't be url parameters on a HATEOAS link but we'll also accept + # example urls from the documentation, where there could be parameters + url_peices <- httr2::url_parse(url) + + # Only send the API key to subdomains of patentsview.org + if (!grepl("^.*\\.patentsview.org$", url_peices$hostname)) { stop2("retrieve_linked_data is only for patentsview.org urls") } + params <- list() + query <- "" + + if (!is.null(url_peices$query)) { + # Need to change f to fields vector, s to sort vector and o to opts + # There is probably a whizbangy better way to do this in R + if (!is.null(url_peices$query$f)) { + params$fields <- unlist(strsplit(gsub("[\\[\\]]", "", url_peices$query$f, perl = TRUE), ",\\s*")) + } + + if (!is.null(url_peices$query$s)) { + params$sort <- jsonlite::fromJSON(sub(".*s=([^&]*).*", "\\1", url)) + } + + if (!is.null(url_peices$query$o)) { + params$opts <- jsonlite::fromJSON(sub(".*o=([^&]*).*", "\\1", url)) + } + + query <- if (!is.null(url_peices$query$q)) sub(".*q=([^&]*).*", "\\1", url) else "" + url <- paste0(url_peices$scheme, "://", url_peices$hostname, url_peices$path) + } + # Go through one_request, which handles resend on throttle errors - # The API doesn't seem to mind ?q=&f=&o=&s= appended to the URL - res <- one_request("GET", "", url, list(), api_key, ...) + # The API doesn't seem to mind ?q=&f=&o=&s= appended to HATEOAS URLs + res <- one_request("GET", query, url, params, api_key, ...) process_resp(res) } From 66ad2f50edb08e1c2d2efc6fcb8e9ab3fea6f3c4 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:16:19 -0600 Subject: [PATCH 032/103] feat: search_pv parameter updates --- R/search-pv.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/search-pv.R b/R/search-pv.R index f9fb9fab..8c983882 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -70,6 +70,8 @@ patentsview_error_body <- function(resp) { if (httr2::resp_status(resp) == 400) c(httr2::resp_header(resp, "X-Status-Reason")) else NULL } +#' @noRd +one_request <- function(method, query, base_url, arg_list, api_key, ...) { if (method == "GET") { get_url <- get_get_url(query, base_url, arg_list) req <- httr2::request(get_url) |> From 065a5397a1c725035f065a7f264f380abf509c1c Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:19:44 -0600 Subject: [PATCH 033/103] docs: API link update --- R/get-fields.R | 2 +- R/search-pv.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index 8eb53892..b270de31 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -19,7 +19,7 @@ get_top_level_attributes <- function(endpoint) { #' endpoint's fields (i.e., do not filter the field list based on group #' membership). See the field tables located online to see which groups you #' can specify for a given endpoint (e.g., the -#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint table}), or use the \code{fieldsdf} table #' (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}). #' diff --git a/R/search-pv.R b/R/search-pv.R index 8c983882..cb27cdf7 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -171,7 +171,7 @@ get_default_sort <- function(endpoint) { #' #' \item An object of class \code{pv_query}, which you create by calling one #' of the functions found in the \code{\link{qry_funs}} list...See the -#' \href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing +#' \href{../articles/writing-queries.html}{writing #' queries vignette} for details.\cr #' E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} #' } @@ -179,7 +179,7 @@ get_default_sort <- function(endpoint) { #' A value of \code{NULL} indicates to the API that it should return the default fields #' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the -#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference#patent}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. From 7ce51b4632b9fd65a8150bb0ea85ed632dc1304e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:21:48 -0600 Subject: [PATCH 034/103] docs: parameter and example changes --- R/get-fields.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R/get-fields.R b/R/get-fields.R index b270de31..a66365af 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -22,6 +22,9 @@ get_top_level_attributes <- function(endpoint) { #' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint table}), or use the \code{fieldsdf} table #' (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}). +#' @param include_pk Boolean on whether to include the endpoint's primary key, +#' defaults to FALSE. The primary key is needed if you plan on calling +#' \code{\link{unnest_pv_data}} on the results of \code{\link{search_pv}} #' #' @return A character vector with field names. #' @@ -47,6 +50,19 @@ get_top_level_attributes <- function(endpoint) { #' fields = fields #' ) #' } +#' # Get the nested inventors fields and the primary key in order to call unnest_pv_data +#' # on the returned data. unnest_pv_data would throw an error if the primary key was +#' # not present in the results. +#' fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) +#' +#' \dontrun{ +#' # ...Then pass to search_pv and unnest the results +#' results <- search_pv( +#' query = '{"_gte":{"patent_date":"2007-01-04"}}', +#' fields = fields +#' ) +#' unnest_pv_data(results$data) +#' } #' #' @export get_fields <- function(endpoint, groups = NULL, include_pk = FALSE) { From 86dae002906b564bf2899a353e6f17701c0ccd61 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:23:57 -0600 Subject: [PATCH 035/103] docs: plural to singular endpoints --- R/get-fields.R | 6 +++--- R/search-pv.R | 14 +++++++------- R/unnest-pv-data.R | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index a66365af..062c804e 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -29,8 +29,8 @@ get_top_level_attributes <- function(endpoint) { #' @return A character vector with field names. #' #' @examples -#' # Get all assignee-level fields for the patent endpoint: -#' fields <- get_fields(endpoint = "patent", groups = "assignees") +#' # Get all top level (non-nested) fields for the patent endpoint: +#' fields <- get_fields(endpoint = "patent", groups = c("patents")) #' #' # ...Then pass to search_pv: #' \dontrun{ @@ -40,7 +40,7 @@ get_top_level_attributes <- function(endpoint) { #' fields = fields #' ) #' } -#' # Get all patent and assignee-level fields for the patent endpoint: +#' # Get unnested patent and assignee-level fields for the patent endpoint: #' fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) #' #' \dontrun{ diff --git a/R/search-pv.R b/R/search-pv.R index cb27cdf7..7eaff4fb 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -208,25 +208,25 @@ get_default_sort <- function(endpoint) { #' \code{all_pages = TRUE}, the value of \code{size} is ignored. #' @param sort A named character vector where the name indicates the field to #' sort by and the value indicates the direction of sorting (direction should -#' be either "asc" or "desc"). For example, \code{sort = c("patent_number" = -#' "asc")} or \cr\code{sort = c("patent_number" = "asc", "patent_date" = +#' be either "asc" or "desc"). For example, \code{sort = c("patent_id" = +#' "asc")} or \cr\code{sort = c("patent_id" = "asc", "patent_date" = #' "desc")}. \code{sort = NULL} (the default) means do not sort the results. #' You must include any fields that you wish to sort by in \code{fields}. #' @param method The HTTP method that you want to use to send the request. #' Possible values include "GET" or "POST". Use the POST method when #' your query is very long (say, over 2,000 characters in length). #' @param error_browser `r lifecycle::badge("deprecated")` -#' @param api_key API key. See \href{https://patentsview.org/apis/keyrequest}{ -#' Here} for info on creating a key. -#' @param ... Arguments passed along to httr's \code{\link[httr]{GET}} or -#' \code{\link[httr]{POST}} function. +#' @param api_key API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +#' \href{https://patentsview.org/apis/keyrequest}{here}. +#' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} +#' when we do GETs or POSTs. #' #' @return A list with the following three elements: #' \describe{ #' \item{data}{A list with one element - a named data frame containing the #' data returned by the server. Each row in the data frame corresponds to a #' single value for the primary entity. For example, if you search the -#' assignees endpoint, then the data frame will be on the assignee-level, +#' assignee endpoint, then the data frame will be on the assignee-level, #' where each row corresponds to a single assignee. Fields that are not on #' the assignee-level would be returned in list columns.} #' diff --git a/R/unnest-pv-data.R b/R/unnest-pv-data.R index f6af8470..19b29370 100644 --- a/R/unnest-pv-data.R +++ b/R/unnest-pv-data.R @@ -65,8 +65,8 @@ get_ok_pk <- function(endpoint_or_entity) { #' inside it. See examples. #' @param pk The column/field name that will link the data frames together. This #' should be the unique identifier for the primary entity. For example, if you -#' used the patents endpoint in your call to \code{search_pv}, you could -#' specify \code{pk = "patent_number"}. \strong{This identifier has to have +#' used the patent endpoint in your call to \code{search_pv}, you could +#' specify \code{pk = "patent_id"}. \strong{This identifier has to have #' been included in your \code{fields} vector when you called #' \code{search_pv}}. You can use \code{\link{get_ok_pk}} to suggest a #' potential primary key for your data. From 5956b7bb1f29f849fdb38bd6ff5b2d4a54481d98 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:41:01 -0600 Subject: [PATCH 036/103] feat validation in the new api version --- R/validate-args.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/validate-args.R b/R/validate-args.R index 4430892e..31dd6467 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -69,7 +69,6 @@ validate_groups <- function(endpoint, groups) { all(groups %in% ok_grps), "for the ", endpoint, " endpoint, group must be one of the following: ", paste(ok_grps, collapse = ", ") - ) ) } From 07f50e373eb35f2154e3b69b6d37eadc4c921f72 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:57:33 -0600 Subject: [PATCH 037/103] generated files --- man/cast_pv_data.Rd | 2 +- man/fieldsdf.Rd | 2 +- man/get_fields.Rd | 27 +++++++++++--- man/get_ok_pk.Rd | 15 ++++---- man/pad_patent_id.Rd | 24 +++++++++++++ man/patentsview-package.Rd | 2 +- man/qry_funs.Rd | 17 ++++++--- man/retrieve_linked_data.Rd | 52 ++++++++++++++++++--------- man/search_pv.Rd | 72 +++++++++++++++++++++++-------------- man/unnest_pv_data.Rd | 6 ++-- man/with_qfuns.Rd | 23 ++++++------ 11 files changed, 166 insertions(+), 76 deletions(-) create mode 100644 man/pad_patent_id.Rd diff --git a/man/cast_pv_data.Rd b/man/cast_pv_data.Rd index 829b3c13..3ed42d36 100644 --- a/man/cast_pv_data.Rd +++ b/man/cast_pv_data.Rd @@ -23,7 +23,7 @@ they have their most appropriate data types (e.g., date, numeric, etc.). \dontrun{ fields <- c("patent_date", "patent_title", "patent_year") -res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) +res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields) cast_pv_data(data = res$data) } diff --git a/man/fieldsdf.Rd b/man/fieldsdf.Rd index b55ddee3..aa107283 100644 --- a/man/fieldsdf.Rd +++ b/man/fieldsdf.Rd @@ -22,7 +22,7 @@ fieldsdf A data frame containing the names of retrievable fields for each of the endpoints. You can find this data on the API's online documentation for each endpoint as well (e.g., the -\href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patent endpoint field list table}). } \keyword{datasets} diff --git a/man/get_fields.Rd b/man/get_fields.Rd index 4dc23060..b6ce3639 100644 --- a/man/get_fields.Rd +++ b/man/get_fields.Rd @@ -4,7 +4,7 @@ \alias{get_fields} \title{Get list of retrievable fields} \usage{ -get_fields(endpoint, groups = NULL) +get_fields(endpoint, groups = NULL, include_pk = FALSE) } \arguments{ \item{endpoint}{The API endpoint whose field list you want to get. See @@ -15,9 +15,13 @@ returned. A value of \code{NULL} indicates that you want all of the endpoint's fields (i.e., do not filter the field list based on group membership). See the field tables located online to see which groups you can specify for a given endpoint (e.g., the -\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patent +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patents endpoint table}), or use the \code{fieldsdf} table (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}).} + +\item{include_pk}{Boolean on whether to include the endpoint's primary key, +defaults to FALSE. The primary key is needed if you plan on calling +\code{\link{unnest_pv_data}} on the results of \code{\link{search_pv}}} } \value{ A character vector with field names. @@ -30,8 +34,8 @@ entity group(s) as well (which is recommended, given the large number of possible fields for each endpoint). } \examples{ -# Get all assignee-level fields for the patent endpoint: -fields <- get_fields(endpoint = "patent", groups = "assignees") +# Get all top level (non-nested) fields for the patent endpoint: +fields <- get_fields(endpoint = "patent", groups = c("patents")) # ...Then pass to search_pv: \dontrun{ @@ -41,7 +45,7 @@ search_pv( fields = fields ) } -# Get all patent and assignee-level fields for the patent endpoint: +# Get unnested patent and assignee-level fields for the patent endpoint: fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) \dontrun{ @@ -51,5 +55,18 @@ search_pv( fields = fields ) } +# Get the nested inventors fields and the primary key in order to call unnest_pv_data +# on the returned data. unnest_pv_data would throw an error if the primary key was +# not present in the results. +fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + +\dontrun{ +# ...Then pass to search_pv and unnest the results +results <- search_pv( + query = '{"_gte":{"patent_date":"2007-01-04"}}', + fields = fields +) +unnest_pv_data(results$data) +} } diff --git a/man/get_ok_pk.Rd b/man/get_ok_pk.Rd index 6bd223c9..36d3dbf3 100644 --- a/man/get_ok_pk.Rd +++ b/man/get_ok_pk.Rd @@ -4,11 +4,11 @@ \alias{get_ok_pk} \title{Get OK primary key} \usage{ -get_ok_pk(endpoint) +get_ok_pk(endpoint_or_entity) } \arguments{ -\item{endpoint}{The endpoint which you would like to know a potential primary -key for.} +\item{endpoint_or_entity}{The endpoint or entity name for which you +would like to know a potential primary key for.} } \value{ The name of a primary key (\code{pk}) that you could pass to @@ -19,11 +19,12 @@ This function suggests a value that you could use for the \code{pk} argument in \code{\link{unnest_pv_data}}, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -\code{endpoint = "patent"}. +\code{endpoint_or_entity = "patent"}. It would return the same value if +the entity name "patents" was passed via \code{get_ok_pk(names(pv_return$data))} +where pv_return was returned from \code{\link{search_pv}}. } \examples{ -get_ok_pk(endpoint = "inventor") -get_ok_pk(endpoint = "cpc_subclass") -get_ok_pk("publication/rel_app_text") +get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id" +get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id" } diff --git a/man/pad_patent_id.Rd b/man/pad_patent_id.Rd new file mode 100644 index 00000000..544ee243 --- /dev/null +++ b/man/pad_patent_id.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/search-pv.R +\name{pad_patent_id} +\alias{pad_patent_id} +\title{Pad patent_id} +\usage{ +pad_patent_id(patent_id) +} +\arguments{ +\item{patent_id}{The patent_id that needs to be padded. It can +be the patent_id for a utility, design, plant or reissue patent.} +} +\description{ +This function strategically pads a patent_id with zeroes to 8 characters, +needed only for custom paging that uses sorts by patent_id. +} +\examples{ +\dontrun{ +padded <- pad_patent_id("RE36479") + +padded2 <- pad_patent_id("3930306") +} + +} diff --git a/man/patentsview-package.Rd b/man/patentsview-package.Rd index 621a26f7..6b688378 100644 --- a/man/patentsview-package.Rd +++ b/man/patentsview-package.Rd @@ -6,7 +6,7 @@ \alias{patentsview-package} \title{patentsview: An R Client to the 'PatentsView' API} \description{ -Provides functions to simplify the 'PatentsView' API (\url{https://patentsview.org/apis/purpose}) query language, send GET and POST requests to the API's seven endpoints, and parse the data that comes back. +Provides functions to simplify the 'PatentsView' API (\url{https://patentsview.org/apis/purpose}) query language, send GET and POST requests to the API's twenty seven endpoints, and parse the data that comes back. } \seealso{ Useful links: diff --git a/man/qry_funs.Rd b/man/qry_funs.Rd index cff6667f..6a7c90e1 100644 --- a/man/qry_funs.Rd +++ b/man/qry_funs.Rd @@ -5,7 +5,7 @@ \alias{qry_funs} \title{List of query functions} \format{ -An object of class \code{list} of length 14. +An object of class \code{list} of length 15. } \usage{ qry_funs @@ -16,9 +16,8 @@ list with a print method attached to it. } \description{ A list of functions that make it easy to write PatentsView queries. See the -details section below for a list of the 14 functions, as well as the -\href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing -queries vignette} for further details. +details section below for a list of the 15 functions, as well as the +\href{../articles/writing-queries.html}{writing queries vignette} for further details. } \details{ \strong{1. Comparison operator functions} \cr @@ -66,11 +65,21 @@ There is 1 negation function: \itemize{ \item \code{not} - The comparison is not true } + +\strong{4. Convenience function} \cr + +There is 1 convenience function: +\itemize{ +\item \code{in_range} - Builds a <= x <= b query +} } \examples{ qry_funs$eq(patent_date = "2001-01-01") qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) +qry_funs$in_range(patent_year = c(2010, 2021)) + +qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28")) } \keyword{datasets} diff --git a/man/retrieve_linked_data.Rd b/man/retrieve_linked_data.Rd index 6a0e347f..90da02bd 100644 --- a/man/retrieve_linked_data.Rd +++ b/man/retrieve_linked_data.Rd @@ -2,18 +2,26 @@ % Please edit documentation in R/search-pv.R \name{retrieve_linked_data} \alias{retrieve_linked_data} -\title{Get Linked Data} +\title{Retrieve Linked Data} \usage{ -retrieve_linked_data(url, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ...) +retrieve_linked_data( + url, + encoded_url = FALSE, + api_key = Sys.getenv("PATENTSVIEW_API_KEY"), + ... +) } \arguments{ -\item{url}{The link that was returned by the API on a previous call.} +\item{url}{A link that was returned by the API on a previous call, an example +in the documentation or a Request URL from the \href{https://search.patentsview.org/swagger-ui/}{API's Swagger UI page}.} + +\item{encoded_url}{boolean to indicate whether the url has been URL encoded, defaults to FALSE. +Set to TRUE for Request URLs from Swagger UI.} -\item{api_key}{API key. See \href{https://patentsview.org/apis/keyrequest}{ -Here} for info on creating a key.} +\item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +\href{https://patentsview.org/apis/keyrequest}{here}.} -\item{...}{Arguments passed along to httr's \code{\link[httr]{GET}} or -\code{\link[httr]{POST}} function.} +\item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} function.} } \value{ A list with the following three elements: @@ -21,30 +29,42 @@ A list with the following three elements: \item{data}{A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.} \item{query_results}{Entity counts across all pages of output (not just the page returned to you).} -\item{request}{Details of the HTTP request that was sent to the server. -When you set \code{all_pages = TRUE}, you will only get a sample request. -In other words, you will not be given multiple requests for the multiple -calls that were made to the server (one for each page of results).} +\item{request}{Details of the GET HTTP request that was sent to the server.} } } \description{ Some of the endpoints now return HATEOAS style links to get more data. E.g., -the inventors endpoint may return a link such as: -"https://search.patentsview.org/api/v1/inventor/252373/" +the patent endpoint may return a link such as: +"https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/" } \examples{ \dontrun{ retrieve_linked_data( - "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/" - ) + "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/" +) + +endpoint_url <- "https://search.patentsview.org/api/v1/patent/" +q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}' +s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}' +f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' +# (URL broken up to avoid a long line warning in this Rd) + +retrieve_linked_data( + paste0(endpoint_url, q_param, s_and_o_params, f_param) +) + +retrieve_linked_data( + "https://search.patentsview.org/api/v1/patent/?q=\%7B\%22patent_date\%22\%3A\%221976-01-06\%22\%7D", + encoded_url = TRUE +) } } diff --git a/man/search_pv.Rd b/man/search_pv.Rd index d767abc8..bd00c370 100644 --- a/man/search_pv.Rd +++ b/man/search_pv.Rd @@ -10,8 +10,10 @@ search_pv( endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", @@ -32,42 +34,55 @@ E.g., \code{list("_gte" = list("patent_date" = "2007-01-04"))} \item An object of class \code{pv_query}, which you create by calling one of the functions found in the \code{\link{qry_funs}} list...See the -\href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing +\href{../articles/writing-queries.html}{writing queries vignette} for details.\cr E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} }} \item{fields}{A character vector of the fields that you want returned to you. -A value of \code{NULL} indicates that the default fields should be -returned. Acceptable fields for a given endpoint can be found at the API's +A value of \code{NULL} indicates to the API that it should return the default fields +for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -\href{https://patentsview.org/apis/api-endpoints/patents}{patents +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference#patent}{patents endpoint}) or by viewing the \code{fieldsdf} data frame (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list -out the fields available for a given endpoint.} +out the fields available for a given endpoint. + +Nested fields can be fully qualified, e.g., "application.filing_date" or the +group name can be used to retrieve all of its nested fields, E.g. "application". +The latter would be similar to passing \code{get_fields("patent", group = "application")} +except it's the API that decides what fields to return.} \item{endpoint}{The web service resource you wish to search. Use \code{get_endpoints()} to list the available endpoints.} -\item{subent_cnts}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Non-matched subentities -will always be returned under the new version of the API} +\item{subent_cnts}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is always FALSE in the +new version of the API as the total counts of unique subentities is no longer available.} \item{mtchd_subent_only}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is always -FALSE in the new version of the API.} +FALSE in the new version of the API as non-matched subentities +will always be returned.} + +\item{page}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The new version of the API does not use +\code{page} as a parameter for paging, it uses \code{after}.} + +\item{per_page}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The API now uses \code{size}} -\item{page}{The page number of the results that should be returned.} +\item{size}{The number of records that should be returned per page. This +value can be as high as 1,000 (e.g., \code{size = 1000}).} -\item{per_page}{The number of records that should be returned per page. This -value can be as high as 1,000 (e.g., \code{per_page = 1000}).} +\item{after}{A list of sort key values that defaults to NULL. This +exposes the API's paging parameter for users who want to implement their own +paging. It cannot be set when \code{all_pages = TRUE} as the R package manipulates it +for users automatically. See \href{../articles/result-set-paging.html}{result set paging}} \item{all_pages}{Do you want to download all possible pages of output? If -\code{all_pages = TRUE}, the values of \code{page} and \code{per_page} are -ignored.} +\code{all_pages = TRUE}, the value of \code{size} is ignored.} \item{sort}{A named character vector where the name indicates the field to sort by and the value indicates the direction of sorting (direction should -be either "asc" or "desc"). For example, \code{sort = c("patent_number" = - "asc")} or \cr\code{sort = c("patent_number" = "asc", "patent_date" = +be either "asc" or "desc"). For example, \code{sort = c("patent_id" = + "asc")} or \cr\code{sort = c("patent_id" = "asc", "patent_date" = "desc")}. \code{sort = NULL} (the default) means do not sort the results. You must include any fields that you wish to sort by in \code{fields}.} @@ -77,11 +92,11 @@ your query is very long (say, over 2,000 characters in length).} \item{error_browser}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} -\item{api_key}{API key. See \href{https://patentsview.org/apis/keyrequest}{ -Here} for info on creating a key.} +\item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +\href{https://patentsview.org/apis/keyrequest}{here}.} -\item{...}{Arguments passed along to httr's \code{\link[httr]{GET}} or -\code{\link[httr]{POST}} function.} +\item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} +when we do GETs or POSTs.} } \value{ A list with the following three elements: @@ -89,7 +104,7 @@ A list with the following three elements: \item{data}{A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.} @@ -119,8 +134,8 @@ search_pv( search_pv( query = qry_funs$gt(patent_year = 2010), method = "POST", - fields = "patent_number", - sort = c("patent_number" = "asc") + fields = "patent_id", + sort = c("patent_id" = "asc") ) search_pv( @@ -135,9 +150,14 @@ search_pv( ) search_pv( - query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), + query = qry_funs$contains(inventors.inventor_name_last = "Smith"), endpoint = "patent", - config = httr::timeout(40) + timeout = 40 +) + +search_pv( + query = qry_funs$eq(patent_id = "11530080"), + fields = "application" ) } diff --git a/man/unnest_pv_data.Rd b/man/unnest_pv_data.Rd index a2c528ef..46de5723 100644 --- a/man/unnest_pv_data.Rd +++ b/man/unnest_pv_data.Rd @@ -4,7 +4,7 @@ \alias{unnest_pv_data} \title{Unnest PatentsView data} \usage{ -unnest_pv_data(data, pk = get_ok_pk(names(data))) +unnest_pv_data(data, pk = NULL) } \arguments{ \item{data}{The data returned by \code{\link{search_pv}}. This is the first @@ -14,8 +14,8 @@ inside it. See examples.} \item{pk}{The column/field name that will link the data frames together. This should be the unique identifier for the primary entity. For example, if you -used the patents endpoint in your call to \code{search_pv}, you could -specify \code{pk = "patent_number"}. \strong{This identifier has to have +used the patent endpoint in your call to \code{search_pv}, you could +specify \code{pk = "patent_id"}. \strong{This identifier has to have been included in your \code{fields} vector when you called \code{search_pv}}. You can use \code{\link{get_ok_pk}} to suggest a potential primary key for your data.} diff --git a/man/with_qfuns.Rd b/man/with_qfuns.Rd index 40f755bb..ba847615 100644 --- a/man/with_qfuns.Rd +++ b/man/with_qfuns.Rd @@ -23,26 +23,25 @@ try assigning the \code{\link{qry_funs}} list into your global environment with: \code{list2env(qry_funs, envir = globalenv())}. } \examples{ -# Without with_qfuns, we have to do: qry_funs$and( qry_funs$gte(patent_date = "2007-01-01"), qry_funs$text_phrase(patent_abstract = c("computer program")), qry_funs$or( - qry_funs$eq(inventor_last_name = "ihaka"), - qry_funs$eq(inventor_first_name = "chris") + qry_funs$eq(inventors.inventor_name_last = "Ihaka"), + qry_funs$eq(inventors.inventor_name_last = "Chris") ) ) -#...With it, this becomes: +# ...With it, this becomes: with_qfuns( - and( - gte(patent_date = "2007-01-01"), - text_phrase(patent_abstract = c("computer program")), - or( - eq(inventor_last_name = "ihaka"), - eq(inventor_first_name = "chris") - ) - ) + and( + gte(patent_date = "2007-01-01"), + text_phrase(patent_abstract = c("computer program")), + or( + eq(inventors.inventor_name_last = "Ihaka"), + eq(inventors.inventor_name_last = "Chris") + ) + ) ) } From 47c014f4309abe1444f58ff6d11bf92f4de93f37 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 17:17:31 -0600 Subject: [PATCH 038/103] generated files --- docs/reference/cast_pv_data.html | 190 ++++--------- docs/reference/fieldsdf.html | 192 +++++--------- docs/reference/figures/lifecycle-archived.svg | 1 + docs/reference/figures/lifecycle-defunct.svg | 1 + .../figures/lifecycle-deprecated.svg | 1 + .../figures/lifecycle-experimental.svg | 1 + docs/reference/figures/lifecycle-maturing.svg | 1 + .../figures/lifecycle-questioning.svg | 1 + docs/reference/figures/lifecycle-stable.svg | 1 + .../figures/lifecycle-superseded.svg | 1 + docs/reference/get_endpoints.html | 171 +++--------- docs/reference/get_fields.html | 30 ++- docs/reference/get_ok_pk.html | 24 +- docs/reference/index.html | 249 +++++------------- docs/reference/pad_patent_id.html | 139 ++++++++++ docs/reference/patentsview-package.html | 4 +- docs/reference/qry_funs.html | 242 ++++++----------- docs/reference/retrieve_linked_data.html | 199 ++++++++++++++ docs/reference/search_pv.html | 73 +++-- docs/reference/unnest_pv_data.html | 6 +- docs/reference/with_qfuns.html | 241 ++++++----------- 21 files changed, 820 insertions(+), 948 deletions(-) create mode 100644 docs/reference/figures/lifecycle-archived.svg create mode 100644 docs/reference/figures/lifecycle-defunct.svg create mode 100644 docs/reference/figures/lifecycle-deprecated.svg create mode 100644 docs/reference/figures/lifecycle-experimental.svg create mode 100644 docs/reference/figures/lifecycle-maturing.svg create mode 100644 docs/reference/figures/lifecycle-questioning.svg create mode 100644 docs/reference/figures/lifecycle-stable.svg create mode 100644 docs/reference/figures/lifecycle-superseded.svg create mode 100644 docs/reference/pad_patent_id.html create mode 100644 docs/reference/retrieve_linked_data.html diff --git a/docs/reference/cast_pv_data.html b/docs/reference/cast_pv_data.html index 08ef48c0..ecc3f786 100644 --- a/docs/reference/cast_pv_data.html +++ b/docs/reference/cast_pv_data.html @@ -1,70 +1,13 @@ - - - - - - - -Cast PatentsView data — cast_pv_data • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Cast PatentsView data — cast_pv_data • patentsview + + - - - - -
-
- -
- -
+
-

This will cast the data fields returned by search_pv so that +

This will cast the data fields returned by search_pv so that they have their most appropriate data types (e.g., date, numeric, etc.).

-
cast_pv_data(data)
+
+
cast_pv_data(data)
+
-

Arguments

- - - - - - -
data

The data returned by search_pv. This is the first +

+

Arguments

+
data
+

The data returned by search_pv. This is the first element of the three-element result object you got back from search_pv. It should be a list of length 1, with one data frame -inside it. See examples.

- -

Value

+inside it. See examples.

-

The same type of object that you passed into cast_pv_data.

+
+
+

Value

+ -

Examples

-
if (FALSE) { - -fields <- c("patent_date", "patent_title", "patent_year") -res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) -cast_pv_data(data = res$data) -} +

The same type of object that you passed into cast_pv_data.

+
-
+
+

Examples

+
if (FALSE) {
+
+fields <- c("patent_date", "patent_title", "patent_year")
+res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields)
+cast_pv_data(data = res$data)
+}
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/fieldsdf.html b/docs/reference/fieldsdf.html index 11fc300e..19611599 100644 --- a/docs/reference/fieldsdf.html +++ b/docs/reference/fieldsdf.html @@ -1,74 +1,16 @@ - - - - - - - -Fields data frame — fieldsdf • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fields data frame — fieldsdf • patentsview - - + + - - -
-
- -
- -
+
-

A data frame containing the names of retrievable and queryable fields for -each of the 7 API endpoints. A yes/no flag (can_query) indicates -which fields can be included in the user's query. You can also find this -data on the API's online documentation for each endpoint as well (e.g., -the patents -endpoint field list table)

+

A data frame containing the names of retrievable fields for each of the +endpoints. You can find this data on the API's online documentation for each +endpoint as well (e.g., the +patent endpoint +field list table).

-
fieldsdf
+
+
fieldsdf
+
+
+

Format

+

A data frame with the following columns:

endpoint
+

The endpoint that this field record is for

-

Format

+
field
+

The complete name of the field, including the parent group if +applicable

-

A data frame with 992 rows and 7 variables:

-
endpoint

The endpoint that this field record is for

-
field

The name of the field

-
data_type

The field's data type (string, date, float, integer, - fulltext)

-
can_query

An indicator for whether the field can be included in - the user query for the given endpoint

-
group

The group the field belongs to

-
common_name

The field's common name

-
description

A description of the field

+
data_type
+

The field's input data type

-
+
group
+

The group the field belongs to

+
common_name
+

The field name without the parent group structure

+ + +
+
-
- +
- - + + diff --git a/docs/reference/figures/lifecycle-archived.svg b/docs/reference/figures/lifecycle-archived.svg new file mode 100644 index 00000000..48f72a6f --- /dev/null +++ b/docs/reference/figures/lifecycle-archived.svg @@ -0,0 +1 @@ + lifecyclelifecyclearchivedarchived \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-defunct.svg b/docs/reference/figures/lifecycle-defunct.svg new file mode 100644 index 00000000..01452e5f --- /dev/null +++ b/docs/reference/figures/lifecycle-defunct.svg @@ -0,0 +1 @@ +lifecyclelifecycledefunctdefunct \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-deprecated.svg b/docs/reference/figures/lifecycle-deprecated.svg new file mode 100644 index 00000000..4baaee01 --- /dev/null +++ b/docs/reference/figures/lifecycle-deprecated.svg @@ -0,0 +1 @@ +lifecyclelifecycledeprecateddeprecated \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-experimental.svg b/docs/reference/figures/lifecycle-experimental.svg new file mode 100644 index 00000000..d1d060e9 --- /dev/null +++ b/docs/reference/figures/lifecycle-experimental.svg @@ -0,0 +1 @@ +lifecyclelifecycleexperimentalexperimental \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-maturing.svg b/docs/reference/figures/lifecycle-maturing.svg new file mode 100644 index 00000000..df713101 --- /dev/null +++ b/docs/reference/figures/lifecycle-maturing.svg @@ -0,0 +1 @@ +lifecyclelifecyclematuringmaturing \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-questioning.svg b/docs/reference/figures/lifecycle-questioning.svg new file mode 100644 index 00000000..08ee0c90 --- /dev/null +++ b/docs/reference/figures/lifecycle-questioning.svg @@ -0,0 +1 @@ +lifecyclelifecyclequestioningquestioning \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-stable.svg b/docs/reference/figures/lifecycle-stable.svg new file mode 100644 index 00000000..e015dc81 --- /dev/null +++ b/docs/reference/figures/lifecycle-stable.svg @@ -0,0 +1 @@ +lifecyclelifecyclestablestable \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-superseded.svg b/docs/reference/figures/lifecycle-superseded.svg new file mode 100644 index 00000000..75f24f55 --- /dev/null +++ b/docs/reference/figures/lifecycle-superseded.svg @@ -0,0 +1 @@ + lifecyclelifecyclesupersededsuperseded \ No newline at end of file diff --git a/docs/reference/get_endpoints.html b/docs/reference/get_endpoints.html index e510ce14..e9eea4ab 100644 --- a/docs/reference/get_endpoints.html +++ b/docs/reference/get_endpoints.html @@ -1,70 +1,13 @@ - - - - - - - -Get endpoints — get_endpoints • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get endpoints — get_endpoints • patentsview + + - - - - -
-
- -
- -
+
-

This function reminds the user what the 7 possible PatentsView API endpoints +

This function reminds the user what the possible PatentsView API endpoints are.

-
get_endpoints()
- - -

Value

+
+
get_endpoints()
+
-

A character vector with the names of the 7 endpoints. Those endpoints are:

-
    -
  • assignees

  • -
  • cpc_subsections

  • -
  • inventors

  • -
  • locations

  • -
  • nber_subcategories

  • -
  • patents

  • -
  • uspc_mainclasses

  • -
+
+

Value

+ +

A character vector with the names of each endpoint.

+
-

Examples

-
get_endpoints() -
#> [1] "assignees" "cpc_subsections" "inventors" -#> [4] "locations" "nber_subcategories" "patents" -#> [7] "uspc_mainclasses"
+
-
- +
- - + + diff --git a/docs/reference/get_fields.html b/docs/reference/get_fields.html index 3951913c..4e1bb43a 100644 --- a/docs/reference/get_fields.html +++ b/docs/reference/get_fields.html @@ -98,7 +98,7 @@

Get list of retrievable fields

-
get_fields(endpoint, groups = NULL)
+
get_fields(endpoint, groups = NULL, include_pk = FALSE)
@@ -114,10 +114,16 @@

Arguments

endpoint's fields (i.e., do not filter the field list based on group membership). See the field tables located online to see which groups you can specify for a given endpoint (e.g., the -patent +patents endpoint table), or use the fieldsdf table (e.g., unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])).

+ +
include_pk
+

Boolean on whether to include the endpoint's primary key, +defaults to FALSE. The primary key is needed if you plan on calling +unnest_pv_data on the results of search_pv

+

Value

@@ -128,8 +134,8 @@

Value

Examples

-
# Get all assignee-level fields for the patent endpoint:
-fields <- get_fields(endpoint = "patent", groups = "assignees")
+    
# Get all top level (non-nested) fields for the patent endpoint:
+fields <- get_fields(endpoint = "patent", groups = c("patents"))
 
 # ...Then pass to search_pv:
 if (FALSE) {
@@ -139,9 +145,8 @@ 

Examples

fields = fields ) } -# Get all patent and assignee-level fields for the patent endpoint: +# Get unnested patent and assignee-level fields for the patent endpoint: fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) -#> Error: group must be one of the following: , assignee_years, inventor_years, applicants, application, assignees, attorneys, botanic, cpc_at_issue, cpc_current, examiners, figures, foreign_priority, gov_interest_contract_award_numbers, gov_interest_organizations, granted_pregrant_crosswalk, inventors, ipcr, pct_data, us_related_documents, us_term_of_grant, uspc_at_issue, wipo, us_parties if (FALSE) { # ...Then pass to search_pv: @@ -150,6 +155,19 @@

Examples

fields = fields ) } +# Get the nested inventors fields and the primary key in order to call unnest_pv_data +# on the returned data. unnest_pv_data would throw an error if the primary key was +# not present in the results. +fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + +if (FALSE) { +# ...Then pass to search_pv and unnest the results +results <- search_pv( + query = '{"_gte":{"patent_date":"2007-01-04"}}', + fields = fields +) +unnest_pv_data(results$data) +}
diff --git a/docs/reference/get_ok_pk.html b/docs/reference/get_ok_pk.html index 149a8283..d91c7dad 100644 --- a/docs/reference/get_ok_pk.html +++ b/docs/reference/get_ok_pk.html @@ -3,7 +3,9 @@ in unnest_pv_data, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -endpoint = "patent".'> @@ -94,18 +96,20 @@

Get OK primary key

in unnest_pv_data, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -endpoint = "patent".

+endpoint_or_entity = "patent". It would return the same value if +the entity name "patents" was passed via get_ok_pk(names(pv_return$data)) +where pv_return was returned from search_pv.

-
get_ok_pk(endpoint)
+
get_ok_pk(endpoint_or_entity)

Arguments

-
endpoint
-

The endpoint which you would like to know a potential primary -key for.

+
endpoint_or_entity
+

The endpoint or entity name for which you +would like to know a potential primary key for.

@@ -118,12 +122,10 @@

Value

Examples

-
get_ok_pk(endpoint = "inventor")
+    
get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id"
 #> [1] "inventor_id"
-get_ok_pk(endpoint = "cpc_subclass")
-#> [1] "cpc_subclass_id"
-get_ok_pk("publication/rel_app_text")
-#> [1] "document_number"
+get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id"
+#> [1] "cpc_group_id"
 
 
diff --git a/docs/reference/index.html b/docs/reference/index.html index ee40061b..ab30c2ca 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,68 +1,12 @@ - - - - - - - -Function reference • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • patentsview - - + + - - -
-
- -
- -
+
- - - - - - - - - - - + + + + +
-

The API client

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+

The API client

+

search_pv()

Search PatentsView

-

Convenience objects for search_pv

+
+

Convenience objects for search_pv

+

get_endpoints()

Get endpoints

+

get_fields()

Get list of retrievable fields

+

fieldsdf

Fields data frame

-

Writing queries with the DSL

+
+

Writing queries with the DSL

+

qry_funs

List of query functions

+

with_qfuns()

With qry_funs

-

Manipulating patentsview data

+
+

Manipulating patentsview data

+

unnest_pv_data()

Unnest PatentsView data

+

get_ok_pk()

Get OK primary key

+

cast_pv_data()

Cast PatentsView data

- +
+

retrieve_linked_data()

+

Retrieve Linked Data

+

Utility

+

+
+

pad_patent_id()

+

Pad patent_id

+
-
- +
- - + + diff --git a/docs/reference/pad_patent_id.html b/docs/reference/pad_patent_id.html new file mode 100644 index 00000000..d92031de --- /dev/null +++ b/docs/reference/pad_patent_id.html @@ -0,0 +1,139 @@ + +Pad patent_id — pad_patent_id • patentsview + + +
+
+ + + +
+
+ + +
+

This function strategically pads a patent_id with zeroes to 8 characters, +needed only for custom paging that uses sorts by patent_id.

+
+ +
+
pad_patent_id(patent_id)
+
+ +
+

Arguments

+
patent_id
+

The patent_id that needs to be padded. It can +be the patent_id for a utility, design, plant or reissue patent.

+ +
+ +
+

Examples

+
if (FALSE) {
+padded <- pad_patent_id("RE36479")
+
+padded2 <- pad_patent_id("3930306")
+}
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/patentsview-package.html b/docs/reference/patentsview-package.html index bea2797e..1ed343ea 100644 --- a/docs/reference/patentsview-package.html +++ b/docs/reference/patentsview-package.html @@ -1,5 +1,5 @@ -patentsview: An R Client to the 'PatentsView' API — patentsview-package • patentsviewpatentsview: An R Client to the 'PatentsView' API — patentsview-package • patentsview @@ -86,7 +86,7 @@

patentsview: An R Client to the 'PatentsView' API

-

Provides functions to simplify the 'PatentsView' API (https://patentsview.org/apis/purpose) query language, send GET and POST requests to the API's seven endpoints, and parse the data that comes back.

+

Provides functions to simplify the 'PatentsView' API (https://patentsview.org/apis/purpose) query language, send GET and POST requests to the API's twenty seven endpoints, and parse the data that comes back.

diff --git a/docs/reference/qry_funs.html b/docs/reference/qry_funs.html index f332a17e..c73b1b52 100644 --- a/docs/reference/qry_funs.html +++ b/docs/reference/qry_funs.html @@ -1,72 +1,14 @@ - - - - - - - -List of query functions — qry_funs • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -List of query functions — qry_funs • patentsview - + + - - - -
-
- -
- -
+

A list of functions that make it easy to write PatentsView queries. See the -details section below for a list of the 14 functions, as well as the -writing -queries vignette for further details.

+details section below for a list of the 15 functions, as well as the +writing queries vignette for further details.

-
qry_funs
- - -

Format

- -

An object of class list of length 14.

-

Value

+
+
qry_funs
+
-

An object of class pv_query. This is basically just a simple - list with a print method attached to it.

-

Details

+
+

Format

+

An object of class list of length 15.

+
+
+

Value

+ -

1. Comparison operator functions

+

An object of class pv_query. This is basically just a simple +list with a print method attached to it.

+
+
+

Details

+

1. Comparison operator functions

There are 6 comparison operator functions that work with fields of type -integer, float, date, or string:

    -
  • eq - Equal to

  • +integer, float, date, or string:

    • eq - Equal to

    • neq - Not equal to

    • gt - Greater than

    • gte - Greater than or equal to

    • lt - Less than

    • lte - Less than or equal to

    • -
    - -

    There are 2 comparison operator functions that only work with fields of type -string:

      -
    • begins - The string begins with the value string

    • +

    There are 2 comparison operator functions that only work with fields of type +string:

    • begins - The string begins with the value string

    • contains - The string contains the value string

    • -
    - -

    There are 3 comparison operator functions that only work with fields of type -fulltext:

      -
    • text_all - The text contains all the words in the value - string

    • +

    There are 3 comparison operator functions that only work with fields of type +fulltext:

    • text_all - The text contains all the words in the value +string

    • text_any - The text contains any of the words in the value - string

    • +string

    • text_phrase - The text contains the exact phrase of the value - string

    • -
    - -

    2. Array functions

    -

    There are 2 array functions:

      -
    • and - Both members of the array must be true

    • +string

      +

    2. Array functions

    +

    There are 2 array functions:

    • and - Both members of the array must be true

    • or - Only one member of the array must be true

    • -
    - -

    3. Negation function

    -

    There is 1 negation function:

      -
    • not - The comparison is not true

    • -
    - - -

    Examples

    -
    qry_funs$eq(patent_date = "2001-01-01") -
    #> {"_eq":{"patent_date":"2001-01-01"}}
    -qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) -
    #> {"_not":{"_eq":{"patent_date":"2001-01-01"}}}
    -
    +

3. Negation function

+

There is 1 negation function:

  • not - The comparison is not true

  • +

4. Convenience function

+

There is 1 convenience function:

  • in_range - Builds a <= x <= b query

  • +
+ +
+

Examples

+
qry_funs$eq(patent_date = "2001-01-01")
+#> {"_eq":{"patent_date":"2001-01-01"}}
+
+qry_funs$not(qry_funs$eq(patent_date = "2001-01-01"))
+#> {"_not":{"_eq":{"patent_date":"2001-01-01"}}}
+
+qry_funs$in_range(patent_year = c(2010, 2021))
+#> {"_and":[{"_gte":{"patent_year":2010}},{"_lte":{"patent_year":2021}}]}
+
+qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28"))
+#> {"_and":[{"_gte":{"patent_date":"1976-01-01"}},{"_lte":{"patent_date":"1983-02-28"}}]}
+
+
+
-
- +
- - + + diff --git a/docs/reference/retrieve_linked_data.html b/docs/reference/retrieve_linked_data.html new file mode 100644 index 00000000..148968c5 --- /dev/null +++ b/docs/reference/retrieve_linked_data.html @@ -0,0 +1,199 @@ + +Retrieve Linked Data — retrieve_linked_data • patentsview + + +
+
+ + + +
+
+ + +
+

Some of the endpoints now return HATEOAS style links to get more data. E.g., +the patent endpoint may return a link such as: +"https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/"

+
+ +
+
retrieve_linked_data(
+  url,
+  encoded_url = FALSE,
+  api_key = Sys.getenv("PATENTSVIEW_API_KEY"),
+  ...
+)
+
+ +
+

Arguments

+
url
+

A link that was returned by the API on a previous call, an example +in the documentation or a Request URL from the API's Swagger UI page.

+ + +
encoded_url
+

boolean to indicate whether the url has been URL encoded, defaults to FALSE. +Set to TRUE for Request URLs from Swagger UI.

+ + +
api_key
+

API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +here.

+ + +
...
+

Curl options passed along to httr2's req_options function.

+ +
+
+

Value

+ + +

A list with the following three elements:

data
+

A list with one element - a named data frame containing the +data returned by the server. Each row in the data frame corresponds to a +single value for the primary entity. For example, if you search the +assignee endpoint, then the data frame will be on the assignee-level, +where each row corresponds to a single assignee. Fields that are not on +the assignee-level would be returned in list columns.

+ + +
query_results
+

Entity counts across all pages of output (not just +the page returned to you).

+ + +
request
+

Details of the GET HTTP request that was sent to the server.

+ + +
+ +
+

Examples

+
if (FALSE) {
+
+retrieve_linked_data(
+  "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/"
+)
+
+endpoint_url <- "https://search.patentsview.org/api/v1/patent/"
+q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}'
+s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}'
+f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]'
+# (URL broken up to avoid a long line warning in this Rd)
+
+retrieve_linked_data(
+  paste0(endpoint_url, q_param, s_and_o_params, f_param)
+)
+
+retrieve_linked_data(
+  "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D",
+  encoded_url = TRUE
+)
+}
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/search_pv.html b/docs/reference/search_pv.html index 8710e3a7..640aba6f 100644 --- a/docs/reference/search_pv.html +++ b/docs/reference/search_pv.html @@ -98,8 +98,10 @@

Search PatentsView

endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", @@ -119,7 +121,7 @@

Arguments

E.g., list("_gte" = list("patent_date" = "2007-01-04"))

  • An object of class pv_query, which you create by calling one of the functions found in the qry_funs list...See the -writing +writing queries vignette for details.
    E.g., qry_funs$gte(patent_date = "2007-01-04")

  • @@ -127,13 +129,17 @@

    Arguments

    fields

    A character vector of the fields that you want returned to you. -A value of NULL indicates that the default fields should be -returned. Acceptable fields for a given endpoint can be found at the API's +A value of NULL indicates to the API that it should return the default fields +for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -patents +patents endpoint) or by viewing the fieldsdf data frame (View(fieldsdf)). You can also use get_fields to list -out the fields available for a given endpoint.

    +out the fields available for a given endpoint.

    +

    Nested fields can be fully qualified, e.g., "application.filing_date" or the +group name can be used to retrieve all of its nested fields, E.g. "application". +The latter would be similar to passing get_fields("patent", group = "application") +except it's the API that decides what fields to return.

    endpoint
    @@ -142,35 +148,47 @@

    Arguments

    subent_cnts
    -

    [Deprecated] Non-matched subentities -will always be returned under the new version of the API

    +

    [Deprecated] This is always FALSE in the +new version of the API as the total counts of unique subentities is no longer available.

    mtchd_subent_only

    [Deprecated] This is always -FALSE in the new version of the API.

    +FALSE in the new version of the API as non-matched subentities +will always be returned.

    page
    -

    The page number of the results that should be returned.

    +

    [Deprecated] The new version of the API does not use +page as a parameter for paging, it uses after.

    per_page
    +

    [Deprecated] The API now uses size

    + + +
    size

    The number of records that should be returned per page. This -value can be as high as 1,000 (e.g., per_page = 1000).

    +value can be as high as 1,000 (e.g., size = 1000).

    + + +
    after
    +

    A list of sort key values that defaults to NULL. This +exposes the API's paging parameter for users who want to implement their own +paging. It cannot be set when all_pages = TRUE as the R package manipulates it +for users automatically. See result set paging

    all_pages

    Do you want to download all possible pages of output? If -all_pages = TRUE, the values of page and per_page are -ignored.

    +all_pages = TRUE, the value of size is ignored.

    sort

    A named character vector where the name indicates the field to sort by and the value indicates the direction of sorting (direction should -be either "asc" or "desc"). For example, sort = c("patent_number" = - "asc") or
    sort = c("patent_number" = "asc", "patent_date" = +be either "asc" or "desc"). For example, sort = c("patent_id" = + "asc") or
    sort = c("patent_id" = "asc", "patent_date" = "desc"). sort = NULL (the default) means do not sort the results. You must include any fields that you wish to sort by in fields.

    @@ -186,13 +204,13 @@

    Arguments

    api_key
    -

    API key. See -Here for info on creating a key.

    +

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +here.

    ...
    -

    Arguments passed along to httr's GET or -POST function.

    +

    Curl options passed along to httr2's req_options +when we do GETs or POSTs.

    @@ -203,7 +221,7 @@

    Value

    A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.

    @@ -236,8 +254,8 @@

    Examples

    search_pv( query = qry_funs$gt(patent_year = 2010), method = "POST", - fields = "patent_number", - sort = c("patent_number" = "asc") + fields = "patent_id", + sort = c("patent_id" = "asc") ) search_pv( @@ -252,9 +270,14 @@

    Examples

    ) search_pv( - query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), + query = qry_funs$contains(inventors.inventor_name_last = "Smith"), endpoint = "patent", - config = httr::timeout(40) + timeout = 40 +) + +search_pv( + query = qry_funs$eq(patent_id = "11530080"), + fields = "application" ) } diff --git a/docs/reference/unnest_pv_data.html b/docs/reference/unnest_pv_data.html index 5c41902b..e055c5bd 100644 --- a/docs/reference/unnest_pv_data.html +++ b/docs/reference/unnest_pv_data.html @@ -100,7 +100,7 @@

    Unnest PatentsView data

    -
    unnest_pv_data(data, pk = get_ok_pk(names(data)))
    +
    unnest_pv_data(data, pk = NULL)
    @@ -115,8 +115,8 @@

    Arguments

    pk

    The column/field name that will link the data frames together. This should be the unique identifier for the primary entity. For example, if you -used the patents endpoint in your call to search_pv, you could -specify pk = "patent_number". This identifier has to have +used the patent endpoint in your call to search_pv, you could +specify pk = "patent_id". This identifier has to have been included in your fields vector when you called search_pv. You can use get_ok_pk to suggest a potential primary key for your data.

    diff --git a/docs/reference/with_qfuns.html b/docs/reference/with_qfuns.html index d1d1564c..c1e80e77 100644 --- a/docs/reference/with_qfuns.html +++ b/docs/reference/with_qfuns.html @@ -1,73 +1,16 @@ - - - - - - - -With qry_funs — with_qfuns • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -With qry_funs — with_qfuns • patentsview - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +

    This function evaluates whatever code you pass to it in the environment of -the qry_funs list. This allows you to cut down on typing when +the qry_funs list. This allows you to cut down on typing when writing your queries. If you want to cut down on typing even more, you can -try assigning the qry_funs list into your global environment -with: list2env(qry_funs, envir = globalenv()).

    +try assigning the qry_funs list into your global environment +with: list2env(qry_funs, envir = globalenv()).

    -
    with_qfuns(code, envir = parent.frame())
    - -

    Arguments

    - - - - - - - - - - -
    code

    Code to evaluate. See example.

    envir

    Where should R look for objects present in code that -aren't present in qry_funs.

    - -

    Value

    - -

    The result of code - i.e., your query.

    - -

    Examples

    -
    # Without with_qfuns, we have to do: -qry_funs$and( - qry_funs$gte(patent_date = "2007-01-01"), - qry_funs$text_phrase(patent_abstract = c("computer program")), - qry_funs$or( - qry_funs$eq(inventor_last_name = "ihaka"), - qry_funs$eq(inventor_first_name = "chris") - ) -) -
    #> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventor_last_name":"ihaka"}},{"_eq":{"inventor_first_name":"chris"}}]}]}
    -#...With it, this becomes: -with_qfuns( - and( - gte(patent_date = "2007-01-01"), - text_phrase(patent_abstract = c("computer program")), - or( - eq(inventor_last_name = "ihaka"), - eq(inventor_first_name = "chris") - ) - ) -) -
    #> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventor_last_name":"ihaka"}},{"_eq":{"inventor_first_name":"chris"}}]}]}
    -
    +
    +
    with_qfuns(code, envir = parent.frame())
    +
    + +
    +

    Arguments

    +
    code
    +

    Code to evaluate. See example.

    + + +
    envir
    +

    Where should R look for objects present in code that +aren't present in qry_funs.

    + +
    +
    +

    Value

    + + +

    The result of code - i.e., your query.

    +
    + +
    +

    Examples

    +
    qry_funs$and(
    +  qry_funs$gte(patent_date = "2007-01-01"),
    +  qry_funs$text_phrase(patent_abstract = c("computer program")),
    +  qry_funs$or(
    +    qry_funs$eq(inventors.inventor_name_last = "Ihaka"),
    +    qry_funs$eq(inventors.inventor_name_last = "Chris")
    +  )
    +)
    +#> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventors.inventor_name_last":"Ihaka"}},{"_eq":{"inventors.inventor_name_last":"Chris"}}]}]}
    +
    +# ...With it, this becomes:
    +with_qfuns(
    +  and(
    +    gte(patent_date = "2007-01-01"),
    +    text_phrase(patent_abstract = c("computer program")),
    +    or(
    +      eq(inventors.inventor_name_last = "Ihaka"),
    +      eq(inventors.inventor_name_last = "Chris")
    +    )
    +  )
    +)
    +#> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventors.inventor_name_last":"Ihaka"}},{"_eq":{"inventors.inventor_name_last":"Chris"}}]}]}
    +
    +
    +
    +
    -
    - +
    - - + + From ce1875f096256d1749ec138770db38f4a6bca3b9 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 19:50:49 -0600 Subject: [PATCH 039/103] feat: new paging methodology --- NAMESPACE | 1 + _pkgdown.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 46981d70..2cff0eaa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(cast_pv_data) export(get_endpoints) export(get_fields) export(get_ok_pk) +export(pad_patent_id) export(qry_funs) export(retrieve_linked_data) export(search_pv) diff --git a/_pkgdown.yml b/_pkgdown.yml index 75d58fbb..fe54d8ba 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -23,6 +23,9 @@ reference: - get_ok_pk - cast_pv_data - retrieve_linked_data + - title: Utility + contents: + - pad_patent_id navbar: components: From 6e997fc0f877910b41084838721622a0febae1cb Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 12:15:57 -0600 Subject: [PATCH 040/103] test: updatng tests for new api version --- tests/testthat/helpers.R | 21 +- tests/testthat/test-api-bugs.R | 359 +++++++++++++++++++++ tests/testthat/test-arg-validation.R | 40 --- tests/testthat/test-cast-pv-data.R | 72 ++++- tests/testthat/test-check-query.R | 98 ++++++ tests/testthat/test-get-fields.R | 32 ++ tests/testthat/test-print.R | 36 +++ tests/testthat/test-query-dsl.R | 36 +++ tests/testthat/test-search-pv.R | 447 +++++++++++++++++++++++---- tests/testthat/test-unnest-pv-data.R | 75 ++++- tests/testthat/test-utils.R | 12 + tests/testthat/test-validate-args.R | 87 ++++++ 12 files changed, 1193 insertions(+), 122 deletions(-) create mode 100644 tests/testthat/test-api-bugs.R delete mode 100644 tests/testthat/test-arg-validation.R create mode 100644 tests/testthat/test-check-query.R create mode 100644 tests/testthat/test-get-fields.R create mode 100644 tests/testthat/test-print.R create mode 100644 tests/testthat/test-query-dsl.R create mode 100644 tests/testthat/test-utils.R create mode 100644 tests/testthat/test-validate-args.R diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 3fe9dc36..d39b4e00 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,5 +1,5 @@ # Vector of queries (one for each endpoint) that are used during testing. We -# need this b/c in the new version of the api, only three of the endpoints are +# need this b/c in the new version of the api, only ten of the endpoints are # searchable by patent number (i.e., we can't use a generic patent number # search query). further, now patent_number has been patent_id @@ -32,3 +32,22 @@ TEST_QUERIES <- c( "uspc_subclass" = '{"uspc_subclass_id": "100/1"}', "wipo" = '{"wipo_id": "1"}' ) + +to_plural <- function(x) { + pk <- get_ok_pk(x) + fieldsdf[fieldsdf$endpoint == x & fieldsdf$field == pk, "group"] +} + +to_singular <- function(entity) { + endpoint_df <- fieldsdf[fieldsdf$group == entity, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attorneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + } + + # can't distinguish rel_app_texts between patent/rel_app_text and publication/rel_app_text + endpoint +} diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R new file mode 100644 index 00000000..2e5f2227 --- /dev/null +++ b/tests/testthat/test-api-bugs.R @@ -0,0 +1,359 @@ + +# Tests from the other files in this directory that are masking API errors +# This file was submitted to the API team as PVS-1125 + +eps <- (get_endpoints()) + +add_base_url <- function(x) { + paste0("https://search.patentsview.org/api/v1/", x) +} + +test_that("there is trouble paging", { + skip_on_cran() + skip_on_ci() + + # reprex inspired by https://patentsview.org/forum/7/topic/812 + # Not all requested groups are coming back as we page, causing + # Error in rbind(deparse.level, ...) : + # numbers of columns of arguments do not match + # This query fails if any of these groups are specified + # "applicants", "cpc_at_issue", "gov_interest_contract_award_numbers", + # "uspc_at_issue") + + query <- with_qfuns( + and( + gte(application.filing_date = "2000-01-01"), + eq(cpc_current.cpc_subclass_id = "A01D") + ) + ) + + sort <- c("patent_id" = "asc") + fields <- c( + "patent_id", "applicants", "cpc_at_issue", + "gov_interest_contract_award_numbers", "uspc_at_issue" + ) + + result1 <- search_pv(query, + method = "GET", all_pages = FALSE, + fields = fields, sort = sort, size = 1000 + ) + + result2 <- search_pv(query, + method = "GET", all_pages = FALSE, + fields = fields, sort = sort, size = 1000, after = "06901731" + ) + + # result1$data$patents$applicants is sparse, mostly NULL + # there isn't a result2$data$patents$applicants + names1 <- names(result1$data$patents) + names2 <- names(result2$data$patents) + + expect_failure( + expect_setequal(names1, names2) + ) +}) + +test_that("there is case sensitivity on string equals", { + skip_on_cran() + skip_on_ci() + + # reported to the API team PVS-1147 + # not sure if this is a bug or feature - original API was case insensitive + # using both forms of equals, impied and explicit + + assignee <- "Johnson & Johnson International" + query1 <- sprintf('{"assignee_organization": \"%s\"}', assignee) + a <- search_pv(query1, endpoint = "assignee") + query2 <- qry_funs$eq(assignee_organization = assignee) + b <- search_pv(query2, endpoint = "assignee") + expect_equal(a$query_results$total_hits, 1) + expect_equal(b$query_results$total_hits, 1) + + assignee <- tolower(assignee) + query1 <- sprintf('{"assignee_organization": \"%s\"}', assignee) + c <- search_pv(query1, endpoint = "assignee") + query2 <- qry_funs$eq(assignee_organization = assignee) + d <- search_pv(query2, endpoint = "assignee") + expect_equal(c$query_results$total_hits, 0) + expect_equal(d$query_results$total_hits, 0) +}) + +test_that("string vs text operators behave differently", { + skip_on_cran() + + # # reported to the API team PVS-1147 + query <- qry_funs$begins(assignee_organization = "johnson") + a <- search_pv(query, endpoint = "assignee") + + query <- qry_funs$text_any(assignee_organization = "johnson") + b <- search_pv(query, endpoint = "assignee") + + expect_failure( + expect_equal(a$query_results$total_hits, b$query_results$total_hits) + ) +}) + +test_that("API returns all requested groups", { + skip_on_cran() + skip_on_ci() + + # can we traverse the return building a list of fields? + # sort both requested fields and returned ones to see if they are equal + + # TODO: remove the trickery to get this test to pass, once the API is fixed + bad_eps <- c( + "cpc_subclasses", + "location" # Error: Invalid field: location_latitude + , "uspc_subclasse" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + , "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "uspc_subclass" # 404 + , "pg_claim" # Invalid field: claim_dependent + ) + + mismatched_returns <- c( + "patent", + "publication" + ) + + # this will fail when the api is fixed + z <- lapply(bad_eps, function(x) { + print(x) + expect_error( + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = get_fields(x)) + ) + }) + + # this will fail when the API is fixed + z <- lapply(mismatched_returns, function(x) { + print(x) + res <- search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x) + ) + + dl <- unnest_pv_data(res$data) + + actual_groups <- names(dl) + expected_groups <- unique(fieldsdf[fieldsdf$endpoint == x, "group"]) + + # we now need to unnest the endpoints for the comparison to work + expected_groups <- gsub("^(patent|publication)/", "", expected_groups) + + # the expected group for unnested attributes would be "" in actuality the come back + # in an entity matching the plural form of the unnested endpoint + expected_groups <- replace(expected_groups, expected_groups == "", to_plural(x)) + + expect_failure( + expect_setequal(actual_groups, expected_groups) + ) + }) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed +}) + +eps <- (get_endpoints()) + +test_that("We can call all the legitimate HATEOAS endpoints", { + skip_on_cran() + + # these currently throw Error: Internal Server Error + broken_single_item_queries <- c( + "cpc_subclass/A01B/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/" + ) + + + # TODO: remove when this is fixed + # we'll know the api is fixed when this test fails + dev_null <- lapply(broken_single_item_queries, function(q) { + expect_error( + j <- retrieve_linked_data(add_base_url(q)) + ) + }) +}) + +test_that("individual fields are still broken", { + skip_on_cran() + + # Sample fields that cause 500 errors when requested by themselves. + # Some don't throw errors when included in get_fields() but they do if + # they are the only field requested. Other individual fields at these + # same endpoints throw errors. Check fields again when these fail. + sample_bad_fields <- c( + "assignee_organization" = "assignees", + "inventor_lastknown_longitude" = "inventors", + "inventor_gender_code" = "inventors", + "location_name" = "locations", + "attorney_name_last" = "patent/attorneys", + "citation_country" = "patent/foreign_citations", + "ipc_id" = "ipcs" + ) + + dev_null <- lapply(names(sample_bad_fields), function(x) { + endpoint <- sample_bad_fields[[x]] + expect_error( + out <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint, fields = c(x)) + ) + }) +}) + +test_that("we can't sort by all fields", { + skip_on_cran() + + # PVS-1377 + sorts_to_try <- c( + assignee = "assignee_lastknown_city", + cpc_class = "cpc_class_title", + cpc_group = "cpc_group_title", + cpc_subclass = "cpc_subclass", + g_brf_sum_text = "summary_text", + g_claim = "claim_text", + g_detail_desc_text = "description_text", + g_draw_desc_text = "draw_desc_text", + inventor = "inventor_lastknown_city", + patent = "patent_id" # good pair to show that the code works + ) + + results <- lapply(names(sorts_to_try), function(endpoint) { + field <- sorts_to_try[[endpoint]] + print(paste(endpoint, field)) + + tryCatch( + { + sort <- c("asc") + names(sort) <- field + j <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, sort = sort, method = "GET" + ) + NA + }, + error = function(e) { + paste(endpoint, field) + } + ) + }) + + results <- results[!is.na(results)] + expect_gt(length(results), 0) + expect_lt(length(results), length(sorts_to_try)) # assert that at least one sort worked +}) + + +test_that("withdrawn patents are still present in the database", { + skip_on_cran() + + # PVS-1342 Underlying data issues + # There are 8,000 patents that were in the bulk xml files patentsiew is based on. + # The patents were subsequently withdrawn but not removed from the database + withdrawn <- c( + "9978309", "9978406", "9978509", "9978615", "9978659", + "9978697", "9978830", "9978838", "9978886", "9978906", "9978916", + "9979255", "9979355", "9979482", "9979700", "9979841", "9979847", + "9980139", "9980711", "9980782", "9981222", "9981277", "9981423", + "9981472", "9981603", "9981760", "9981914", "9982126", "9982172", + "9982670", "9982860", "9982871", "9983588", "9983756", "9984058", + "9984899", "9984952", "9985340", "9985480", "9985987", "9986046" + ) + + query <- qry_funs$eq("patent_id" = c(withdrawn)) + results <- search_pv(query, method = "POST") + expect_equal(results$query_results$total_hits, length(withdrawn)) +}) + +test_that("missing patents are still missing", { + skip_on_cran() + + # PVS-1342 Underlying data issues + # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. + missing <- c( + "4097517", "4424514", "4480077", "4487876", "4704648", "4704721", + "4705017", "4705031", "4705032", "4705036", "4705037", "4705097", "4705107", + "4705125", "4705142", "4705169", "4705170", "4705230", "4705274", "4705328", + "4705412", "4705416", "4705437", "4705455", "4705462", "5493812", "5509710", + "5697964", "5922850", "6087542", "6347059", "6680878", "6988922", "7151114", + "7200832", "7464613", "7488564", "7606803", "8309694", "8455078" + ) + query <- qry_funs$eq("patent_id" = missing) + results <- search_pv(query, method = "POST") + + # This would fail if these patents are added to the patentsview database + expect_equal(results$query_results$total_hits, 0) +}) + +test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { + skip_on_cran() + + bad_eps <- c( + "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field + "inventor" # Invalid field: inventor_years.num_patents. + ) + + # PVS-1437 Errors are thrown when requesting assignee_years or inventor_years + # (it works if the group name is used but fails on fully qualified nested fields) + tmp <- lapply(bad_eps, function(endpoint) { + expect_error( + pv_out <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + fields = fieldsdf[fieldsdf$endpoint == endpoint, "field"] + ), + "Invalid field: (assignee|inventor)_years.num_patents" + ) + }) +}) + +test_that("uspcs aren't right", { + skip_on_cran() + + # PVS-1615 + + endpoint <- "patent" + res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + fields = get_fields(endpoint, groups = "uspc_at_issue") + ) + + # id fields are correct, non id fields should be HATEOAS links + uspcs <- res$data$patents$uspc_at_issue + + # these should fail when the API is fixed + expect_equal(uspcs$uspc_mainclass, uspcs$uspc_mainclass_id) + expect_equal(uspcs$uspc_subclass, uspcs$uspc_subclass_id) +}) + +test_that("endpoints are still broken", { + skip_on_cran() + # this will fail when the api is fixed + + broken_endpoints <- c( + "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "location" # Error: Invalid field: location_latitude + , "pg_claim" # Invalid field: claim_dependent + , "uspc_subclass" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + ) + + dev_null <- lapply(broken_endpoints, function(x) { + print(x) + expect_error( + search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x) + ) + ) + }) +}) diff --git a/tests/testthat/test-arg-validation.R b/tests/testthat/test-arg-validation.R deleted file mode 100644 index 8115baf2..00000000 --- a/tests/testthat/test-arg-validation.R +++ /dev/null @@ -1,40 +0,0 @@ -context("validate_args") - -test_that("validate_args throws errors for all bad args", { - skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") - - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patent"), - "endpoint" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), - "method" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = NULL), - "subent_cnts" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), - "mtchd_subent_only" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', per_page = "50"), - "per_page" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', page = NA), - "page" - ) - expect_error( - search_pv( - '{"patent_date":["1976-01-06"]}', - fields = "patent_date", - sort = c("patent_number" = "asc") - ), - "sort" - ) -}) diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index 91eaa7ad..efcb1c5b 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,19 +1,73 @@ -context("cast_pv_data") +test_that("cast_pv_data casts patent fields as expected", { + skip_on_cran() + + pv_out <- search_pv( + query = '{"patent_id":"5116621"}', fields = get_fields("patent") + ) + + dat <- cast_pv_data(data = pv_out$data) + + # patent_date was received as a string and should be cast to a date + date <- class(dat$patents$patent_date) == "Date" + + # patent_detail_desc_length was recieved as an int and should still be one + num <- is.numeric(dat$patents$patent_detail_desc_length) + + # assignee type is a string like "3" from the api and gets cast to an integer + assignee_type <- is.numeric(dat$patents$assignees[[1]]$assignee_type[[1]]) + + expect_true(num && date && assignee_type) + + # application.rule_47_flag is received as a boolean and casting should leave it alone + expect_true(is.logical(dat$patents$application[[1]]$rule_47_flag)) +}) -test_that("cast_pv_data casts data types as expected", { +test_that("cast_pv_data casts assignee fields as expected", { skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") + skip_on_ci() + # ** Invalid field: assignee_years.num_patents. assignee_years is not a nested field pv_out <- search_pv( - query = "{\"patent_number\":\"5116621\"}", fields = get_fields("patents") + query = '{"_text_phrase":{"assignee_individual_name_last": "Clinton"}}', + endpoint = "assignee", + fields = get_fields("assignee", groups = "assignees") # ** ) dat <- cast_pv_data(data = pv_out$data) - date <- !is.character(dat$patents$patent_date) - num <- is.numeric(dat$patents$patent_num_claims) - date2 <- !is.character(dat$patents$assignees[[1]]$assignee_last_seen_date[1]) + # latitude comes from the api as numeric and is left as is by casting + lat <- is.numeric(dat$assignees$assignee_lastknown_latitude[[1]]) + + # here we have the same funky conversion mentioned above + # on the field "assigneee_type" + assignee_type <- is.numeric(dat$assignees$assignee_type[[1]]) + + # was first seen date cast properly? + cast_date <- class(dat$assignees$assignee_first_seen_date[[1]]) == "Date" + + # integer from the API should remain an integer + years_active <- is.numeric(dat$assignees$assignee_years_active[[1]]) + + expect_true(lat) + expect_true(assignee_type) + expect_true(cast_date) + expect_true(years_active) + + skip("Skip for API bugs") +}) + +test_that("we can cast a bool", { + skip_on_cran() + + # TODO(any): remove when the API returns this as a boolean + fields <- c("rule_47_flag") + endpoint <- "publication" + results <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint, fields = fields) + + # this would fail when the API is fixed + expect_true(is.character(results$data$publications$rule_47_flag)) + + cast_results <- cast_pv_data(results$data) - expect_true(date && num && date2) + expect_true(is.logical(cast_results$publications$rule_47_flag)) }) diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R new file mode 100644 index 00000000..8f846032 --- /dev/null +++ b/tests/testthat/test-check-query.R @@ -0,0 +1,98 @@ + +test_that("errors are thrown on invalid queries", { + skip_on_cran() + + expect_error( + search_pv(qry_funs$eq("shoe_size" = 11.5)), + "^.* is not a valid field to query for your endpoint$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_id" = "10000000")), + "^You cannot use the operator .* with the field .*$" + ) + + expect_error( + search_pv(qry_funs$eq("patent_date" = "10000000")), + "^Bad date: .*\\. Date must be in the format of yyyy-mm-dd$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_id" = 10000000)), + "^.* must be of type character$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_year" = 1980.5)), + "^.* must be an integer$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_year" = "1980")), + "^.* must be an integer$" + ) + + expect_error( + search_pv(qry_funs$eq("application.rule_47_flag" = "TRUE")), + "^.* must be a boolean$" + ) + + expect_error( + search_pv(qry_funs$eq("rule_47_flag" = TRUE), endpoint = "publication"), + "^.* must be of type character$" + ) + + expect_error( + search_pv(qry_funs$gt("location_latitude" = "TRUE"), endpoint = "location"), + "^.* must be a number$" + ) + + expect_error( + search_pv(list(patent_number = "10000000")), + "is not a valid operator or not a valid field" + ) + + bogus_operator_query <- + list( + "_ends_with" = + list(patent_title = "dog") + ) + + expect_error( + search_pv(bogus_operator_query), + "is not a valid operator or not a valid field" + ) +}) + +test_that("a valid nested field can be queried", { + skip_on_cran() + + results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) + + expect_gt(results$query_results$total_hits, 8000000) +}) + +test_that("the _eq message is thrown when appropriate", { + skip_on_cran() + + expect_message( + search_pv(list(patent_date = "2007-03-06")), + "^The _eq operator is a safer alternative to using field:value pairs" + ) +}) + +test_that("a query with an and operator returns results", { + skip_on_cran() + + patents_query <- + with_qfuns( + and( + text_phrase(inventors.inventor_name_first = "George"), + text_phrase(inventors.inventor_name_last = "Washington") + ) + ) + + result <- search_pv(patents_query) + + expect_gte(result$query_results$total_hits, 1) +}) diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R new file mode 100644 index 00000000..7eb316f0 --- /dev/null +++ b/tests/testthat/test-get-fields.R @@ -0,0 +1,32 @@ +test_that("get_fields works as expected", { + skip_on_cran() + + expect_error( + get_fields("bogus endpoint"), + "endpoint must be", + fixed = TRUE + ) + + expect_error( + get_fields("patent", groups = "bogus"), + "for the patent endpoint", + fixed = TRUE + ) + + patent_pk <- get_ok_pk("patent") + fields <- get_fields(endpoint = "patent", groups = c("inventors")) + expect_false(patent_pk %in% fields) + + fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + expect_true(patent_pk %in% fields) +}) + +test_that("the endpoints are stable", { + skip_on_cran() + + # quick check of the endpoints - useful after an api update. We run fieldsdf.R + # and do a build. This test would fail if an endpoint was added, moved or deleted + found <- unique(fieldsdf$endpoint) + expecting <- get_endpoints() + expect_equal(expecting, found) +}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R new file mode 100644 index 00000000..cc0d5f6a --- /dev/null +++ b/tests/testthat/test-print.R @@ -0,0 +1,36 @@ +test_that("We can print the returns from all endpoints ", { + skip_on_cran() + + eps <- get_endpoints() + bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") + good_eps <- eps[!eps %in% bad_eps] + + lapply(good_eps, function(x) { + print(x) + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) + print(j) + j + }) + + expect_true(TRUE) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed +}) + +test_that("we can print a query, its request, and unnested data", { + skip_on_cran() + + x <- "patent" + q <- qry_funs$eq(patent_id = "11530080") + print(q) + + fields <- c("patent_id", get_fields(x, groups = "ipcr")) + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = fields) + print(j$request) + + k <- unnest_pv_data(j$data) + print(k) + + expect_true(TRUE) +}) diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R new file mode 100644 index 00000000..840ed780 --- /dev/null +++ b/tests/testthat/test-query-dsl.R @@ -0,0 +1,36 @@ +test_that("between works as expected", { + skip_on_cran() + + query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) + + results <- search_pv(query, all_pages = TRUE) + + expect_gt(results$query_results$total_hits, 2600) +}) + +test_that("with_qfuns() works as advertised", { + skip_on_cran() # wouldn't necessarily have to skip! + + a <- with_qfuns( + and( + text_phrase(inventors.inventor_name_first = "George"), + text_phrase(inventors.inventor_name_last = "Washington") + ) + ) + + b <- qry_funs$and( + qry_funs$text_phrase(inventors.inventor_name_first = "George"), + qry_funs$text_phrase(inventors.inventor_name_last = "Washington") + ) + + expect_equal(a, b) +}) + +test_that("argument check works on in_range", { + skip_on_cran() # wouldn't necessarily have to skip! + + expect_error( + qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), + "expects a range of exactly two arguments" + ) +}) diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 29857741..494c3308 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -1,18 +1,37 @@ -context("search_pv") -# TODO: add a test to see if all the requested fields come back +add_base_url <- function(x) { + paste0("https://search.patentsview.org/api/v1/", x) +} endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() - df_names <- vapply(endpoints, function(x) { + broken_endpoints <- c( + "cpc_subclass", + "uspc_subclass", + "uspc_mainclass", + "wipo" + ) + + # these both return rel_app_texts + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + + goodendpoints <- endpoints[!endpoints %in% c(broken_endpoints, overloaded_entities)] + + df_names <- vapply(goodendpoints, function(x) { + print(x) out <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) - names(out[[1]]) + + # now the endpoints are singular and most entites are plural + to_singular(names(out[[1]])) }, FUN.VALUE = character(1), USE.NAMES = FALSE) - expect_equal(endpoints, df_names) + # publication/rel_app_text's entity is rel_app_text_publications + df_names <- gsub("rel_app_text_publication", "rel_app_text", df_names) + + expect_equal(goodendpoints, df_names) }) test_that("DSL-based query returns expected results", { @@ -37,18 +56,26 @@ test_that("You can download up to 9,000+ records", { # Should return 9,000+ rows query <- with_qfuns( and( - gte(patent_date = "2021-12-13"), - lte(patent_date = "2021-12-24") + gte(patent_date = "2021-12-13"), + lte(patent_date = "2021-12-24") ) ) - out <- search_pv(query, per_page = 1000, all_pages = TRUE) + out <- search_pv(query, size = 1000, all_pages = TRUE) expect_gt(out$query_results$total_hits, 9000) }) test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() - dev_null <- lapply(endpoints, function(x) { + troubled_endpoints <- c( + "cpc_subclass", "location", + "uspc_subclass", "uspc_mainclass", "wipo", "claim", "draw_desc_text", + "pg_claim" # Invalid field: claim_dependent + ) + + # We should be able to get all fields from the non troubled endpoints + dev_null <- lapply(endpoints[!(endpoints %in% troubled_endpoints)], function(x) { + print(x) search_pv( query = TEST_QUERIES[[x]], endpoint = x, @@ -62,13 +89,13 @@ test_that("Sort option works as expected", { skip_on_cran() out <- search_pv( - qry_funs$neq(assignee_id = 1), - fields = get_fields("assignees"), - endpoint = "assignees", - sort = c("lastknown_latitude" = "desc"), - per_page = 100 + qry_funs$neq(assignee_id = ""), + fields = get_fields("assignee", groups = c("assignees")), + endpoint = "assignee", + sort = c("assignee_lastknown_latitude" = "desc"), + size = 100 ) - lat <- as.numeric(out$data$assignees$lastknown_latitude) + lat <- as.numeric(out$data$assignees$assignee_lastknown_latitude) expect_true(lat[1] >= lat[100]) }) @@ -76,45 +103,30 @@ test_that("search_pv properly URL encodes queries", { skip_on_cran() # Covers https://github.com/ropensci/patentsview/issues/24 - # need to use the assignee endpoint now and the field is full_text - ampersand_query <- with_qfuns(text_phrase(organization = "Johnson & Johnson")) - dev_null <- search_pv(ampersand_query, endpoint = "assignees") - expect_true(TRUE) -}) + # need to use the assignee endpoint now + organization <- "Johnson & Johnson International" + text_query <- with_qfuns(text_phrase(assignee_organization = organization)) + phrase_search <- search_pv(text_query, endpoint = "assignee") + expect_true(phrase_search$query_results$total_hits == 1) -# Below we request the same data in built_singly and result_all, with the only -# difference being that we intentionally get throttled in built_singly by -# sending one request per patent number (instead of all requests at once). If -# the two responses match, then we've correctly handled throttling errors. -test_that("Throttled requests are automatically retried", { - skip_on_cran() + # also test that the string operator does not matter now + eq_query <- with_qfuns(eq(assignee_organization = organization)) + eq_search <- search_pv(eq_query, endpoint = "assignee") + expect_identical(eq_search$data, phrase_search$data) - res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', per_page = 50) - patent_numbers <- res$data$patents$patent_number + # text_phrase seems to be case insensitive but equal is not + organization <- tolower(organization) - built_singly <- lapply(patent_numbers, function(patent_number) { - search_pv( - query = qry_funs$eq(patent_number = patent_number), - endpoint = "patent_citations", - fields = c("patent_number", "cited_patent_number"), - sort = c("cited_patent_number" = "asc") - )[["data"]][["patent_citations"]] - }) - built_singly <- do.call(rbind, built_singly) + text_query <- with_qfuns(text_phrase(assignee_organization = organization)) + phrase_search <- search_pv(text_query, endpoint = "assignee") + expect_true(phrase_search$query_results$total_hits == 1) - result_all <- search_pv( - query = qry_funs$eq(patent_number = patent_numbers), - endpoint = "patent_citations", - fields = c("patent_number", "cited_patent_number"), - sort = c("patent_number" = "asc", "cited_patent_number" = "asc"), - per_page = 1000, - all_pages = TRUE - ) - result_all <- result_all$data$patent_citations - - expect_identical(built_singly, result_all) + eq_query <- with_qfuns(eq(assignee_organization = organization)) + eq_search <- search_pv(eq_query, endpoint = "assignee") + expect_true(eq_search$query_results$total_hits == 0) }) + test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() @@ -128,28 +140,339 @@ test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() single_item_queries <- c( - "https://search.patentsview.org/api/v1/assignee/10/", - "https://search.patentsview.org/api/v1/cpc_group/A01B/", - "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/", - "https://search.patentsview.org/api/v1/cpc_subsection/A01/", - "https://search.patentsview.org/api/v1/inventor/10/", - "https://search.patentsview.org/api/v1/nber_category/1/", - "https://search.patentsview.org/api/v1/nber_subcategory/11/", - "https://search.patentsview.org/api/v1/patent/10757852/", - "https://search.patentsview.org/api/v1/uspc_mainclass/30/", - "https://search.patentsview.org/api/v1/uspc_subclass/30:100/" + "cpc_subclass/A01B/", + "cpc_class/A01/", + "cpc_group/G01S7:4811/", + "patent/10757852/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/", + "publication/20010000001/" ) + + # these currently throw Error: Internal Server Error + broken_single_item_queries <- c( + "cpc_subclass/A01B/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/" + ) + + single_item_queries <- single_item_queries[!single_item_queries %in% broken_single_item_queries] + dev_null <- lapply(single_item_queries, function(q) { - j <- retrieve_linked_data(q) + print(q) + j <- retrieve_linked_data(add_base_url(q)) expect_equal(j$query_results$total_hits, 1) }) multi_item_queries <- c( - "https://search.patentsview.org/api/v1/application_citation/10966293/", - "https://search.patentsview.org/api/v1/patent_citation/10966293/" + "patent/us_application_citation/10966293/", + "patent/us_patent_citation/10966293/" ) dev_null <- lapply(multi_item_queries, function(q) { - j <- retrieve_linked_data(q) + j <- retrieve_linked_data(add_base_url(q)) expect_true(j$query_results$total_hits > 1) }) + + + # We'll make a call to get an inventor and assignee HATEOAS link + # in case their ids are not persistent + # new weirdness: we request inventor_id and assignee_id but the + # fields come back without the _id + res <- search_pv('{"patent_id":"10000000"}', + fields = c("inventors.inventor_id", "assignees.assignee_id") + ) + + assignee <- retrieve_linked_data(res$data$patents$assignees[[1]]$assignee) + expect_true(assignee$query_results$total_hits == 1) + + inventor <- retrieve_linked_data(res$data$patents$inventors[[1]]$inventor) + expect_true(inventor$query_results$total_hits == 1) + + # Query to get a location HATEOAS link in case location_ids are not persistent + res <- search_pv('{"location_name":"Chicago"}', + fields = c("location_id"), + endpoint = "location" + ) + + location <- retrieve_linked_data(add_base_url(paste0("location/", res$data$locations$location_id, "/"))) + expect_true(location$query_results$total_hits == 1) +}) + +# Make sure gets and posts return the same data. +# Posts had issues that went undetected for a while using the new API +# (odd results with posts when either no fields or sort was passed +# see get_post_body in search-pv.R) + +test_that("posts and gets return the same data", { + skip_on_cran() + + bad_eps <- c( + "cpc_subclass" + # ,"location" # Error: Invalid field: location_latitude + , "uspc_subclass" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + , "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "uspc_subclass" # 404 + # , "pg_claim" # check this one + ) + + good_eps <- endpoints[!endpoints %in% bad_eps] + + z <- lapply(good_eps, function(endpoint) { + print(endpoint) + get_res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + method = "GET" + ) + + g <- unnest_pv_data(get_res$data, pk = get_ok_pk(endpoint)) + + post_res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + method = "POST" + ) + + p <- unnest_pv_data(post_res$data) + + expect_equal(g, p) + }) +}) + +test_that("nested shorthand produces the same results as fully qualified ones", { + skip_on_cran() + + # the API now allows a shorthand in the fields/f: parameter + # just the group name will retrieve all that group's attributes + # This is indirectly testing our parse of the OpenAPI object and actual API responses + fields <- fieldsdf[fieldsdf$endpoint == "patent" & fieldsdf$group == "application", "field"] + + shorthand_res <- search_pv(TEST_QUERIES[["patent"]], fields = c("application")) + qualified_res <- search_pv(TEST_QUERIES[["patent"]], fields = fields) + + # the request$urls will be different but the data should match + expect_failure(expect_equal(shorthand_res$request$url, qualified_res$request$url)) + expect_equal(shorthand_res$data, qualified_res$data) +}) + + +test_that("the 'after' parameter works properly", { + skip_on_cran() + + sort <- c("patent_id" = "asc") + big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits + results <- search_pv(big_query, all_pages = FALSE, sort = sort) + expect_gt(results$query_results$total_hits, 1000) + + after <- results$data$patents$patent_id[[nrow(results$data$patents)]] + subsequent <- search_pv(big_query, all_pages = FALSE, after = after, sort = sort) + + # ** New API bug? should be expect_equal `actual`: 399 + expect_lt(nrow(subsequent$data$patents), 1000) + + # the first row's patent_id should be bigger than after + # now "D418273" + # expect_gt(as.integer(subsequent$data$patents$patent_id[[1]]), as.integer(after)) + + # now we'll add a descending sort to make sure that also works + sort <- c("patent_id" = "desc") + fields <- NULL # c("patent_id") + + results <- search_pv(big_query, all_pages = FALSE, fields = fields, sort = sort) + after <- results$data$patents$patent_id[[nrow(results$data$patents)]] + + subsequent <- search_pv(big_query, + all_pages = FALSE, after = after, sort = sort, + fields = fields + ) + + # now the first row's patent_id should be smaller than after + # should be expect_lt + expect_gt(as.integer(subsequent$data$patents$patent_id[[1]]), as.integer(after)) + skip("New API bug?") +}) + +test_that("the documentation and Swagger UI URLs work properly", { + skip_on_cran() + + documentation_url <- + 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' + + results <- retrieve_linked_data(documentation_url) + + expect_gt(results$query_results$total_hits, 0) + + swagger_url <- "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D" + + results <- retrieve_linked_data(swagger_url, encoded = TRUE) + expect_gt(results$query_results$total_hits, 0) +}) + +test_that("an error occurs if all_pages is TRUE and there aren't any results", { + skip_on_cran() + + too_early <- qry_funs$lt(patent_date = "1976-01-01") + + results <- search_pv(too_early, all_pages = FALSE) + + # would like this test to fail! (meaning API added earlier data) + expect_equal(results$query_results$total_hits, 0) + + expect_error( + search_pv(too_early, all_pages = TRUE), + "No records matched your query" + ) +}) + +test_that("we can retrieve all_pages = TRUE without specifiying fields", { + skip_on_cran() + + query <- qry_funs$eq(patent_date = "1976-01-06") + sort <- c("patent_type" = "asc", "patent_id" = "asc") + + # here we aren't requesting fields but are requesting a sort + results <- search_pv(query, sort = sort, all_pages = TRUE) + + expect_gt(results$query_results$total_hits, 1300) +}) + +# Below we request the same data in built_singly and result_all, with the only +# difference being that we intentionally get throttled in built_singly by +# sending one request per patent number (instead of all requests at once). If +# the two responses match, then we've correctly handled throttling errors. +test_that("Throttled requests are automatically retried", { + skip_on_cran() + + res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) + patent_ids <- res$data$patents$patent_id + + # now we don't get message "The API's requests per minute limit has been reached. " + # so we'll testthat it takes over 60 seconds to run (since we got throttled) + # TODO(any): can we use evaluate_promise to find "Waiting 45s for retry backoff"? + + duration <- system.time( + built_singly <- lapply(patent_ids, function(patent_id) { + search_pv( + query = qry_funs$eq(patent_id = patent_id), + endpoint = "patent/us_patent_citation", + fields = c("patent_id", "citation_patent_id"), + sort = c("citation_patent_id" = "asc") + )[["data"]][["us_patent_citations"]] + }) + ) + + expect_gt(duration[["elapsed"]], 60) + + built_singly <- do.call(rbind, built_singly) + + # we'll also test that the results are the same for a post and get + # when there is a secondary sort on the bulk requests + sort <- c("patent_id" = "asc", "citation_patent_id" = "asc") + methods <- c("POST", "GET") + output <- lapply(methods, function(method) { + result_all <- search_pv( + query = qry_funs$eq(patent_id = patent_ids), + endpoint = "patent/us_patent_citation", + fields = c("patent_id", "citation_patent_id"), + sort = sort, + size = 1000, + all_pages = TRUE, + method = method + ) + result_all <- result_all$data$us_patent_citations + }) + + expect_equal(output[[1]], output[[2]]) + + # We'll do our own sort and check that it matches the API output + # We want to make sure we sent in the sort parameter correctly, where + # the API is doing the sort (since the we didn't need to page) + + second_output <- output[[2]] + + # Sorting logic using order() + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(second_output[[col]]) + } else { + return(-rank(second_output[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + second_output <- second_output[do.call(order, sort_order), , drop = FALSE] + + expect_equal(output[[1]], second_output) + + # TODO(any): fix this: + # expect_equal says actual row.names are an integer vector and expected + # row.names is a character vector. Not sure why + row.names(output[[1]]) <- NULL + row.names(built_singly) <- NULL + + expect_equal(built_singly, output[[1]]) +}) + +test_that("we can sort on an unrequested field across page boundaries", { + skip_on_cran() + + # total_hits = 5,352 + query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) + fields <- c("patent_title", "patent_date") + sort <- c("patent_date" = "desc", "patent_id" = "desc") + + r_ordered <- search_pv( + query = query, + fields = fields, + sort = sort, + all_pages = TRUE + ) + + fields <- c(fields, "patent_id") + api_ordered <- search_pv( + query = query, + fields = fields, + sort = sort, + all_pages = TRUE + ) + + # Remove patent_id before comparison. We're also indirectly testing that the + # patent_id field added by the first search_pv was removed, otherwise this + # expect equal would fail + api_ordered$data$patents[["patent_id"]] <- NULL + expect_equal(r_ordered$data, api_ordered$data) +}) + +test_that("sort works across page boundaries", { + skip_on_cran() + + sort <- c("patent_type" = "desc", "patent_id" = "desc") + results <- search_pv( + qry_funs$eq(patent_date = "1976-01-06"), + fields = c("patent_type", "patent_id"), + sort = sort, + all_pages = TRUE + ) + + double_check <- results$data$patents + + # Sorting logic using order() + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(double_check[[col]]) + } else { + return(-rank(double_check[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + double_check <- double_check[do.call(order, sort_order), , drop = FALSE] + + expect_equal(results$data$patents, double_check) }) diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index eb2807cd..afe0fcbc 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -1,23 +1,78 @@ -context("unnest_pv_data") - eps <- get_endpoints() -test_that("", { +test_that("we can unnest all entities", { skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") - eps_no_loc <- eps[eps != "locations"] + # TODO(any): add back fields = get_fields(x) + # API throws 500s if some nested fields are included + + # locations endpoint is back but it fails this test + bad_endpoints <- c( + "location", "cpc_subclass", + "uspc_subclass", "uspc_mainclass", "wipo", + "claim", "draw_desc_text", "pg_claim" + ) + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + good_eps <- eps[!eps %in% c(bad_endpoints, overloaded_entities)] + + z <- lapply(good_eps, function(x) { + print(x) - z <- lapply(eps_no_loc, function(x) { - Sys.sleep(1) pv_out <- search_pv( - "{\"patent_number\":\"5116621\"}", + query = TEST_QUERIES[[x]], endpoint = x, - fields = get_fields(x) + fields = get_fields(x, group = to_plural(x)) # requesting non-nested attributes ) + + expect_gt(pv_out$query_results$total_hits, 0) # check that the query worked unnest_pv_data(pv_out[["data"]]) }) expect_true(TRUE) + + # this will fail when the api is fixed + z <- lapply(bad_endpoints, function(x) { + print(x) + expect_error( + pv_out <- search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x, group = to_plural(x)) # requesting non-nested attributes + ) + ) + }) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed/bad_endpoints removed +}) + +test_that("endpoint's pks match their entity's pks", { + skip_on_cran() + + # the overloaded_entities endpoints return the same entity, rel_app_texts, + # so we can't determine the endpoint from the entity like we can + # for the rest of the entities + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + bad_endpoints <- c("uspc_subclass", "cpc_subclass", "uspc_mainclass", "wipo") + good_eps <- eps[!eps %in% c(bad_endpoints, overloaded_entities)] + + endpoint_pks <- lapply(good_eps, function(endpoint) { + print(endpoint) + get_ok_pk(endpoint) + }) + + entity_pks <- lapply(good_eps, function(endpoint) { + result <- search_pv(TEST_QUERIES[[endpoint]], endpoint = endpoint) + get_ok_pk(names(result$data)) + }) + + expect_equal(endpoint_pks, entity_pks) + + # this will fail when the api is fixed + z <- lapply(bad_endpoints, function(endpoint) { + print(endpoint) + expect_error( + result <- search_pv(TEST_QUERIES[[endpoint]], endpoint = endpoint) + ) + }) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 00000000..f38200b9 --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,12 @@ +test_that("we can cast the endpoints that return the same entity", { + skip_on_cran() + + endpoints <- c("patent/rel_app_text", "publication/rel_app_text") + + nul <- lapply(endpoints, function(endpoint) { + results <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint) + cast <- cast_pv_data(results$data) + }) + + expect_true(TRUE) +}) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R new file mode 100644 index 00000000..acadf8cc --- /dev/null +++ b/tests/testthat/test-validate-args.R @@ -0,0 +1,87 @@ +# make sure deprecated warnings are always thrown- bypass 8 hour suppression +rlang::local_options(lifecycle_verbosity = "warning") + +test_that("validate_args throws errors for all bad args", { + skip_on_cran() + + # requesting the old plural endpoint should now throw an error + expect_error( + search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patents"), + "endpoint" + ) + expect_error( + search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), + "method" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome"), + class = "lifecycle_warning_deprecated" + ) + + per_page <- 17 + expect_warning( + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page), + class = "lifecycle_warning_deprecated" + ) + + # make sure the size attribute was set from the per_page parameter + expect_equal(per_page, nrow(results$data$patents)) + + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', page = 2), + class = "lifecycle_warning_deprecated" # unsupported page parameter + ) + expect_error( + search_pv( + '{"patent_date":["1976-01-06"]}', + fields = "patent_date", + all_pages = TRUE, + after = "3930272" + ), + "after" + ) + expect_error( + get_fields("assignee", groups = "cpc_current"), # valid group for a different endpoint + "for the assignee endpoint" + ) +}) + +test_that("per_page parameter warns but still works", { + skip_on_cran() + + expect_warning( + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = 23), + class = "lifecycle_warning_deprecated" + ) + + expect_equal(23, nrow(results$data$patents)) +}) + +test_that("group names can be requested as fields via new API shorthand", { + skip_on_cran() + + endpoint <- "patent" + shorthand <- get_fields("patent", groups=c("application")) + expect_equal(shorthand , "application") + shorthand_res <- search_pv(TEST_QUERIES[[endpoint]], fields=shorthand) + + explicit <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == "application", "field"] + explicit_res <- search_pv(TEST_QUERIES[[endpoint]], fields=explicit) + + # the requests are different but the results should be the same + expect_failure(expect_equal(shorthand_res$request, explicit_res$request)) + expect_equal(shorthand_res$data, explicit_res$data) + +}) From aae887d30d35aca3acd9ab79cc3de05376232082 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 13:37:03 -0600 Subject: [PATCH 041/103] test: updatng tests for new api version --- tests/testthat/test-validate-args.R | 72 +++++++++++++---------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index acadf8cc..88e0e0e5 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -1,5 +1,6 @@ -# make sure deprecated warnings are always thrown- bypass 8 hour suppression -rlang::local_options(lifecycle_verbosity = "warning") +# We can't use expect_warning() without adding a dependency to rlang +# to bypass 8 hour warning suppression +# rlang::local_options(lifecycle_verbosity = "warning") test_that("validate_args throws errors for all bad args", { skip_on_cran() @@ -13,36 +14,40 @@ test_that("validate_args throws errors for all bad args", { search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), "method" ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome"), - class = "lifecycle_warning_deprecated" - ) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome") + #class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) per_page <- 17 - expect_warning( - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page), - class = "lifecycle_warning_deprecated" - ) + suppressWarnings({ + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page) - # make sure the size attribute was set from the per_page parameter - expect_equal(per_page, nrow(results$data$patents)) + # make sure the size attribute was set from the per_page parameter + expect_equal(per_page, nrow(results$data$patents)) + }) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', page = 2), - class = "lifecycle_warning_deprecated" # unsupported page parameter - ) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', page = 2) + # class = "lifecycle_warning_deprecated" # unsupported page parameter + expect_gt(result$query_results$total_hits, 0) + }) expect_error( search_pv( '{"patent_date":["1976-01-06"]}', @@ -58,17 +63,6 @@ test_that("validate_args throws errors for all bad args", { ) }) -test_that("per_page parameter warns but still works", { - skip_on_cran() - - expect_warning( - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = 23), - class = "lifecycle_warning_deprecated" - ) - - expect_equal(23, nrow(results$data$patents)) -}) - test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() From 1c1eba51d900d2846a31e2739935fd6ccf6c182f Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 17:11:11 -0600 Subject: [PATCH 042/103] added skip_on_ci()s --- tests/testthat/test-api-bugs.R | 9 +++++++++ tests/testthat/test-cast-pv-data.R | 2 ++ tests/testthat/test-check-query.R | 4 ++++ tests/testthat/test-get-fields.R | 2 ++ tests/testthat/test-print.R | 2 ++ tests/testthat/test-query-dsl.R | 3 +++ tests/testthat/test-search-pv.R | 17 +++++++++++++++++ tests/testthat/test-unnest-pv-data.R | 2 ++ tests/testthat/test-utils.R | 1 + tests/testthat/test-validate-args.R | 2 ++ 10 files changed, 44 insertions(+) diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R index 2e5f2227..a7e31df6 100644 --- a/tests/testthat/test-api-bugs.R +++ b/tests/testthat/test-api-bugs.R @@ -80,6 +80,7 @@ test_that("there is case sensitivity on string equals", { test_that("string vs text operators behave differently", { skip_on_cran() + skip_on_ci() # # reported to the API team PVS-1147 query <- qry_funs$begins(assignee_organization = "johnson") @@ -161,6 +162,7 @@ eps <- (get_endpoints()) test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() + skip_on_ci() # these currently throw Error: Internal Server Error broken_single_item_queries <- c( @@ -182,6 +184,7 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("individual fields are still broken", { skip_on_cran() + skip_on_ci() # Sample fields that cause 500 errors when requested by themselves. # Some don't throw errors when included in get_fields() but they do if @@ -207,6 +210,7 @@ test_that("individual fields are still broken", { test_that("we can't sort by all fields", { skip_on_cran() + skip_on_ci() # PVS-1377 sorts_to_try <- c( @@ -250,6 +254,7 @@ test_that("we can't sort by all fields", { test_that("withdrawn patents are still present in the database", { skip_on_cran() + skip_on_ci() # PVS-1342 Underlying data issues # There are 8,000 patents that were in the bulk xml files patentsiew is based on. @@ -271,6 +276,7 @@ test_that("withdrawn patents are still present in the database", { test_that("missing patents are still missing", { skip_on_cran() + skip_on_ci() # PVS-1342 Underlying data issues # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. @@ -291,6 +297,7 @@ test_that("missing patents are still missing", { test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { skip_on_cran() + skip_on_ci() bad_eps <- c( "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field @@ -313,6 +320,7 @@ test_that("we can't explicitly request assignee_ or inventor_years.num_patents", test_that("uspcs aren't right", { skip_on_cran() + skip_on_ci() # PVS-1615 @@ -333,6 +341,7 @@ test_that("uspcs aren't right", { test_that("endpoints are still broken", { skip_on_cran() + skip_on_ci() # this will fail when the api is fixed broken_endpoints <- c( diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index efcb1c5b..63efe59a 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,5 +1,6 @@ test_that("cast_pv_data casts patent fields as expected", { skip_on_cran() + skip_on_ci() pv_out <- search_pv( query = '{"patent_id":"5116621"}', fields = get_fields("patent") @@ -58,6 +59,7 @@ test_that("cast_pv_data casts assignee fields as expected", { test_that("we can cast a bool", { skip_on_cran() + skip_on_ci() # TODO(any): remove when the API returns this as a boolean fields <- c("rule_47_flag") diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R index 8f846032..5b1bff86 100644 --- a/tests/testthat/test-check-query.R +++ b/tests/testthat/test-check-query.R @@ -1,6 +1,7 @@ test_that("errors are thrown on invalid queries", { skip_on_cran() + skip_on_ci() expect_error( search_pv(qry_funs$eq("shoe_size" = 11.5)), @@ -66,6 +67,7 @@ test_that("errors are thrown on invalid queries", { test_that("a valid nested field can be queried", { skip_on_cran() + skip_on_ci() results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) @@ -74,6 +76,7 @@ test_that("a valid nested field can be queried", { test_that("the _eq message is thrown when appropriate", { skip_on_cran() + skip_on_ci() expect_message( search_pv(list(patent_date = "2007-03-06")), @@ -83,6 +86,7 @@ test_that("the _eq message is thrown when appropriate", { test_that("a query with an and operator returns results", { skip_on_cran() + skip_on_ci() patents_query <- with_qfuns( diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R index 7eb316f0..777569a2 100644 --- a/tests/testthat/test-get-fields.R +++ b/tests/testthat/test-get-fields.R @@ -1,5 +1,6 @@ test_that("get_fields works as expected", { skip_on_cran() + skip_on_ci() expect_error( get_fields("bogus endpoint"), @@ -23,6 +24,7 @@ test_that("get_fields works as expected", { test_that("the endpoints are stable", { skip_on_cran() + skip_on_ci() # quick check of the endpoints - useful after an api update. We run fieldsdf.R # and do a build. This test would fail if an endpoint was added, moved or deleted diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index cc0d5f6a..d8d7d01b 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,5 +1,6 @@ test_that("We can print the returns from all endpoints ", { skip_on_cran() + skip_on_ci() eps <- get_endpoints() bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") @@ -20,6 +21,7 @@ test_that("We can print the returns from all endpoints ", { test_that("we can print a query, its request, and unnested data", { skip_on_cran() + skip_on_ci() x <- "patent" q <- qry_funs$eq(patent_id = "11530080") diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R index 840ed780..2f44b3a7 100644 --- a/tests/testthat/test-query-dsl.R +++ b/tests/testthat/test-query-dsl.R @@ -1,5 +1,6 @@ test_that("between works as expected", { skip_on_cran() + skip_on_ci() query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) @@ -10,6 +11,7 @@ test_that("between works as expected", { test_that("with_qfuns() works as advertised", { skip_on_cran() # wouldn't necessarily have to skip! + skip_on_ci() # wouldn't necessarily have to skip! a <- with_qfuns( and( @@ -28,6 +30,7 @@ test_that("with_qfuns() works as advertised", { test_that("argument check works on in_range", { skip_on_cran() # wouldn't necessarily have to skip! + skip_on_ci() # wouldn't necessarily have to skip! expect_error( qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 494c3308..0946f2a5 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -7,6 +7,7 @@ endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() + skip_on_ci() broken_endpoints <- c( "cpc_subclass", @@ -36,6 +37,7 @@ test_that("API returns expected df names for all endpoints", { test_that("DSL-based query returns expected results", { skip_on_cran() + skip_on_ci() query <- with_qfuns( and( @@ -52,6 +54,7 @@ test_that("DSL-based query returns expected results", { test_that("You can download up to 9,000+ records", { skip_on_cran() + skip_on_ci() # Should return 9,000+ rows query <- with_qfuns( @@ -66,6 +69,7 @@ test_that("You can download up to 9,000+ records", { test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() + skip_on_ci() troubled_endpoints <- c( "cpc_subclass", "location", @@ -87,6 +91,7 @@ test_that("search_pv can pull all fields for all endpoints", { test_that("Sort option works as expected", { skip_on_cran() + skip_on_ci() out <- search_pv( qry_funs$neq(assignee_id = ""), @@ -101,6 +106,7 @@ test_that("Sort option works as expected", { test_that("search_pv properly URL encodes queries", { skip_on_cran() + skip_on_ci() # Covers https://github.com/ropensci/patentsview/issues/24 # need to use the assignee endpoint now @@ -129,6 +135,7 @@ test_that("search_pv properly URL encodes queries", { test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() + skip_on_ci() # We will try to call the api that tells us who is currently in space in_space_now_url <- "http://api.open-notify.org/astros.json" @@ -138,6 +145,7 @@ test_that("We won't expose the user's patentsview API key to random websites", { test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() + skip_on_ci() single_item_queries <- c( "cpc_subclass/A01B/", @@ -207,6 +215,7 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("posts and gets return the same data", { skip_on_cran() + skip_on_ci() bad_eps <- c( "cpc_subclass" @@ -247,6 +256,7 @@ test_that("posts and gets return the same data", { test_that("nested shorthand produces the same results as fully qualified ones", { skip_on_cran() + skip_on_ci() # the API now allows a shorthand in the fields/f: parameter # just the group name will retrieve all that group's attributes @@ -264,6 +274,7 @@ test_that("nested shorthand produces the same results as fully qualified ones", test_that("the 'after' parameter works properly", { skip_on_cran() + skip_on_ci() sort <- c("patent_id" = "asc") big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits @@ -300,6 +311,7 @@ test_that("the 'after' parameter works properly", { test_that("the documentation and Swagger UI URLs work properly", { skip_on_cran() + skip_on_ci() documentation_url <- 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' @@ -316,6 +328,7 @@ test_that("the documentation and Swagger UI URLs work properly", { test_that("an error occurs if all_pages is TRUE and there aren't any results", { skip_on_cran() + skip_on_ci() too_early <- qry_funs$lt(patent_date = "1976-01-01") @@ -332,6 +345,7 @@ test_that("an error occurs if all_pages is TRUE and there aren't any results", { test_that("we can retrieve all_pages = TRUE without specifiying fields", { skip_on_cran() + skip_on_ci() query <- qry_funs$eq(patent_date = "1976-01-06") sort <- c("patent_type" = "asc", "patent_id" = "asc") @@ -348,6 +362,7 @@ test_that("we can retrieve all_pages = TRUE without specifiying fields", { # the two responses match, then we've correctly handled throttling errors. test_that("Throttled requests are automatically retried", { skip_on_cran() + skip_on_ci() res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) patent_ids <- res$data$patents$patent_id @@ -421,6 +436,7 @@ test_that("Throttled requests are automatically retried", { test_that("we can sort on an unrequested field across page boundaries", { skip_on_cran() + skip_on_ci() # total_hits = 5,352 query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) @@ -451,6 +467,7 @@ test_that("we can sort on an unrequested field across page boundaries", { test_that("sort works across page boundaries", { skip_on_cran() + skip_on_ci() sort <- c("patent_type" = "desc", "patent_id" = "desc") results <- search_pv( diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index afe0fcbc..1d9e1ad5 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -2,6 +2,7 @@ eps <- get_endpoints() test_that("we can unnest all entities", { skip_on_cran() + skip_on_ci() # TODO(any): add back fields = get_fields(x) # API throws 500s if some nested fields are included @@ -48,6 +49,7 @@ test_that("we can unnest all entities", { test_that("endpoint's pks match their entity's pks", { skip_on_cran() + skip_on_ci() # the overloaded_entities endpoints return the same entity, rel_app_texts, # so we can't determine the endpoint from the entity like we can diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index f38200b9..fcfaca18 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,5 +1,6 @@ test_that("we can cast the endpoints that return the same entity", { skip_on_cran() + skip_on_ci() endpoints <- c("patent/rel_app_text", "publication/rel_app_text") diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index 88e0e0e5..5f0208b9 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -4,6 +4,7 @@ test_that("validate_args throws errors for all bad args", { skip_on_cran() + skip_on_ci() # requesting the old plural endpoint should now throw an error expect_error( @@ -65,6 +66,7 @@ test_that("validate_args throws errors for all bad args", { test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() + skip_on_ci() endpoint <- "patent" shorthand <- get_fields("patent", groups=c("application")) From dda8bacf23db48bd0e50d18e2ddcaf8206dd3c98 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 18:20:13 -0600 Subject: [PATCH 043/103] removed skip_on_ci()s --- tests/testthat/test-api-bugs.R | 12 ------------ tests/testthat/test-cast-pv-data.R | 3 --- tests/testthat/test-check-query.R | 4 ---- tests/testthat/test-get-fields.R | 2 -- tests/testthat/test-print.R | 2 -- tests/testthat/test-query-dsl.R | 3 --- tests/testthat/test-search-pv.R | 17 ----------------- tests/testthat/test-unnest-pv-data.R | 2 -- tests/testthat/test-utils.R | 1 - tests/testthat/test-validate-args.R | 2 -- 10 files changed, 48 deletions(-) diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R index a7e31df6..55e7cb3d 100644 --- a/tests/testthat/test-api-bugs.R +++ b/tests/testthat/test-api-bugs.R @@ -10,7 +10,6 @@ add_base_url <- function(x) { test_that("there is trouble paging", { skip_on_cran() - skip_on_ci() # reprex inspired by https://patentsview.org/forum/7/topic/812 # Not all requested groups are coming back as we page, causing @@ -55,7 +54,6 @@ test_that("there is trouble paging", { test_that("there is case sensitivity on string equals", { skip_on_cran() - skip_on_ci() # reported to the API team PVS-1147 # not sure if this is a bug or feature - original API was case insensitive @@ -80,7 +78,6 @@ test_that("there is case sensitivity on string equals", { test_that("string vs text operators behave differently", { skip_on_cran() - skip_on_ci() # # reported to the API team PVS-1147 query <- qry_funs$begins(assignee_organization = "johnson") @@ -96,7 +93,6 @@ test_that("string vs text operators behave differently", { test_that("API returns all requested groups", { skip_on_cran() - skip_on_ci() # can we traverse the return building a list of fields? # sort both requested fields and returned ones to see if they are equal @@ -162,7 +158,6 @@ eps <- (get_endpoints()) test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() - skip_on_ci() # these currently throw Error: Internal Server Error broken_single_item_queries <- c( @@ -184,7 +179,6 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("individual fields are still broken", { skip_on_cran() - skip_on_ci() # Sample fields that cause 500 errors when requested by themselves. # Some don't throw errors when included in get_fields() but they do if @@ -210,7 +204,6 @@ test_that("individual fields are still broken", { test_that("we can't sort by all fields", { skip_on_cran() - skip_on_ci() # PVS-1377 sorts_to_try <- c( @@ -254,7 +247,6 @@ test_that("we can't sort by all fields", { test_that("withdrawn patents are still present in the database", { skip_on_cran() - skip_on_ci() # PVS-1342 Underlying data issues # There are 8,000 patents that were in the bulk xml files patentsiew is based on. @@ -276,7 +268,6 @@ test_that("withdrawn patents are still present in the database", { test_that("missing patents are still missing", { skip_on_cran() - skip_on_ci() # PVS-1342 Underlying data issues # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. @@ -297,7 +288,6 @@ test_that("missing patents are still missing", { test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { skip_on_cran() - skip_on_ci() bad_eps <- c( "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field @@ -320,7 +310,6 @@ test_that("we can't explicitly request assignee_ or inventor_years.num_patents", test_that("uspcs aren't right", { skip_on_cran() - skip_on_ci() # PVS-1615 @@ -341,7 +330,6 @@ test_that("uspcs aren't right", { test_that("endpoints are still broken", { skip_on_cran() - skip_on_ci() # this will fail when the api is fixed broken_endpoints <- c( diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index 63efe59a..27dcae04 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,6 +1,5 @@ test_that("cast_pv_data casts patent fields as expected", { skip_on_cran() - skip_on_ci() pv_out <- search_pv( query = '{"patent_id":"5116621"}', fields = get_fields("patent") @@ -25,7 +24,6 @@ test_that("cast_pv_data casts patent fields as expected", { test_that("cast_pv_data casts assignee fields as expected", { skip_on_cran() - skip_on_ci() # ** Invalid field: assignee_years.num_patents. assignee_years is not a nested field pv_out <- search_pv( @@ -59,7 +57,6 @@ test_that("cast_pv_data casts assignee fields as expected", { test_that("we can cast a bool", { skip_on_cran() - skip_on_ci() # TODO(any): remove when the API returns this as a boolean fields <- c("rule_47_flag") diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R index 5b1bff86..8f846032 100644 --- a/tests/testthat/test-check-query.R +++ b/tests/testthat/test-check-query.R @@ -1,7 +1,6 @@ test_that("errors are thrown on invalid queries", { skip_on_cran() - skip_on_ci() expect_error( search_pv(qry_funs$eq("shoe_size" = 11.5)), @@ -67,7 +66,6 @@ test_that("errors are thrown on invalid queries", { test_that("a valid nested field can be queried", { skip_on_cran() - skip_on_ci() results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) @@ -76,7 +74,6 @@ test_that("a valid nested field can be queried", { test_that("the _eq message is thrown when appropriate", { skip_on_cran() - skip_on_ci() expect_message( search_pv(list(patent_date = "2007-03-06")), @@ -86,7 +83,6 @@ test_that("the _eq message is thrown when appropriate", { test_that("a query with an and operator returns results", { skip_on_cran() - skip_on_ci() patents_query <- with_qfuns( diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R index 777569a2..7eb316f0 100644 --- a/tests/testthat/test-get-fields.R +++ b/tests/testthat/test-get-fields.R @@ -1,6 +1,5 @@ test_that("get_fields works as expected", { skip_on_cran() - skip_on_ci() expect_error( get_fields("bogus endpoint"), @@ -24,7 +23,6 @@ test_that("get_fields works as expected", { test_that("the endpoints are stable", { skip_on_cran() - skip_on_ci() # quick check of the endpoints - useful after an api update. We run fieldsdf.R # and do a build. This test would fail if an endpoint was added, moved or deleted diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index d8d7d01b..cc0d5f6a 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,6 +1,5 @@ test_that("We can print the returns from all endpoints ", { skip_on_cran() - skip_on_ci() eps <- get_endpoints() bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") @@ -21,7 +20,6 @@ test_that("We can print the returns from all endpoints ", { test_that("we can print a query, its request, and unnested data", { skip_on_cran() - skip_on_ci() x <- "patent" q <- qry_funs$eq(patent_id = "11530080") diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R index 2f44b3a7..840ed780 100644 --- a/tests/testthat/test-query-dsl.R +++ b/tests/testthat/test-query-dsl.R @@ -1,6 +1,5 @@ test_that("between works as expected", { skip_on_cran() - skip_on_ci() query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) @@ -11,7 +10,6 @@ test_that("between works as expected", { test_that("with_qfuns() works as advertised", { skip_on_cran() # wouldn't necessarily have to skip! - skip_on_ci() # wouldn't necessarily have to skip! a <- with_qfuns( and( @@ -30,7 +28,6 @@ test_that("with_qfuns() works as advertised", { test_that("argument check works on in_range", { skip_on_cran() # wouldn't necessarily have to skip! - skip_on_ci() # wouldn't necessarily have to skip! expect_error( qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 0946f2a5..494c3308 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -7,7 +7,6 @@ endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() - skip_on_ci() broken_endpoints <- c( "cpc_subclass", @@ -37,7 +36,6 @@ test_that("API returns expected df names for all endpoints", { test_that("DSL-based query returns expected results", { skip_on_cran() - skip_on_ci() query <- with_qfuns( and( @@ -54,7 +52,6 @@ test_that("DSL-based query returns expected results", { test_that("You can download up to 9,000+ records", { skip_on_cran() - skip_on_ci() # Should return 9,000+ rows query <- with_qfuns( @@ -69,7 +66,6 @@ test_that("You can download up to 9,000+ records", { test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() - skip_on_ci() troubled_endpoints <- c( "cpc_subclass", "location", @@ -91,7 +87,6 @@ test_that("search_pv can pull all fields for all endpoints", { test_that("Sort option works as expected", { skip_on_cran() - skip_on_ci() out <- search_pv( qry_funs$neq(assignee_id = ""), @@ -106,7 +101,6 @@ test_that("Sort option works as expected", { test_that("search_pv properly URL encodes queries", { skip_on_cran() - skip_on_ci() # Covers https://github.com/ropensci/patentsview/issues/24 # need to use the assignee endpoint now @@ -135,7 +129,6 @@ test_that("search_pv properly URL encodes queries", { test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() - skip_on_ci() # We will try to call the api that tells us who is currently in space in_space_now_url <- "http://api.open-notify.org/astros.json" @@ -145,7 +138,6 @@ test_that("We won't expose the user's patentsview API key to random websites", { test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() - skip_on_ci() single_item_queries <- c( "cpc_subclass/A01B/", @@ -215,7 +207,6 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("posts and gets return the same data", { skip_on_cran() - skip_on_ci() bad_eps <- c( "cpc_subclass" @@ -256,7 +247,6 @@ test_that("posts and gets return the same data", { test_that("nested shorthand produces the same results as fully qualified ones", { skip_on_cran() - skip_on_ci() # the API now allows a shorthand in the fields/f: parameter # just the group name will retrieve all that group's attributes @@ -274,7 +264,6 @@ test_that("nested shorthand produces the same results as fully qualified ones", test_that("the 'after' parameter works properly", { skip_on_cran() - skip_on_ci() sort <- c("patent_id" = "asc") big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits @@ -311,7 +300,6 @@ test_that("the 'after' parameter works properly", { test_that("the documentation and Swagger UI URLs work properly", { skip_on_cran() - skip_on_ci() documentation_url <- 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' @@ -328,7 +316,6 @@ test_that("the documentation and Swagger UI URLs work properly", { test_that("an error occurs if all_pages is TRUE and there aren't any results", { skip_on_cran() - skip_on_ci() too_early <- qry_funs$lt(patent_date = "1976-01-01") @@ -345,7 +332,6 @@ test_that("an error occurs if all_pages is TRUE and there aren't any results", { test_that("we can retrieve all_pages = TRUE without specifiying fields", { skip_on_cran() - skip_on_ci() query <- qry_funs$eq(patent_date = "1976-01-06") sort <- c("patent_type" = "asc", "patent_id" = "asc") @@ -362,7 +348,6 @@ test_that("we can retrieve all_pages = TRUE without specifiying fields", { # the two responses match, then we've correctly handled throttling errors. test_that("Throttled requests are automatically retried", { skip_on_cran() - skip_on_ci() res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) patent_ids <- res$data$patents$patent_id @@ -436,7 +421,6 @@ test_that("Throttled requests are automatically retried", { test_that("we can sort on an unrequested field across page boundaries", { skip_on_cran() - skip_on_ci() # total_hits = 5,352 query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) @@ -467,7 +451,6 @@ test_that("we can sort on an unrequested field across page boundaries", { test_that("sort works across page boundaries", { skip_on_cran() - skip_on_ci() sort <- c("patent_type" = "desc", "patent_id" = "desc") results <- search_pv( diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index 1d9e1ad5..afe0fcbc 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -2,7 +2,6 @@ eps <- get_endpoints() test_that("we can unnest all entities", { skip_on_cran() - skip_on_ci() # TODO(any): add back fields = get_fields(x) # API throws 500s if some nested fields are included @@ -49,7 +48,6 @@ test_that("we can unnest all entities", { test_that("endpoint's pks match their entity's pks", { skip_on_cran() - skip_on_ci() # the overloaded_entities endpoints return the same entity, rel_app_texts, # so we can't determine the endpoint from the entity like we can diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index fcfaca18..f38200b9 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,6 +1,5 @@ test_that("we can cast the endpoints that return the same entity", { skip_on_cran() - skip_on_ci() endpoints <- c("patent/rel_app_text", "publication/rel_app_text") diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index 5f0208b9..88e0e0e5 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -4,7 +4,6 @@ test_that("validate_args throws errors for all bad args", { skip_on_cran() - skip_on_ci() # requesting the old plural endpoint should now throw an error expect_error( @@ -66,7 +65,6 @@ test_that("validate_args throws errors for all bad args", { test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() - skip_on_ci() endpoint <- "patent" shorthand <- get_fields("patent", groups=c("application")) From c50f208740b7a87e68fbba5b2999460ad2fbde58 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 18:21:30 -0600 Subject: [PATCH 044/103] removed run_dontrun = TRUE from run_examples --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 9568a92f..491fe4d3 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -82,7 +82,7 @@ jobs: run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples(run_dontrun = TRUE) + devtools::run_examples() shell: Rscript {0} - name: Upload check results From 6dc6c7c9cad3c235425a10e3a44da81dd53253a2 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:09:39 -0600 Subject: [PATCH 045/103] generated files --- docs/reference/retrieve_linked_data.html | 2 +- docs/reference/search_pv.html | 4 ++-- man/retrieve_linked_data.Rd | 2 +- man/search_pv.Rd | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/reference/retrieve_linked_data.html b/docs/reference/retrieve_linked_data.html index 148968c5..d54faca9 100644 --- a/docs/reference/retrieve_linked_data.html +++ b/docs/reference/retrieve_linked_data.html @@ -116,7 +116,7 @@

    Arguments

    api_key

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -here.

    +here.

    ...
    diff --git a/docs/reference/search_pv.html b/docs/reference/search_pv.html index 640aba6f..e52cbf6f 100644 --- a/docs/reference/search_pv.html +++ b/docs/reference/search_pv.html @@ -132,7 +132,7 @@

    Arguments

    A value of NULL indicates to the API that it should return the default fields for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -patents +patents endpoint) or by viewing the fieldsdf data frame (View(fieldsdf)). You can also use get_fields to list out the fields available for a given endpoint.

    @@ -205,7 +205,7 @@

    Arguments

    api_key

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -here.

    +here.

    ...
    diff --git a/man/retrieve_linked_data.Rd b/man/retrieve_linked_data.Rd index 90da02bd..838c2d7a 100644 --- a/man/retrieve_linked_data.Rd +++ b/man/retrieve_linked_data.Rd @@ -19,7 +19,7 @@ in the documentation or a Request URL from the \href{https://search.patentsview. Set to TRUE for Request URLs from Swagger UI.} \item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -\href{https://patentsview.org/apis/keyrequest}{here}.} +\href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}.} \item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} function.} } diff --git a/man/search_pv.Rd b/man/search_pv.Rd index bd00c370..094f362c 100644 --- a/man/search_pv.Rd +++ b/man/search_pv.Rd @@ -43,7 +43,7 @@ E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} A value of \code{NULL} indicates to the API that it should return the default fields for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference#patent}{patents +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patents endpoint}) or by viewing the \code{fieldsdf} data frame (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list out the fields available for a given endpoint. @@ -93,7 +93,7 @@ your query is very long (say, over 2,000 characters in length).} \item{error_browser}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} \item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -\href{https://patentsview.org/apis/keyrequest}{here}.} +\href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}.} \item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} when we do GETs or POSTs.} From 47218b635b5fd075f7ba39d6411c1cb34c9c0956 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:10:26 -0600 Subject: [PATCH 046/103] docs: API link update --- R/search-pv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 7eaff4fb..6f31fe25 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -179,7 +179,7 @@ get_default_sort <- function(endpoint) { #' A value of \code{NULL} indicates to the API that it should return the default fields #' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the -#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference#patent}{patents +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. @@ -217,7 +217,7 @@ get_default_sort <- function(endpoint) { #' your query is very long (say, over 2,000 characters in length). #' @param error_browser `r lifecycle::badge("deprecated")` #' @param api_key API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -#' \href{https://patentsview.org/apis/keyrequest}{here}. +#' \href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}. #' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} #' when we do GETs or POSTs. #' From e634e314841cf47016683689be8d986d0f09a7a1 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:40:53 -0600 Subject: [PATCH 047/103] ropensci build changes --- .github/workflows/R-CMD-check.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 491fe4d3..46359706 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -28,6 +28,8 @@ jobs: RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} + EXAMPLES_PARAM: ${{ github.event_name == 'push' && 'run_dontrun = TRUE' || 'run_dontrun = FALSE' }} + NOT_CRAN_VALUE: ${{ github.event_name == 'push' && 'TRUE' || 'FALSE' }} steps: - uses: actions/checkout@v4 @@ -71,6 +73,8 @@ jobs: - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false + NOT_CRAN: ${{ env.NOT_CRAN_VALUE }} + run: | options(crayon.enabled = TRUE) rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") @@ -79,10 +83,11 @@ jobs: - name: Run examples env: _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples() + devtools::run_examples( ${{ env.EXAMPLES_PARAM }} ) shell: Rscript {0} - name: Upload check results From 7f0309e46c27b67b9357ca10bbd1dd0b4d13b76a Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 14:14:20 -0600 Subject: [PATCH 048/103] checking access to the patentsview API key --- .github/workflows/R-CMD-check.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 46359706..5e8852ec 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -28,8 +28,6 @@ jobs: RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} - EXAMPLES_PARAM: ${{ github.event_name == 'push' && 'run_dontrun = TRUE' || 'run_dontrun = FALSE' }} - NOT_CRAN_VALUE: ${{ github.event_name == 'push' && 'TRUE' || 'FALSE' }} steps: - uses: actions/checkout@v4 @@ -40,6 +38,15 @@ jobs: - uses: r-lib/actions/setup-pandoc@v2 + - name: Check Secrets Access + run: | + if [[ "x${{ secrets.PATENTSVIEW_API_KEY }}" != "x" ]]; then + echo "Access to secrets" + else + echo "No access to secrets" + exit 1 + fi + - name: Query dependencies run: | install.packages('remotes') @@ -73,7 +80,6 @@ jobs: - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - NOT_CRAN: ${{ env.NOT_CRAN_VALUE }} run: | options(crayon.enabled = TRUE) @@ -87,7 +93,7 @@ jobs: run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples( ${{ env.EXAMPLES_PARAM }} ) + devtools::run_examples(run_dontrun = TRUE) shell: Rscript {0} - name: Upload check results From 4434cd2b5b860266c407b70e303068526240cf3e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Tue, 24 Dec 2024 15:36:15 -0600 Subject: [PATCH 049/103] test removal --- tests/testthat/test-get-fields.R | 32 ------------ tests/testthat/test-print.R | 36 ------------- tests/testthat/test-query-dsl.R | 36 ------------- tests/testthat/test-validate-args.R | 81 ----------------------------- 4 files changed, 185 deletions(-) delete mode 100644 tests/testthat/test-get-fields.R delete mode 100644 tests/testthat/test-print.R delete mode 100644 tests/testthat/test-query-dsl.R delete mode 100644 tests/testthat/test-validate-args.R diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R deleted file mode 100644 index 7eb316f0..00000000 --- a/tests/testthat/test-get-fields.R +++ /dev/null @@ -1,32 +0,0 @@ -test_that("get_fields works as expected", { - skip_on_cran() - - expect_error( - get_fields("bogus endpoint"), - "endpoint must be", - fixed = TRUE - ) - - expect_error( - get_fields("patent", groups = "bogus"), - "for the patent endpoint", - fixed = TRUE - ) - - patent_pk <- get_ok_pk("patent") - fields <- get_fields(endpoint = "patent", groups = c("inventors")) - expect_false(patent_pk %in% fields) - - fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) - expect_true(patent_pk %in% fields) -}) - -test_that("the endpoints are stable", { - skip_on_cran() - - # quick check of the endpoints - useful after an api update. We run fieldsdf.R - # and do a build. This test would fail if an endpoint was added, moved or deleted - found <- unique(fieldsdf$endpoint) - expecting <- get_endpoints() - expect_equal(expecting, found) -}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R deleted file mode 100644 index cc0d5f6a..00000000 --- a/tests/testthat/test-print.R +++ /dev/null @@ -1,36 +0,0 @@ -test_that("We can print the returns from all endpoints ", { - skip_on_cran() - - eps <- get_endpoints() - bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") - good_eps <- eps[!eps %in% bad_eps] - - lapply(good_eps, function(x) { - print(x) - j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) - print(j) - j - }) - - expect_true(TRUE) - - # make it noticeable that all is not right with the API - skip("Skip for API bugs") # TODO: remove when the API is fixed -}) - -test_that("we can print a query, its request, and unnested data", { - skip_on_cran() - - x <- "patent" - q <- qry_funs$eq(patent_id = "11530080") - print(q) - - fields <- c("patent_id", get_fields(x, groups = "ipcr")) - j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = fields) - print(j$request) - - k <- unnest_pv_data(j$data) - print(k) - - expect_true(TRUE) -}) diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R deleted file mode 100644 index 840ed780..00000000 --- a/tests/testthat/test-query-dsl.R +++ /dev/null @@ -1,36 +0,0 @@ -test_that("between works as expected", { - skip_on_cran() - - query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) - - results <- search_pv(query, all_pages = TRUE) - - expect_gt(results$query_results$total_hits, 2600) -}) - -test_that("with_qfuns() works as advertised", { - skip_on_cran() # wouldn't necessarily have to skip! - - a <- with_qfuns( - and( - text_phrase(inventors.inventor_name_first = "George"), - text_phrase(inventors.inventor_name_last = "Washington") - ) - ) - - b <- qry_funs$and( - qry_funs$text_phrase(inventors.inventor_name_first = "George"), - qry_funs$text_phrase(inventors.inventor_name_last = "Washington") - ) - - expect_equal(a, b) -}) - -test_that("argument check works on in_range", { - skip_on_cran() # wouldn't necessarily have to skip! - - expect_error( - qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), - "expects a range of exactly two arguments" - ) -}) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R deleted file mode 100644 index 88e0e0e5..00000000 --- a/tests/testthat/test-validate-args.R +++ /dev/null @@ -1,81 +0,0 @@ -# We can't use expect_warning() without adding a dependency to rlang -# to bypass 8 hour warning suppression -# rlang::local_options(lifecycle_verbosity = "warning") - -test_that("validate_args throws errors for all bad args", { - skip_on_cran() - - # requesting the old plural endpoint should now throw an error - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patents"), - "endpoint" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), - "method" - ) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome") - #class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - - per_page <- 17 - suppressWarnings({ - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page) - - # make sure the size attribute was set from the per_page parameter - expect_equal(per_page, nrow(results$data$patents)) - }) - - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', page = 2) - # class = "lifecycle_warning_deprecated" # unsupported page parameter - expect_gt(result$query_results$total_hits, 0) - }) - expect_error( - search_pv( - '{"patent_date":["1976-01-06"]}', - fields = "patent_date", - all_pages = TRUE, - after = "3930272" - ), - "after" - ) - expect_error( - get_fields("assignee", groups = "cpc_current"), # valid group for a different endpoint - "for the assignee endpoint" - ) -}) - -test_that("group names can be requested as fields via new API shorthand", { - skip_on_cran() - - endpoint <- "patent" - shorthand <- get_fields("patent", groups=c("application")) - expect_equal(shorthand , "application") - shorthand_res <- search_pv(TEST_QUERIES[[endpoint]], fields=shorthand) - - explicit <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == "application", "field"] - explicit_res <- search_pv(TEST_QUERIES[[endpoint]], fields=explicit) - - # the requests are different but the results should be the same - expect_failure(expect_equal(shorthand_res$request, explicit_res$request)) - expect_equal(shorthand_res$data, explicit_res$data) - -}) From 3a1efb3f59f80896bf3e4c2fb833db613e28175b Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Tue, 24 Dec 2024 15:36:52 -0600 Subject: [PATCH 050/103] checking secrets access --- .github/workflows/R-CMD-check.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5e8852ec..4609cc0f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -39,13 +39,10 @@ jobs: - uses: r-lib/actions/setup-pandoc@v2 - name: Check Secrets Access + if: ${{ env.PATENTSVIEW_API_KEY == '' }} run: | - if [[ "x${{ secrets.PATENTSVIEW_API_KEY }}" != "x" ]]; then - echo "Access to secrets" - else - echo "No access to secrets" - exit 1 - fi + echo "No access to secrets" + exit 1 - name: Query dependencies run: | From 8e88622d8cec128d68aa494f26aba54ecc13421e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Wed, 25 Dec 2024 10:59:31 -0600 Subject: [PATCH 051/103] only apply sort if user set one --- R/search-pv.R | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 6f31fe25..0f6b32c0 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -347,19 +347,20 @@ search_pv <- function(query, arg_list <- to_arglist(fields, size, primary_sort_key, after) paged_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) - # apply the user's sort using order() + # we apply the user's sort, if they supplied one, using order() # was data.table::setorderv(paged_data, names(sort), ifelse(as.vector(sort) == "asc", 1, -1)) - - sort_order <- mapply(function(col, direction) { - if (direction == "asc") { - return(paged_data[[col]]) - } else { - return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order - } - }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) - - # Final sorting - paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + if (!is.null(sort)) { + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(paged_data[[col]]) + } else { + return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + } # remove the fields we added in order to do the user's sort ourselves paged_data <- paged_data[, !names(paged_data) %in% additional_fields] From fdf728be980b7df2732554d40d0d5202b68455cb Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 19:16:10 -0600 Subject: [PATCH 052/103] feat: httr to httr2 --- DESCRIPTION | 2 +- R/process-error.R | 57 ----------------------------------------------- R/process-resp.R | 18 ++------------- R/search-pv.R | 25 ++++++++------------- 4 files changed, 12 insertions(+), 90 deletions(-) delete mode 100644 R/process-error.R diff --git a/DESCRIPTION b/DESCRIPTION index cacc3d76..37ee6a1e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ LazyData: TRUE Depends: R (>= 3.1) Imports: - httr, + httr2, lifecycle, jsonlite, utils diff --git a/R/process-error.R b/R/process-error.R deleted file mode 100644 index 4136f5d7..00000000 --- a/R/process-error.R +++ /dev/null @@ -1,57 +0,0 @@ -#' @noRd -throw_er <- function(resp) { - throw_if_loc_error(resp) - xheader_er_or_status(resp) -} - -#' @noRd -throw_if_loc_error <- function(resp) { - if (hit_locations_ep(resp$url) && httr::status_code(resp) == 500) { - num_grps <- get_num_groups(resp$url) - if (num_grps > 2) { - stop2( - "Your request resulted in a 500 error, likely because you have ", - "requested too many fields in your request (the location endpoint ", - "currently has restrictions on the number of fields/groups you can ", - "request). Try slimming down your field list and trying again." - ) - } - } -} - -# Not sure this is still applicable -#' @noRd -hit_locations_ep <- function(url) { - grepl( - "^https://search.patentsview.org/api/v1/location/", - url, - ignore.case = TRUE - ) -} - -#' @noRd -get_num_groups <- function(url) { - prsd_json_filds <- gsub(".*&f=([^&]*).*", "\\1", utils::URLdecode(url)) - fields <- jsonlite::fromJSON(prsd_json_filds) - grps <- fieldsdf[fieldsdf$endpoint == "location" & - fieldsdf$field %in% fields, "group"] - length(unique(grps)) -} - -#' @noRd -xheader_er_or_status <- function(resp) { - - # look for the api's ultra-helpful X-Status-Reason header - xhdr <- get_x_status(resp) - - if (length(xhdr) != 1) - httr::stop_for_status(resp) - else - stop(xhdr[[1]], call. = FALSE) -} - -#' @noRd -get_x_status <- function(resp) { - headers <- httr::headers(resp) - headers[grepl("x-status-reason$", names(headers), ignore.case = TRUE)] -} diff --git a/R/process-resp.R b/R/process-resp.R index 4fb9ed3d..426f3f4b 100644 --- a/R/process-resp.R +++ b/R/process-resp.R @@ -1,23 +1,10 @@ -#' @noRd -parse_resp <- function(resp) { - j <- httr::content(resp, as = "text", encoding = "UTF-8") - jsonlite::fromJSON( - j, - simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE - ) -} - #' @noRd get_request <- function(resp) { gp <- structure( - list(method = resp$req$method, url = resp$req$url), + list(method = resp$request$method, url = resp$request$url), class = c("list", "pv_request") ) - if (gp$method == "POST") { - gp$body <- rawToChar(resp$req$options$postfields) - } - gp } @@ -42,11 +29,10 @@ get_query_results <- function(prsd_resp) { #' @noRd process_resp <- function(resp) { - if (httr::http_error(resp)) throw_er(resp) - prsd_resp <- parse_resp(resp) request <- get_request(resp) data <- get_data(prsd_resp) + query_results <- get_query_results(prsd_resp) structure( diff --git a/R/search-pv.R b/R/search-pv.R index 7e711c0b..7e86f9d8 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -48,27 +48,20 @@ get_post_body <- function(query, arg_list) { } #' @noRd -one_request <- function(method, query, base_url, arg_list, api_key, ...) { - ua <- httr::user_agent("https://github.com/ropensci/patentsview") +patentsview_error_body <- function(resp) { + if (httr2::resp_status(resp) == 400) c(httr2::resp_header(resp, "X-Status-Reason")) else NULL +} if (method == "GET") { get_url <- get_get_url(query, base_url, arg_list) - resp <- httr::GET( - get_url, - httr::add_headers("X-Api-Key" = api_key), - ua, ... - ) + req <- httr2::request(get_url) |> + httr2::req_method("GET") } else { body <- get_post_body(query, arg_list) - resp <- httr::POST( - base_url, - httr::add_headers( - "X-Api-Key" = api_key, - "Content-Type" = "application/json" - ), - body = body, - ua, ... - ) + req <- httr2::request(base_url) |> + httr2::req_body_raw(body) |> + httr2::req_headers("Content-Type" = "application/json") |> + httr2::req_method("POST") } # Sleep and retry on a 429 (too many requests). The Retry-After header is the From 9158a56c354d83d45219c6c1ba40437ff67e0870 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 19:27:54 -0600 Subject: [PATCH 053/103] feat: httr to httr2 --- R/utils.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/utils.R b/R/utils.R index cc487623..a6f50a6b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -4,6 +4,16 @@ stop2 <- function(...) stop(..., call. = FALSE) #' @noRd asrt <- function(expr, ...) if (!expr) stop2(...) +#' @noRd +parse_resp <- function(resp) { + j <- resp |> httr2::resp_body_string(encoding = "UTF-8") + + jsonlite::fromJSON( + j, + simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE + ) +} + #' @noRd format_num <- function(x) { format( From ad0defb98f85217f9c4e7a6a45d785923c343b8f Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 21:54:14 -0600 Subject: [PATCH 054/103] docs: API link update --- R/data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.R b/R/data.R index 53cc8ecb..49ae72f3 100644 --- a/R/data.R +++ b/R/data.R @@ -3,7 +3,7 @@ #' A data frame containing the names of retrievable fields for each of the #' endpoints. You can find this data on the API's online documentation for each #' endpoint as well (e.g., the -#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent endpoint #' field list table}). #' #' @format A data frame with the following columns: From 6c2b3dc525bfd9899c0e4b216e05615cc5ea93d8 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 21:55:26 -0600 Subject: [PATCH 055/103] feat: dont print document number in scientific notation --- R/print.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/print.R b/R/print.R index 65bb53fe..025e7b4f 100644 --- a/R/print.R +++ b/R/print.R @@ -24,7 +24,10 @@ print.pv_data_result <- function(x, ...) { ) utils::str( - x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut" + x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut", + formatNum = function(x, ...) { + format(x, trim = TRUE, drop0trailing = TRUE, scientific = FALSE, ...) + } ) } From 62eccf9da9cee89a22dfe440525d13846f564ed0 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 22:07:56 -0600 Subject: [PATCH 056/103] build: version bumps to ko deprecated msgs --- .github/workflows/R-CMD-check.yaml | 122 ----------------------------- 1 file changed, 122 deletions(-) delete mode 100644 .github/workflows/R-CMD-check.yaml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml deleted file mode 100644 index b2ad92fa..00000000 --- a/.github/workflows/R-CMD-check.yaml +++ /dev/null @@ -1,122 +0,0 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions - -# Details on pull_request_target and why it's insecure: -# https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/ -# Post describing a workaround, from which we take inspiration: -# https://michaelheap.com/access-secrets-from-forks/ - -name: R-CMD-check - -on: - push: - branches: - - master - - 'feature/**' - - 'bugfix/**' - pull_request_target: - types: [opened, synchronize] - -jobs: - pre-check: - runs-on: ubuntu-latest - steps: - - name: Confirm crew102 triggered the build - run: | - if [ "${{ github.actor }}" == "crew102" ]; then - echo "Actor is crew102" - else - echo "Actor is ${{ github.actor }}, failing build." - exit 1 - fi - - R-CMD-check: - needs: [pre-check] - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - # Run sequentially so that we don't run into rate limit errors that our - # code would normally work around via retry logic - max-parallel: 1 - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - # - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - # Use the head SHA for pull requests - ref: ${{ github.event.pull_request.head.sha || github.sha }} - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore R package cache - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - remotes::install_cran("covr") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - options(crayon.enabled = TRUE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Run examples - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - options(crayon.enabled = TRUE) - remotes::install_cran("devtools") - devtools::run_examples(run_dontrun = TRUE) - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check \ No newline at end of file From 19f2f43bc955ad3666ec22aec9eeb0cea6b05d69 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Thu, 19 Dec 2024 22:08:45 -0600 Subject: [PATCH 057/103] added timeout - prevent run away jobs --- .github/workflows/R-CMD-check.yaml | 122 +++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 .github/workflows/R-CMD-check.yaml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..5a260a18 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,122 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions + +# Details on pull_request_target and why it's insecure: +# https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/ +# Post describing a workaround, from which we take inspiration: +# https://michaelheap.com/access-secrets-from-forks/ + +name: R-CMD-check + +on: + push: + branches: + - master + - 'feature/**' + - 'bugfix/**' + pull_request_target: + types: [opened, synchronize] + +jobs: + pre-check: + runs-on: ubuntu-latest + steps: + - name: Confirm crew102 triggered the build + run: | + if [ "${{ github.actor }}" == "crew102" ]; then + echo "Actor is crew102" + else + echo "Actor is ${{ github.actor }}, failing build." + exit 1 + fi + + R-CMD-check: + needs: [pre-check] + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + # Run sequentially so that we don't run into rate limit errors that our + # code would normally work around via retry logic + max-parallel: 1 + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + # - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + # Use the head SHA for pull requests + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Restore R package cache + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: | + options(crayon.enabled = TRUE) + rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Run examples + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: | + options(crayon.enabled = TRUE) + remotes::install_cran("devtools") + devtools::run_examples(run_dontrun = TRUE) + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check From 41b246c4e312bfc003bbcb92325433eed97ba1ac Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:21:30 -0600 Subject: [PATCH 058/103] feat: casting type changes --- R/cast-pv-data.R | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 57716461..6d68a925 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -5,21 +5,23 @@ as_is <- function(x) x get_cast_fun <- function(data_type) { # Some fields aren't documented, so we don't know what their data type is. Use # string type for these. + # new version of the API: state of string vs fulltext is in flux. Latter currently unused if (length(data_type) != 1) data_type <- "string" - switch( - data_type, + switch(data_type, "string" = as_is, "date" = as.Date, - "float" = as.numeric, - "integer" = as.integer, + "number" = as_is, + "integer" = as_is, "int" = as.integer, - "fulltext" = as_is + "fulltext" = as_is, + "boolean" = as_is, + "bool" = as.logical ) } #' @noRd lookup_cast_fun <- function(name, typesdf) { - data_type <- typesdf[typesdf$field == name, "data_type"] + data_type <- typesdf[typesdf$common_name == name, "data_type"] get_cast_fun(data_type = data_type) } @@ -29,6 +31,18 @@ cast_one.character <- function(one, name, typesdf) { cast_fun(one) } +#' @noRd +cast_one.double <- function(one, name, typesdf) { + cast_fun <- lookup_cast_fun(name, typesdf) + cast_fun(one) +} + +#' @noRd +cast_one.integer <- function(one, name, typesdf) { + cast_fun <- lookup_cast_fun(name, typesdf) + cast_fun(one) +} + #' @noRd cast_one.default <- function(one, name, typesdf) NA From 8bedcb2d6ce7a2bb123d88bec86532d7106508c2 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:22:40 -0600 Subject: [PATCH 059/103] docs: field name change --- R/cast-pv-data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 6d68a925..93354c80 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -83,7 +83,7 @@ cast_one <- function(one, name, typesdf) UseMethod("cast_one") #' \dontrun{ #' #' fields <- c("patent_date", "patent_title", "patent_year") -#' res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) +#' res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields) #' cast_pv_data(data = res$data) #' } #' From 24c7c510e793b44b9d4b6447f7b3344989ddd46f Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:23:43 -0600 Subject: [PATCH 060/103] feat: new casting methodology --- R/cast-pv-data.R | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R index 93354c80..e3cd59da 100644 --- a/R/cast-pv-data.R +++ b/R/cast-pv-data.R @@ -91,9 +91,25 @@ cast_one <- function(one, name, typesdf) UseMethod("cast_one") cast_pv_data <- function(data) { validate_pv_data(data) - endpoint <- names(data) + entity_name <- names(data) - typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("field", "data_type")] + if (entity_name == "rel_app_texts") { + # blend the fields from both rel_app_texts entities + typesdf <- unique(fieldsdf[fieldsdf$group == entity_name, c("common_name", "data_type")]) + } else { + # need to get the endpoint from entity_name + endpoint_df <- fieldsdf[fieldsdf$group == entity_name, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attorneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + } + + typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("common_name", "data_type")] + + } df <- data[[1]] @@ -103,7 +119,7 @@ cast_pv_data <- function(data) { df[] <- list_out out_data <- list(x = df) - names(out_data) <- endpoint + names(out_data) <- entity_name structure( out_data, From 1dcb89d6f238b3e10ed2f3609a212a18ddabcb10 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:31:20 -0600 Subject: [PATCH 061/103] docs: field name change --- R/query-dsl.R | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/R/query-dsl.R b/R/query-dsl.R index 51bf0640..bb9ff79f 100644 --- a/R/query-dsl.R +++ b/R/query-dsl.R @@ -144,26 +144,25 @@ qry_funs <- c( #' @return The result of \code{code} - i.e., your query. #' #' @examples -#' # Without with_qfuns, we have to do: #' qry_funs$and( #' qry_funs$gte(patent_date = "2007-01-01"), #' qry_funs$text_phrase(patent_abstract = c("computer program")), #' qry_funs$or( -#' qry_funs$eq(inventor_last_name = "ihaka"), -#' qry_funs$eq(inventor_first_name = "chris") +#' qry_funs$eq(inventors.inventor_name_last = "Ihaka"), +#' qry_funs$eq(inventors.inventor_name_last = "Chris") #' ) #' ) #' -#' #...With it, this becomes: +#' # ...With it, this becomes: #' with_qfuns( -#' and( -#' gte(patent_date = "2007-01-01"), -#' text_phrase(patent_abstract = c("computer program")), -#' or( -#' eq(inventor_last_name = "ihaka"), -#' eq(inventor_first_name = "chris") -#' ) -#' ) +#' and( +#' gte(patent_date = "2007-01-01"), +#' text_phrase(patent_abstract = c("computer program")), +#' or( +#' eq(inventors.inventor_name_last = "Ihaka"), +#' eq(inventors.inventor_name_last = "Chris") +#' ) +#' ) #' ) #' #' @export From 3454c525c11d96d63b4652c750dce208fd46faed Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 08:34:56 -0600 Subject: [PATCH 062/103] docs: field name change --- R/search-pv.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 7e86f9d8..998f6e0e 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -200,8 +200,8 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. #' search_pv( #' query = qry_funs$gt(patent_year = 2010), #' method = "POST", -#' fields = "patent_number", -#' sort = c("patent_number" = "asc") +#' fields = "patent_id", +#' sort = c("patent_id" = "asc") #' ) #' #' search_pv( @@ -216,9 +216,14 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. #' ) #' #' search_pv( -#' query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), +#' query = qry_funs$contains(inventors.inventor_name_last = "Smith"), #' endpoint = "patent", -#' config = httr::timeout(40) +#' timeout = 40 +#' ) +#' +#' search_pv( +#' query = qry_funs$eq(patent_id = "11530080"), +#' fields = "application" #' ) #' } #' From ad0c9f7082ca0f182deafbe563b74a846ae06f85 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:17:27 -0600 Subject: [PATCH 063/103] fix: parameters on posts --- R/search-pv.R | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 998f6e0e..a0e3ae92 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -11,26 +11,41 @@ tojson_2 <- function(x, ...) { } #' @noRd -to_arglist <- function(fields, page, per_page, sort) { +to_arglist <- function(fields, size, sort, after) { + opts <- list(size = size) + if (!is.null(after)) { + opts$after <- after + } + list( fields = fields, sort = list(as.list(sort)), - opts = list( - offset = (page - 1) * per_page, - size = per_page - ) + opts = opts ) } +#' @noRd +set_sort_param <- function(before) { + # Fixes former bug + # for sort = c("patent_id" = "asc", "citation_patent_id" = "asc") + # we sent [{"patent_id":"asc","citation_patent_id":"asc"}] + # API wants [{"patent_id": "asc" },{"citation_patent_id": "asc" }] + # TODO(any): brute meet force- there must be a better way... + after <- tojson_2(before, auto_unbox = TRUE) + after <- gsub('","', '"},{"', after) + after +} + #' @noRd get_get_url <- function(query, base_url, arg_list) { j <- paste0( base_url, "?q=", utils::URLencode(query, reserved = TRUE), "&f=", tojson_2(arg_list$fields), - "&o=", tojson_2(arg_list$opts, auto_unbox = TRUE), - "&s=", tojson_2(arg_list$sort, auto_unbox = TRUE) + "&s=", set_sort_param(arg_list$sort), + "&o=", tojson_2(arg_list$opts, auto_unbox = TRUE) ) + utils::URLencode(j) } @@ -40,11 +55,14 @@ get_post_body <- function(query, arg_list) { "{", '"q":', query, ",", '"f":', tojson_2(arg_list$fields), ",", - '"o":', tojson_2(arg_list$opts, auto_unbox = TRUE), ",", - '"s":', tojson_2(arg_list$sort, auto_unbox = TRUE), + '"s":', set_sort_param(arg_list$sort), ",", + '"o":', tojson_2(arg_list$opts, auto_unbox = TRUE), "}" ) - gsub('(,"[fs]":)([,}])', paste0("\\1", "{}", "\\2"), body) + # The API can now act weirdly if we pass f:{},s:{} as we did in the past. + # (Weirdly in that the post results may not equal the get results or posts error out) + # Now we'd remove "f":, and "s":, We're guaranteed to have q: and at least "size":1000 as o: + gsub('("[fs]":,)', "", body) } #' @noRd From a5889f8706159120ce7fc2d74cc54f0cb7494adf Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:33:01 -0600 Subject: [PATCH 064/103] feat: httr to httr2 --- R/search-pv.R | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index a0e3ae92..21b27727 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -82,21 +82,16 @@ patentsview_error_body <- function(resp) { httr2::req_method("POST") } - # Sleep and retry on a 429 (too many requests). The Retry-After header is the - # seconds to sleep - if (httr::status_code(resp) == 429) { - num_seconds <- httr::headers(resp)[["Retry-After"]] - maybe_an_s <- if (num_seconds == "1") "" else "s" - message(paste0( - "The API's requests per minute limit has been reached. ", - "Pausing for ", num_seconds, " second", maybe_an_s, - " before continuing." - )) - Sys.sleep(num_seconds) + resp <- req |> + httr2::req_user_agent("https://github.com/ropensci/patentsview") |> + httr2::req_options(...) |> + httr2::req_retry(max_tries = 20) |> # automatic 429 Retry-After + httr2::req_headers("X-Api-Key" = api_key, .redact = "X-Api-Key") |> + httr2::req_error(body = patentsview_error_body) |> + httr2::req_perform() - one_request(method, query, base_url, arg_list, api_key, ...) - } else { - resp + resp +} } } From 6a9aa9c0ae5bdd1f57678a67c1828829343e76d0 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:38:00 -0600 Subject: [PATCH 065/103] feat: new paging methodology --- R/search-pv.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/search-pv.R b/R/search-pv.R index 21b27727..200f353d 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -92,7 +92,31 @@ patentsview_error_body <- function(resp) { resp } + +#' Pad patent_id +#' +#' This function strategically pads a patent_id with zeroes to 8 characters, +#' needed only for custom paging that uses sorts by patent_id. +#' +#' @param patent_id The patent_id that needs to be padded. It can +#' be the patent_id for a utility, design, plant or reissue patent. +#' +#' @examples +#' \dontrun{ +#' padded <- pad_patent_id("RE36479") +#' +#' padded2 <- pad_patent_id("3930306") +#' } +#' +#' @export +# zero pad patent_id to 8 characters. +pad_patent_id <- function(patent_id) { + pad <- 8 - nchar(patent_id) + if (pad > 0) { + patent_id <- paste0(sprintf("%0*d", pad, 0), patent_id) + patent_id <- sub("(0+)([[:alpha:]]+)([[:digit:]]+)", "\\2\\1\\3", patent_id) } + patent_id } #' @noRd @@ -121,12 +145,32 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. arg_list$opts$offset <- (i - 1) * arg_list$opts$size x <- one_request(method, query, base_url, arg_list, api_key, ...) x <- process_resp(x) + + # now to page we need to set the "after" attribute to where we left off + # we want the last value of the primary sort field + s <- names(arg_list$sort[[1]])[[1]] + index <- nrow(x$data[[1]]) + last_value <- x$data[[1]][[s]][[index]] + + if (s == "patent_id") { + last_value <- pad_patent_id(last_value) + } + + arg_list$opts$after <<- last_value + x$data[[1]] }) do.call("rbind", c(tmp, make.row.names = FALSE)) } +#' @noRd +get_default_sort <- function(endpoint) { + default <- c("asc") + names(default) <- get_ok_pk(endpoint) + default +} + #' Search PatentsView #' #' This function makes an HTTP request to the PatentsView API for data matching From 7a539b13f14985e7a2f44846a24f57fdfeb2509c Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 09:39:26 -0600 Subject: [PATCH 066/103] feat: removed paging limits --- R/search-pv.R | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 200f353d..c810ff87 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -126,20 +126,6 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. if (req_pages < 1) { stop2("No records matched your query...Can't download multiple pages") } - if (matched_records > 10000) { - stop2( - "The API only allows you to download 10,000 records in a single query. ", - "Your query returned ", matched_records, " records. See for ", - "how to get around this limitation." - ) - } - if (req_pages > 10) { - stop2( - "The API only allows you to download 10 pages in a single query. ", - "Your query returned ", req_pages, " pages. See for ", - "how to get around this limitation." - ) - } tmp <- lapply(seq_len(req_pages), function(i) { arg_list$opts$offset <- (i - 1) * arg_list$opts$size From ca71aa17cd3a36d3b2634233badd334aca42bcc8 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Fri, 20 Dec 2024 16:59:08 -0600 Subject: [PATCH 067/103] feat: search_pv parameter updates --- R/search-pv.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index c810ff87..eb3cb43a 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -178,25 +178,36 @@ get_default_sort <- function(endpoint) { #' E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} #' } #' @param fields A character vector of the fields that you want returned to you. -#' A value of \code{NULL} indicates that the default fields should be -#' returned. Acceptable fields for a given endpoint can be found at the API's +#' A value of \code{NULL} indicates to the API that it should return the default fields +#' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the #' \href{https://patentsview.org/apis/api-endpoints/patents}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. +#' +#' Nested fields can be fully qualified, e.g., "application.filing_date" or the +#' group name can be used to retrieve all of its nested fields, E.g. "application". +#' The latter would be similar to passing \code{get_fields("patent", group = "application")} +#' except it's the API that decides what fields to return. #' @param endpoint The web service resource you wish to search. Use #' \code{get_endpoints()} to list the available endpoints. -#' @param subent_cnts `r lifecycle::badge("deprecated")` Non-matched subentities -#' will always be returned under the new version of the API +#' @param subent_cnts `r lifecycle::badge("deprecated")` This is always FALSE in the +#' new version of the API as the total counts of unique subentities is no longer available. #' @param mtchd_subent_only `r lifecycle::badge("deprecated")` This is always -#' FALSE in the new version of the API. -#' @param page The page number of the results that should be returned. -#' @param per_page The number of records that should be returned per page. This -#' value can be as high as 1,000 (e.g., \code{per_page = 1000}). +#' FALSE in the new version of the API as non-matched subentities +#' will always be returned. +#' @param page `r lifecycle::badge("deprecated")` The new version of the API does not use +#' \code{page} as a parameter for paging, it uses \code{after}. +#' @param per_page `r lifecycle::badge("deprecated")` The API now uses \code{size} +#' @param size The number of records that should be returned per page. This +#' value can be as high as 1,000 (e.g., \code{size = 1000}). +#' @param after A list of sort key values that defaults to NULL. This +#' exposes the API's paging parameter for users who want to implement their own +#' paging. It cannot be set when \code{all_pages = TRUE} as the R package manipulates it +#' for users automatically. See \href{../articles/result-set-paging.html}{result set paging} #' @param all_pages Do you want to download all possible pages of output? If -#' \code{all_pages = TRUE}, the values of \code{page} and \code{per_page} are -#' ignored. +#' \code{all_pages = TRUE}, the value of \code{size} is ignored. #' @param sort A named character vector where the name indicates the field to #' sort by and the value indicates the direction of sorting (direction should #' be either "asc" or "desc"). For example, \code{sort = c("patent_number" = From f5a1fcbbb3f03d4ff6aa6f3ddc1270660d40773d Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 09:43:53 -0600 Subject: [PATCH 068/103] feat: use new api group/field shorthand --- R/get-fields.R | 25 +++++++++++++++++++++++-- R/validate-args.R | 13 +++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index 850eff89..4088c8e5 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -45,11 +45,32 @@ #' @export get_fields <- function(endpoint, groups = NULL) { validate_endpoint(endpoint) + + # using API's shorthand notation, group names can be requested as fields instead of + # fully qualifying each nested field. Fully qualified, all patent endpoint's attributes + # is over 4K, too big to be sent on a GET with a modest query + + pk <- get_ok_pk(endpoint) + plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] + top_level_attributes <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == plural_entity, "field"] + if (is.null(groups)) { - fieldsdf[fieldsdf$endpoint == endpoint, "field"] + c( + top_level_attributes, + unique(fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group != plural_entity, "group"]) + ) } else { validate_groups(endpoint, groups = groups) - fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group %in% groups, "field"] + + # don't include pk if plural_entity group is requested (pk would be a member) + extra_field <- if (include_pk && !plural_entity %in% groups) pk else NULL + extra_fields <- if (plural_entity %in% groups) top_level_attributes else NULL + + c( + extra_field, + extra_fields, + groups[!groups == plural_entity] + ) } } diff --git a/R/validate-args.R b/R/validate-args.R index a669e182..275614c8 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -8,14 +8,23 @@ validate_endpoint <- function(endpoint) { } #' @noRd -validate_args <- function(api_key, fields, endpoint, method, page, per_page, - sort) { +validate_args <- function(api_key, fields, endpoint, method, + sort, after, size, all_pages) { asrt( !identical(api_key, ""), "The new version of the API requires an API key" ) flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint, "field"] + + # Now the API allows the group name to be requested as in fields to get all of + # the group's nested fields. ex.: "assignees" on the patent endpoint gets you all + # of the assignee fields. Note that "patents" can't be requested + groups <- unique(fieldsdf[fieldsdf$endpoint == endpoint, c("group")]) + pk <- get_ok_pk(endpoint) + plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] + flds_flt <- append(flds_flt, groups[!groups == plural_entity]) + asrt( all(fields %in% flds_flt), "Bad field(s): ", paste(fields[!(fields %in% flds_flt)], collapse = ", ") From 4b2703ccf1ae373d780b2783e346b8537d371a7b Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 10:59:34 -0600 Subject: [PATCH 069/103] feat: added in_range query function --- R/query-dsl.R | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/R/query-dsl.R b/R/query-dsl.R index bb9ff79f..f3ea00ef 100644 --- a/R/query-dsl.R +++ b/R/query-dsl.R @@ -54,12 +54,31 @@ create_not_fun <- function(fun) { } } +#' @noRd +create_in_range_fun <- function(fun) { + force(fun) + function(...) { + value_p <- list(...) + field <- names(value_p) + value <- unlist(value_p) + names(value) <- NULL + + # throw an error if the length isn't two + asrt(length(value) == 2, fun, " expects a range of exactly two arguments") + + low <- create_one_fun(field = field, value = value[1], fun = "gte") + high <- create_one_fun(field = field, value = value[2], fun = "lte") + z <- list(`_and` = list(low, high)) + + structure(z, class = c(class(z), "pv_query")) + } +} + #' List of query functions #' #' A list of functions that make it easy to write PatentsView queries. See the -#' details section below for a list of the 14 functions, as well as the -#' \href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing -#' queries vignette} for further details. +#' details section below for a list of the 15 functions, as well as the +#' \href{../articles/writing-queries.html}{writing queries vignette} for further details. #' #' @details #' @@ -109,6 +128,13 @@ create_not_fun <- function(fun) { #' \item \code{not} - The comparison is not true #' } #' +#' \strong{4. Convenience function} \cr +#' +#' There is 1 convenience function: +#' \itemize{ +#' \item \code{in_range} - Builds a <= x <= b query +#' } +#' #' @return An object of class \code{pv_query}. This is basically just a simple #' list with a print method attached to it. #' @@ -117,6 +143,10 @@ create_not_fun <- function(fun) { #' #' qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) #' +#' qry_funs$in_range(patent_year = c(2010, 2021)) +#' +#' qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28")) + #' @export qry_funs <- c( lapply2( @@ -126,7 +156,8 @@ qry_funs <- c( ), create_key_fun ), lapply2(c("and", "or"), create_array_fun), - lapply2("not", create_not_fun) + lapply2("not", create_not_fun), + lapply2("in_range", create_in_range_fun) ) #' With qry_funs From cdb768c8dba0266d131a052da980f68cd83576bd Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 11:09:15 -0600 Subject: [PATCH 070/103] feat: unnesting plural entities from singular endpoints --- R/unnest-pv-data.R | 58 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/R/unnest-pv-data.R b/R/unnest-pv-data.R index a86ca441..f6af8470 100644 --- a/R/unnest-pv-data.R +++ b/R/unnest-pv-data.R @@ -4,24 +4,50 @@ #' in \code{\link{unnest_pv_data}}, based on the endpoint you searched. #' It will return a potential unique identifier for a given entity (i.e., a #' given endpoint). For example, it will return "patent_id" when -#' \code{endpoint = "patent"}. +#' \code{endpoint_or_entity = "patent"}. It would return the same value if +#' the entity name "patents" was passed via \code{get_ok_pk(names(pv_return$data))} +#' where pv_return was returned from \code{\link{search_pv}}. #' -#' @param endpoint The endpoint which you would like to know a potential primary -#' key for. +#' @param endpoint_or_entity The endpoint or entity name for which you +#' would like to know a potential primary key for. #' #' @return The name of a primary key (\code{pk}) that you could pass to #' \code{\link{unnest_pv_data}}. #' #' @examples -#' get_ok_pk(endpoint = "inventor") -#' get_ok_pk(endpoint = "cpc_subclass") -#' get_ok_pk("publication/rel_app_text") +#' get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id" +#' get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id" #' #' @export -get_ok_pk <- function(endpoint) { +get_ok_pk <- function(endpoint_or_entity) { + endpoint_df <- fieldsdf[fieldsdf$endpoint == endpoint_or_entity, ] + if (nrow(endpoint_df) > 0) { + endpoint <- endpoint_or_entity + } else { + endpoint_df <- fieldsdf[fieldsdf$group == endpoint_or_entity, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attourneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + endpoint_df <- fieldsdf[fieldsdf$endpoint == endpoint, ] + } + } + unnested_endpoint <- sub("^(patent|publication)/", "", endpoint) possible_pks <- c("patent_id", "document_number", paste0(unnested_endpoint, "_id")) - fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field %in% possible_pks, "field"] + pk <- endpoint_df[endpoint_df$field %in% possible_pks, "field"] + + # we're unable to determine the pk if an entity name of rel_app_texts was passed + asrt( + length(pk) == 1, + "The primary key cannot be determined for ", endpoint_or_entity, + ". Try using the endpoint's name instead ", + paste(unique(fieldsdf[fieldsdf$group == endpoint_or_entity, "endpoint"]), collapse = ", ") + ) + + pk } #' Unnest PatentsView data @@ -58,12 +84,20 @@ get_ok_pk <- function(endpoint) { #' } #' #' @export -unnest_pv_data <- function(data, pk = get_ok_pk(names(data))) { - +unnest_pv_data <- function(data, pk = NULL) { validate_pv_data(data) df <- data[[1]] + if (is.null(pk)) { + # now there are two endpoints that return rel_app_texts entities with different pks + if (names(data) == "rel_app_texts") { + pk <- if ("document_number" %in% names(df)) "document_number" else "patent_id" + } else { + pk = get_ok_pk(names(data)) + } + } + asrt( pk %in% colnames(df), pk, " not in primary entity data frame...Did you include it in your ", @@ -75,14 +109,12 @@ unnest_pv_data <- function(data, pk = get_ok_pk(names(data))) { sub_ent_df <- df[, !prim_ent_var, drop = FALSE] sub_ents <- colnames(sub_ent_df) - ok_pk <- get_ok_pk(names(data)) - out_sub_ent <- lapply2(sub_ents, function(x) { temp <- sub_ent_df[[x]] asrt( length(unique(df[, pk])) == length(temp), pk, " cannot act as a primary key because it is not a unique identifier.\n\n", - "Try using ", ok_pk, " instead." + "Try using ", pk, " instead." ) names(temp) <- df[, pk] xn <- do.call("rbind", temp) From 467a9501722059243ed9d1db29ac8c2e44f59bec Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 11:12:55 -0600 Subject: [PATCH 071/103] feat: added lifecycle deprecations --- R/validate-args.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/R/validate-args.R b/R/validate-args.R index 275614c8..8ad9f930 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -97,4 +97,21 @@ deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only) { version of the API" ) } -} \ No newline at end of file + + if (lifecycle::is_present(per_page)) { + lifecycle::deprecate_warn( + when = "0.3.0", + what = "search_pv(per_page)", + details = "The new version of the API uses 'size' instead of 'per_page'", + with = "search_pv(size)" + ) + } + + if (lifecycle::is_present(page)) { + lifecycle::deprecate_warn( + when = "0.3.0", + what = "search_pv(page)", + details = "The new version of the API does not support the page parameter" + ) + } +} From d77af76881aea0f47488eccc9beb3cfb9056a282 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:26:09 -0600 Subject: [PATCH 072/103] feat: added in_range query function --- R/check-query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check-query.R b/R/check-query.R index c572e02e..122ffca9 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -40,7 +40,7 @@ check_query <- function(query, endpoint) { num_opr <- c("_gt", "_gte", "_lt", "_lte") str_opr <- c("_begins", "_contains") fltxt_opr <- c("_text_all", "_text_any", "_text_phrase") - all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr) + all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr, "_in_range") flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$can_query == "y", ] From 722c84cb1e4df82497df16415938f77a996c94e0 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:30:45 -0600 Subject: [PATCH 073/103] fix: length check to avoid coercion warning --- R/check-query.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/check-query.R b/R/check-query.R index 122ffca9..dd8c6f2a 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -46,7 +46,10 @@ check_query <- function(query, endpoint) { apply_checks <- function(x, endpoint) { x <- swap_null_nms(x) - if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { + + # troublesome next line: 'length(x) = 2 > 1' in coercion to 'logical(1)' + # if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { + if (length(names(x)) > 1 || names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) { lapply(x, FUN = apply_checks) } else if (names(x) %in% all_opr) { f1 <- flds_flt[flds_flt$field == names(x[[1]]), ] From eb93718d7747437f2878743fefa00d121deef2e0 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:31:44 -0600 Subject: [PATCH 074/103] feat: query checking in the new api version --- R/check-query.R | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/R/check-query.R b/R/check-query.R index dd8c6f2a..238c8824 100644 --- a/R/check-query.R +++ b/R/check-query.R @@ -10,28 +10,32 @@ is_int <- function(x) #' @noRd is_date <- function(x) - grepl("[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]", x) + grepl("^[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]$", x) #' @noRd one_check <- function(operator, field, value, f1) { - if (nrow(f1) == 0) stop2(field, " is not a valid field to query for your endpoint") if (f1$data_type == "date" && !is_date(value)) stop2("Bad date: ", value, ". Date must be in the format of yyyy-mm-dd") - if (f1$data_type %in% c("string", "fulltext") && !is.character(value)) + if (f1$data_type %in% c("bool", "int", "string", "fulltext") && !is.character(value)) stop2(value, " must be of type character") if (f1$data_type == "integer" && !is_int(value)) stop2(value, " must be an integer") + if (f1$data_type == "boolean" && !is.logical(value)) + stop2(value, " must be a boolean") + if (f1$data_type == "number" && !is.numeric(value)) + stop2(value, " must be a number") if ( - (operator %in% c("_begins", "_contains") && !(f1$data_type == "string")) || - (operator %in% c("_text_all", "_text_any", "_text_phrase") && - !(f1$data_type == "fulltext")) || - (f1$data_type %in% c("string", "fulltext") && - operator %in% c("_gt", "_gte", "_lt", "_lte")) - ) + # The new version of the API blurrs the distinction between string/fulltext fields. + # It looks like the string/fulltext functions can be used interchangeably + (operator %in% c("_begins", "_contains", "_text_all", "_text_any", "_text_phrase") && + !(f1$data_type == "fulltext" || f1$data_type == "string")) || + (f1$data_type %in% c("string", "fulltext") && + operator %in% c("_gt", "_gte", "_lt", "_lte"))) { stop2("You cannot use the operator ", operator, " with the field ", field) + } } #' @noRd @@ -42,7 +46,7 @@ check_query <- function(query, endpoint) { fltxt_opr <- c("_text_all", "_text_any", "_text_phrase") all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr, "_in_range") - flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$can_query == "y", ] + flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint, ] apply_checks <- function(x, endpoint) { x <- swap_null_nms(x) @@ -64,8 +68,8 @@ check_query <- function(query, endpoint) { ) } else { stop2( - names(x), " is either not a valid operator or not a ", - "queryable field for this endpoint" + names(x), " is not a valid operator or not a ", + "valid field for this endpoint" ) } } From 973409bd5f941fa025b101ff2c7149e2f725850c Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:37:18 -0600 Subject: [PATCH 075/103] feat: don't require sort fields to be fields param --- R/validate-args.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/validate-args.R b/R/validate-args.R index 8ad9f930..56aad289 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -44,13 +44,17 @@ validate_args <- function(api_key, fields, endpoint, method, all(is.numeric(per_page), length(per_page) == 1, per_page <= 1000), "per_page must be a numeric value less than or equal to 1,000" ) - if (!is.null(sort)) + + # Removed all(names(sort) %in% fields) Was it our requirement or the API's? + # It does seem to work when we don't request fields and rely on the API to sort + # using them. + if (!is.null(sort)) { asrt( all( - all(names(sort) %in% fields), all(sort %in% c("asc", "desc")), - !is.list(sort)), - "sort has to be a named character vector and each name has to be ", - "specified in the field argument. See examples" + all(sort %in% c("asc", "desc")), + !is.list(sort) + ), + "sort has to be a named character vector. See examples" ) } From 146ac99f83854572a39bab16d884a9521bc257a4 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 12:51:14 -0600 Subject: [PATCH 076/103] feat validation in the new api version --- R/validate-args.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/R/validate-args.R b/R/validate-args.R index 56aad289..4430892e 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -36,13 +36,10 @@ validate_args <- function(api_key, fields, endpoint, method, all(method %in% c("GET", "POST"), length(method) == 1), "method must be either 'GET' or 'POST'" ) + asrt( - all(is.numeric(page), length(page) == 1, page >= 1), - "page must be a numeric value greater than 1" - ) - asrt( - all(is.numeric(per_page), length(per_page) == 1, per_page <= 1000), - "per_page must be a numeric value less than or equal to 1,000" + all(is.numeric(size), length(size) == 1, size <= 1000), + "size must be a numeric value less than or equal to 1,000" ) # Removed all(names(sort) %in% fields) Was it our requirement or the API's? @@ -56,15 +53,23 @@ validate_args <- function(api_key, fields, endpoint, method, ), "sort has to be a named character vector. See examples" ) + } + + asrt( + any(is.null(after), !all_pages), + "'after' cannot be set when all_pages = TRUE" + ) } #' @noRd validate_groups <- function(endpoint, groups) { ok_grps <- unique(fieldsdf[fieldsdf$endpoint == endpoint, "group"]) + asrt( all(groups %in% ok_grps), "for the ", endpoint, " endpoint, group must be one of the following: ", paste(ok_grps, collapse = ", ") + ) ) } @@ -77,14 +82,14 @@ validate_pv_data <- function(data) { } #' @noRd -deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only) { +deprecate_warn_all <- function(error_browser, subent_cnts, mtchd_subent_only, page, per_page) { if (!is.null(error_browser)) { lifecycle::deprecate_warn(when = "0.2.0", what = "search_pv(error_browser)") } # Was previously defaulting to FALSE and we're still defaulting to FALSE to # mirror the fact that the API doesn't support subent_cnts. Warn only if user - # tries to set subent_cnts to TRUE. - if (isTRUE(subent_cnts)) { + # tries to set subent_cnts to anything other than FALSE (test cases try NULL and 7) + if (!(is.logical(subent_cnts) && isFALSE(subent_cnts))) { lifecycle::deprecate_warn( when = "1.0.0", what = "search_pv(subent_cnts)", From 8b77cad7e5182a4b241f398b2b71799858825a62 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 13:00:49 -0600 Subject: [PATCH 077/103] feat: search_pv parameter changes --- R/search-pv.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index eb3cb43a..41407631 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -123,12 +123,8 @@ pad_patent_id <- function(patent_id) { request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, ...) { matched_records <- ex_res$query_results[[1]] req_pages <- ceiling(matched_records / arg_list$opts$size) - if (req_pages < 1) { - stop2("No records matched your query...Can't download multiple pages") - } tmp <- lapply(seq_len(req_pages), function(i) { - arg_list$opts$offset <- (i - 1) * arg_list$opts$size x <- one_request(method, query, base_url, arg_list, api_key, ...) x <- process_resp(x) @@ -287,23 +283,26 @@ search_pv <- function(query, endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", error_browser = NULL, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ...) { - - validate_args(api_key, fields, endpoint, method, page, per_page, sort) - deprecate_warn_all(error_browser, subent_cnts, mtchd_subent_only) + validate_args(api_key, fields, endpoint, method, sort, after, size, all_pages) + deprecate_warn_all(error_browser, subent_cnts, mtchd_subent_only, page, per_page) + if (lifecycle::is_present(per_page)) size <- per_page if (is.list(query)) { - # check_query(query, endpoint) + check_query(query, endpoint) query <- jsonlite::toJSON(query, auto_unbox = TRUE) } - arg_list <- to_arglist(fields, page, per_page, sort) + + arg_list <- to_arglist(fields, size, sort, after) base_url <- get_base(endpoint) result <- one_request(method, query, base_url, arg_list, api_key, ...) From fc5655511d88bee041a8e5335ae8de10e2eb7d54 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 13:06:41 -0600 Subject: [PATCH 078/103] feat: new paging methodology --- R/search-pv.R | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 41407631..88ba516e 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -307,11 +307,62 @@ search_pv <- function(query, result <- one_request(method, query, base_url, arg_list, api_key, ...) result <- process_resp(result) - if (!all_pages) return(result) - full_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) - result$data[[1]] <- full_data + if (all_pages && result$query_result$total_hits == 0) { + stop2("No records matched your query...Can't download multiple pages") + } + + # return if we don't need to make additional API requests + if (!all_pages || + result$query_result$total_hits == 0 || + result$query_result$total_hits == nrow(result$data[[1]])) { + return(result) + } + + # Here we ignore the user's sort and instead have the API sort by the primary + # key for the requested endpoint. + primary_sort_key <- get_default_sort(endpoint) + + # We check what fields we got back from the first call. If the user didn't + # specify fields, we'd get back the API's defaults. We may need to request + # additional fields from the API so we can apply the users sort and then remove + # the additional fields. + returned_fields <- names(result$data[[1]]) + + if (!is.null(sort)) { + sort_fields <- names(sort) + additional_fields <- sort_fields[!(sort_fields %in% returned_fields)] + if (is.null(fields)) { + fields <- returned_fields # the default fields + } else { + fields <- fields # user passed + } + fields <- append(fields, additional_fields) + } else { + additional_fields <- c() + } + + arg_list <- to_arglist(fields, size, primary_sort_key, after) + paged_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) + + # apply the user's sort using order() + # was data.table::setorderv(paged_data, names(sort), ifelse(as.vector(sort) == "asc", 1, -1)) + + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(paged_data[[col]]) + } else { + return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + + # remove the fields we added in order to do the user's sort ourselves + paged_data <- paged_data[, !names(paged_data) %in% additional_fields] + result$data[[1]] <- paged_data result } From 9a8b1df958f37dc3a70b14bb1e59de4a8dcd8a42 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 14:43:15 -0600 Subject: [PATCH 079/103] refactor: getting top level attributes --- R/get-fields.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/get-fields.R b/R/get-fields.R index 4088c8e5..a7438f0c 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -1,3 +1,9 @@ +#' @noRd +get_top_level_attributes <- function(endpoint) { + fieldsdf[fieldsdf$endpoint == endpoint & !grepl("\\.", fieldsdf$field), "field"] +} + + #' Get list of retrievable fields #' #' This function returns a vector of fields that you can retrieve from a given @@ -52,7 +58,7 @@ get_fields <- function(endpoint, groups = NULL) { pk <- get_ok_pk(endpoint) plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"] - top_level_attributes <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == plural_entity, "field"] + top_level_attributes <- get_top_level_attributes(endpoint) if (is.null(groups)) { c( From 2d48c505b2d21a054e4ad749e935d402ab6514e5 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 14:57:25 -0600 Subject: [PATCH 080/103] feat: retrieve_linked can retrieve documentation links --- R/get-fields.R | 2 +- R/search-pv.R | 87 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index a7438f0c..8eb53892 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -49,7 +49,7 @@ get_top_level_attributes <- function(endpoint) { #' } #' #' @export -get_fields <- function(endpoint, groups = NULL) { +get_fields <- function(endpoint, groups = NULL, include_pk = FALSE) { validate_endpoint(endpoint) # using API's shorthand notation, group names can be requested as fields instead of diff --git a/R/search-pv.R b/R/search-pv.R index 88ba516e..f9fb9fab 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -366,38 +366,103 @@ search_pv <- function(query, result } -#' Get Linked Data +#' Retrieve Linked Data #' #' Some of the endpoints now return HATEOAS style links to get more data. E.g., -#' the inventors endpoint may return a link such as: -#' "https://search.patentsview.org/api/v1/inventor/252373/" +#' the patent endpoint may return a link such as: +#' "https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/" #' -#' @param url The link that was returned by the API on a previous call. +#' @param url A link that was returned by the API on a previous call, an example +#' in the documentation or a Request URL from the \href{https://search.patentsview.org/swagger-ui/}{API's Swagger UI page}. +#' +#' @param encoded_url boolean to indicate whether the url has been URL encoded, defaults to FALSE. +#' Set to TRUE for Request URLs from Swagger UI. +#' +#' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} function. +#' +#' @return A list with the following three elements: +#' \describe{ +#' \item{data}{A list with one element - a named data frame containing the +#' data returned by the server. Each row in the data frame corresponds to a +#' single value for the primary entity. For example, if you search the +#' assignee endpoint, then the data frame will be on the assignee-level, +#' where each row corresponds to a single assignee. Fields that are not on +#' the assignee-level would be returned in list columns.} +#' +#' \item{query_results}{Entity counts across all pages of output (not just +#' the page returned to you).} +#' +#' \item{request}{Details of the GET HTTP request that was sent to the server.} +#' } #' -#' @inherit search_pv return #' @inheritParams search_pv #' #' @examples #' \dontrun{ #' #' retrieve_linked_data( -#' "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/" -#' ) +#' "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/" +#' ) +#' +#' endpoint_url <- "https://search.patentsview.org/api/v1/patent/" +#' q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}' +#' s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}' +#' f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' +#' # (URL broken up to avoid a long line warning in this Rd) +#' +#' retrieve_linked_data( +#' paste0(endpoint_url, q_param, s_and_o_params, f_param) +#' ) +#' +#' retrieve_linked_data( +#' "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D", +#' encoded_url = TRUE +#' ) #' } #' #' @export retrieve_linked_data <- function(url, + encoded_url = FALSE, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ... ) { + if (encoded_url) { + url <- utils::URLdecode(url) + } - # Don't sent the API key to any domain other than patentsview.org - if (!grepl("^https://[^/]*\\.patentsview.org/", url)) { + # There wouldn't be url parameters on a HATEOAS link but we'll also accept + # example urls from the documentation, where there could be parameters + url_peices <- httr2::url_parse(url) + + # Only send the API key to subdomains of patentsview.org + if (!grepl("^.*\\.patentsview.org$", url_peices$hostname)) { stop2("retrieve_linked_data is only for patentsview.org urls") } + params <- list() + query <- "" + + if (!is.null(url_peices$query)) { + # Need to change f to fields vector, s to sort vector and o to opts + # There is probably a whizbangy better way to do this in R + if (!is.null(url_peices$query$f)) { + params$fields <- unlist(strsplit(gsub("[\\[\\]]", "", url_peices$query$f, perl = TRUE), ",\\s*")) + } + + if (!is.null(url_peices$query$s)) { + params$sort <- jsonlite::fromJSON(sub(".*s=([^&]*).*", "\\1", url)) + } + + if (!is.null(url_peices$query$o)) { + params$opts <- jsonlite::fromJSON(sub(".*o=([^&]*).*", "\\1", url)) + } + + query <- if (!is.null(url_peices$query$q)) sub(".*q=([^&]*).*", "\\1", url) else "" + url <- paste0(url_peices$scheme, "://", url_peices$hostname, url_peices$path) + } + # Go through one_request, which handles resend on throttle errors - # The API doesn't seem to mind ?q=&f=&o=&s= appended to the URL - res <- one_request("GET", "", url, list(), api_key, ...) + # The API doesn't seem to mind ?q=&f=&o=&s= appended to HATEOAS URLs + res <- one_request("GET", query, url, params, api_key, ...) process_resp(res) } From f8da8693c97e862f80c0041419a48f38b1d1234e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:16:19 -0600 Subject: [PATCH 081/103] feat: search_pv parameter updates --- R/search-pv.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/search-pv.R b/R/search-pv.R index f9fb9fab..8c983882 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -70,6 +70,8 @@ patentsview_error_body <- function(resp) { if (httr2::resp_status(resp) == 400) c(httr2::resp_header(resp, "X-Status-Reason")) else NULL } +#' @noRd +one_request <- function(method, query, base_url, arg_list, api_key, ...) { if (method == "GET") { get_url <- get_get_url(query, base_url, arg_list) req <- httr2::request(get_url) |> From 37fd5244a1dcadb3d96f9434dc066786f8a2ca10 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:19:44 -0600 Subject: [PATCH 082/103] docs: API link update --- R/get-fields.R | 2 +- R/search-pv.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index 8eb53892..b270de31 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -19,7 +19,7 @@ get_top_level_attributes <- function(endpoint) { #' endpoint's fields (i.e., do not filter the field list based on group #' membership). See the field tables located online to see which groups you #' can specify for a given endpoint (e.g., the -#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint table}), or use the \code{fieldsdf} table #' (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}). #' diff --git a/R/search-pv.R b/R/search-pv.R index 8c983882..cb27cdf7 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -171,7 +171,7 @@ get_default_sort <- function(endpoint) { #' #' \item An object of class \code{pv_query}, which you create by calling one #' of the functions found in the \code{\link{qry_funs}} list...See the -#' \href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing +#' \href{../articles/writing-queries.html}{writing #' queries vignette} for details.\cr #' E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} #' } @@ -179,7 +179,7 @@ get_default_sort <- function(endpoint) { #' A value of \code{NULL} indicates to the API that it should return the default fields #' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the -#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference#patent}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. From 79e232a5ac74d66e267af8e03d2506cd407feda3 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:21:48 -0600 Subject: [PATCH 083/103] docs: parameter and example changes --- R/get-fields.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R/get-fields.R b/R/get-fields.R index b270de31..a66365af 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -22,6 +22,9 @@ get_top_level_attributes <- function(endpoint) { #' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint table}), or use the \code{fieldsdf} table #' (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}). +#' @param include_pk Boolean on whether to include the endpoint's primary key, +#' defaults to FALSE. The primary key is needed if you plan on calling +#' \code{\link{unnest_pv_data}} on the results of \code{\link{search_pv}} #' #' @return A character vector with field names. #' @@ -47,6 +50,19 @@ get_top_level_attributes <- function(endpoint) { #' fields = fields #' ) #' } +#' # Get the nested inventors fields and the primary key in order to call unnest_pv_data +#' # on the returned data. unnest_pv_data would throw an error if the primary key was +#' # not present in the results. +#' fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) +#' +#' \dontrun{ +#' # ...Then pass to search_pv and unnest the results +#' results <- search_pv( +#' query = '{"_gte":{"patent_date":"2007-01-04"}}', +#' fields = fields +#' ) +#' unnest_pv_data(results$data) +#' } #' #' @export get_fields <- function(endpoint, groups = NULL, include_pk = FALSE) { From 745570d3a03274889a8d0bf99b43117cc448073a Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:23:57 -0600 Subject: [PATCH 084/103] docs: plural to singular endpoints --- R/get-fields.R | 6 +++--- R/search-pv.R | 14 +++++++------- R/unnest-pv-data.R | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/get-fields.R b/R/get-fields.R index a66365af..062c804e 100644 --- a/R/get-fields.R +++ b/R/get-fields.R @@ -29,8 +29,8 @@ get_top_level_attributes <- function(endpoint) { #' @return A character vector with field names. #' #' @examples -#' # Get all assignee-level fields for the patent endpoint: -#' fields <- get_fields(endpoint = "patent", groups = "assignees") +#' # Get all top level (non-nested) fields for the patent endpoint: +#' fields <- get_fields(endpoint = "patent", groups = c("patents")) #' #' # ...Then pass to search_pv: #' \dontrun{ @@ -40,7 +40,7 @@ get_top_level_attributes <- function(endpoint) { #' fields = fields #' ) #' } -#' # Get all patent and assignee-level fields for the patent endpoint: +#' # Get unnested patent and assignee-level fields for the patent endpoint: #' fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) #' #' \dontrun{ diff --git a/R/search-pv.R b/R/search-pv.R index cb27cdf7..7eaff4fb 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -208,25 +208,25 @@ get_default_sort <- function(endpoint) { #' \code{all_pages = TRUE}, the value of \code{size} is ignored. #' @param sort A named character vector where the name indicates the field to #' sort by and the value indicates the direction of sorting (direction should -#' be either "asc" or "desc"). For example, \code{sort = c("patent_number" = -#' "asc")} or \cr\code{sort = c("patent_number" = "asc", "patent_date" = +#' be either "asc" or "desc"). For example, \code{sort = c("patent_id" = +#' "asc")} or \cr\code{sort = c("patent_id" = "asc", "patent_date" = #' "desc")}. \code{sort = NULL} (the default) means do not sort the results. #' You must include any fields that you wish to sort by in \code{fields}. #' @param method The HTTP method that you want to use to send the request. #' Possible values include "GET" or "POST". Use the POST method when #' your query is very long (say, over 2,000 characters in length). #' @param error_browser `r lifecycle::badge("deprecated")` -#' @param api_key API key. See \href{https://patentsview.org/apis/keyrequest}{ -#' Here} for info on creating a key. -#' @param ... Arguments passed along to httr's \code{\link[httr]{GET}} or -#' \code{\link[httr]{POST}} function. +#' @param api_key API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +#' \href{https://patentsview.org/apis/keyrequest}{here}. +#' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} +#' when we do GETs or POSTs. #' #' @return A list with the following three elements: #' \describe{ #' \item{data}{A list with one element - a named data frame containing the #' data returned by the server. Each row in the data frame corresponds to a #' single value for the primary entity. For example, if you search the -#' assignees endpoint, then the data frame will be on the assignee-level, +#' assignee endpoint, then the data frame will be on the assignee-level, #' where each row corresponds to a single assignee. Fields that are not on #' the assignee-level would be returned in list columns.} #' diff --git a/R/unnest-pv-data.R b/R/unnest-pv-data.R index f6af8470..19b29370 100644 --- a/R/unnest-pv-data.R +++ b/R/unnest-pv-data.R @@ -65,8 +65,8 @@ get_ok_pk <- function(endpoint_or_entity) { #' inside it. See examples. #' @param pk The column/field name that will link the data frames together. This #' should be the unique identifier for the primary entity. For example, if you -#' used the patents endpoint in your call to \code{search_pv}, you could -#' specify \code{pk = "patent_number"}. \strong{This identifier has to have +#' used the patent endpoint in your call to \code{search_pv}, you could +#' specify \code{pk = "patent_id"}. \strong{This identifier has to have #' been included in your \code{fields} vector when you called #' \code{search_pv}}. You can use \code{\link{get_ok_pk}} to suggest a #' potential primary key for your data. From 472eb896c196a7fef33e0f2e9dc0a1c3e312ea24 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:41:01 -0600 Subject: [PATCH 085/103] feat validation in the new api version --- R/validate-args.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/validate-args.R b/R/validate-args.R index 4430892e..31dd6467 100644 --- a/R/validate-args.R +++ b/R/validate-args.R @@ -69,7 +69,6 @@ validate_groups <- function(endpoint, groups) { all(groups %in% ok_grps), "for the ", endpoint, " endpoint, group must be one of the following: ", paste(ok_grps, collapse = ", ") - ) ) } From 1511e6176564dbf0afd7da269e66df923751a508 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 15:57:33 -0600 Subject: [PATCH 086/103] generated files --- man/cast_pv_data.Rd | 2 +- man/fieldsdf.Rd | 2 +- man/get_fields.Rd | 27 +++++++++++--- man/get_ok_pk.Rd | 15 ++++---- man/pad_patent_id.Rd | 24 +++++++++++++ man/patentsview-package.Rd | 2 +- man/qry_funs.Rd | 17 ++++++--- man/retrieve_linked_data.Rd | 52 ++++++++++++++++++--------- man/search_pv.Rd | 72 +++++++++++++++++++++++-------------- man/unnest_pv_data.Rd | 6 ++-- man/with_qfuns.Rd | 23 ++++++------ 11 files changed, 166 insertions(+), 76 deletions(-) create mode 100644 man/pad_patent_id.Rd diff --git a/man/cast_pv_data.Rd b/man/cast_pv_data.Rd index 829b3c13..3ed42d36 100644 --- a/man/cast_pv_data.Rd +++ b/man/cast_pv_data.Rd @@ -23,7 +23,7 @@ they have their most appropriate data types (e.g., date, numeric, etc.). \dontrun{ fields <- c("patent_date", "patent_title", "patent_year") -res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) +res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields) cast_pv_data(data = res$data) } diff --git a/man/fieldsdf.Rd b/man/fieldsdf.Rd index b55ddee3..aa107283 100644 --- a/man/fieldsdf.Rd +++ b/man/fieldsdf.Rd @@ -22,7 +22,7 @@ fieldsdf A data frame containing the names of retrievable fields for each of the endpoints. You can find this data on the API's online documentation for each endpoint as well (e.g., the -\href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patent endpoint field list table}). } \keyword{datasets} diff --git a/man/get_fields.Rd b/man/get_fields.Rd index 4dc23060..b6ce3639 100644 --- a/man/get_fields.Rd +++ b/man/get_fields.Rd @@ -4,7 +4,7 @@ \alias{get_fields} \title{Get list of retrievable fields} \usage{ -get_fields(endpoint, groups = NULL) +get_fields(endpoint, groups = NULL, include_pk = FALSE) } \arguments{ \item{endpoint}{The API endpoint whose field list you want to get. See @@ -15,9 +15,13 @@ returned. A value of \code{NULL} indicates that you want all of the endpoint's fields (i.e., do not filter the field list based on group membership). See the field tables located online to see which groups you can specify for a given endpoint (e.g., the -\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patent +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patents endpoint table}), or use the \code{fieldsdf} table (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}).} + +\item{include_pk}{Boolean on whether to include the endpoint's primary key, +defaults to FALSE. The primary key is needed if you plan on calling +\code{\link{unnest_pv_data}} on the results of \code{\link{search_pv}}} } \value{ A character vector with field names. @@ -30,8 +34,8 @@ entity group(s) as well (which is recommended, given the large number of possible fields for each endpoint). } \examples{ -# Get all assignee-level fields for the patent endpoint: -fields <- get_fields(endpoint = "patent", groups = "assignees") +# Get all top level (non-nested) fields for the patent endpoint: +fields <- get_fields(endpoint = "patent", groups = c("patents")) # ...Then pass to search_pv: \dontrun{ @@ -41,7 +45,7 @@ search_pv( fields = fields ) } -# Get all patent and assignee-level fields for the patent endpoint: +# Get unnested patent and assignee-level fields for the patent endpoint: fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) \dontrun{ @@ -51,5 +55,18 @@ search_pv( fields = fields ) } +# Get the nested inventors fields and the primary key in order to call unnest_pv_data +# on the returned data. unnest_pv_data would throw an error if the primary key was +# not present in the results. +fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + +\dontrun{ +# ...Then pass to search_pv and unnest the results +results <- search_pv( + query = '{"_gte":{"patent_date":"2007-01-04"}}', + fields = fields +) +unnest_pv_data(results$data) +} } diff --git a/man/get_ok_pk.Rd b/man/get_ok_pk.Rd index 6bd223c9..36d3dbf3 100644 --- a/man/get_ok_pk.Rd +++ b/man/get_ok_pk.Rd @@ -4,11 +4,11 @@ \alias{get_ok_pk} \title{Get OK primary key} \usage{ -get_ok_pk(endpoint) +get_ok_pk(endpoint_or_entity) } \arguments{ -\item{endpoint}{The endpoint which you would like to know a potential primary -key for.} +\item{endpoint_or_entity}{The endpoint or entity name for which you +would like to know a potential primary key for.} } \value{ The name of a primary key (\code{pk}) that you could pass to @@ -19,11 +19,12 @@ This function suggests a value that you could use for the \code{pk} argument in \code{\link{unnest_pv_data}}, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -\code{endpoint = "patent"}. +\code{endpoint_or_entity = "patent"}. It would return the same value if +the entity name "patents" was passed via \code{get_ok_pk(names(pv_return$data))} +where pv_return was returned from \code{\link{search_pv}}. } \examples{ -get_ok_pk(endpoint = "inventor") -get_ok_pk(endpoint = "cpc_subclass") -get_ok_pk("publication/rel_app_text") +get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id" +get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id" } diff --git a/man/pad_patent_id.Rd b/man/pad_patent_id.Rd new file mode 100644 index 00000000..544ee243 --- /dev/null +++ b/man/pad_patent_id.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/search-pv.R +\name{pad_patent_id} +\alias{pad_patent_id} +\title{Pad patent_id} +\usage{ +pad_patent_id(patent_id) +} +\arguments{ +\item{patent_id}{The patent_id that needs to be padded. It can +be the patent_id for a utility, design, plant or reissue patent.} +} +\description{ +This function strategically pads a patent_id with zeroes to 8 characters, +needed only for custom paging that uses sorts by patent_id. +} +\examples{ +\dontrun{ +padded <- pad_patent_id("RE36479") + +padded2 <- pad_patent_id("3930306") +} + +} diff --git a/man/patentsview-package.Rd b/man/patentsview-package.Rd index 621a26f7..6b688378 100644 --- a/man/patentsview-package.Rd +++ b/man/patentsview-package.Rd @@ -6,7 +6,7 @@ \alias{patentsview-package} \title{patentsview: An R Client to the 'PatentsView' API} \description{ -Provides functions to simplify the 'PatentsView' API (\url{https://patentsview.org/apis/purpose}) query language, send GET and POST requests to the API's seven endpoints, and parse the data that comes back. +Provides functions to simplify the 'PatentsView' API (\url{https://patentsview.org/apis/purpose}) query language, send GET and POST requests to the API's twenty seven endpoints, and parse the data that comes back. } \seealso{ Useful links: diff --git a/man/qry_funs.Rd b/man/qry_funs.Rd index cff6667f..6a7c90e1 100644 --- a/man/qry_funs.Rd +++ b/man/qry_funs.Rd @@ -5,7 +5,7 @@ \alias{qry_funs} \title{List of query functions} \format{ -An object of class \code{list} of length 14. +An object of class \code{list} of length 15. } \usage{ qry_funs @@ -16,9 +16,8 @@ list with a print method attached to it. } \description{ A list of functions that make it easy to write PatentsView queries. See the -details section below for a list of the 14 functions, as well as the -\href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing -queries vignette} for further details. +details section below for a list of the 15 functions, as well as the +\href{../articles/writing-queries.html}{writing queries vignette} for further details. } \details{ \strong{1. Comparison operator functions} \cr @@ -66,11 +65,21 @@ There is 1 negation function: \itemize{ \item \code{not} - The comparison is not true } + +\strong{4. Convenience function} \cr + +There is 1 convenience function: +\itemize{ +\item \code{in_range} - Builds a <= x <= b query +} } \examples{ qry_funs$eq(patent_date = "2001-01-01") qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) +qry_funs$in_range(patent_year = c(2010, 2021)) + +qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28")) } \keyword{datasets} diff --git a/man/retrieve_linked_data.Rd b/man/retrieve_linked_data.Rd index 6a0e347f..90da02bd 100644 --- a/man/retrieve_linked_data.Rd +++ b/man/retrieve_linked_data.Rd @@ -2,18 +2,26 @@ % Please edit documentation in R/search-pv.R \name{retrieve_linked_data} \alias{retrieve_linked_data} -\title{Get Linked Data} +\title{Retrieve Linked Data} \usage{ -retrieve_linked_data(url, api_key = Sys.getenv("PATENTSVIEW_API_KEY"), ...) +retrieve_linked_data( + url, + encoded_url = FALSE, + api_key = Sys.getenv("PATENTSVIEW_API_KEY"), + ... +) } \arguments{ -\item{url}{The link that was returned by the API on a previous call.} +\item{url}{A link that was returned by the API on a previous call, an example +in the documentation or a Request URL from the \href{https://search.patentsview.org/swagger-ui/}{API's Swagger UI page}.} + +\item{encoded_url}{boolean to indicate whether the url has been URL encoded, defaults to FALSE. +Set to TRUE for Request URLs from Swagger UI.} -\item{api_key}{API key. See \href{https://patentsview.org/apis/keyrequest}{ -Here} for info on creating a key.} +\item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +\href{https://patentsview.org/apis/keyrequest}{here}.} -\item{...}{Arguments passed along to httr's \code{\link[httr]{GET}} or -\code{\link[httr]{POST}} function.} +\item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} function.} } \value{ A list with the following three elements: @@ -21,30 +29,42 @@ A list with the following three elements: \item{data}{A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.} \item{query_results}{Entity counts across all pages of output (not just the page returned to you).} -\item{request}{Details of the HTTP request that was sent to the server. -When you set \code{all_pages = TRUE}, you will only get a sample request. -In other words, you will not be given multiple requests for the multiple -calls that were made to the server (one for each page of results).} +\item{request}{Details of the GET HTTP request that was sent to the server.} } } \description{ Some of the endpoints now return HATEOAS style links to get more data. E.g., -the inventors endpoint may return a link such as: -"https://search.patentsview.org/api/v1/inventor/252373/" +the patent endpoint may return a link such as: +"https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/" } \examples{ \dontrun{ retrieve_linked_data( - "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/" - ) + "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/" +) + +endpoint_url <- "https://search.patentsview.org/api/v1/patent/" +q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}' +s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}' +f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' +# (URL broken up to avoid a long line warning in this Rd) + +retrieve_linked_data( + paste0(endpoint_url, q_param, s_and_o_params, f_param) +) + +retrieve_linked_data( + "https://search.patentsview.org/api/v1/patent/?q=\%7B\%22patent_date\%22\%3A\%221976-01-06\%22\%7D", + encoded_url = TRUE +) } } diff --git a/man/search_pv.Rd b/man/search_pv.Rd index d767abc8..bd00c370 100644 --- a/man/search_pv.Rd +++ b/man/search_pv.Rd @@ -10,8 +10,10 @@ search_pv( endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", @@ -32,42 +34,55 @@ E.g., \code{list("_gte" = list("patent_date" = "2007-01-04"))} \item An object of class \code{pv_query}, which you create by calling one of the functions found in the \code{\link{qry_funs}} list...See the -\href{https://docs.ropensci.org/patentsview/articles/writing-queries.html}{writing +\href{../articles/writing-queries.html}{writing queries vignette} for details.\cr E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} }} \item{fields}{A character vector of the fields that you want returned to you. -A value of \code{NULL} indicates that the default fields should be -returned. Acceptable fields for a given endpoint can be found at the API's +A value of \code{NULL} indicates to the API that it should return the default fields +for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -\href{https://patentsview.org/apis/api-endpoints/patents}{patents +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference#patent}{patents endpoint}) or by viewing the \code{fieldsdf} data frame (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list -out the fields available for a given endpoint.} +out the fields available for a given endpoint. + +Nested fields can be fully qualified, e.g., "application.filing_date" or the +group name can be used to retrieve all of its nested fields, E.g. "application". +The latter would be similar to passing \code{get_fields("patent", group = "application")} +except it's the API that decides what fields to return.} \item{endpoint}{The web service resource you wish to search. Use \code{get_endpoints()} to list the available endpoints.} -\item{subent_cnts}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Non-matched subentities -will always be returned under the new version of the API} +\item{subent_cnts}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is always FALSE in the +new version of the API as the total counts of unique subentities is no longer available.} \item{mtchd_subent_only}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is always -FALSE in the new version of the API.} +FALSE in the new version of the API as non-matched subentities +will always be returned.} + +\item{page}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The new version of the API does not use +\code{page} as a parameter for paging, it uses \code{after}.} + +\item{per_page}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The API now uses \code{size}} -\item{page}{The page number of the results that should be returned.} +\item{size}{The number of records that should be returned per page. This +value can be as high as 1,000 (e.g., \code{size = 1000}).} -\item{per_page}{The number of records that should be returned per page. This -value can be as high as 1,000 (e.g., \code{per_page = 1000}).} +\item{after}{A list of sort key values that defaults to NULL. This +exposes the API's paging parameter for users who want to implement their own +paging. It cannot be set when \code{all_pages = TRUE} as the R package manipulates it +for users automatically. See \href{../articles/result-set-paging.html}{result set paging}} \item{all_pages}{Do you want to download all possible pages of output? If -\code{all_pages = TRUE}, the values of \code{page} and \code{per_page} are -ignored.} +\code{all_pages = TRUE}, the value of \code{size} is ignored.} \item{sort}{A named character vector where the name indicates the field to sort by and the value indicates the direction of sorting (direction should -be either "asc" or "desc"). For example, \code{sort = c("patent_number" = - "asc")} or \cr\code{sort = c("patent_number" = "asc", "patent_date" = +be either "asc" or "desc"). For example, \code{sort = c("patent_id" = + "asc")} or \cr\code{sort = c("patent_id" = "asc", "patent_date" = "desc")}. \code{sort = NULL} (the default) means do not sort the results. You must include any fields that you wish to sort by in \code{fields}.} @@ -77,11 +92,11 @@ your query is very long (say, over 2,000 characters in length).} \item{error_browser}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} -\item{api_key}{API key. See \href{https://patentsview.org/apis/keyrequest}{ -Here} for info on creating a key.} +\item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +\href{https://patentsview.org/apis/keyrequest}{here}.} -\item{...}{Arguments passed along to httr's \code{\link[httr]{GET}} or -\code{\link[httr]{POST}} function.} +\item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} +when we do GETs or POSTs.} } \value{ A list with the following three elements: @@ -89,7 +104,7 @@ A list with the following three elements: \item{data}{A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.} @@ -119,8 +134,8 @@ search_pv( search_pv( query = qry_funs$gt(patent_year = 2010), method = "POST", - fields = "patent_number", - sort = c("patent_number" = "asc") + fields = "patent_id", + sort = c("patent_id" = "asc") ) search_pv( @@ -135,9 +150,14 @@ search_pv( ) search_pv( - query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), + query = qry_funs$contains(inventors.inventor_name_last = "Smith"), endpoint = "patent", - config = httr::timeout(40) + timeout = 40 +) + +search_pv( + query = qry_funs$eq(patent_id = "11530080"), + fields = "application" ) } diff --git a/man/unnest_pv_data.Rd b/man/unnest_pv_data.Rd index a2c528ef..46de5723 100644 --- a/man/unnest_pv_data.Rd +++ b/man/unnest_pv_data.Rd @@ -4,7 +4,7 @@ \alias{unnest_pv_data} \title{Unnest PatentsView data} \usage{ -unnest_pv_data(data, pk = get_ok_pk(names(data))) +unnest_pv_data(data, pk = NULL) } \arguments{ \item{data}{The data returned by \code{\link{search_pv}}. This is the first @@ -14,8 +14,8 @@ inside it. See examples.} \item{pk}{The column/field name that will link the data frames together. This should be the unique identifier for the primary entity. For example, if you -used the patents endpoint in your call to \code{search_pv}, you could -specify \code{pk = "patent_number"}. \strong{This identifier has to have +used the patent endpoint in your call to \code{search_pv}, you could +specify \code{pk = "patent_id"}. \strong{This identifier has to have been included in your \code{fields} vector when you called \code{search_pv}}. You can use \code{\link{get_ok_pk}} to suggest a potential primary key for your data.} diff --git a/man/with_qfuns.Rd b/man/with_qfuns.Rd index 40f755bb..ba847615 100644 --- a/man/with_qfuns.Rd +++ b/man/with_qfuns.Rd @@ -23,26 +23,25 @@ try assigning the \code{\link{qry_funs}} list into your global environment with: \code{list2env(qry_funs, envir = globalenv())}. } \examples{ -# Without with_qfuns, we have to do: qry_funs$and( qry_funs$gte(patent_date = "2007-01-01"), qry_funs$text_phrase(patent_abstract = c("computer program")), qry_funs$or( - qry_funs$eq(inventor_last_name = "ihaka"), - qry_funs$eq(inventor_first_name = "chris") + qry_funs$eq(inventors.inventor_name_last = "Ihaka"), + qry_funs$eq(inventors.inventor_name_last = "Chris") ) ) -#...With it, this becomes: +# ...With it, this becomes: with_qfuns( - and( - gte(patent_date = "2007-01-01"), - text_phrase(patent_abstract = c("computer program")), - or( - eq(inventor_last_name = "ihaka"), - eq(inventor_first_name = "chris") - ) - ) + and( + gte(patent_date = "2007-01-01"), + text_phrase(patent_abstract = c("computer program")), + or( + eq(inventors.inventor_name_last = "Ihaka"), + eq(inventors.inventor_name_last = "Chris") + ) + ) ) } From 61677210072ae004f62f12deac7bead28c828f23 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 17:17:31 -0600 Subject: [PATCH 087/103] generated files --- docs/reference/cast_pv_data.html | 190 ++++--------- docs/reference/fieldsdf.html | 192 +++++--------- docs/reference/figures/lifecycle-archived.svg | 1 + docs/reference/figures/lifecycle-defunct.svg | 1 + .../figures/lifecycle-deprecated.svg | 1 + .../figures/lifecycle-experimental.svg | 1 + docs/reference/figures/lifecycle-maturing.svg | 1 + .../figures/lifecycle-questioning.svg | 1 + docs/reference/figures/lifecycle-stable.svg | 1 + .../figures/lifecycle-superseded.svg | 1 + docs/reference/get_endpoints.html | 171 +++--------- docs/reference/get_fields.html | 30 ++- docs/reference/get_ok_pk.html | 24 +- docs/reference/index.html | 249 +++++------------- docs/reference/pad_patent_id.html | 139 ++++++++++ docs/reference/patentsview-package.html | 4 +- docs/reference/qry_funs.html | 242 ++++++----------- docs/reference/retrieve_linked_data.html | 199 ++++++++++++++ docs/reference/search_pv.html | 73 +++-- docs/reference/unnest_pv_data.html | 6 +- docs/reference/with_qfuns.html | 241 ++++++----------- 21 files changed, 820 insertions(+), 948 deletions(-) create mode 100644 docs/reference/figures/lifecycle-archived.svg create mode 100644 docs/reference/figures/lifecycle-defunct.svg create mode 100644 docs/reference/figures/lifecycle-deprecated.svg create mode 100644 docs/reference/figures/lifecycle-experimental.svg create mode 100644 docs/reference/figures/lifecycle-maturing.svg create mode 100644 docs/reference/figures/lifecycle-questioning.svg create mode 100644 docs/reference/figures/lifecycle-stable.svg create mode 100644 docs/reference/figures/lifecycle-superseded.svg create mode 100644 docs/reference/pad_patent_id.html create mode 100644 docs/reference/retrieve_linked_data.html diff --git a/docs/reference/cast_pv_data.html b/docs/reference/cast_pv_data.html index 08ef48c0..ecc3f786 100644 --- a/docs/reference/cast_pv_data.html +++ b/docs/reference/cast_pv_data.html @@ -1,70 +1,13 @@ - - - - - - - -Cast PatentsView data — cast_pv_data • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Cast PatentsView data — cast_pv_data • patentsview + + - - - - -
    -
    - -
    - -
    +
    -

    This will cast the data fields returned by search_pv so that +

    This will cast the data fields returned by search_pv so that they have their most appropriate data types (e.g., date, numeric, etc.).

    -
    cast_pv_data(data)
    +
    +
    cast_pv_data(data)
    +
    -

    Arguments

    - - - - - - -
    data

    The data returned by search_pv. This is the first +

    +

    Arguments

    +
    data
    +

    The data returned by search_pv. This is the first element of the three-element result object you got back from search_pv. It should be a list of length 1, with one data frame -inside it. See examples.

    - -

    Value

    +inside it. See examples.

    -

    The same type of object that you passed into cast_pv_data.

    +
    +
    +

    Value

    + -

    Examples

    -
    if (FALSE) { - -fields <- c("patent_date", "patent_title", "patent_year") -res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields) -cast_pv_data(data = res$data) -} +

    The same type of object that you passed into cast_pv_data.

    +
    -
    +
    +

    Examples

    +
    if (FALSE) {
    +
    +fields <- c("patent_date", "patent_title", "patent_year")
    +res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields)
    +cast_pv_data(data = res$data)
    +}
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fieldsdf.html b/docs/reference/fieldsdf.html index 11fc300e..19611599 100644 --- a/docs/reference/fieldsdf.html +++ b/docs/reference/fieldsdf.html @@ -1,74 +1,16 @@ - - - - - - - -Fields data frame — fieldsdf • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fields data frame — fieldsdf • patentsview - - + + - - -
    -
    - -
    - -
    +
    -

    A data frame containing the names of retrievable and queryable fields for -each of the 7 API endpoints. A yes/no flag (can_query) indicates -which fields can be included in the user's query. You can also find this -data on the API's online documentation for each endpoint as well (e.g., -the patents -endpoint field list table)

    +

    A data frame containing the names of retrievable fields for each of the +endpoints. You can find this data on the API's online documentation for each +endpoint as well (e.g., the +patent endpoint +field list table).

    -
    fieldsdf
    +
    +
    fieldsdf
    +
    +
    +

    Format

    +

    A data frame with the following columns:

    endpoint
    +

    The endpoint that this field record is for

    -

    Format

    +
    field
    +

    The complete name of the field, including the parent group if +applicable

    -

    A data frame with 992 rows and 7 variables:

    -
    endpoint

    The endpoint that this field record is for

    -
    field

    The name of the field

    -
    data_type

    The field's data type (string, date, float, integer, - fulltext)

    -
    can_query

    An indicator for whether the field can be included in - the user query for the given endpoint

    -
    group

    The group the field belongs to

    -
    common_name

    The field's common name

    -
    description

    A description of the field

    +
    data_type
    +

    The field's input data type

    -
    +
    group
    +

    The group the field belongs to

    +
    common_name
    +

    The field name without the parent group structure

    + + +
    +
    -
    - +
    - - + + diff --git a/docs/reference/figures/lifecycle-archived.svg b/docs/reference/figures/lifecycle-archived.svg new file mode 100644 index 00000000..48f72a6f --- /dev/null +++ b/docs/reference/figures/lifecycle-archived.svg @@ -0,0 +1 @@ + lifecyclelifecyclearchivedarchived \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-defunct.svg b/docs/reference/figures/lifecycle-defunct.svg new file mode 100644 index 00000000..01452e5f --- /dev/null +++ b/docs/reference/figures/lifecycle-defunct.svg @@ -0,0 +1 @@ +lifecyclelifecycledefunctdefunct \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-deprecated.svg b/docs/reference/figures/lifecycle-deprecated.svg new file mode 100644 index 00000000..4baaee01 --- /dev/null +++ b/docs/reference/figures/lifecycle-deprecated.svg @@ -0,0 +1 @@ +lifecyclelifecycledeprecateddeprecated \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-experimental.svg b/docs/reference/figures/lifecycle-experimental.svg new file mode 100644 index 00000000..d1d060e9 --- /dev/null +++ b/docs/reference/figures/lifecycle-experimental.svg @@ -0,0 +1 @@ +lifecyclelifecycleexperimentalexperimental \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-maturing.svg b/docs/reference/figures/lifecycle-maturing.svg new file mode 100644 index 00000000..df713101 --- /dev/null +++ b/docs/reference/figures/lifecycle-maturing.svg @@ -0,0 +1 @@ +lifecyclelifecyclematuringmaturing \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-questioning.svg b/docs/reference/figures/lifecycle-questioning.svg new file mode 100644 index 00000000..08ee0c90 --- /dev/null +++ b/docs/reference/figures/lifecycle-questioning.svg @@ -0,0 +1 @@ +lifecyclelifecyclequestioningquestioning \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-stable.svg b/docs/reference/figures/lifecycle-stable.svg new file mode 100644 index 00000000..e015dc81 --- /dev/null +++ b/docs/reference/figures/lifecycle-stable.svg @@ -0,0 +1 @@ +lifecyclelifecyclestablestable \ No newline at end of file diff --git a/docs/reference/figures/lifecycle-superseded.svg b/docs/reference/figures/lifecycle-superseded.svg new file mode 100644 index 00000000..75f24f55 --- /dev/null +++ b/docs/reference/figures/lifecycle-superseded.svg @@ -0,0 +1 @@ + lifecyclelifecyclesupersededsuperseded \ No newline at end of file diff --git a/docs/reference/get_endpoints.html b/docs/reference/get_endpoints.html index e510ce14..e9eea4ab 100644 --- a/docs/reference/get_endpoints.html +++ b/docs/reference/get_endpoints.html @@ -1,70 +1,13 @@ - - - - - - - -Get endpoints — get_endpoints • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get endpoints — get_endpoints • patentsview + + - - - - -
    -
    - -
    - -
    +
    -

    This function reminds the user what the 7 possible PatentsView API endpoints +

    This function reminds the user what the possible PatentsView API endpoints are.

    -
    get_endpoints()
    - - -

    Value

    +
    +
    get_endpoints()
    +
    -

    A character vector with the names of the 7 endpoints. Those endpoints are:

    -
      -
    • assignees

    • -
    • cpc_subsections

    • -
    • inventors

    • -
    • locations

    • -
    • nber_subcategories

    • -
    • patents

    • -
    • uspc_mainclasses

    • -
    +
    +

    Value

    + +

    A character vector with the names of each endpoint.

    +
    -

    Examples

    -
    get_endpoints() -
    #> [1] "assignees" "cpc_subsections" "inventors" -#> [4] "locations" "nber_subcategories" "patents" -#> [7] "uspc_mainclasses"
    +
    -
    - +
    - - + + diff --git a/docs/reference/get_fields.html b/docs/reference/get_fields.html index 3951913c..4e1bb43a 100644 --- a/docs/reference/get_fields.html +++ b/docs/reference/get_fields.html @@ -98,7 +98,7 @@

    Get list of retrievable fields

    -
    get_fields(endpoint, groups = NULL)
    +
    get_fields(endpoint, groups = NULL, include_pk = FALSE)
    @@ -114,10 +114,16 @@

    Arguments

    endpoint's fields (i.e., do not filter the field list based on group membership). See the field tables located online to see which groups you can specify for a given endpoint (e.g., the -patent +patents endpoint table), or use the fieldsdf table (e.g., unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])).

    + +
    include_pk
    +

    Boolean on whether to include the endpoint's primary key, +defaults to FALSE. The primary key is needed if you plan on calling +unnest_pv_data on the results of search_pv

    +

    Value

    @@ -128,8 +134,8 @@

    Value

    Examples

    -
    # Get all assignee-level fields for the patent endpoint:
    -fields <- get_fields(endpoint = "patent", groups = "assignees")
    +    
    # Get all top level (non-nested) fields for the patent endpoint:
    +fields <- get_fields(endpoint = "patent", groups = c("patents"))
     
     # ...Then pass to search_pv:
     if (FALSE) {
    @@ -139,9 +145,8 @@ 

    Examples

    fields = fields ) } -# Get all patent and assignee-level fields for the patent endpoint: +# Get unnested patent and assignee-level fields for the patent endpoint: fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents")) -#> Error: group must be one of the following: , assignee_years, inventor_years, applicants, application, assignees, attorneys, botanic, cpc_at_issue, cpc_current, examiners, figures, foreign_priority, gov_interest_contract_award_numbers, gov_interest_organizations, granted_pregrant_crosswalk, inventors, ipcr, pct_data, us_related_documents, us_term_of_grant, uspc_at_issue, wipo, us_parties if (FALSE) { # ...Then pass to search_pv: @@ -150,6 +155,19 @@

    Examples

    fields = fields ) } +# Get the nested inventors fields and the primary key in order to call unnest_pv_data +# on the returned data. unnest_pv_data would throw an error if the primary key was +# not present in the results. +fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + +if (FALSE) { +# ...Then pass to search_pv and unnest the results +results <- search_pv( + query = '{"_gte":{"patent_date":"2007-01-04"}}', + fields = fields +) +unnest_pv_data(results$data) +}
    diff --git a/docs/reference/get_ok_pk.html b/docs/reference/get_ok_pk.html index 149a8283..d91c7dad 100644 --- a/docs/reference/get_ok_pk.html +++ b/docs/reference/get_ok_pk.html @@ -3,7 +3,9 @@ in unnest_pv_data, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -endpoint = "patent".'> @@ -94,18 +96,20 @@

    Get OK primary key

    in unnest_pv_data, based on the endpoint you searched. It will return a potential unique identifier for a given entity (i.e., a given endpoint). For example, it will return "patent_id" when -endpoint = "patent".

    +endpoint_or_entity = "patent". It would return the same value if +the entity name "patents" was passed via get_ok_pk(names(pv_return$data)) +where pv_return was returned from search_pv.

    -
    get_ok_pk(endpoint)
    +
    get_ok_pk(endpoint_or_entity)

    Arguments

    -
    endpoint
    -

    The endpoint which you would like to know a potential primary -key for.

    +
    endpoint_or_entity
    +

    The endpoint or entity name for which you +would like to know a potential primary key for.

    @@ -118,12 +122,10 @@

    Value

    Examples

    -
    get_ok_pk(endpoint = "inventor")
    +    
    get_ok_pk(endpoint_or_entity = "inventor") # Returns "inventor_id"
     #> [1] "inventor_id"
    -get_ok_pk(endpoint = "cpc_subclass")
    -#> [1] "cpc_subclass_id"
    -get_ok_pk("publication/rel_app_text")
    -#> [1] "document_number"
    +get_ok_pk(endpoint_or_entity = "cpc_group") # Returns "cpc_group_id"
    +#> [1] "cpc_group_id"
     
     
    diff --git a/docs/reference/index.html b/docs/reference/index.html index ee40061b..ab30c2ca 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,68 +1,12 @@ - - - - - - - -Function reference • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • patentsview - - + + - - -
    -
    - -
    - -
    +
    - - - - - - - - - - - + + + + +
    -

    The API client

    + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    +

    The API client

    +

    search_pv()

    Search PatentsView

    -

    Convenience objects for search_pv

    +
    +

    Convenience objects for search_pv

    +

    get_endpoints()

    Get endpoints

    +

    get_fields()

    Get list of retrievable fields

    +

    fieldsdf

    Fields data frame

    -

    Writing queries with the DSL

    +
    +

    Writing queries with the DSL

    +

    qry_funs

    List of query functions

    +

    with_qfuns()

    With qry_funs

    -

    Manipulating patentsview data

    +
    +

    Manipulating patentsview data

    +

    unnest_pv_data()

    Unnest PatentsView data

    +

    get_ok_pk()

    Get OK primary key

    +

    cast_pv_data()

    Cast PatentsView data

    - +
    +

    retrieve_linked_data()

    +

    Retrieve Linked Data

    +

    Utility

    +

    +
    +

    pad_patent_id()

    +

    Pad patent_id

    +
    -
    - +
    - - + + diff --git a/docs/reference/pad_patent_id.html b/docs/reference/pad_patent_id.html new file mode 100644 index 00000000..d92031de --- /dev/null +++ b/docs/reference/pad_patent_id.html @@ -0,0 +1,139 @@ + +Pad patent_id — pad_patent_id • patentsview + + +
    +
    + + + +
    +
    + + +
    +

    This function strategically pads a patent_id with zeroes to 8 characters, +needed only for custom paging that uses sorts by patent_id.

    +
    + +
    +
    pad_patent_id(patent_id)
    +
    + +
    +

    Arguments

    +
    patent_id
    +

    The patent_id that needs to be padded. It can +be the patent_id for a utility, design, plant or reissue patent.

    + +
    + +
    +

    Examples

    +
    if (FALSE) {
    +padded <- pad_patent_id("RE36479")
    +
    +padded2 <- pad_patent_id("3930306")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.9.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/patentsview-package.html b/docs/reference/patentsview-package.html index bea2797e..1ed343ea 100644 --- a/docs/reference/patentsview-package.html +++ b/docs/reference/patentsview-package.html @@ -1,5 +1,5 @@ -patentsview: An R Client to the 'PatentsView' API — patentsview-package • patentsviewpatentsview: An R Client to the 'PatentsView' API — patentsview-package • patentsview @@ -86,7 +86,7 @@

    patentsview: An R Client to the 'PatentsView' API

    -

    Provides functions to simplify the 'PatentsView' API (https://patentsview.org/apis/purpose) query language, send GET and POST requests to the API's seven endpoints, and parse the data that comes back.

    +

    Provides functions to simplify the 'PatentsView' API (https://patentsview.org/apis/purpose) query language, send GET and POST requests to the API's twenty seven endpoints, and parse the data that comes back.

    diff --git a/docs/reference/qry_funs.html b/docs/reference/qry_funs.html index f332a17e..c73b1b52 100644 --- a/docs/reference/qry_funs.html +++ b/docs/reference/qry_funs.html @@ -1,72 +1,14 @@ - - - - - - - -List of query functions — qry_funs • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -List of query functions — qry_funs • patentsview - + + - - - -
    -
    - -
    - -
    +

    A list of functions that make it easy to write PatentsView queries. See the -details section below for a list of the 14 functions, as well as the -writing -queries vignette for further details.

    +details section below for a list of the 15 functions, as well as the +writing queries vignette for further details.

    -
    qry_funs
    - - -

    Format

    - -

    An object of class list of length 14.

    -

    Value

    +
    +
    qry_funs
    +
    -

    An object of class pv_query. This is basically just a simple - list with a print method attached to it.

    -

    Details

    +
    +

    Format

    +

    An object of class list of length 15.

    +
    +
    +

    Value

    + -

    1. Comparison operator functions

    +

    An object of class pv_query. This is basically just a simple +list with a print method attached to it.

    +
    +
    +

    Details

    +

    1. Comparison operator functions

    There are 6 comparison operator functions that work with fields of type -integer, float, date, or string:

      -
    • eq - Equal to

    • +integer, float, date, or string:

      • eq - Equal to

      • neq - Not equal to

      • gt - Greater than

      • gte - Greater than or equal to

      • lt - Less than

      • lte - Less than or equal to

      • -
      - -

      There are 2 comparison operator functions that only work with fields of type -string:

        -
      • begins - The string begins with the value string

      • +

      There are 2 comparison operator functions that only work with fields of type +string:

      • begins - The string begins with the value string

      • contains - The string contains the value string

      • -
      - -

      There are 3 comparison operator functions that only work with fields of type -fulltext:

        -
      • text_all - The text contains all the words in the value - string

      • +

      There are 3 comparison operator functions that only work with fields of type +fulltext:

      • text_all - The text contains all the words in the value +string

      • text_any - The text contains any of the words in the value - string

      • +string

      • text_phrase - The text contains the exact phrase of the value - string

      • -
      - -

      2. Array functions

      -

      There are 2 array functions:

        -
      • and - Both members of the array must be true

      • +string

        +

      2. Array functions

      +

      There are 2 array functions:

      • and - Both members of the array must be true

      • or - Only one member of the array must be true

      • -
      - -

      3. Negation function

      -

      There is 1 negation function:

        -
      • not - The comparison is not true

      • -
      - - -

      Examples

      -
      qry_funs$eq(patent_date = "2001-01-01") -
      #> {"_eq":{"patent_date":"2001-01-01"}}
      -qry_funs$not(qry_funs$eq(patent_date = "2001-01-01")) -
      #> {"_not":{"_eq":{"patent_date":"2001-01-01"}}}
      -
      +

    3. Negation function

    +

    There is 1 negation function:

    • not - The comparison is not true

    • +

    4. Convenience function

    +

    There is 1 convenience function:

    • in_range - Builds a <= x <= b query

    • +
    + +
    +

    Examples

    +
    qry_funs$eq(patent_date = "2001-01-01")
    +#> {"_eq":{"patent_date":"2001-01-01"}}
    +
    +qry_funs$not(qry_funs$eq(patent_date = "2001-01-01"))
    +#> {"_not":{"_eq":{"patent_date":"2001-01-01"}}}
    +
    +qry_funs$in_range(patent_year = c(2010, 2021))
    +#> {"_and":[{"_gte":{"patent_year":2010}},{"_lte":{"patent_year":2021}}]}
    +
    +qry_funs$in_range(patent_date = c("1976-01-01", "1983-02-28"))
    +#> {"_and":[{"_gte":{"patent_date":"1976-01-01"}},{"_lte":{"patent_date":"1983-02-28"}}]}
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/retrieve_linked_data.html b/docs/reference/retrieve_linked_data.html new file mode 100644 index 00000000..148968c5 --- /dev/null +++ b/docs/reference/retrieve_linked_data.html @@ -0,0 +1,199 @@ + +Retrieve Linked Data — retrieve_linked_data • patentsview + + +
    +
    + + + +
    +
    + + +
    +

    Some of the endpoints now return HATEOAS style links to get more data. E.g., +the patent endpoint may return a link such as: +"https://search.patentsview.org/api/v1/inventor/fl:th_ln:jefferson-1/"

    +
    + +
    +
    retrieve_linked_data(
    +  url,
    +  encoded_url = FALSE,
    +  api_key = Sys.getenv("PATENTSVIEW_API_KEY"),
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    url
    +

    A link that was returned by the API on a previous call, an example +in the documentation or a Request URL from the API's Swagger UI page.

    + + +
    encoded_url
    +

    boolean to indicate whether the url has been URL encoded, defaults to FALSE. +Set to TRUE for Request URLs from Swagger UI.

    + + +
    api_key
    +

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +here.

    + + +
    ...
    +

    Curl options passed along to httr2's req_options function.

    + +
    +
    +

    Value

    + + +

    A list with the following three elements:

    data
    +

    A list with one element - a named data frame containing the +data returned by the server. Each row in the data frame corresponds to a +single value for the primary entity. For example, if you search the +assignee endpoint, then the data frame will be on the assignee-level, +where each row corresponds to a single assignee. Fields that are not on +the assignee-level would be returned in list columns.

    + + +
    query_results
    +

    Entity counts across all pages of output (not just +the page returned to you).

    + + +
    request
    +

    Details of the GET HTTP request that was sent to the server.

    + + +
    + +
    +

    Examples

    +
    if (FALSE) {
    +
    +retrieve_linked_data(
    +  "https://search.patentsview.org/api/v1/cpc_group/G01S7:4811/"
    +)
    +
    +endpoint_url <- "https://search.patentsview.org/api/v1/patent/"
    +q_param <- '?q={"_text_any":{"patent_title":"COBOL cotton gin"}}'
    +s_and_o_params <- '&s=[{"patent_id": "asc" }]&o={"size":50}'
    +f_param <- '&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]'
    +# (URL broken up to avoid a long line warning in this Rd)
    +
    +retrieve_linked_data(
    +  paste0(endpoint_url, q_param, s_and_o_params, f_param)
    +)
    +
    +retrieve_linked_data(
    +  "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D",
    +  encoded_url = TRUE
    +)
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.9.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/search_pv.html b/docs/reference/search_pv.html index 8710e3a7..640aba6f 100644 --- a/docs/reference/search_pv.html +++ b/docs/reference/search_pv.html @@ -98,8 +98,10 @@

    Search PatentsView

    endpoint = "patent", subent_cnts = FALSE, mtchd_subent_only = lifecycle::deprecated(), - page = 1, - per_page = 1000, + page = lifecycle::deprecated(), + per_page = lifecycle::deprecated(), + size = 1000, + after = NULL, all_pages = FALSE, sort = NULL, method = "GET", @@ -119,7 +121,7 @@

    Arguments

    E.g., list("_gte" = list("patent_date" = "2007-01-04"))

  • An object of class pv_query, which you create by calling one of the functions found in the qry_funs list...See the -writing +writing queries vignette for details.
    E.g., qry_funs$gte(patent_date = "2007-01-04")

  • @@ -127,13 +129,17 @@

    Arguments

    fields

    A character vector of the fields that you want returned to you. -A value of NULL indicates that the default fields should be -returned. Acceptable fields for a given endpoint can be found at the API's +A value of NULL indicates to the API that it should return the default fields +for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -patents +patents endpoint) or by viewing the fieldsdf data frame (View(fieldsdf)). You can also use get_fields to list -out the fields available for a given endpoint.

    +out the fields available for a given endpoint.

    +

    Nested fields can be fully qualified, e.g., "application.filing_date" or the +group name can be used to retrieve all of its nested fields, E.g. "application". +The latter would be similar to passing get_fields("patent", group = "application") +except it's the API that decides what fields to return.

    endpoint
    @@ -142,35 +148,47 @@

    Arguments

    subent_cnts
    -

    [Deprecated] Non-matched subentities -will always be returned under the new version of the API

    +

    [Deprecated] This is always FALSE in the +new version of the API as the total counts of unique subentities is no longer available.

    mtchd_subent_only

    [Deprecated] This is always -FALSE in the new version of the API.

    +FALSE in the new version of the API as non-matched subentities +will always be returned.

    page
    -

    The page number of the results that should be returned.

    +

    [Deprecated] The new version of the API does not use +page as a parameter for paging, it uses after.

    per_page
    +

    [Deprecated] The API now uses size

    + + +
    size

    The number of records that should be returned per page. This -value can be as high as 1,000 (e.g., per_page = 1000).

    +value can be as high as 1,000 (e.g., size = 1000).

    + + +
    after
    +

    A list of sort key values that defaults to NULL. This +exposes the API's paging parameter for users who want to implement their own +paging. It cannot be set when all_pages = TRUE as the R package manipulates it +for users automatically. See result set paging

    all_pages

    Do you want to download all possible pages of output? If -all_pages = TRUE, the values of page and per_page are -ignored.

    +all_pages = TRUE, the value of size is ignored.

    sort

    A named character vector where the name indicates the field to sort by and the value indicates the direction of sorting (direction should -be either "asc" or "desc"). For example, sort = c("patent_number" = - "asc") or
    sort = c("patent_number" = "asc", "patent_date" = +be either "asc" or "desc"). For example, sort = c("patent_id" = + "asc") or
    sort = c("patent_id" = "asc", "patent_date" = "desc"). sort = NULL (the default) means do not sort the results. You must include any fields that you wish to sort by in fields.

    @@ -186,13 +204,13 @@

    Arguments

    api_key
    -

    API key. See -Here for info on creating a key.

    +

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key +here.

    ...
    -

    Arguments passed along to httr's GET or -POST function.

    +

    Curl options passed along to httr2's req_options +when we do GETs or POSTs.

    @@ -203,7 +221,7 @@

    Value

    A list with one element - a named data frame containing the data returned by the server. Each row in the data frame corresponds to a single value for the primary entity. For example, if you search the -assignees endpoint, then the data frame will be on the assignee-level, +assignee endpoint, then the data frame will be on the assignee-level, where each row corresponds to a single assignee. Fields that are not on the assignee-level would be returned in list columns.

    @@ -236,8 +254,8 @@

    Examples

    search_pv( query = qry_funs$gt(patent_year = 2010), method = "POST", - fields = "patent_number", - sort = c("patent_number" = "asc") + fields = "patent_id", + sort = c("patent_id" = "asc") ) search_pv( @@ -252,9 +270,14 @@

    Examples

    ) search_pv( - query = qry_funs$contains(inventors_at_grant.name_last = "Smith"), + query = qry_funs$contains(inventors.inventor_name_last = "Smith"), endpoint = "patent", - config = httr::timeout(40) + timeout = 40 +) + +search_pv( + query = qry_funs$eq(patent_id = "11530080"), + fields = "application" ) } diff --git a/docs/reference/unnest_pv_data.html b/docs/reference/unnest_pv_data.html index 5c41902b..e055c5bd 100644 --- a/docs/reference/unnest_pv_data.html +++ b/docs/reference/unnest_pv_data.html @@ -100,7 +100,7 @@

    Unnest PatentsView data

    -
    unnest_pv_data(data, pk = get_ok_pk(names(data)))
    +
    unnest_pv_data(data, pk = NULL)
    @@ -115,8 +115,8 @@

    Arguments

    pk

    The column/field name that will link the data frames together. This should be the unique identifier for the primary entity. For example, if you -used the patents endpoint in your call to search_pv, you could -specify pk = "patent_number". This identifier has to have +used the patent endpoint in your call to search_pv, you could +specify pk = "patent_id". This identifier has to have been included in your fields vector when you called search_pv. You can use get_ok_pk to suggest a potential primary key for your data.

    diff --git a/docs/reference/with_qfuns.html b/docs/reference/with_qfuns.html index d1d1564c..c1e80e77 100644 --- a/docs/reference/with_qfuns.html +++ b/docs/reference/with_qfuns.html @@ -1,73 +1,16 @@ - - - - - - - -With qry_funs — with_qfuns • patentsview - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -With qry_funs — with_qfuns • patentsview - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +

    This function evaluates whatever code you pass to it in the environment of -the qry_funs list. This allows you to cut down on typing when +the qry_funs list. This allows you to cut down on typing when writing your queries. If you want to cut down on typing even more, you can -try assigning the qry_funs list into your global environment -with: list2env(qry_funs, envir = globalenv()).

    +try assigning the qry_funs list into your global environment +with: list2env(qry_funs, envir = globalenv()).

    -
    with_qfuns(code, envir = parent.frame())
    - -

    Arguments

    - - - - - - - - - - -
    code

    Code to evaluate. See example.

    envir

    Where should R look for objects present in code that -aren't present in qry_funs.

    - -

    Value

    - -

    The result of code - i.e., your query.

    - -

    Examples

    -
    # Without with_qfuns, we have to do: -qry_funs$and( - qry_funs$gte(patent_date = "2007-01-01"), - qry_funs$text_phrase(patent_abstract = c("computer program")), - qry_funs$or( - qry_funs$eq(inventor_last_name = "ihaka"), - qry_funs$eq(inventor_first_name = "chris") - ) -) -
    #> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventor_last_name":"ihaka"}},{"_eq":{"inventor_first_name":"chris"}}]}]}
    -#...With it, this becomes: -with_qfuns( - and( - gte(patent_date = "2007-01-01"), - text_phrase(patent_abstract = c("computer program")), - or( - eq(inventor_last_name = "ihaka"), - eq(inventor_first_name = "chris") - ) - ) -) -
    #> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventor_last_name":"ihaka"}},{"_eq":{"inventor_first_name":"chris"}}]}]}
    -
    +
    +
    with_qfuns(code, envir = parent.frame())
    +
    + +
    +

    Arguments

    +
    code
    +

    Code to evaluate. See example.

    + + +
    envir
    +

    Where should R look for objects present in code that +aren't present in qry_funs.

    + +
    +
    +

    Value

    + + +

    The result of code - i.e., your query.

    +
    + +
    +

    Examples

    +
    qry_funs$and(
    +  qry_funs$gte(patent_date = "2007-01-01"),
    +  qry_funs$text_phrase(patent_abstract = c("computer program")),
    +  qry_funs$or(
    +    qry_funs$eq(inventors.inventor_name_last = "Ihaka"),
    +    qry_funs$eq(inventors.inventor_name_last = "Chris")
    +  )
    +)
    +#> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventors.inventor_name_last":"Ihaka"}},{"_eq":{"inventors.inventor_name_last":"Chris"}}]}]}
    +
    +# ...With it, this becomes:
    +with_qfuns(
    +  and(
    +    gte(patent_date = "2007-01-01"),
    +    text_phrase(patent_abstract = c("computer program")),
    +    or(
    +      eq(inventors.inventor_name_last = "Ihaka"),
    +      eq(inventors.inventor_name_last = "Chris")
    +    )
    +  )
    +)
    +#> {"_and":[{"_gte":{"patent_date":"2007-01-01"}},{"_text_phrase":{"patent_abstract":"computer program"}},{"_or":[{"_eq":{"inventors.inventor_name_last":"Ihaka"}},{"_eq":{"inventors.inventor_name_last":"Chris"}}]}]}
    +
    +
    +
    +
    -
    - +
    - - + + From 20a4f0d20f217991ca640474d0c1250034836bb8 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 21 Dec 2024 19:50:49 -0600 Subject: [PATCH 088/103] feat: new paging methodology --- NAMESPACE | 1 + _pkgdown.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 46981d70..2cff0eaa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(cast_pv_data) export(get_endpoints) export(get_fields) export(get_ok_pk) +export(pad_patent_id) export(qry_funs) export(retrieve_linked_data) export(search_pv) diff --git a/_pkgdown.yml b/_pkgdown.yml index 75d58fbb..fe54d8ba 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -23,6 +23,9 @@ reference: - get_ok_pk - cast_pv_data - retrieve_linked_data + - title: Utility + contents: + - pad_patent_id navbar: components: From 3040e78ca8387046357e950a29e5569ad331570d Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 12:15:57 -0600 Subject: [PATCH 089/103] test: updatng tests for new api version --- tests/testthat/helpers.R | 21 +- tests/testthat/test-api-bugs.R | 359 +++++++++++++++++++++ tests/testthat/test-arg-validation.R | 40 --- tests/testthat/test-cast-pv-data.R | 72 ++++- tests/testthat/test-check-query.R | 98 ++++++ tests/testthat/test-get-fields.R | 32 ++ tests/testthat/test-print.R | 36 +++ tests/testthat/test-query-dsl.R | 36 +++ tests/testthat/test-search-pv.R | 447 +++++++++++++++++++++++---- tests/testthat/test-unnest-pv-data.R | 75 ++++- tests/testthat/test-utils.R | 12 + tests/testthat/test-validate-args.R | 87 ++++++ 12 files changed, 1193 insertions(+), 122 deletions(-) create mode 100644 tests/testthat/test-api-bugs.R delete mode 100644 tests/testthat/test-arg-validation.R create mode 100644 tests/testthat/test-check-query.R create mode 100644 tests/testthat/test-get-fields.R create mode 100644 tests/testthat/test-print.R create mode 100644 tests/testthat/test-query-dsl.R create mode 100644 tests/testthat/test-utils.R create mode 100644 tests/testthat/test-validate-args.R diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 3fe9dc36..d39b4e00 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,5 +1,5 @@ # Vector of queries (one for each endpoint) that are used during testing. We -# need this b/c in the new version of the api, only three of the endpoints are +# need this b/c in the new version of the api, only ten of the endpoints are # searchable by patent number (i.e., we can't use a generic patent number # search query). further, now patent_number has been patent_id @@ -32,3 +32,22 @@ TEST_QUERIES <- c( "uspc_subclass" = '{"uspc_subclass_id": "100/1"}', "wipo" = '{"wipo_id": "1"}' ) + +to_plural <- function(x) { + pk <- get_ok_pk(x) + fieldsdf[fieldsdf$endpoint == x & fieldsdf$field == pk, "group"] +} + +to_singular <- function(entity) { + endpoint_df <- fieldsdf[fieldsdf$group == entity, ] + endpoint <- unique(endpoint_df$endpoint) + + # watch out here- several endpoints return entities that are groups returned + # by the patent and publication endpoints (attorneys, inventors, assignees) + if(length(endpoint) > 1) { + endpoint <- endpoint[!endpoint %in% c("patent", "publication")] + } + + # can't distinguish rel_app_texts between patent/rel_app_text and publication/rel_app_text + endpoint +} diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R new file mode 100644 index 00000000..2e5f2227 --- /dev/null +++ b/tests/testthat/test-api-bugs.R @@ -0,0 +1,359 @@ + +# Tests from the other files in this directory that are masking API errors +# This file was submitted to the API team as PVS-1125 + +eps <- (get_endpoints()) + +add_base_url <- function(x) { + paste0("https://search.patentsview.org/api/v1/", x) +} + +test_that("there is trouble paging", { + skip_on_cran() + skip_on_ci() + + # reprex inspired by https://patentsview.org/forum/7/topic/812 + # Not all requested groups are coming back as we page, causing + # Error in rbind(deparse.level, ...) : + # numbers of columns of arguments do not match + # This query fails if any of these groups are specified + # "applicants", "cpc_at_issue", "gov_interest_contract_award_numbers", + # "uspc_at_issue") + + query <- with_qfuns( + and( + gte(application.filing_date = "2000-01-01"), + eq(cpc_current.cpc_subclass_id = "A01D") + ) + ) + + sort <- c("patent_id" = "asc") + fields <- c( + "patent_id", "applicants", "cpc_at_issue", + "gov_interest_contract_award_numbers", "uspc_at_issue" + ) + + result1 <- search_pv(query, + method = "GET", all_pages = FALSE, + fields = fields, sort = sort, size = 1000 + ) + + result2 <- search_pv(query, + method = "GET", all_pages = FALSE, + fields = fields, sort = sort, size = 1000, after = "06901731" + ) + + # result1$data$patents$applicants is sparse, mostly NULL + # there isn't a result2$data$patents$applicants + names1 <- names(result1$data$patents) + names2 <- names(result2$data$patents) + + expect_failure( + expect_setequal(names1, names2) + ) +}) + +test_that("there is case sensitivity on string equals", { + skip_on_cran() + skip_on_ci() + + # reported to the API team PVS-1147 + # not sure if this is a bug or feature - original API was case insensitive + # using both forms of equals, impied and explicit + + assignee <- "Johnson & Johnson International" + query1 <- sprintf('{"assignee_organization": \"%s\"}', assignee) + a <- search_pv(query1, endpoint = "assignee") + query2 <- qry_funs$eq(assignee_organization = assignee) + b <- search_pv(query2, endpoint = "assignee") + expect_equal(a$query_results$total_hits, 1) + expect_equal(b$query_results$total_hits, 1) + + assignee <- tolower(assignee) + query1 <- sprintf('{"assignee_organization": \"%s\"}', assignee) + c <- search_pv(query1, endpoint = "assignee") + query2 <- qry_funs$eq(assignee_organization = assignee) + d <- search_pv(query2, endpoint = "assignee") + expect_equal(c$query_results$total_hits, 0) + expect_equal(d$query_results$total_hits, 0) +}) + +test_that("string vs text operators behave differently", { + skip_on_cran() + + # # reported to the API team PVS-1147 + query <- qry_funs$begins(assignee_organization = "johnson") + a <- search_pv(query, endpoint = "assignee") + + query <- qry_funs$text_any(assignee_organization = "johnson") + b <- search_pv(query, endpoint = "assignee") + + expect_failure( + expect_equal(a$query_results$total_hits, b$query_results$total_hits) + ) +}) + +test_that("API returns all requested groups", { + skip_on_cran() + skip_on_ci() + + # can we traverse the return building a list of fields? + # sort both requested fields and returned ones to see if they are equal + + # TODO: remove the trickery to get this test to pass, once the API is fixed + bad_eps <- c( + "cpc_subclasses", + "location" # Error: Invalid field: location_latitude + , "uspc_subclasse" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + , "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "uspc_subclass" # 404 + , "pg_claim" # Invalid field: claim_dependent + ) + + mismatched_returns <- c( + "patent", + "publication" + ) + + # this will fail when the api is fixed + z <- lapply(bad_eps, function(x) { + print(x) + expect_error( + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = get_fields(x)) + ) + }) + + # this will fail when the API is fixed + z <- lapply(mismatched_returns, function(x) { + print(x) + res <- search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x) + ) + + dl <- unnest_pv_data(res$data) + + actual_groups <- names(dl) + expected_groups <- unique(fieldsdf[fieldsdf$endpoint == x, "group"]) + + # we now need to unnest the endpoints for the comparison to work + expected_groups <- gsub("^(patent|publication)/", "", expected_groups) + + # the expected group for unnested attributes would be "" in actuality the come back + # in an entity matching the plural form of the unnested endpoint + expected_groups <- replace(expected_groups, expected_groups == "", to_plural(x)) + + expect_failure( + expect_setequal(actual_groups, expected_groups) + ) + }) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed +}) + +eps <- (get_endpoints()) + +test_that("We can call all the legitimate HATEOAS endpoints", { + skip_on_cran() + + # these currently throw Error: Internal Server Error + broken_single_item_queries <- c( + "cpc_subclass/A01B/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/" + ) + + + # TODO: remove when this is fixed + # we'll know the api is fixed when this test fails + dev_null <- lapply(broken_single_item_queries, function(q) { + expect_error( + j <- retrieve_linked_data(add_base_url(q)) + ) + }) +}) + +test_that("individual fields are still broken", { + skip_on_cran() + + # Sample fields that cause 500 errors when requested by themselves. + # Some don't throw errors when included in get_fields() but they do if + # they are the only field requested. Other individual fields at these + # same endpoints throw errors. Check fields again when these fail. + sample_bad_fields <- c( + "assignee_organization" = "assignees", + "inventor_lastknown_longitude" = "inventors", + "inventor_gender_code" = "inventors", + "location_name" = "locations", + "attorney_name_last" = "patent/attorneys", + "citation_country" = "patent/foreign_citations", + "ipc_id" = "ipcs" + ) + + dev_null <- lapply(names(sample_bad_fields), function(x) { + endpoint <- sample_bad_fields[[x]] + expect_error( + out <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint, fields = c(x)) + ) + }) +}) + +test_that("we can't sort by all fields", { + skip_on_cran() + + # PVS-1377 + sorts_to_try <- c( + assignee = "assignee_lastknown_city", + cpc_class = "cpc_class_title", + cpc_group = "cpc_group_title", + cpc_subclass = "cpc_subclass", + g_brf_sum_text = "summary_text", + g_claim = "claim_text", + g_detail_desc_text = "description_text", + g_draw_desc_text = "draw_desc_text", + inventor = "inventor_lastknown_city", + patent = "patent_id" # good pair to show that the code works + ) + + results <- lapply(names(sorts_to_try), function(endpoint) { + field <- sorts_to_try[[endpoint]] + print(paste(endpoint, field)) + + tryCatch( + { + sort <- c("asc") + names(sort) <- field + j <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, sort = sort, method = "GET" + ) + NA + }, + error = function(e) { + paste(endpoint, field) + } + ) + }) + + results <- results[!is.na(results)] + expect_gt(length(results), 0) + expect_lt(length(results), length(sorts_to_try)) # assert that at least one sort worked +}) + + +test_that("withdrawn patents are still present in the database", { + skip_on_cran() + + # PVS-1342 Underlying data issues + # There are 8,000 patents that were in the bulk xml files patentsiew is based on. + # The patents were subsequently withdrawn but not removed from the database + withdrawn <- c( + "9978309", "9978406", "9978509", "9978615", "9978659", + "9978697", "9978830", "9978838", "9978886", "9978906", "9978916", + "9979255", "9979355", "9979482", "9979700", "9979841", "9979847", + "9980139", "9980711", "9980782", "9981222", "9981277", "9981423", + "9981472", "9981603", "9981760", "9981914", "9982126", "9982172", + "9982670", "9982860", "9982871", "9983588", "9983756", "9984058", + "9984899", "9984952", "9985340", "9985480", "9985987", "9986046" + ) + + query <- qry_funs$eq("patent_id" = c(withdrawn)) + results <- search_pv(query, method = "POST") + expect_equal(results$query_results$total_hits, length(withdrawn)) +}) + +test_that("missing patents are still missing", { + skip_on_cran() + + # PVS-1342 Underlying data issues + # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. + missing <- c( + "4097517", "4424514", "4480077", "4487876", "4704648", "4704721", + "4705017", "4705031", "4705032", "4705036", "4705037", "4705097", "4705107", + "4705125", "4705142", "4705169", "4705170", "4705230", "4705274", "4705328", + "4705412", "4705416", "4705437", "4705455", "4705462", "5493812", "5509710", + "5697964", "5922850", "6087542", "6347059", "6680878", "6988922", "7151114", + "7200832", "7464613", "7488564", "7606803", "8309694", "8455078" + ) + query <- qry_funs$eq("patent_id" = missing) + results <- search_pv(query, method = "POST") + + # This would fail if these patents are added to the patentsview database + expect_equal(results$query_results$total_hits, 0) +}) + +test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { + skip_on_cran() + + bad_eps <- c( + "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field + "inventor" # Invalid field: inventor_years.num_patents. + ) + + # PVS-1437 Errors are thrown when requesting assignee_years or inventor_years + # (it works if the group name is used but fails on fully qualified nested fields) + tmp <- lapply(bad_eps, function(endpoint) { + expect_error( + pv_out <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + fields = fieldsdf[fieldsdf$endpoint == endpoint, "field"] + ), + "Invalid field: (assignee|inventor)_years.num_patents" + ) + }) +}) + +test_that("uspcs aren't right", { + skip_on_cran() + + # PVS-1615 + + endpoint <- "patent" + res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + fields = get_fields(endpoint, groups = "uspc_at_issue") + ) + + # id fields are correct, non id fields should be HATEOAS links + uspcs <- res$data$patents$uspc_at_issue + + # these should fail when the API is fixed + expect_equal(uspcs$uspc_mainclass, uspcs$uspc_mainclass_id) + expect_equal(uspcs$uspc_subclass, uspcs$uspc_subclass_id) +}) + +test_that("endpoints are still broken", { + skip_on_cran() + # this will fail when the api is fixed + + broken_endpoints <- c( + "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "location" # Error: Invalid field: location_latitude + , "pg_claim" # Invalid field: claim_dependent + , "uspc_subclass" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + ) + + dev_null <- lapply(broken_endpoints, function(x) { + print(x) + expect_error( + search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x) + ) + ) + }) +}) diff --git a/tests/testthat/test-arg-validation.R b/tests/testthat/test-arg-validation.R deleted file mode 100644 index 8115baf2..00000000 --- a/tests/testthat/test-arg-validation.R +++ /dev/null @@ -1,40 +0,0 @@ -context("validate_args") - -test_that("validate_args throws errors for all bad args", { - skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") - - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patent"), - "endpoint" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), - "method" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = NULL), - "subent_cnts" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), - "mtchd_subent_only" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', per_page = "50"), - "per_page" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', page = NA), - "page" - ) - expect_error( - search_pv( - '{"patent_date":["1976-01-06"]}', - fields = "patent_date", - sort = c("patent_number" = "asc") - ), - "sort" - ) -}) diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index 91eaa7ad..efcb1c5b 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,19 +1,73 @@ -context("cast_pv_data") +test_that("cast_pv_data casts patent fields as expected", { + skip_on_cran() + + pv_out <- search_pv( + query = '{"patent_id":"5116621"}', fields = get_fields("patent") + ) + + dat <- cast_pv_data(data = pv_out$data) + + # patent_date was received as a string and should be cast to a date + date <- class(dat$patents$patent_date) == "Date" + + # patent_detail_desc_length was recieved as an int and should still be one + num <- is.numeric(dat$patents$patent_detail_desc_length) + + # assignee type is a string like "3" from the api and gets cast to an integer + assignee_type <- is.numeric(dat$patents$assignees[[1]]$assignee_type[[1]]) + + expect_true(num && date && assignee_type) + + # application.rule_47_flag is received as a boolean and casting should leave it alone + expect_true(is.logical(dat$patents$application[[1]]$rule_47_flag)) +}) -test_that("cast_pv_data casts data types as expected", { +test_that("cast_pv_data casts assignee fields as expected", { skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") + skip_on_ci() + # ** Invalid field: assignee_years.num_patents. assignee_years is not a nested field pv_out <- search_pv( - query = "{\"patent_number\":\"5116621\"}", fields = get_fields("patents") + query = '{"_text_phrase":{"assignee_individual_name_last": "Clinton"}}', + endpoint = "assignee", + fields = get_fields("assignee", groups = "assignees") # ** ) dat <- cast_pv_data(data = pv_out$data) - date <- !is.character(dat$patents$patent_date) - num <- is.numeric(dat$patents$patent_num_claims) - date2 <- !is.character(dat$patents$assignees[[1]]$assignee_last_seen_date[1]) + # latitude comes from the api as numeric and is left as is by casting + lat <- is.numeric(dat$assignees$assignee_lastknown_latitude[[1]]) + + # here we have the same funky conversion mentioned above + # on the field "assigneee_type" + assignee_type <- is.numeric(dat$assignees$assignee_type[[1]]) + + # was first seen date cast properly? + cast_date <- class(dat$assignees$assignee_first_seen_date[[1]]) == "Date" + + # integer from the API should remain an integer + years_active <- is.numeric(dat$assignees$assignee_years_active[[1]]) + + expect_true(lat) + expect_true(assignee_type) + expect_true(cast_date) + expect_true(years_active) + + skip("Skip for API bugs") +}) + +test_that("we can cast a bool", { + skip_on_cran() + + # TODO(any): remove when the API returns this as a boolean + fields <- c("rule_47_flag") + endpoint <- "publication" + results <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint, fields = fields) + + # this would fail when the API is fixed + expect_true(is.character(results$data$publications$rule_47_flag)) + + cast_results <- cast_pv_data(results$data) - expect_true(date && num && date2) + expect_true(is.logical(cast_results$publications$rule_47_flag)) }) diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R new file mode 100644 index 00000000..8f846032 --- /dev/null +++ b/tests/testthat/test-check-query.R @@ -0,0 +1,98 @@ + +test_that("errors are thrown on invalid queries", { + skip_on_cran() + + expect_error( + search_pv(qry_funs$eq("shoe_size" = 11.5)), + "^.* is not a valid field to query for your endpoint$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_id" = "10000000")), + "^You cannot use the operator .* with the field .*$" + ) + + expect_error( + search_pv(qry_funs$eq("patent_date" = "10000000")), + "^Bad date: .*\\. Date must be in the format of yyyy-mm-dd$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_id" = 10000000)), + "^.* must be of type character$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_year" = 1980.5)), + "^.* must be an integer$" + ) + + expect_error( + search_pv(qry_funs$gt("patent_year" = "1980")), + "^.* must be an integer$" + ) + + expect_error( + search_pv(qry_funs$eq("application.rule_47_flag" = "TRUE")), + "^.* must be a boolean$" + ) + + expect_error( + search_pv(qry_funs$eq("rule_47_flag" = TRUE), endpoint = "publication"), + "^.* must be of type character$" + ) + + expect_error( + search_pv(qry_funs$gt("location_latitude" = "TRUE"), endpoint = "location"), + "^.* must be a number$" + ) + + expect_error( + search_pv(list(patent_number = "10000000")), + "is not a valid operator or not a valid field" + ) + + bogus_operator_query <- + list( + "_ends_with" = + list(patent_title = "dog") + ) + + expect_error( + search_pv(bogus_operator_query), + "is not a valid operator or not a valid field" + ) +}) + +test_that("a valid nested field can be queried", { + skip_on_cran() + + results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) + + expect_gt(results$query_results$total_hits, 8000000) +}) + +test_that("the _eq message is thrown when appropriate", { + skip_on_cran() + + expect_message( + search_pv(list(patent_date = "2007-03-06")), + "^The _eq operator is a safer alternative to using field:value pairs" + ) +}) + +test_that("a query with an and operator returns results", { + skip_on_cran() + + patents_query <- + with_qfuns( + and( + text_phrase(inventors.inventor_name_first = "George"), + text_phrase(inventors.inventor_name_last = "Washington") + ) + ) + + result <- search_pv(patents_query) + + expect_gte(result$query_results$total_hits, 1) +}) diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R new file mode 100644 index 00000000..7eb316f0 --- /dev/null +++ b/tests/testthat/test-get-fields.R @@ -0,0 +1,32 @@ +test_that("get_fields works as expected", { + skip_on_cran() + + expect_error( + get_fields("bogus endpoint"), + "endpoint must be", + fixed = TRUE + ) + + expect_error( + get_fields("patent", groups = "bogus"), + "for the patent endpoint", + fixed = TRUE + ) + + patent_pk <- get_ok_pk("patent") + fields <- get_fields(endpoint = "patent", groups = c("inventors")) + expect_false(patent_pk %in% fields) + + fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) + expect_true(patent_pk %in% fields) +}) + +test_that("the endpoints are stable", { + skip_on_cran() + + # quick check of the endpoints - useful after an api update. We run fieldsdf.R + # and do a build. This test would fail if an endpoint was added, moved or deleted + found <- unique(fieldsdf$endpoint) + expecting <- get_endpoints() + expect_equal(expecting, found) +}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R new file mode 100644 index 00000000..cc0d5f6a --- /dev/null +++ b/tests/testthat/test-print.R @@ -0,0 +1,36 @@ +test_that("We can print the returns from all endpoints ", { + skip_on_cran() + + eps <- get_endpoints() + bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") + good_eps <- eps[!eps %in% bad_eps] + + lapply(good_eps, function(x) { + print(x) + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) + print(j) + j + }) + + expect_true(TRUE) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed +}) + +test_that("we can print a query, its request, and unnested data", { + skip_on_cran() + + x <- "patent" + q <- qry_funs$eq(patent_id = "11530080") + print(q) + + fields <- c("patent_id", get_fields(x, groups = "ipcr")) + j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = fields) + print(j$request) + + k <- unnest_pv_data(j$data) + print(k) + + expect_true(TRUE) +}) diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R new file mode 100644 index 00000000..840ed780 --- /dev/null +++ b/tests/testthat/test-query-dsl.R @@ -0,0 +1,36 @@ +test_that("between works as expected", { + skip_on_cran() + + query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) + + results <- search_pv(query, all_pages = TRUE) + + expect_gt(results$query_results$total_hits, 2600) +}) + +test_that("with_qfuns() works as advertised", { + skip_on_cran() # wouldn't necessarily have to skip! + + a <- with_qfuns( + and( + text_phrase(inventors.inventor_name_first = "George"), + text_phrase(inventors.inventor_name_last = "Washington") + ) + ) + + b <- qry_funs$and( + qry_funs$text_phrase(inventors.inventor_name_first = "George"), + qry_funs$text_phrase(inventors.inventor_name_last = "Washington") + ) + + expect_equal(a, b) +}) + +test_that("argument check works on in_range", { + skip_on_cran() # wouldn't necessarily have to skip! + + expect_error( + qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), + "expects a range of exactly two arguments" + ) +}) diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 29857741..494c3308 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -1,18 +1,37 @@ -context("search_pv") -# TODO: add a test to see if all the requested fields come back +add_base_url <- function(x) { + paste0("https://search.patentsview.org/api/v1/", x) +} endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() - df_names <- vapply(endpoints, function(x) { + broken_endpoints <- c( + "cpc_subclass", + "uspc_subclass", + "uspc_mainclass", + "wipo" + ) + + # these both return rel_app_texts + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + + goodendpoints <- endpoints[!endpoints %in% c(broken_endpoints, overloaded_entities)] + + df_names <- vapply(goodendpoints, function(x) { + print(x) out <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) - names(out[[1]]) + + # now the endpoints are singular and most entites are plural + to_singular(names(out[[1]])) }, FUN.VALUE = character(1), USE.NAMES = FALSE) - expect_equal(endpoints, df_names) + # publication/rel_app_text's entity is rel_app_text_publications + df_names <- gsub("rel_app_text_publication", "rel_app_text", df_names) + + expect_equal(goodendpoints, df_names) }) test_that("DSL-based query returns expected results", { @@ -37,18 +56,26 @@ test_that("You can download up to 9,000+ records", { # Should return 9,000+ rows query <- with_qfuns( and( - gte(patent_date = "2021-12-13"), - lte(patent_date = "2021-12-24") + gte(patent_date = "2021-12-13"), + lte(patent_date = "2021-12-24") ) ) - out <- search_pv(query, per_page = 1000, all_pages = TRUE) + out <- search_pv(query, size = 1000, all_pages = TRUE) expect_gt(out$query_results$total_hits, 9000) }) test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() - dev_null <- lapply(endpoints, function(x) { + troubled_endpoints <- c( + "cpc_subclass", "location", + "uspc_subclass", "uspc_mainclass", "wipo", "claim", "draw_desc_text", + "pg_claim" # Invalid field: claim_dependent + ) + + # We should be able to get all fields from the non troubled endpoints + dev_null <- lapply(endpoints[!(endpoints %in% troubled_endpoints)], function(x) { + print(x) search_pv( query = TEST_QUERIES[[x]], endpoint = x, @@ -62,13 +89,13 @@ test_that("Sort option works as expected", { skip_on_cran() out <- search_pv( - qry_funs$neq(assignee_id = 1), - fields = get_fields("assignees"), - endpoint = "assignees", - sort = c("lastknown_latitude" = "desc"), - per_page = 100 + qry_funs$neq(assignee_id = ""), + fields = get_fields("assignee", groups = c("assignees")), + endpoint = "assignee", + sort = c("assignee_lastknown_latitude" = "desc"), + size = 100 ) - lat <- as.numeric(out$data$assignees$lastknown_latitude) + lat <- as.numeric(out$data$assignees$assignee_lastknown_latitude) expect_true(lat[1] >= lat[100]) }) @@ -76,45 +103,30 @@ test_that("search_pv properly URL encodes queries", { skip_on_cran() # Covers https://github.com/ropensci/patentsview/issues/24 - # need to use the assignee endpoint now and the field is full_text - ampersand_query <- with_qfuns(text_phrase(organization = "Johnson & Johnson")) - dev_null <- search_pv(ampersand_query, endpoint = "assignees") - expect_true(TRUE) -}) + # need to use the assignee endpoint now + organization <- "Johnson & Johnson International" + text_query <- with_qfuns(text_phrase(assignee_organization = organization)) + phrase_search <- search_pv(text_query, endpoint = "assignee") + expect_true(phrase_search$query_results$total_hits == 1) -# Below we request the same data in built_singly and result_all, with the only -# difference being that we intentionally get throttled in built_singly by -# sending one request per patent number (instead of all requests at once). If -# the two responses match, then we've correctly handled throttling errors. -test_that("Throttled requests are automatically retried", { - skip_on_cran() + # also test that the string operator does not matter now + eq_query <- with_qfuns(eq(assignee_organization = organization)) + eq_search <- search_pv(eq_query, endpoint = "assignee") + expect_identical(eq_search$data, phrase_search$data) - res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', per_page = 50) - patent_numbers <- res$data$patents$patent_number + # text_phrase seems to be case insensitive but equal is not + organization <- tolower(organization) - built_singly <- lapply(patent_numbers, function(patent_number) { - search_pv( - query = qry_funs$eq(patent_number = patent_number), - endpoint = "patent_citations", - fields = c("patent_number", "cited_patent_number"), - sort = c("cited_patent_number" = "asc") - )[["data"]][["patent_citations"]] - }) - built_singly <- do.call(rbind, built_singly) + text_query <- with_qfuns(text_phrase(assignee_organization = organization)) + phrase_search <- search_pv(text_query, endpoint = "assignee") + expect_true(phrase_search$query_results$total_hits == 1) - result_all <- search_pv( - query = qry_funs$eq(patent_number = patent_numbers), - endpoint = "patent_citations", - fields = c("patent_number", "cited_patent_number"), - sort = c("patent_number" = "asc", "cited_patent_number" = "asc"), - per_page = 1000, - all_pages = TRUE - ) - result_all <- result_all$data$patent_citations - - expect_identical(built_singly, result_all) + eq_query <- with_qfuns(eq(assignee_organization = organization)) + eq_search <- search_pv(eq_query, endpoint = "assignee") + expect_true(eq_search$query_results$total_hits == 0) }) + test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() @@ -128,28 +140,339 @@ test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() single_item_queries <- c( - "https://search.patentsview.org/api/v1/assignee/10/", - "https://search.patentsview.org/api/v1/cpc_group/A01B/", - "https://search.patentsview.org/api/v1/cpc_subgroup/G01S7:4811/", - "https://search.patentsview.org/api/v1/cpc_subsection/A01/", - "https://search.patentsview.org/api/v1/inventor/10/", - "https://search.patentsview.org/api/v1/nber_category/1/", - "https://search.patentsview.org/api/v1/nber_subcategory/11/", - "https://search.patentsview.org/api/v1/patent/10757852/", - "https://search.patentsview.org/api/v1/uspc_mainclass/30/", - "https://search.patentsview.org/api/v1/uspc_subclass/30:100/" + "cpc_subclass/A01B/", + "cpc_class/A01/", + "cpc_group/G01S7:4811/", + "patent/10757852/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/", + "publication/20010000001/" ) + + # these currently throw Error: Internal Server Error + broken_single_item_queries <- c( + "cpc_subclass/A01B/", + "uspc_mainclass/30/", + "uspc_subclass/30:100/", + "wipo/1/" + ) + + single_item_queries <- single_item_queries[!single_item_queries %in% broken_single_item_queries] + dev_null <- lapply(single_item_queries, function(q) { - j <- retrieve_linked_data(q) + print(q) + j <- retrieve_linked_data(add_base_url(q)) expect_equal(j$query_results$total_hits, 1) }) multi_item_queries <- c( - "https://search.patentsview.org/api/v1/application_citation/10966293/", - "https://search.patentsview.org/api/v1/patent_citation/10966293/" + "patent/us_application_citation/10966293/", + "patent/us_patent_citation/10966293/" ) dev_null <- lapply(multi_item_queries, function(q) { - j <- retrieve_linked_data(q) + j <- retrieve_linked_data(add_base_url(q)) expect_true(j$query_results$total_hits > 1) }) + + + # We'll make a call to get an inventor and assignee HATEOAS link + # in case their ids are not persistent + # new weirdness: we request inventor_id and assignee_id but the + # fields come back without the _id + res <- search_pv('{"patent_id":"10000000"}', + fields = c("inventors.inventor_id", "assignees.assignee_id") + ) + + assignee <- retrieve_linked_data(res$data$patents$assignees[[1]]$assignee) + expect_true(assignee$query_results$total_hits == 1) + + inventor <- retrieve_linked_data(res$data$patents$inventors[[1]]$inventor) + expect_true(inventor$query_results$total_hits == 1) + + # Query to get a location HATEOAS link in case location_ids are not persistent + res <- search_pv('{"location_name":"Chicago"}', + fields = c("location_id"), + endpoint = "location" + ) + + location <- retrieve_linked_data(add_base_url(paste0("location/", res$data$locations$location_id, "/"))) + expect_true(location$query_results$total_hits == 1) +}) + +# Make sure gets and posts return the same data. +# Posts had issues that went undetected for a while using the new API +# (odd results with posts when either no fields or sort was passed +# see get_post_body in search-pv.R) + +test_that("posts and gets return the same data", { + skip_on_cran() + + bad_eps <- c( + "cpc_subclass" + # ,"location" # Error: Invalid field: location_latitude + , "uspc_subclass" # Error: Internal Server Error + , "uspc_mainclass" # Error: Internal Server Error + , "wipo" # Error: Internal Server Error + , "claim" # Error: Invalid field: claim_dependent + , "draw_desc_text" # Error: Invalid field: description_sequence + , "cpc_subclass" # 404? check the test query + , "uspc_subclass" # 404 + # , "pg_claim" # check this one + ) + + good_eps <- endpoints[!endpoints %in% bad_eps] + + z <- lapply(good_eps, function(endpoint) { + print(endpoint) + get_res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + method = "GET" + ) + + g <- unnest_pv_data(get_res$data, pk = get_ok_pk(endpoint)) + + post_res <- search_pv( + query = TEST_QUERIES[[endpoint]], + endpoint = endpoint, + method = "POST" + ) + + p <- unnest_pv_data(post_res$data) + + expect_equal(g, p) + }) +}) + +test_that("nested shorthand produces the same results as fully qualified ones", { + skip_on_cran() + + # the API now allows a shorthand in the fields/f: parameter + # just the group name will retrieve all that group's attributes + # This is indirectly testing our parse of the OpenAPI object and actual API responses + fields <- fieldsdf[fieldsdf$endpoint == "patent" & fieldsdf$group == "application", "field"] + + shorthand_res <- search_pv(TEST_QUERIES[["patent"]], fields = c("application")) + qualified_res <- search_pv(TEST_QUERIES[["patent"]], fields = fields) + + # the request$urls will be different but the data should match + expect_failure(expect_equal(shorthand_res$request$url, qualified_res$request$url)) + expect_equal(shorthand_res$data, qualified_res$data) +}) + + +test_that("the 'after' parameter works properly", { + skip_on_cran() + + sort <- c("patent_id" = "asc") + big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits + results <- search_pv(big_query, all_pages = FALSE, sort = sort) + expect_gt(results$query_results$total_hits, 1000) + + after <- results$data$patents$patent_id[[nrow(results$data$patents)]] + subsequent <- search_pv(big_query, all_pages = FALSE, after = after, sort = sort) + + # ** New API bug? should be expect_equal `actual`: 399 + expect_lt(nrow(subsequent$data$patents), 1000) + + # the first row's patent_id should be bigger than after + # now "D418273" + # expect_gt(as.integer(subsequent$data$patents$patent_id[[1]]), as.integer(after)) + + # now we'll add a descending sort to make sure that also works + sort <- c("patent_id" = "desc") + fields <- NULL # c("patent_id") + + results <- search_pv(big_query, all_pages = FALSE, fields = fields, sort = sort) + after <- results$data$patents$patent_id[[nrow(results$data$patents)]] + + subsequent <- search_pv(big_query, + all_pages = FALSE, after = after, sort = sort, + fields = fields + ) + + # now the first row's patent_id should be smaller than after + # should be expect_lt + expect_gt(as.integer(subsequent$data$patents$patent_id[[1]]), as.integer(after)) + skip("New API bug?") +}) + +test_that("the documentation and Swagger UI URLs work properly", { + skip_on_cran() + + documentation_url <- + 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' + + results <- retrieve_linked_data(documentation_url) + + expect_gt(results$query_results$total_hits, 0) + + swagger_url <- "https://search.patentsview.org/api/v1/patent/?q=%7B%22patent_date%22%3A%221976-01-06%22%7D" + + results <- retrieve_linked_data(swagger_url, encoded = TRUE) + expect_gt(results$query_results$total_hits, 0) +}) + +test_that("an error occurs if all_pages is TRUE and there aren't any results", { + skip_on_cran() + + too_early <- qry_funs$lt(patent_date = "1976-01-01") + + results <- search_pv(too_early, all_pages = FALSE) + + # would like this test to fail! (meaning API added earlier data) + expect_equal(results$query_results$total_hits, 0) + + expect_error( + search_pv(too_early, all_pages = TRUE), + "No records matched your query" + ) +}) + +test_that("we can retrieve all_pages = TRUE without specifiying fields", { + skip_on_cran() + + query <- qry_funs$eq(patent_date = "1976-01-06") + sort <- c("patent_type" = "asc", "patent_id" = "asc") + + # here we aren't requesting fields but are requesting a sort + results <- search_pv(query, sort = sort, all_pages = TRUE) + + expect_gt(results$query_results$total_hits, 1300) +}) + +# Below we request the same data in built_singly and result_all, with the only +# difference being that we intentionally get throttled in built_singly by +# sending one request per patent number (instead of all requests at once). If +# the two responses match, then we've correctly handled throttling errors. +test_that("Throttled requests are automatically retried", { + skip_on_cran() + + res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) + patent_ids <- res$data$patents$patent_id + + # now we don't get message "The API's requests per minute limit has been reached. " + # so we'll testthat it takes over 60 seconds to run (since we got throttled) + # TODO(any): can we use evaluate_promise to find "Waiting 45s for retry backoff"? + + duration <- system.time( + built_singly <- lapply(patent_ids, function(patent_id) { + search_pv( + query = qry_funs$eq(patent_id = patent_id), + endpoint = "patent/us_patent_citation", + fields = c("patent_id", "citation_patent_id"), + sort = c("citation_patent_id" = "asc") + )[["data"]][["us_patent_citations"]] + }) + ) + + expect_gt(duration[["elapsed"]], 60) + + built_singly <- do.call(rbind, built_singly) + + # we'll also test that the results are the same for a post and get + # when there is a secondary sort on the bulk requests + sort <- c("patent_id" = "asc", "citation_patent_id" = "asc") + methods <- c("POST", "GET") + output <- lapply(methods, function(method) { + result_all <- search_pv( + query = qry_funs$eq(patent_id = patent_ids), + endpoint = "patent/us_patent_citation", + fields = c("patent_id", "citation_patent_id"), + sort = sort, + size = 1000, + all_pages = TRUE, + method = method + ) + result_all <- result_all$data$us_patent_citations + }) + + expect_equal(output[[1]], output[[2]]) + + # We'll do our own sort and check that it matches the API output + # We want to make sure we sent in the sort parameter correctly, where + # the API is doing the sort (since the we didn't need to page) + + second_output <- output[[2]] + + # Sorting logic using order() + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(second_output[[col]]) + } else { + return(-rank(second_output[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + second_output <- second_output[do.call(order, sort_order), , drop = FALSE] + + expect_equal(output[[1]], second_output) + + # TODO(any): fix this: + # expect_equal says actual row.names are an integer vector and expected + # row.names is a character vector. Not sure why + row.names(output[[1]]) <- NULL + row.names(built_singly) <- NULL + + expect_equal(built_singly, output[[1]]) +}) + +test_that("we can sort on an unrequested field across page boundaries", { + skip_on_cran() + + # total_hits = 5,352 + query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) + fields <- c("patent_title", "patent_date") + sort <- c("patent_date" = "desc", "patent_id" = "desc") + + r_ordered <- search_pv( + query = query, + fields = fields, + sort = sort, + all_pages = TRUE + ) + + fields <- c(fields, "patent_id") + api_ordered <- search_pv( + query = query, + fields = fields, + sort = sort, + all_pages = TRUE + ) + + # Remove patent_id before comparison. We're also indirectly testing that the + # patent_id field added by the first search_pv was removed, otherwise this + # expect equal would fail + api_ordered$data$patents[["patent_id"]] <- NULL + expect_equal(r_ordered$data, api_ordered$data) +}) + +test_that("sort works across page boundaries", { + skip_on_cran() + + sort <- c("patent_type" = "desc", "patent_id" = "desc") + results <- search_pv( + qry_funs$eq(patent_date = "1976-01-06"), + fields = c("patent_type", "patent_id"), + sort = sort, + all_pages = TRUE + ) + + double_check <- results$data$patents + + # Sorting logic using order() + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(double_check[[col]]) + } else { + return(-rank(double_check[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + double_check <- double_check[do.call(order, sort_order), , drop = FALSE] + + expect_equal(results$data$patents, double_check) }) diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index eb2807cd..afe0fcbc 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -1,23 +1,78 @@ -context("unnest_pv_data") - eps <- get_endpoints() -test_that("", { +test_that("we can unnest all entities", { skip_on_cran() - # TODO(any): Remove: - skip("Temp skip for API redesign PR") - eps_no_loc <- eps[eps != "locations"] + # TODO(any): add back fields = get_fields(x) + # API throws 500s if some nested fields are included + + # locations endpoint is back but it fails this test + bad_endpoints <- c( + "location", "cpc_subclass", + "uspc_subclass", "uspc_mainclass", "wipo", + "claim", "draw_desc_text", "pg_claim" + ) + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + good_eps <- eps[!eps %in% c(bad_endpoints, overloaded_entities)] + + z <- lapply(good_eps, function(x) { + print(x) - z <- lapply(eps_no_loc, function(x) { - Sys.sleep(1) pv_out <- search_pv( - "{\"patent_number\":\"5116621\"}", + query = TEST_QUERIES[[x]], endpoint = x, - fields = get_fields(x) + fields = get_fields(x, group = to_plural(x)) # requesting non-nested attributes ) + + expect_gt(pv_out$query_results$total_hits, 0) # check that the query worked unnest_pv_data(pv_out[["data"]]) }) expect_true(TRUE) + + # this will fail when the api is fixed + z <- lapply(bad_endpoints, function(x) { + print(x) + expect_error( + pv_out <- search_pv( + query = TEST_QUERIES[[x]], + endpoint = x, + fields = get_fields(x, group = to_plural(x)) # requesting non-nested attributes + ) + ) + }) + + # make it noticeable that all is not right with the API + skip("Skip for API bugs") # TODO: remove when the API is fixed/bad_endpoints removed +}) + +test_that("endpoint's pks match their entity's pks", { + skip_on_cran() + + # the overloaded_entities endpoints return the same entity, rel_app_texts, + # so we can't determine the endpoint from the entity like we can + # for the rest of the entities + overloaded_entities <- c("patent/rel_app_text", "publication/rel_app_text") + bad_endpoints <- c("uspc_subclass", "cpc_subclass", "uspc_mainclass", "wipo") + good_eps <- eps[!eps %in% c(bad_endpoints, overloaded_entities)] + + endpoint_pks <- lapply(good_eps, function(endpoint) { + print(endpoint) + get_ok_pk(endpoint) + }) + + entity_pks <- lapply(good_eps, function(endpoint) { + result <- search_pv(TEST_QUERIES[[endpoint]], endpoint = endpoint) + get_ok_pk(names(result$data)) + }) + + expect_equal(endpoint_pks, entity_pks) + + # this will fail when the api is fixed + z <- lapply(bad_endpoints, function(endpoint) { + print(endpoint) + expect_error( + result <- search_pv(TEST_QUERIES[[endpoint]], endpoint = endpoint) + ) + }) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 00000000..f38200b9 --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,12 @@ +test_that("we can cast the endpoints that return the same entity", { + skip_on_cran() + + endpoints <- c("patent/rel_app_text", "publication/rel_app_text") + + nul <- lapply(endpoints, function(endpoint) { + results <- search_pv(query = TEST_QUERIES[[endpoint]], endpoint = endpoint) + cast <- cast_pv_data(results$data) + }) + + expect_true(TRUE) +}) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R new file mode 100644 index 00000000..acadf8cc --- /dev/null +++ b/tests/testthat/test-validate-args.R @@ -0,0 +1,87 @@ +# make sure deprecated warnings are always thrown- bypass 8 hour suppression +rlang::local_options(lifecycle_verbosity = "warning") + +test_that("validate_args throws errors for all bad args", { + skip_on_cran() + + # requesting the old plural endpoint should now throw an error + expect_error( + search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patents"), + "endpoint" + ) + expect_error( + search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), + "method" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), + class = "lifecycle_warning_deprecated" + ) + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome"), + class = "lifecycle_warning_deprecated" + ) + + per_page <- 17 + expect_warning( + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page), + class = "lifecycle_warning_deprecated" + ) + + # make sure the size attribute was set from the per_page parameter + expect_equal(per_page, nrow(results$data$patents)) + + expect_warning( + search_pv('{"patent_date":["1976-01-06"]}', page = 2), + class = "lifecycle_warning_deprecated" # unsupported page parameter + ) + expect_error( + search_pv( + '{"patent_date":["1976-01-06"]}', + fields = "patent_date", + all_pages = TRUE, + after = "3930272" + ), + "after" + ) + expect_error( + get_fields("assignee", groups = "cpc_current"), # valid group for a different endpoint + "for the assignee endpoint" + ) +}) + +test_that("per_page parameter warns but still works", { + skip_on_cran() + + expect_warning( + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = 23), + class = "lifecycle_warning_deprecated" + ) + + expect_equal(23, nrow(results$data$patents)) +}) + +test_that("group names can be requested as fields via new API shorthand", { + skip_on_cran() + + endpoint <- "patent" + shorthand <- get_fields("patent", groups=c("application")) + expect_equal(shorthand , "application") + shorthand_res <- search_pv(TEST_QUERIES[[endpoint]], fields=shorthand) + + explicit <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == "application", "field"] + explicit_res <- search_pv(TEST_QUERIES[[endpoint]], fields=explicit) + + # the requests are different but the results should be the same + expect_failure(expect_equal(shorthand_res$request, explicit_res$request)) + expect_equal(shorthand_res$data, explicit_res$data) + +}) From 41ec5374c4a87d1fc96d612553e4a726e39f704a Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 13:37:03 -0600 Subject: [PATCH 090/103] test: updatng tests for new api version --- tests/testthat/test-validate-args.R | 72 +++++++++++++---------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index acadf8cc..88e0e0e5 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -1,5 +1,6 @@ -# make sure deprecated warnings are always thrown- bypass 8 hour suppression -rlang::local_options(lifecycle_verbosity = "warning") +# We can't use expect_warning() without adding a dependency to rlang +# to bypass 8 hour warning suppression +# rlang::local_options(lifecycle_verbosity = "warning") test_that("validate_args throws errors for all bad args", { skip_on_cran() @@ -13,36 +14,40 @@ test_that("validate_args throws errors for all bad args", { search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), "method" ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL), - class = "lifecycle_warning_deprecated" - ) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome"), - class = "lifecycle_warning_deprecated" - ) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL) + # class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome") + #class = "lifecycle_warning_deprecated" + expect_gt(result$query_results$total_hits, 0) + }) per_page <- 17 - expect_warning( - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page), - class = "lifecycle_warning_deprecated" - ) + suppressWarnings({ + results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page) - # make sure the size attribute was set from the per_page parameter - expect_equal(per_page, nrow(results$data$patents)) + # make sure the size attribute was set from the per_page parameter + expect_equal(per_page, nrow(results$data$patents)) + }) - expect_warning( - search_pv('{"patent_date":["1976-01-06"]}', page = 2), - class = "lifecycle_warning_deprecated" # unsupported page parameter - ) + suppressWarnings({ + result <- search_pv('{"patent_date":["1976-01-06"]}', page = 2) + # class = "lifecycle_warning_deprecated" # unsupported page parameter + expect_gt(result$query_results$total_hits, 0) + }) expect_error( search_pv( '{"patent_date":["1976-01-06"]}', @@ -58,17 +63,6 @@ test_that("validate_args throws errors for all bad args", { ) }) -test_that("per_page parameter warns but still works", { - skip_on_cran() - - expect_warning( - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = 23), - class = "lifecycle_warning_deprecated" - ) - - expect_equal(23, nrow(results$data$patents)) -}) - test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() From f023a317e503b4d663f5abd46e7c94c9bd56e841 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 17:11:11 -0600 Subject: [PATCH 091/103] added skip_on_ci()s --- tests/testthat/test-api-bugs.R | 9 +++++++++ tests/testthat/test-cast-pv-data.R | 2 ++ tests/testthat/test-check-query.R | 4 ++++ tests/testthat/test-get-fields.R | 2 ++ tests/testthat/test-print.R | 2 ++ tests/testthat/test-query-dsl.R | 3 +++ tests/testthat/test-search-pv.R | 17 +++++++++++++++++ tests/testthat/test-unnest-pv-data.R | 2 ++ tests/testthat/test-utils.R | 1 + tests/testthat/test-validate-args.R | 2 ++ 10 files changed, 44 insertions(+) diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R index 2e5f2227..a7e31df6 100644 --- a/tests/testthat/test-api-bugs.R +++ b/tests/testthat/test-api-bugs.R @@ -80,6 +80,7 @@ test_that("there is case sensitivity on string equals", { test_that("string vs text operators behave differently", { skip_on_cran() + skip_on_ci() # # reported to the API team PVS-1147 query <- qry_funs$begins(assignee_organization = "johnson") @@ -161,6 +162,7 @@ eps <- (get_endpoints()) test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() + skip_on_ci() # these currently throw Error: Internal Server Error broken_single_item_queries <- c( @@ -182,6 +184,7 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("individual fields are still broken", { skip_on_cran() + skip_on_ci() # Sample fields that cause 500 errors when requested by themselves. # Some don't throw errors when included in get_fields() but they do if @@ -207,6 +210,7 @@ test_that("individual fields are still broken", { test_that("we can't sort by all fields", { skip_on_cran() + skip_on_ci() # PVS-1377 sorts_to_try <- c( @@ -250,6 +254,7 @@ test_that("we can't sort by all fields", { test_that("withdrawn patents are still present in the database", { skip_on_cran() + skip_on_ci() # PVS-1342 Underlying data issues # There are 8,000 patents that were in the bulk xml files patentsiew is based on. @@ -271,6 +276,7 @@ test_that("withdrawn patents are still present in the database", { test_that("missing patents are still missing", { skip_on_cran() + skip_on_ci() # PVS-1342 Underlying data issues # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. @@ -291,6 +297,7 @@ test_that("missing patents are still missing", { test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { skip_on_cran() + skip_on_ci() bad_eps <- c( "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field @@ -313,6 +320,7 @@ test_that("we can't explicitly request assignee_ or inventor_years.num_patents", test_that("uspcs aren't right", { skip_on_cran() + skip_on_ci() # PVS-1615 @@ -333,6 +341,7 @@ test_that("uspcs aren't right", { test_that("endpoints are still broken", { skip_on_cran() + skip_on_ci() # this will fail when the api is fixed broken_endpoints <- c( diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index efcb1c5b..63efe59a 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,5 +1,6 @@ test_that("cast_pv_data casts patent fields as expected", { skip_on_cran() + skip_on_ci() pv_out <- search_pv( query = '{"patent_id":"5116621"}', fields = get_fields("patent") @@ -58,6 +59,7 @@ test_that("cast_pv_data casts assignee fields as expected", { test_that("we can cast a bool", { skip_on_cran() + skip_on_ci() # TODO(any): remove when the API returns this as a boolean fields <- c("rule_47_flag") diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R index 8f846032..5b1bff86 100644 --- a/tests/testthat/test-check-query.R +++ b/tests/testthat/test-check-query.R @@ -1,6 +1,7 @@ test_that("errors are thrown on invalid queries", { skip_on_cran() + skip_on_ci() expect_error( search_pv(qry_funs$eq("shoe_size" = 11.5)), @@ -66,6 +67,7 @@ test_that("errors are thrown on invalid queries", { test_that("a valid nested field can be queried", { skip_on_cran() + skip_on_ci() results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) @@ -74,6 +76,7 @@ test_that("a valid nested field can be queried", { test_that("the _eq message is thrown when appropriate", { skip_on_cran() + skip_on_ci() expect_message( search_pv(list(patent_date = "2007-03-06")), @@ -83,6 +86,7 @@ test_that("the _eq message is thrown when appropriate", { test_that("a query with an and operator returns results", { skip_on_cran() + skip_on_ci() patents_query <- with_qfuns( diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R index 7eb316f0..777569a2 100644 --- a/tests/testthat/test-get-fields.R +++ b/tests/testthat/test-get-fields.R @@ -1,5 +1,6 @@ test_that("get_fields works as expected", { skip_on_cran() + skip_on_ci() expect_error( get_fields("bogus endpoint"), @@ -23,6 +24,7 @@ test_that("get_fields works as expected", { test_that("the endpoints are stable", { skip_on_cran() + skip_on_ci() # quick check of the endpoints - useful after an api update. We run fieldsdf.R # and do a build. This test would fail if an endpoint was added, moved or deleted diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index cc0d5f6a..d8d7d01b 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,5 +1,6 @@ test_that("We can print the returns from all endpoints ", { skip_on_cran() + skip_on_ci() eps <- get_endpoints() bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") @@ -20,6 +21,7 @@ test_that("We can print the returns from all endpoints ", { test_that("we can print a query, its request, and unnested data", { skip_on_cran() + skip_on_ci() x <- "patent" q <- qry_funs$eq(patent_id = "11530080") diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R index 840ed780..2f44b3a7 100644 --- a/tests/testthat/test-query-dsl.R +++ b/tests/testthat/test-query-dsl.R @@ -1,5 +1,6 @@ test_that("between works as expected", { skip_on_cran() + skip_on_ci() query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) @@ -10,6 +11,7 @@ test_that("between works as expected", { test_that("with_qfuns() works as advertised", { skip_on_cran() # wouldn't necessarily have to skip! + skip_on_ci() # wouldn't necessarily have to skip! a <- with_qfuns( and( @@ -28,6 +30,7 @@ test_that("with_qfuns() works as advertised", { test_that("argument check works on in_range", { skip_on_cran() # wouldn't necessarily have to skip! + skip_on_ci() # wouldn't necessarily have to skip! expect_error( qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 494c3308..0946f2a5 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -7,6 +7,7 @@ endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() + skip_on_ci() broken_endpoints <- c( "cpc_subclass", @@ -36,6 +37,7 @@ test_that("API returns expected df names for all endpoints", { test_that("DSL-based query returns expected results", { skip_on_cran() + skip_on_ci() query <- with_qfuns( and( @@ -52,6 +54,7 @@ test_that("DSL-based query returns expected results", { test_that("You can download up to 9,000+ records", { skip_on_cran() + skip_on_ci() # Should return 9,000+ rows query <- with_qfuns( @@ -66,6 +69,7 @@ test_that("You can download up to 9,000+ records", { test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() + skip_on_ci() troubled_endpoints <- c( "cpc_subclass", "location", @@ -87,6 +91,7 @@ test_that("search_pv can pull all fields for all endpoints", { test_that("Sort option works as expected", { skip_on_cran() + skip_on_ci() out <- search_pv( qry_funs$neq(assignee_id = ""), @@ -101,6 +106,7 @@ test_that("Sort option works as expected", { test_that("search_pv properly URL encodes queries", { skip_on_cran() + skip_on_ci() # Covers https://github.com/ropensci/patentsview/issues/24 # need to use the assignee endpoint now @@ -129,6 +135,7 @@ test_that("search_pv properly URL encodes queries", { test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() + skip_on_ci() # We will try to call the api that tells us who is currently in space in_space_now_url <- "http://api.open-notify.org/astros.json" @@ -138,6 +145,7 @@ test_that("We won't expose the user's patentsview API key to random websites", { test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() + skip_on_ci() single_item_queries <- c( "cpc_subclass/A01B/", @@ -207,6 +215,7 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("posts and gets return the same data", { skip_on_cran() + skip_on_ci() bad_eps <- c( "cpc_subclass" @@ -247,6 +256,7 @@ test_that("posts and gets return the same data", { test_that("nested shorthand produces the same results as fully qualified ones", { skip_on_cran() + skip_on_ci() # the API now allows a shorthand in the fields/f: parameter # just the group name will retrieve all that group's attributes @@ -264,6 +274,7 @@ test_that("nested shorthand produces the same results as fully qualified ones", test_that("the 'after' parameter works properly", { skip_on_cran() + skip_on_ci() sort <- c("patent_id" = "asc") big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits @@ -300,6 +311,7 @@ test_that("the 'after' parameter works properly", { test_that("the documentation and Swagger UI URLs work properly", { skip_on_cran() + skip_on_ci() documentation_url <- 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' @@ -316,6 +328,7 @@ test_that("the documentation and Swagger UI URLs work properly", { test_that("an error occurs if all_pages is TRUE and there aren't any results", { skip_on_cran() + skip_on_ci() too_early <- qry_funs$lt(patent_date = "1976-01-01") @@ -332,6 +345,7 @@ test_that("an error occurs if all_pages is TRUE and there aren't any results", { test_that("we can retrieve all_pages = TRUE without specifiying fields", { skip_on_cran() + skip_on_ci() query <- qry_funs$eq(patent_date = "1976-01-06") sort <- c("patent_type" = "asc", "patent_id" = "asc") @@ -348,6 +362,7 @@ test_that("we can retrieve all_pages = TRUE without specifiying fields", { # the two responses match, then we've correctly handled throttling errors. test_that("Throttled requests are automatically retried", { skip_on_cran() + skip_on_ci() res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) patent_ids <- res$data$patents$patent_id @@ -421,6 +436,7 @@ test_that("Throttled requests are automatically retried", { test_that("we can sort on an unrequested field across page boundaries", { skip_on_cran() + skip_on_ci() # total_hits = 5,352 query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) @@ -451,6 +467,7 @@ test_that("we can sort on an unrequested field across page boundaries", { test_that("sort works across page boundaries", { skip_on_cran() + skip_on_ci() sort <- c("patent_type" = "desc", "patent_id" = "desc") results <- search_pv( diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index afe0fcbc..1d9e1ad5 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -2,6 +2,7 @@ eps <- get_endpoints() test_that("we can unnest all entities", { skip_on_cran() + skip_on_ci() # TODO(any): add back fields = get_fields(x) # API throws 500s if some nested fields are included @@ -48,6 +49,7 @@ test_that("we can unnest all entities", { test_that("endpoint's pks match their entity's pks", { skip_on_cran() + skip_on_ci() # the overloaded_entities endpoints return the same entity, rel_app_texts, # so we can't determine the endpoint from the entity like we can diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index f38200b9..fcfaca18 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,5 +1,6 @@ test_that("we can cast the endpoints that return the same entity", { skip_on_cran() + skip_on_ci() endpoints <- c("patent/rel_app_text", "publication/rel_app_text") diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index 88e0e0e5..5f0208b9 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -4,6 +4,7 @@ test_that("validate_args throws errors for all bad args", { skip_on_cran() + skip_on_ci() # requesting the old plural endpoint should now throw an error expect_error( @@ -65,6 +66,7 @@ test_that("validate_args throws errors for all bad args", { test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() + skip_on_ci() endpoint <- "patent" shorthand <- get_fields("patent", groups=c("application")) From 591b82ed623a2421efdea71887a2b386f8b5c1f5 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 18:20:13 -0600 Subject: [PATCH 092/103] removed skip_on_ci()s --- tests/testthat/test-api-bugs.R | 12 ------------ tests/testthat/test-cast-pv-data.R | 3 --- tests/testthat/test-check-query.R | 4 ---- tests/testthat/test-get-fields.R | 2 -- tests/testthat/test-print.R | 2 -- tests/testthat/test-query-dsl.R | 3 --- tests/testthat/test-search-pv.R | 17 ----------------- tests/testthat/test-unnest-pv-data.R | 2 -- tests/testthat/test-utils.R | 1 - tests/testthat/test-validate-args.R | 2 -- 10 files changed, 48 deletions(-) diff --git a/tests/testthat/test-api-bugs.R b/tests/testthat/test-api-bugs.R index a7e31df6..55e7cb3d 100644 --- a/tests/testthat/test-api-bugs.R +++ b/tests/testthat/test-api-bugs.R @@ -10,7 +10,6 @@ add_base_url <- function(x) { test_that("there is trouble paging", { skip_on_cran() - skip_on_ci() # reprex inspired by https://patentsview.org/forum/7/topic/812 # Not all requested groups are coming back as we page, causing @@ -55,7 +54,6 @@ test_that("there is trouble paging", { test_that("there is case sensitivity on string equals", { skip_on_cran() - skip_on_ci() # reported to the API team PVS-1147 # not sure if this is a bug or feature - original API was case insensitive @@ -80,7 +78,6 @@ test_that("there is case sensitivity on string equals", { test_that("string vs text operators behave differently", { skip_on_cran() - skip_on_ci() # # reported to the API team PVS-1147 query <- qry_funs$begins(assignee_organization = "johnson") @@ -96,7 +93,6 @@ test_that("string vs text operators behave differently", { test_that("API returns all requested groups", { skip_on_cran() - skip_on_ci() # can we traverse the return building a list of fields? # sort both requested fields and returned ones to see if they are equal @@ -162,7 +158,6 @@ eps <- (get_endpoints()) test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() - skip_on_ci() # these currently throw Error: Internal Server Error broken_single_item_queries <- c( @@ -184,7 +179,6 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("individual fields are still broken", { skip_on_cran() - skip_on_ci() # Sample fields that cause 500 errors when requested by themselves. # Some don't throw errors when included in get_fields() but they do if @@ -210,7 +204,6 @@ test_that("individual fields are still broken", { test_that("we can't sort by all fields", { skip_on_cran() - skip_on_ci() # PVS-1377 sorts_to_try <- c( @@ -254,7 +247,6 @@ test_that("we can't sort by all fields", { test_that("withdrawn patents are still present in the database", { skip_on_cran() - skip_on_ci() # PVS-1342 Underlying data issues # There are 8,000 patents that were in the bulk xml files patentsiew is based on. @@ -276,7 +268,6 @@ test_that("withdrawn patents are still present in the database", { test_that("missing patents are still missing", { skip_on_cran() - skip_on_ci() # PVS-1342 Underlying data issues # There are around 300 patents that aren't in the bulk xml files patentsiew is based on. @@ -297,7 +288,6 @@ test_that("missing patents are still missing", { test_that("we can't explicitly request assignee_ or inventor_years.num_patents", { skip_on_cran() - skip_on_ci() bad_eps <- c( "assignee", # Invalid field: assignee_years.num_patents. assignee_years is not a nested field @@ -320,7 +310,6 @@ test_that("we can't explicitly request assignee_ or inventor_years.num_patents", test_that("uspcs aren't right", { skip_on_cran() - skip_on_ci() # PVS-1615 @@ -341,7 +330,6 @@ test_that("uspcs aren't right", { test_that("endpoints are still broken", { skip_on_cran() - skip_on_ci() # this will fail when the api is fixed broken_endpoints <- c( diff --git a/tests/testthat/test-cast-pv-data.R b/tests/testthat/test-cast-pv-data.R index 63efe59a..27dcae04 100644 --- a/tests/testthat/test-cast-pv-data.R +++ b/tests/testthat/test-cast-pv-data.R @@ -1,6 +1,5 @@ test_that("cast_pv_data casts patent fields as expected", { skip_on_cran() - skip_on_ci() pv_out <- search_pv( query = '{"patent_id":"5116621"}', fields = get_fields("patent") @@ -25,7 +24,6 @@ test_that("cast_pv_data casts patent fields as expected", { test_that("cast_pv_data casts assignee fields as expected", { skip_on_cran() - skip_on_ci() # ** Invalid field: assignee_years.num_patents. assignee_years is not a nested field pv_out <- search_pv( @@ -59,7 +57,6 @@ test_that("cast_pv_data casts assignee fields as expected", { test_that("we can cast a bool", { skip_on_cran() - skip_on_ci() # TODO(any): remove when the API returns this as a boolean fields <- c("rule_47_flag") diff --git a/tests/testthat/test-check-query.R b/tests/testthat/test-check-query.R index 5b1bff86..8f846032 100644 --- a/tests/testthat/test-check-query.R +++ b/tests/testthat/test-check-query.R @@ -1,7 +1,6 @@ test_that("errors are thrown on invalid queries", { skip_on_cran() - skip_on_ci() expect_error( search_pv(qry_funs$eq("shoe_size" = 11.5)), @@ -67,7 +66,6 @@ test_that("errors are thrown on invalid queries", { test_that("a valid nested field can be queried", { skip_on_cran() - skip_on_ci() results <- search_pv(qry_funs$eq("application.rule_47_flag" = FALSE)) @@ -76,7 +74,6 @@ test_that("a valid nested field can be queried", { test_that("the _eq message is thrown when appropriate", { skip_on_cran() - skip_on_ci() expect_message( search_pv(list(patent_date = "2007-03-06")), @@ -86,7 +83,6 @@ test_that("the _eq message is thrown when appropriate", { test_that("a query with an and operator returns results", { skip_on_cran() - skip_on_ci() patents_query <- with_qfuns( diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R index 777569a2..7eb316f0 100644 --- a/tests/testthat/test-get-fields.R +++ b/tests/testthat/test-get-fields.R @@ -1,6 +1,5 @@ test_that("get_fields works as expected", { skip_on_cran() - skip_on_ci() expect_error( get_fields("bogus endpoint"), @@ -24,7 +23,6 @@ test_that("get_fields works as expected", { test_that("the endpoints are stable", { skip_on_cran() - skip_on_ci() # quick check of the endpoints - useful after an api update. We run fieldsdf.R # and do a build. This test would fail if an endpoint was added, moved or deleted diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index d8d7d01b..cc0d5f6a 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,6 +1,5 @@ test_that("We can print the returns from all endpoints ", { skip_on_cran() - skip_on_ci() eps <- get_endpoints() bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") @@ -21,7 +20,6 @@ test_that("We can print the returns from all endpoints ", { test_that("we can print a query, its request, and unnested data", { skip_on_cran() - skip_on_ci() x <- "patent" q <- qry_funs$eq(patent_id = "11530080") diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R index 2f44b3a7..840ed780 100644 --- a/tests/testthat/test-query-dsl.R +++ b/tests/testthat/test-query-dsl.R @@ -1,6 +1,5 @@ test_that("between works as expected", { skip_on_cran() - skip_on_ci() query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) @@ -11,7 +10,6 @@ test_that("between works as expected", { test_that("with_qfuns() works as advertised", { skip_on_cran() # wouldn't necessarily have to skip! - skip_on_ci() # wouldn't necessarily have to skip! a <- with_qfuns( and( @@ -30,7 +28,6 @@ test_that("with_qfuns() works as advertised", { test_that("argument check works on in_range", { skip_on_cran() # wouldn't necessarily have to skip! - skip_on_ci() # wouldn't necessarily have to skip! expect_error( qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), diff --git a/tests/testthat/test-search-pv.R b/tests/testthat/test-search-pv.R index 0946f2a5..494c3308 100644 --- a/tests/testthat/test-search-pv.R +++ b/tests/testthat/test-search-pv.R @@ -7,7 +7,6 @@ endpoints <- get_endpoints() test_that("API returns expected df names for all endpoints", { skip_on_cran() - skip_on_ci() broken_endpoints <- c( "cpc_subclass", @@ -37,7 +36,6 @@ test_that("API returns expected df names for all endpoints", { test_that("DSL-based query returns expected results", { skip_on_cran() - skip_on_ci() query <- with_qfuns( and( @@ -54,7 +52,6 @@ test_that("DSL-based query returns expected results", { test_that("You can download up to 9,000+ records", { skip_on_cran() - skip_on_ci() # Should return 9,000+ rows query <- with_qfuns( @@ -69,7 +66,6 @@ test_that("You can download up to 9,000+ records", { test_that("search_pv can pull all fields for all endpoints", { skip_on_cran() - skip_on_ci() troubled_endpoints <- c( "cpc_subclass", "location", @@ -91,7 +87,6 @@ test_that("search_pv can pull all fields for all endpoints", { test_that("Sort option works as expected", { skip_on_cran() - skip_on_ci() out <- search_pv( qry_funs$neq(assignee_id = ""), @@ -106,7 +101,6 @@ test_that("Sort option works as expected", { test_that("search_pv properly URL encodes queries", { skip_on_cran() - skip_on_ci() # Covers https://github.com/ropensci/patentsview/issues/24 # need to use the assignee endpoint now @@ -135,7 +129,6 @@ test_that("search_pv properly URL encodes queries", { test_that("We won't expose the user's patentsview API key to random websites", { skip_on_cran() - skip_on_ci() # We will try to call the api that tells us who is currently in space in_space_now_url <- "http://api.open-notify.org/astros.json" @@ -145,7 +138,6 @@ test_that("We won't expose the user's patentsview API key to random websites", { test_that("We can call all the legitimate HATEOAS endpoints", { skip_on_cran() - skip_on_ci() single_item_queries <- c( "cpc_subclass/A01B/", @@ -215,7 +207,6 @@ test_that("We can call all the legitimate HATEOAS endpoints", { test_that("posts and gets return the same data", { skip_on_cran() - skip_on_ci() bad_eps <- c( "cpc_subclass" @@ -256,7 +247,6 @@ test_that("posts and gets return the same data", { test_that("nested shorthand produces the same results as fully qualified ones", { skip_on_cran() - skip_on_ci() # the API now allows a shorthand in the fields/f: parameter # just the group name will retrieve all that group's attributes @@ -274,7 +264,6 @@ test_that("nested shorthand produces the same results as fully qualified ones", test_that("the 'after' parameter works properly", { skip_on_cran() - skip_on_ci() sort <- c("patent_id" = "asc") big_query <- qry_funs$eq(patent_date = "2000-01-04") # 3003 total_hits @@ -311,7 +300,6 @@ test_that("the 'after' parameter works properly", { test_that("the documentation and Swagger UI URLs work properly", { skip_on_cran() - skip_on_ci() documentation_url <- 'https://search.patentsview.org/api/v1/patent/?q={"_text_any":{"patent_title":"COBOL cotton gin"}}&s=[{"patent_id": "asc" }]&o={"size":50}&f=["inventors.inventor_name_last","patent_id","patent_date","patent_title"]' @@ -328,7 +316,6 @@ test_that("the documentation and Swagger UI URLs work properly", { test_that("an error occurs if all_pages is TRUE and there aren't any results", { skip_on_cran() - skip_on_ci() too_early <- qry_funs$lt(patent_date = "1976-01-01") @@ -345,7 +332,6 @@ test_that("an error occurs if all_pages is TRUE and there aren't any results", { test_that("we can retrieve all_pages = TRUE without specifiying fields", { skip_on_cran() - skip_on_ci() query <- qry_funs$eq(patent_date = "1976-01-06") sort <- c("patent_type" = "asc", "patent_id" = "asc") @@ -362,7 +348,6 @@ test_that("we can retrieve all_pages = TRUE without specifiying fields", { # the two responses match, then we've correctly handled throttling errors. test_that("Throttled requests are automatically retried", { skip_on_cran() - skip_on_ci() res <- search_pv('{"_gte":{"patent_date":"2007-01-04"}}', size = 50) patent_ids <- res$data$patents$patent_id @@ -436,7 +421,6 @@ test_that("Throttled requests are automatically retried", { test_that("we can sort on an unrequested field across page boundaries", { skip_on_cran() - skip_on_ci() # total_hits = 5,352 query <- qry_funs$in_range(patent_date = c("1976-01-01", "1976-01-31")) @@ -467,7 +451,6 @@ test_that("we can sort on an unrequested field across page boundaries", { test_that("sort works across page boundaries", { skip_on_cran() - skip_on_ci() sort <- c("patent_type" = "desc", "patent_id" = "desc") results <- search_pv( diff --git a/tests/testthat/test-unnest-pv-data.R b/tests/testthat/test-unnest-pv-data.R index 1d9e1ad5..afe0fcbc 100644 --- a/tests/testthat/test-unnest-pv-data.R +++ b/tests/testthat/test-unnest-pv-data.R @@ -2,7 +2,6 @@ eps <- get_endpoints() test_that("we can unnest all entities", { skip_on_cran() - skip_on_ci() # TODO(any): add back fields = get_fields(x) # API throws 500s if some nested fields are included @@ -49,7 +48,6 @@ test_that("we can unnest all entities", { test_that("endpoint's pks match their entity's pks", { skip_on_cran() - skip_on_ci() # the overloaded_entities endpoints return the same entity, rel_app_texts, # so we can't determine the endpoint from the entity like we can diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index fcfaca18..f38200b9 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,6 +1,5 @@ test_that("we can cast the endpoints that return the same entity", { skip_on_cran() - skip_on_ci() endpoints <- c("patent/rel_app_text", "publication/rel_app_text") diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R index 5f0208b9..88e0e0e5 100644 --- a/tests/testthat/test-validate-args.R +++ b/tests/testthat/test-validate-args.R @@ -4,7 +4,6 @@ test_that("validate_args throws errors for all bad args", { skip_on_cran() - skip_on_ci() # requesting the old plural endpoint should now throw an error expect_error( @@ -66,7 +65,6 @@ test_that("validate_args throws errors for all bad args", { test_that("group names can be requested as fields via new API shorthand", { skip_on_cran() - skip_on_ci() endpoint <- "patent" shorthand <- get_fields("patent", groups=c("application")) From a589f38590fee771378220863e8d1fea3a62d1c8 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sun, 22 Dec 2024 18:21:30 -0600 Subject: [PATCH 093/103] removed run_dontrun = TRUE from run_examples --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5a260a18..a7ebdbc2 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -111,7 +111,7 @@ jobs: run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples(run_dontrun = TRUE) + devtools::run_examples() shell: Rscript {0} - name: Upload check results From 399a0975181f1b14437de07b5a5cc3ba936f835e Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:09:39 -0600 Subject: [PATCH 094/103] generated files --- docs/reference/retrieve_linked_data.html | 2 +- docs/reference/search_pv.html | 4 ++-- man/retrieve_linked_data.Rd | 2 +- man/search_pv.Rd | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/reference/retrieve_linked_data.html b/docs/reference/retrieve_linked_data.html index 148968c5..d54faca9 100644 --- a/docs/reference/retrieve_linked_data.html +++ b/docs/reference/retrieve_linked_data.html @@ -116,7 +116,7 @@

    Arguments

    api_key

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -here.

    +here.

    ...
    diff --git a/docs/reference/search_pv.html b/docs/reference/search_pv.html index 640aba6f..e52cbf6f 100644 --- a/docs/reference/search_pv.html +++ b/docs/reference/search_pv.html @@ -132,7 +132,7 @@

    Arguments

    A value of NULL indicates to the API that it should return the default fields for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -patents +patents endpoint) or by viewing the fieldsdf data frame (View(fieldsdf)). You can also use get_fields to list out the fields available for a given endpoint.

    @@ -205,7 +205,7 @@

    Arguments

    api_key

    API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -here.

    +here.

    ...
    diff --git a/man/retrieve_linked_data.Rd b/man/retrieve_linked_data.Rd index 90da02bd..838c2d7a 100644 --- a/man/retrieve_linked_data.Rd +++ b/man/retrieve_linked_data.Rd @@ -19,7 +19,7 @@ in the documentation or a Request URL from the \href{https://search.patentsview. Set to TRUE for Request URLs from Swagger UI.} \item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -\href{https://patentsview.org/apis/keyrequest}{here}.} +\href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}.} \item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} function.} } diff --git a/man/search_pv.Rd b/man/search_pv.Rd index bd00c370..094f362c 100644 --- a/man/search_pv.Rd +++ b/man/search_pv.Rd @@ -43,7 +43,7 @@ E.g., \code{qry_funs$gte(patent_date = "2007-01-04")} A value of \code{NULL} indicates to the API that it should return the default fields for that endpoint. Acceptable fields for a given endpoint can be found at the API's online documentation (e.g., check out the field list for the -\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference#patent}{patents +\href{https://search.patentsview.org/docs/docs/Search\%20API/SearchAPIReference/#patent}{patents endpoint}) or by viewing the \code{fieldsdf} data frame (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list out the fields available for a given endpoint. @@ -93,7 +93,7 @@ your query is very long (say, over 2,000 characters in length).} \item{error_browser}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} \item{api_key}{API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -\href{https://patentsview.org/apis/keyrequest}{here}.} +\href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}.} \item{...}{Curl options passed along to httr2's \code{\link[httr2]{req_options}} when we do GETs or POSTs.} From 1ab2581f8b7583b2010379c245d3d2a5cf48f1a5 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:10:26 -0600 Subject: [PATCH 095/103] docs: API link update --- R/search-pv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 7eaff4fb..6f31fe25 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -179,7 +179,7 @@ get_default_sort <- function(endpoint) { #' A value of \code{NULL} indicates to the API that it should return the default fields #' for that endpoint. Acceptable fields for a given endpoint can be found at the API's #' online documentation (e.g., check out the field list for the -#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference#patent}{patents +#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents #' endpoint}) or by viewing the \code{fieldsdf} data frame #' (\code{View(fieldsdf)}). You can also use \code{\link{get_fields}} to list #' out the fields available for a given endpoint. @@ -217,7 +217,7 @@ get_default_sort <- function(endpoint) { #' your query is very long (say, over 2,000 characters in length). #' @param error_browser `r lifecycle::badge("deprecated")` #' @param api_key API key, it defaults to Sys.getenv("PATENTSVIEW_API_KEY"). Request a key -#' \href{https://patentsview.org/apis/keyrequest}{here}. +#' \href{https://patentsview-support.atlassian.net/servicedesk/customer/portals}{here}. #' @param ... Curl options passed along to httr2's \code{\link[httr2]{req_options}} #' when we do GETs or POSTs. #' From bc5467a59d983c029fae818137220ee2e40331af Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 10:40:53 -0600 Subject: [PATCH 096/103] ropensci build changes --- .github/workflows/R-CMD-check.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a7ebdbc2..ffc88f7e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -53,6 +53,8 @@ jobs: RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} + EXAMPLES_PARAM: ${{ github.event_name == 'push' && 'run_dontrun = TRUE' || 'run_dontrun = FALSE' }} + NOT_CRAN_VALUE: ${{ github.event_name == 'push' && 'TRUE' || 'FALSE' }} steps: - name: Checkout code @@ -100,6 +102,8 @@ jobs: - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false + NOT_CRAN: ${{ env.NOT_CRAN_VALUE }} + run: | options(crayon.enabled = TRUE) rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") @@ -108,10 +112,11 @@ jobs: - name: Run examples env: _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples() + devtools::run_examples( ${{ env.EXAMPLES_PARAM }} ) shell: Rscript {0} - name: Upload check results From 86b26aa50818428ae47ed845b16d8ff700f05d02 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Mon, 23 Dec 2024 14:14:20 -0600 Subject: [PATCH 097/103] checking access to the patentsview API key --- .github/workflows/R-CMD-check.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ffc88f7e..7808b4bc 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -53,8 +53,6 @@ jobs: RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }} - EXAMPLES_PARAM: ${{ github.event_name == 'push' && 'run_dontrun = TRUE' || 'run_dontrun = FALSE' }} - NOT_CRAN_VALUE: ${{ github.event_name == 'push' && 'TRUE' || 'FALSE' }} steps: - name: Checkout code @@ -69,6 +67,15 @@ jobs: - uses: r-lib/actions/setup-pandoc@v1 + - name: Check Secrets Access + run: | + if [[ "x${{ secrets.PATENTSVIEW_API_KEY }}" != "x" ]]; then + echo "Access to secrets" + else + echo "No access to secrets" + exit 1 + fi + - name: Query dependencies run: | install.packages('remotes') @@ -102,7 +109,6 @@ jobs: - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - NOT_CRAN: ${{ env.NOT_CRAN_VALUE }} run: | options(crayon.enabled = TRUE) @@ -116,7 +122,7 @@ jobs: run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") - devtools::run_examples( ${{ env.EXAMPLES_PARAM }} ) + devtools::run_examples(run_dontrun = TRUE) shell: Rscript {0} - name: Upload check results From 763d1f287adb37c214b56da4aa429eac8410b4b3 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Tue, 24 Dec 2024 15:36:15 -0600 Subject: [PATCH 098/103] test removal --- tests/testthat/test-get-fields.R | 32 ------------ tests/testthat/test-print.R | 36 ------------- tests/testthat/test-query-dsl.R | 36 ------------- tests/testthat/test-validate-args.R | 81 ----------------------------- 4 files changed, 185 deletions(-) delete mode 100644 tests/testthat/test-get-fields.R delete mode 100644 tests/testthat/test-print.R delete mode 100644 tests/testthat/test-query-dsl.R delete mode 100644 tests/testthat/test-validate-args.R diff --git a/tests/testthat/test-get-fields.R b/tests/testthat/test-get-fields.R deleted file mode 100644 index 7eb316f0..00000000 --- a/tests/testthat/test-get-fields.R +++ /dev/null @@ -1,32 +0,0 @@ -test_that("get_fields works as expected", { - skip_on_cran() - - expect_error( - get_fields("bogus endpoint"), - "endpoint must be", - fixed = TRUE - ) - - expect_error( - get_fields("patent", groups = "bogus"), - "for the patent endpoint", - fixed = TRUE - ) - - patent_pk <- get_ok_pk("patent") - fields <- get_fields(endpoint = "patent", groups = c("inventors")) - expect_false(patent_pk %in% fields) - - fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE) - expect_true(patent_pk %in% fields) -}) - -test_that("the endpoints are stable", { - skip_on_cran() - - # quick check of the endpoints - useful after an api update. We run fieldsdf.R - # and do a build. This test would fail if an endpoint was added, moved or deleted - found <- unique(fieldsdf$endpoint) - expecting <- get_endpoints() - expect_equal(expecting, found) -}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R deleted file mode 100644 index cc0d5f6a..00000000 --- a/tests/testthat/test-print.R +++ /dev/null @@ -1,36 +0,0 @@ -test_that("We can print the returns from all endpoints ", { - skip_on_cran() - - eps <- get_endpoints() - bad_eps <- c("cpc_subclass", "uspc_subclass", "uspc_mainclass", "wipo") - good_eps <- eps[!eps %in% bad_eps] - - lapply(good_eps, function(x) { - print(x) - j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x) - print(j) - j - }) - - expect_true(TRUE) - - # make it noticeable that all is not right with the API - skip("Skip for API bugs") # TODO: remove when the API is fixed -}) - -test_that("we can print a query, its request, and unnested data", { - skip_on_cran() - - x <- "patent" - q <- qry_funs$eq(patent_id = "11530080") - print(q) - - fields <- c("patent_id", get_fields(x, groups = "ipcr")) - j <- search_pv(query = TEST_QUERIES[[x]], endpoint = x, fields = fields) - print(j$request) - - k <- unnest_pv_data(j$data) - print(k) - - expect_true(TRUE) -}) diff --git a/tests/testthat/test-query-dsl.R b/tests/testthat/test-query-dsl.R deleted file mode 100644 index 840ed780..00000000 --- a/tests/testthat/test-query-dsl.R +++ /dev/null @@ -1,36 +0,0 @@ -test_that("between works as expected", { - skip_on_cran() - - query <- qry_funs$in_range(patent_date = c("1976-01-06", "1976-01-13")) - - results <- search_pv(query, all_pages = TRUE) - - expect_gt(results$query_results$total_hits, 2600) -}) - -test_that("with_qfuns() works as advertised", { - skip_on_cran() # wouldn't necessarily have to skip! - - a <- with_qfuns( - and( - text_phrase(inventors.inventor_name_first = "George"), - text_phrase(inventors.inventor_name_last = "Washington") - ) - ) - - b <- qry_funs$and( - qry_funs$text_phrase(inventors.inventor_name_first = "George"), - qry_funs$text_phrase(inventors.inventor_name_last = "Washington") - ) - - expect_equal(a, b) -}) - -test_that("argument check works on in_range", { - skip_on_cran() # wouldn't necessarily have to skip! - - expect_error( - qq <- qry_funs$in_range("patent_id", c("10000000", "10000002")), - "expects a range of exactly two arguments" - ) -}) diff --git a/tests/testthat/test-validate-args.R b/tests/testthat/test-validate-args.R deleted file mode 100644 index 88e0e0e5..00000000 --- a/tests/testthat/test-validate-args.R +++ /dev/null @@ -1,81 +0,0 @@ -# We can't use expect_warning() without adding a dependency to rlang -# to bypass 8 hour warning suppression -# rlang::local_options(lifecycle_verbosity = "warning") - -test_that("validate_args throws errors for all bad args", { - skip_on_cran() - - # requesting the old plural endpoint should now throw an error - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', endpoint = "patents"), - "endpoint" - ) - expect_error( - search_pv('{"patent_date":["1976-01-06"]}', method = "Post"), - "method" - ) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = TRUE) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', subent_cnts = 7) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', mtchd_subent_only = NULL) - # class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', error_browser = "chrome") - #class = "lifecycle_warning_deprecated" - expect_gt(result$query_results$total_hits, 0) - }) - - per_page <- 17 - suppressWarnings({ - results <- search_pv('{"patent_date":["1976-01-06"]}', per_page = per_page) - - # make sure the size attribute was set from the per_page parameter - expect_equal(per_page, nrow(results$data$patents)) - }) - - suppressWarnings({ - result <- search_pv('{"patent_date":["1976-01-06"]}', page = 2) - # class = "lifecycle_warning_deprecated" # unsupported page parameter - expect_gt(result$query_results$total_hits, 0) - }) - expect_error( - search_pv( - '{"patent_date":["1976-01-06"]}', - fields = "patent_date", - all_pages = TRUE, - after = "3930272" - ), - "after" - ) - expect_error( - get_fields("assignee", groups = "cpc_current"), # valid group for a different endpoint - "for the assignee endpoint" - ) -}) - -test_that("group names can be requested as fields via new API shorthand", { - skip_on_cran() - - endpoint <- "patent" - shorthand <- get_fields("patent", groups=c("application")) - expect_equal(shorthand , "application") - shorthand_res <- search_pv(TEST_QUERIES[[endpoint]], fields=shorthand) - - explicit <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group == "application", "field"] - explicit_res <- search_pv(TEST_QUERIES[[endpoint]], fields=explicit) - - # the requests are different but the results should be the same - expect_failure(expect_equal(shorthand_res$request, explicit_res$request)) - expect_equal(shorthand_res$data, explicit_res$data) - -}) From 7a9c75560b27b7d41c27f3be618800403e5b10cf Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Tue, 24 Dec 2024 15:36:52 -0600 Subject: [PATCH 099/103] checking secrets access --- .github/workflows/R-CMD-check.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 7808b4bc..24837882 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -68,13 +68,10 @@ jobs: - uses: r-lib/actions/setup-pandoc@v1 - name: Check Secrets Access + if: ${{ env.PATENTSVIEW_API_KEY == '' }} run: | - if [[ "x${{ secrets.PATENTSVIEW_API_KEY }}" != "x" ]]; then - echo "Access to secrets" - else - echo "No access to secrets" - exit 1 - fi + echo "No access to secrets" + exit 1 - name: Query dependencies run: | From 1e400a57bb6abc965a5b2ecd307b5ed0a31f48f1 Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Wed, 25 Dec 2024 10:59:31 -0600 Subject: [PATCH 100/103] only apply sort if user set one --- R/search-pv.R | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 6f31fe25..0f6b32c0 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -347,19 +347,20 @@ search_pv <- function(query, arg_list <- to_arglist(fields, size, primary_sort_key, after) paged_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) - # apply the user's sort using order() + # we apply the user's sort, if they supplied one, using order() # was data.table::setorderv(paged_data, names(sort), ifelse(as.vector(sort) == "asc", 1, -1)) - - sort_order <- mapply(function(col, direction) { - if (direction == "asc") { - return(paged_data[[col]]) - } else { - return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order - } - }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) - - # Final sorting - paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + if (!is.null(sort)) { + sort_order <- mapply(function(col, direction) { + if (direction == "asc") { + return(paged_data[[col]]) + } else { + return(-rank(paged_data[[col]], ties.method = "min")) # Invert for descending order + } + }, col = names(sort), direction = as.vector(sort), SIMPLIFY = FALSE) + + # Final sorting + paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] + } # remove the fields we added in order to do the user's sort ourselves paged_data <- paged_data[, !names(paged_data) %in% additional_fields] From 69c366bfb3d49d9928bcf166e7a3f386253c104a Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 28 Dec 2024 12:08:32 -0600 Subject: [PATCH 101/103] adding secondary sort --- R/search-pv.R | 61 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/R/search-pv.R b/R/search-pv.R index 0f6b32c0..fedbf9af 100644 --- a/R/search-pv.R +++ b/R/search-pv.R @@ -131,7 +131,8 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. x <- process_resp(x) # now to page we need to set the "after" attribute to where we left off - # we want the last value of the primary sort field + # we want the last value of the primary sort field and possibly a secondary + # sort field's value s <- names(arg_list$sort[[1]])[[1]] index <- nrow(x$data[[1]]) last_value <- x$data[[1]][[s]][[index]] @@ -140,8 +141,13 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. last_value <- pad_patent_id(last_value) } - arg_list$opts$after <<- last_value + if (length(arg_list$sort[[1]]) == 2) { + sfield <- names(arg_list$sort[[1]])[[2]] + secondary_value <- x$data[[1]][[sfield]][[index]] + last_value <- c(last_value, secondary_value) + } + arg_list$opts$after <<- last_value x$data[[1]] }) @@ -149,9 +155,20 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, api_key, .. } #' @noRd -get_default_sort <- function(endpoint) { - default <- c("asc") - names(default) <- get_ok_pk(endpoint) +get_unique_sort <- function(endpoint) { + + pk <- get_ok_pk(endpoint) + # we add a secondary sort if there is a sequence field + sequence <- fieldsdf[fieldsdf$endpoint == endpoint & grepl("^[^.]*sequence",fieldsdf$field), "field"] + + if (length(sequence) == 0) { + default <- c("asc") + names(default) <- pk + } else { + default <- c("asc", "asc") + names(default) <- c(pk, sequence) + } + default } @@ -321,30 +338,30 @@ search_pv <- function(query, return(result) } - # Here we ignore the user's sort and instead have the API sort by the primary - # key for the requested endpoint. - primary_sort_key <- get_default_sort(endpoint) + # Here we ignore the user's sort and instead have the API sort by the key(s) + # needed for row uniqueness at the requested endpoint. Otherwise paging breaks. + unique_sort_keys <- get_unique_sort(endpoint) # We check what fields we got back from the first call. If the user didn't # specify fields, we'd get back the API's defaults. We may need to request - # additional fields from the API so we can apply the users sort and then remove - # the additional fields. + # additional fields from the API so we can apply the users sort and the keys + # that quarantee uniqueness, later we'll remove the additional fields. returned_fields <- names(result$data[[1]]) - if (!is.null(sort)) { - sort_fields <- names(sort) - additional_fields <- sort_fields[!(sort_fields %in% returned_fields)] - if (is.null(fields)) { - fields <- returned_fields # the default fields - } else { - fields <- fields # user passed - } - fields <- append(fields, additional_fields) + if (is.null(sort)) { + sort_fields <- names(unique_sort_keys) + } else { + sort_fields <- c(names(sort), names(unique_sort_keys)) + } + additional_fields <- sort_fields[!(sort_fields %in% returned_fields)] + if (is.null(fields)) { + fields <- returned_fields # the default fields } else { - additional_fields <- c() + fields <- fields # user passed } + fields <- append(fields, additional_fields) - arg_list <- to_arglist(fields, size, primary_sort_key, after) + arg_list <- to_arglist(fields, size, unique_sort_keys, after) paged_data <- request_apply(result, method, query, base_url, arg_list, api_key, ...) # we apply the user's sort, if they supplied one, using order() @@ -362,7 +379,7 @@ search_pv <- function(query, paged_data <- paged_data[do.call(order, sort_order), , drop = FALSE] } - # remove the fields we added in order to do the user's sort ourselves + # remove the fields we added in order to do the user's and unique sorts paged_data <- paged_data[, !names(paged_data) %in% additional_fields] result$data[[1]] <- paged_data From 7d6ba0154c019107fa5d4e7d04824b21ea3e622f Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 28 Dec 2024 19:24:23 -0600 Subject: [PATCH 102/103] checking write access to the repo --- .github/workflows/R-CMD-check.yaml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5a13eb32..4e5fa4bd 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -21,14 +21,21 @@ jobs: pre-check: runs-on: ubuntu-latest steps: - - name: Confirm crew102 triggered the build + - name: Get User Permission + id: checkAccess + uses: actions-cool/check-user-permission@v2 + with: + require: write + username: ${{ github.triggering_actor }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Check User Permission + if: steps.checkAccess.outputs.require-result == 'false' run: | - if [ "${{ github.event.sender.login }}" == "crew102" ]; then - echo "Actor is crew102" - else - echo "Actor is ${{ github.actor }}, failing build." - exit 1 - fi + echo "${{ github.triggering_actor }} does not have permissions on this repo." + echo "Current permission level is ${{ steps.checkAccess.outputs.user-permission }}" + echo "Job originally triggered by ${{ github.actor }}" + exit 1 R-CMD-check: needs: [pre-check] From 570edb724d9484dd205a3875cac5b003d567e24d Mon Sep 17 00:00:00 2001 From: Russ Allen Date: Sat, 28 Dec 2024 21:51:46 -0600 Subject: [PATCH 103/103] workflow revert --- .github/workflows/R-CMD-check.yaml | 44 ++++++++---------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4e5fa4bd..906c0fb1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -21,26 +21,18 @@ jobs: pre-check: runs-on: ubuntu-latest steps: - - name: Get User Permission - id: checkAccess - uses: actions-cool/check-user-permission@v2 - with: - require: write - username: ${{ github.triggering_actor }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Check User Permission - if: steps.checkAccess.outputs.require-result == 'false' + - name: Confirm crew102 triggered the build run: | - echo "${{ github.triggering_actor }} does not have permissions on this repo." - echo "Current permission level is ${{ steps.checkAccess.outputs.user-permission }}" - echo "Job originally triggered by ${{ github.actor }}" - exit 1 + if [ "${{ github.event.sender.login }}" == "crew102" ]; then + echo "Actor is crew102" + else + echo "Actor is ${{ github.actor }}, failing build." + exit 1 + fi R-CMD-check: needs: [pre-check] runs-on: ${{ matrix.config.os }} - timeout-minutes: 60 name: ${{ matrix.config.os }} (${{ matrix.config.r }}) @@ -69,23 +61,11 @@ jobs: # Use the head SHA for pull requests ref: ${{ github.event.pull_request.head.sha || github.sha }} - - uses: r-lib/actions/setup-r@v2 + - uses: r-lib/actions/setup-r@v1 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Check Secrets Access - if: ${{ env.PATENTSVIEW_API_KEY == '' }} - run: | - echo "No access to secrets" - exit 1 - - - name: Check Secrets Access - if: ${{ env.PATENTSVIEW_API_KEY == '' }} - run: | - echo "No access to secrets" - exit 1 + - uses: r-lib/actions/setup-pandoc@v1 - name: Query dependencies run: | @@ -96,7 +76,7 @@ jobs: - name: Restore R package cache if: runner.os != 'Windows' - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} @@ -120,7 +100,6 @@ jobs: - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | options(crayon.enabled = TRUE) rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") @@ -129,7 +108,6 @@ jobs: - name: Run examples env: _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | options(crayon.enabled = TRUE) remotes::install_cran("devtools") @@ -141,4 +119,4 @@ jobs: uses: actions/upload-artifact@main with: name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + path: check \ No newline at end of file