From ad1412e1988063dcc5258b5aef96e5627e0f3fda Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Mon, 10 Feb 2025 15:13:01 +0100 Subject: [PATCH 1/3] update to the latest docling version Signed-off-by: Michele Dolfi --- .../language/pdf2parquet/requirements.txt | 9 +++--- .../test-data/expected/archive1.parquet | Bin 23390 -> 23075 bytes .../test-data/expected/metadata.json | 28 +++++++----------- .../test-data/expected/redp5110-ch1.parquet | Bin 9688 -> 9690 bytes .../test-data/expected_batch/metadata.json | 18 +++++------ .../expected_batch/redp5110-ch1.parquet | Bin 27226 -> 26903 bytes .../test-data/expected_json/archive1.parquet | Bin 10915 -> 10976 bytes .../test-data/expected_json/metadata.json | 18 +++++------ .../expected_json/redp5110-ch1.parquet | Bin 12078 -> 12508 bytes .../archive1.parquet | Bin 20006 -> 20091 bytes .../expected_md_no_table_no_ocr/metadata.json | 18 +++++------ .../redp5110-ch1.parquet | Bin 9688 -> 9690 bytes 12 files changed, 43 insertions(+), 48 deletions(-) diff --git a/transforms/language/pdf2parquet/requirements.txt b/transforms/language/pdf2parquet/requirements.txt index b4c6d06f2..e3cb4727f 100644 --- a/transforms/language/pdf2parquet/requirements.txt +++ b/transforms/language/pdf2parquet/requirements.txt @@ -1,5 +1,6 @@ -docling-core==2.3.0 -docling-ibm-models==2.0.3 -deepsearch-glm==0.26.1 -docling==2.3.1 +docling-core==2.18.0 +docling-ibm-models==3.3.1 +docling-parse==3.3.0 +deepsearch-glm==1.0.0 +docling==2.21.0 filetype >=1.2.0, <2.0.0 diff --git a/transforms/language/pdf2parquet/test-data/expected/archive1.parquet b/transforms/language/pdf2parquet/test-data/expected/archive1.parquet index 27b97529d293078f50b73cb1d5ca62a1529aee8e..83703cab3033b695db7c69327dedab68e6e6166f 100644 GIT binary patch delta 6277 zcmeHK2~<;8w@v~?lp%mgNFcyPLPQ{vc>t7vfJ`C=uoV={2!TK#0mTZ&fk6S8ZdC-t z2|;W@0ku@A2$Vs8Q6h*Hkb*@83n~Tc)Z$BMU9Z}{^~V2S>#fzhl6!K_-rwHe{m!s& zhA%=-o`;^(g({l@Py_e?lr{=t0)g0FaQbz#rHO?vPKdqrpiU){pX$35~_-^jX|HFpc}dy(u*f7FTCPW^tuzTnDjRb2%DPlgsj zl~6s)HFV;T%hb>!i_R8veUocCah;^NLFGtb&AG+g?aEG4;wS?BQz($5Vr0FzIGETx zix+ZI7JuDV7cJ?u3$N;KqNQ2;rRw6FXZXd%kwHBL&%N{_%+i^r7&D{cHv_Dfv|CuD z%Jg!bTt9Z)w<8xF;By~ai%zThxl>2SU}0KNGwPGD=Vh`SW+Ai7zACZXN$S;cAaDEP zVb-~F<+oA#p>5Wji?ewF*+pdiw?E^@6f!U&{}5A*lwmeJd=t1T;{2F%ru$ltW@_!e zW!+mX4-np2TWhtRjJtVawqG_D`Tb{t^y`w4N`0^R0_QM8=dc9Uc~N9$ZoTcoNNH}o z(+@c&@Kzgxhk9rhZVg;lw+yE8(PwF44Oa4ul*ZF=10gF#2i|`C=06fko$FVF%DY?o z()+wR$5}ByrXGKke9FSq>GAqp`MTYZTi>OXD&iUBr&ir|Z%-plEiR<&8ALX>?&>gp zB3R4q>kC&V#swd{N_PLMP&&P>{wIHe?^dtzpB{(Zs(<>~<2|I@BNrz(=CqDRKffnv zTACQFG@Rs4NStMi>Z%`H?_SuX+MQpS-Z#8%CnsPt>=ktue5C``#dcPYI}fB5CB-J0suT=agjfv zq$}OTdRsvdo{;tG+lu3{~2Kfj(aA8s~kJcjKLY_Fe~E^oXMO-hB|`muRe*4BQ@z zaIREbIDN{mv!x;M$DWZMj1sCaJrhbR1Id%K!@+I1w~Wr+5A>*G+Y!5iR+Yh9t~?Uh zM>eKNioD1rA>qoP!dGjM6ckrT-c+Hc^v5+V)E2z;U;Cu|_cqKK;ddjWW)-5oUx&8g>rT zGnfmT>YlfE-(`y1 zO>I}|Tw!g>Aa>{cD0*n=dU+kW=y~S-ti+q+1XtVm%PCi1V$93dPJg}Vapj@J_>!Qb zwoW&vJ{|1RU7K|w+h&!I(zfGeu~Vz5w^2JYk8GMxYD&aJmf0El*04=uMiN|Ug|Yq- zUydZqG8Qf^JK~ptjQK?+JQIq9?7|(*`N_x!qGcBZfm9r}*vp~UypQq_=hjM9z0R8@|wTa5Ly-@=p+qE+ucge^v)$J&*K%|IVNJ6!?jw2 znA{|SS8C+xwUk-~14EcA_M9v5T<){~xX#__+g*2Gj4TTXkg3&D(o+Aa2xnC&E<+%MLb#6L$+6mc3au$}ir?ax2Nn z>%CIh(P!1TRZ=y*MER6GrE08u>FC`xP`|VveJD^W$GlUe7PXi5Hvq9Ja!j^Wh;ZYK z-rZKebXj|UrD65ZV>GdAE`hU?@6mSR!QvIY?%!=7eU*4?tIwL${`RL}pJizUfwys^ zU0A>S5%loJ?=N|8=_Wd#UD5uvrG2TQ?#`6^6Fu`Wm9+an|FH6Q>|Dp_LqvbmPy_Dh z!Z!qu?#94Tu5c1~5od_wKTPc@#fFA2l*?*f8zdi?Ie9M!9cSQZ$xhng{!MTB0bzS4 z#A$z>%NYeae5SF)*~3g`c9b5Ppd7lay=&EJYl{&0VJz0zxDfAq8*^Mc9Z1JSl^{%Z zZXN(KxcBx};FUVdLI-YDsLWr_T=rQlyuEwxs?*zyvKKct;qF8p-rLxK#Qk{Mohf}Z zvF5Q~=(Q|Yxr>WVUDz0C}y< z&-j0lfw6o+X;?LT>K%FN)6c5t&!@&s8}7SoP3S5wc0T*`$u4WgsSD&C4bRArN5*Az5=+@W;<;*nKLowfYf8W>g%zQyPJID;v_v!>*@Cn?94Gn z6mxf6h1dQ`_~C`Pr5oN&ZHU% zgtm*19_ss2*)n?Z@s;oq=bLSrNo3;Lp63w-nd$X4u&F}}@&m9rqz0vLO5R;aRDLYo zGMM>DTzIY4(Xm&HpI^h($=lJCn5SF;9IdvoYo)Zm4P97cX>_)I3bsnOYv5A0ufF^E zf%AJK)|#1Lf-c}zZ`v%_a6Zs#2Vy*-Y06`Y6oJLbd2%Bc;_KQ}+;QgXR}aPp77a6s z%Pfvq3fW7UOWjSgEE=Nb%m$|QoB}rGAOFPcdbM}{xMyB#GWtVH z@~clGX?-XN$^?aXRKH9aq2MSujsdZoKs8h^^412gdKeZV3_&wQFc_(B-+$NIIA9f$ zxCE&Pn7>j1flSj&(e%J6gawE;3N}jxNXyxD`tNs5kHym5=u9xt)7?O0qyY0yBpR5D zw*gIzO+bHJ18@LuVML++7W#JwHpnx!(MbiU%bDO+ycxI=ZvZwKF9q{$aiF0|Fls&w zjiSj#CC_6tX_gb#!^Dz?VUuR4o>(ja1j*7QKr8{PkvG)ip!KqY&>9edwuJ)V{bly* zdj#52V^E^a5Wsp~lqjC309J_Y3Dkt>(rxN+ zYFQ-2IGIq5-Xc+`(-Wn<_^4#H0P0jikyNo9a93y&CaHbY0I=HJSEEz8la&FGAeSY` zC88vrMiDEVn?`XSE(~g{CM6W9*$JX0Ng_c~vK;KeIH{l94CJyH;BVTXgr+vof$IiZx#)mZlo;5zU0@v7dO>9; zlrY`_1$!ut;HO*@wb>Lbpl*ikz5(`gm%_)cLkS13gH|*zWdZb$|387CU!f3KyP?rJ zTSR(|NgLsUZAB)6eAPFVUx?(i|BNF=(5E(?X%2|Vy?=ON$=x&vI?CS?F`P-= zk9xT?@r_y15|b|(`|7ef_z&#shJr`WB~4Lt6nl0=)SJ^`1HGu8`dXVM?jPu^{Jr9F z=oArEz@v-EbUuS8q_Np-K7+&*3FrcrkVO_zm|`}K#;36a0y=|D7ShB*0Yf05ve`@? zolU0-F&q+4-K8;@R53}+BQwMlnt(>(3k3p-h)!bg)zd@c^Qa6ulS&c``7A0`ND)%? z7-FV?Mq*LKJicavl(YAtIFvq?4uKnCG1}2QMKnqut&N`5MmK2_$_Jr@#(VQ~_D>@0 z_@6TVBjITmbCAeJcup*W%wLYWzqKE)WQ^{64~*`6A2h`7gHdoN8wVSH0G6}O9j}f; z2|tZMasMUUIxbeTic(PVA8bh1WGF-kGgA|ykSKH_i9#fkLdY~{GE4ng&Lpv@3^sc? ziA*BV7*Gh@iw*slwR&fm{TcfngZYEa=;Ofidu{R&Sm|u-)deHeK7!JIfYT=UqZ)d;yss3l@wHfUH_-Ir!`ClAlNG-w#CmDFe gm8yrXt%stu3A*U9+=e&nH)`v$EFq8}cP{zg01=<SIZ+QD zZUCQ_01Nxw5g+y45Ksh25d<IVAP?!|TH5(g{`>mi4>Be10v3MkcQ!qB_TK%N`o5wXNVn3OnL!T=$hY7!a3tM6StDO%@IuXgVNRyw@Nll{rZcJZM?a**&x9$?;OE=3$KZN({-5sTsNm1n z&~E{^X0t;W>*v3BF6$J!bnKlA8e(sXeMWit6aNxEX@Y37IaQb9qKOyRG^L^j&v@87 z4dkVmlby~TdEw|PHmc}6CZ$Y8?T1N7`s-d57NF)}1ofgbd%?++aCFPht8s?#XwLqj z-dwA~_ZJ&ZU`x#`ovv*fx~>wu!*4JGAIWPoR4J3$(>^m~HpNjI|9Ct5%z^b0FODO3 zhaVmLhI)%?m+;&*voQ9Vc}Y6DQ{z7rrcUc@R`QNj+l|A_R&JdcRaKPGJK=(nE0p`VcPGm$>b-1+)C3*G ziOIj1TFE->YB~1W{W)&2tejrmsI2c__>C~-om_7_oN*kqBTc@>2B}=EtX=bzMOjxY z7+ueI$|~IA$1M*OOM?#{e6>%*SihAO5ctepCjLx;n=V#qBTI2pgzixnhgJu|sHa&a zRk15YDaMguXP4_;l%BFqUzc(9N=dwZMt+5Z@xcs-whXHA{>K`bttmEFRCz5&WTz$8YNGi2< zAKfvp-fo6ipDn}@b}0x0)mtS<*7vhRB32c&_iNtUj`W*vEVQ&!->rU(v^8A#;Chvn z%iZ(alu2c#_h4{MvkuGYJ8`yagRZ9rWfz~9zGtk})2Y5IcD9ncKVhgXX-t~*I8H9f z)80%rDWUtmZ*w^QE+(5ntZevLtSfeG$kRac-p22AGQF1Uws?0Eg(kR5hh#s)!&-j5w+c$;(i*;F<%~U5f>&Cxio#`}CLeQ7Hb}-N zN=QqkF=q>}YOb2`?O058TJj2BV>%ahDHqYaXsg@)=D4$I&3W1J_}~kR$5)vKAqRXG zYh}AL9W(fe<{Ja)8|H;$vE1|GH+kOKm?rugdXdEiNso*nGu_kI`bXk3Z~Ba!JQBex z^wh?h_e}c^bx1tAQX&?Da8AuxpFN8YNHNbwXYx@YX1cO`RJ)2nroC54&pvf3O2JtW z)|rJs?H|UDLU$P>yeeKe1C?9oAgCbTtjit^kNp_DS`O|Ix z8zU3d2kveQZH8O;n|2j{OKJ;?m~toWl5OeU^8Q4n<+Qf58zHdXko&qX@A+VHpG%-u z>zjS=Fyofx$t5qv9baB^5UdN9!8h(NaB6NAf61V2^$kgQ+DppH*XE$$&UN?mSD!t! zE#Sh`?N-zon_gkT*}fyY$lDKVy^0IsooyzbY-~Xf<|(!C&PwEUpL|jhe$=o270%PF zNV~i3dRcWPe&Na`w*d9J4QEvy-A9MZNavp1Zmd<&=^aE`XDR4TqWcb8)wUm)9J`a@ zuF@_f9%Dt6FW@@D4-6e=_A1*{bofe}Dl{Yfj#`!qj`_6;(Mw{w;7m!mtW1wrZsPQ` zpC7baJ<=h)*s5~T!z(T{uP`Mdq9lE*c=Wc$$b@I=C+b3Yw_~P_dKNqfin6MCa$W)b z=mVUPn)=s=cg$1Z7Q)k$6IzptZnr23!R^sfM|7FumHfC7dDE%n;Tsi$>vQ5ly7?7# zqT%dl4GP&QjOZNIPmbk!Y&gRRyprfz0oL5?lB=esrcjWJaTTXSQ_%a0cjwC~?%dl| zd}x2=ol^86w>X%EymJ5HrJVQAGv%SLfn!CJZK&s5}~BAt{W`9OAbqh z)m{v(u758);$tqKI;_fy%s(_>`q1tDWN=WMuKH>OLoBxgoh>ePPx5rp+E1n1)YK9; zR$!2-73p1{rf$~or!KyQb9E=V^LrAjA8nire6&=0-$*r3`d~~{REoai@zD^B1BKyXdbwHboY&;YgK(K$E$D| zcHFqzPOi30=nBO`D(UwvLog3tj2+nxVH*E z?QNS4%HdH9DzZsN={8GJuO5Njw`u4;-vdl3F+2O;U9fSnmYyhCq)lGRu6y`wd;V&% zZ(^5Ty4Lgxi#H}hQLD$N4a7sXTAQFgU3UyVXWcPDED46wtVW8poW>BH${v{bBN~Z% z(}VVeC;T;dUvC9I67H{s9yG4hs%>IkK}=*@$GqJMWud5;VzI4_T?uK=O2(x^f4pox z-rg6~X2@8jmnzAD+DaYoc!E2Qj|i@inBO&9V0l5fWTRng8^e;Rf3x;f=yZj->)tgd z>yoZbcYXJMp(xzoLPe9h7-;q0T50!ma>yJ~1D5gRtiMB=!CTox@E*)F6}5&UzJZqw zcFibnbH^&!&E!J{X-(YDXN*WpU-rO#z}#qOU2cRRN)sLU{bWH0!WqXg|1emw*ty}L z+0dLBQ9bBXZt>NVZ)F*}wCp5O+T62CBbB#O*3X}^TyNqlivS@M5f~FueI}29Az&yn z$nY_;?x>X$6j;#F7DJ=az-hQYumslzUcxniI6GsMNcP(mkB;Pqg@>}yO#bp?572C< z3Hz)Ca^4(iFG{v%V3$=s$?`XIy4F`k^nl+H; zUf_E}eW1$47??5CT9Lwy)MdBO1KB)!B-)-GNf!WUSBk6ymlqOBM{l8rvIQ|ff@sPf z8L5FO0ydK$$m2$F`8-2!IwO?r#219I1x6-x9!n6-i)3>|9Wl9VUQ}odpC<}&7@x(B ziUG7Yu%&tI-5fSODq6sftlOw61`$T{SZqOLlnBb@1xE5iqnBwTgXp{1Y!PQ5o5klv z1+l~Ef)I9;NfTA_OsY7 zB3?#`tXKRo2@?f`Ybx@H9=eMg$<^fZOhf6sz-ZCnMM8FHq#1hMGM&gE#st`D=DHFe zPYrG9Z~;GDz-33#MX|WD(PkyiV2^EJBvXW=3z$LNC^mC>Wc1Kb{_aQsujvEn+5<#b zsVb%yfst3DuocGGOz1yr^`Eu+H(aZ>zztj?-AEu%dbz#zyzxcq5-x$fQ^ z*Gx=jVM021Bg9vcFOLh)df~l&Vd=+TFfFMyJ#O;ly4lcOe%3zf$pQl@hTyvP=#3Q6 zUmWom8lA=>vDj2o8jVggB@*#?QHxA5<0}ap0Ctiv;;-(5vCsR!0E%P^%y}sST_jIP_5%P#R)d`y0%K$xT-=sNB}kAV?i_5zFzMzRoyt~hxshEM7b%ALsaK>6Hb~xzt$0h z@)QpsJ#QsuwOj7!ZD6zwyPNsmTqV(@u~{y_{oK-b(djM{e-&k+Er&w4U@;gpiUoy5 zB2vgC27^f@QE6l*P4t5!5@~GF$AW<;FmWPN5)Fr^-S=}eA@hfXK3$aEGC&!%wjL^gp$Vlvnq3o?yKCbO7C1{6mkuvVt$ z#wd_LQ59Z*VqXt~v3U1O&>lqiA2#6H1TZKHqAmgva0JoP$D86Uym3@Z3$i7iCaM^SBr=JBGb0eG zL@F5!O7)ga!})W30+x}5nH z#jt-){kC7FeH$_CpYy&*^nad1Uu;(X>-?$oRqoUg!~T79hG-nF|H)%t9r7PNV3))E mzdsF^(SJS(iBgmGU?dbPfjoYo?y`?RR3cRi1oB+xh5s)YIBIqP diff --git a/transforms/language/pdf2parquet/test-data/expected/metadata.json b/transforms/language/pdf2parquet/test-data/expected/metadata.json index f5961f843..2d9adf085 100644 --- a/transforms/language/pdf2parquet/test-data/expected/metadata.json +++ b/transforms/language/pdf2parquet/test-data/expected/metadata.json @@ -5,15 +5,11 @@ "job name": "pdf2parquet", "job type": "pure python", "job id": "job_id", - "start_time": "2024-11-13 08:35:51", - "end_time": "2024-11-13 08:36:23", + "start_time": "2025-02-10 14:18:13", + "end_time": "2025-02-10 14:18:21", "status": "success" }, - "code": { - "github": "github", - "commit_hash": "12345", - "path": "path" - }, + "code": null, "job_input_params": { "batch_size": -1, "artifacts_path": null, @@ -23,42 +19,40 @@ "ocr_engine": "easyocr", "bitmap_area_threshold": 0.05, "pdf_backend": "dlparse_v2", - "double_precision": 0, + "double_precision": 8, "checkpointing": false, "max_files": -1, "random_samples": -1, "files_to_use": [ ".pdf", - ".docx", - ".pptx", ".zip" ], "num_processors": 0 }, "execution_stats": { - "cpus": 147.5, + "cpus": 23.6, "gpus": 0, - "memory": 33.72, + "memory": 29.99, "object_store": 0, - "execution time, min": 0.522 + "execution time, min": 0.127 }, "job_output_stats": { "source_files": 2, "source_size": 605137, "result_files": 2, - "result_size": 33078, - "processing_time": 4.221, + "result_size": 32765, + "processing_time": 3.93, "nrows": 3, "nsuccess": 3, "nfail": 0, "nskip": 0 }, "source": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/test-data/input", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/test-data/input", "type": "path" }, "target": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/output", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/output", "type": "path" } } \ No newline at end of file diff --git a/transforms/language/pdf2parquet/test-data/expected/redp5110-ch1.parquet b/transforms/language/pdf2parquet/test-data/expected/redp5110-ch1.parquet index 3e08723a07d4168bc0c00f9cab92b9010e6f77f5..d6777ae8724c7914143418132b77566eee8dc754 100644 GIT binary patch delta 5299 zcmd5=XH=8hwj~4zAYceB^w2v75&{vVBfa-(C_#{NXmTh@=)DIKkzS?urie%vqy=e$ zR28Wrpa=>+Ja3$L-*|7_^Bm{iA8-GfW9_x(UVHAj_ZZ(dr~O(xbpiMG9)O`%PKSY* zfe1oGB-nE04CBM=5kU01=_{+qicgs<(Bpgg?dSfi-U~A*LxDn#BONIl1|>ZWlVibx zGatEc8N4sSvWxYNfC@kppi0Y*<&>~NjUBtc!rc`)Hl)D@A_OL_a%%MZAGt1D8%;}% ze=V=k4d+?R$}Ic_v&35i*a3R@KmZpG=*>2i=SvE4fv&t3CLedKkj#Mmq$-}qDa5YKTp<>bri zK0IWwWiDz~?SE}#rPijeR$7k+_7X2Uez>n{w$Wo-NCb@Nz07wowJRQZGzgCl3sVY{ zb}QcT&{4Ru7UM5;BdX0Ou`hpMOq(twv5&S3oLZN`e#m#ARV!-Pk~OswNNm|A_9Y$k zNn*$oFA}t8y75KFaeXsG`CS{%zf1MrPL}WbL2gWKkGF;OV_+mnIg!MwwnMR-?S{~< z_L{gKqj)IM;G`FuM(^=!3H~)1ZVSf`&d6M#*rsG7)rnbpo zW__+w75d@R-p%=#r2255(i$i&R*VG z$Ab}cfE3ozDU{d|^0>d3Qywp*Kf>X~)f_??GF5pkniB=_8Wt?FErp^$|Sr8>nR^>diX7Vf)i@_q?Asc3{qIYA@fd9Gti^EYcULkpvS>2;0?caYmE_*AB}YN!4=y-O_eVyRE5Lw$ z)OysqtfI#LqitioVtg8j2+p@B%~TC1G;57uy&7h6rdgeWbJJ>}rJ!h#XRkxp^^ig6 zZM=tFca#CrEnusNvzsa2(IGEdmgmGDj~zbtX1x@wvK+7qw$lwvRCxMcl)ycuE%c0T z+7fBkF6V6ddiGJ&x}urXwck=0Iw#yDA0G0?$5Pk6nb6I%Q3=!0tim-+-wI(~Z74ZY zZC%KiLB@M_Ft(kT6&7vFrFfgO0aGCoCg&F4fH2hUOgm;D2iOY9asrtawr%2T zb4AVU4*gPu%h8d;-aB6fu`kTlWWZ!xQx?NeHm=XE)TGDRmi4q;)(3q06$7L_x)x1t zO7Aor1hKs5Ggqk#!*Lz3a(BP41cup67u|*Qtesl}8G~6XGN}~8Lo3h2$$^=Y(Ijy+ z9K6D+UIw9(Pj|>o8*^G0VmYT?7*Dm{WREI z7$LJ4irKQ{!=7eaXKZ|6&E5#Eos3vlOm3LG&7zh>4jke#9>dX{3?vNSm**0cy}nj! z*bZ@e@^mnxwwOK{wm7D8X=78QG+~GB6WDN-1`L5i2E#OB17aT;bkFBekH#GWX~=<9 z$xQ^I&!Uw`X1{2S2hC<4FAF2zXD3*#YH>ulZ*fe2V8k2E-2p_BK{5Ln zA_Rwu{d(JgO?=1|SF>|>%h#RL%8vxIUV?9=8O@6Bx^{ZS>|<5Nb@&rxWoVpQJd~ob zUY$|o$%XKkeV}ZFp=D-(Ww9Hv=JYlrhT=W5%3ZJq$D&y>#Q3)mJjB`}EXa(0DhW)Sw$) z8|+Bb7BW^ZS>8?D`tn7w2m565!t-zUhWx~61D|dm>u&L|50+6c)oGC zJn!bE0svxgo43-5Mapk+LOD7(|I&*G&h$Dk=@8R z)Qc`T(Dirj>Dzl@LUAXbiybVG`P5t9owTzkU>0!97=qEq9M=|b6l%`(Mh?=CtxTTIk~Kj69%;~=}R zx&G9W#EY0|7u7e&T{i<`%2l~ev23dLhM>)7N83DQc_>iO2t{~=hDB!OcWCT$CEM#( zyLVd!BifzP599RBsXKHB#w*c}@9h%fvTnOJE9?(tIEP<6YncZ(HF-jKE^#D+-dSu0 zZ8p_yAN1>*m1IJ5d+2Vs2n$;l=!+j^=fox@f7Gv3xj`S{$(X}RLW&uYU-ssppoxD; z>brL-gun5#A^R{uL0iXCD0LY6iiiei3x7MFwGgzmRT=OsiBIcE+tY2#IWF`LZb#9F zPDnqb)osFYBX6D+)1>W~Csc6VATxWf(TSU9h!Xe^N|c;niSlGSqG%}$3mjz#60=64q_^n;Y2TnI;jIyqCEWHFy~i`n|5ZVOY@m0H1&DWT6EBObRpFT{SHKA zZ6yTH75n1@C^8zCP74?exWtK%RJ1>KDSkG1#bXU~A@c z%4XAPAaRmq8&a0&HgrX0^d@n>LgOZHxNrUd!I_Q%M{Yp|5;ap6 zShv9t;te88)?kWfue8~Ac8=Csqh4GJ&upoaD$~MzoSpQ&x)f2)1qX=-mGbS^o}_sy zo0hXywX0+~Y!A7^n_~2m;{;%EN-q3vFHtKCHrB8ddZwBtQ}q2e1Ns-!n9$oTJo>g# zZ&O;MUyV$6+~46_OX$(YEeWBzy4&LSmh%S|Q!B#OOwOv6-Nn0{4n-J6apC<(p`!Zz zRQrl{mc*`E{o$&hYt$<-3$S@?BS$@3Sks9TBMSqNMqk*PyrAduM*2f5jK67FDB|2e z_A@6{MdLC?tD$$ax+mE8r7+mfE~>kpEO&1FlS_Qh(CO0$madDqoV;UoGx?$j+RpX# z!(BK>4w~My=3r~PEQd=c9x~b2+rmMeRhiEv$%&iLIab!=4*@OTxU*QBgZI%ncG%M> zZ|)nNdXhLK6M4Ti-??}5mh4I2y-|^b^`nk?QHR!u4?6+8`PSMLQo_pW**6%G2KAh^oKar#-k{Qaiv?z{kX@G&5obRfu!$-xrIVD=5n0aV{C-VvKZS z4pGj*d_1-%sQJ3yo!9}=FDictZKa)PqdI&RJ`zT%tpALeI<|4D9m%G}e<|a^$0b@~ zzSvI{R0uk~rVY7bcH&zvOMRa6)utq=&%PHC_P5GhQN{7VanRq3my_2O1~Z7eGD5C8 zW`4}|dl^V6^6lbh?Qz>JKaU7n(gI!=PvG=i z0#;hw-gjoe6E!xYM0h?4U^(|{0G)cxrDSw_amtd z#=#I1#qnGBaiNZPzHqAw7|7gL;(l{RvZwDM6AUvCj7)^NPyVQ;`vOY|o_XblX;vER zrYJ;nb^5zMcWW>uk2{WZFR{pvGRsq|I@wIPw@a0{y1hLaPg>q&I&UpGccW&@O@zC^ zd*L-te$(jQ0f8?{%Hq9o(Kv~f{PRKD9+V#xRf86)z@^p6z32ceac+-Dqdn>aN&w2n z%aJ1;Hmo)sE20~##i1I0wYs9kkHzHfswC#*D5OkwykrM8hcyGES`L_VK<;9qTGA<`wld?*bPM6w&b)8#{_0SZo4s11(Whi1+g>)_ z;|8hOxcOfHS3RVzI3|B&4H%FmwAl7ixzy}k3 zMofL-*}lqj_Q2*K6LzHW7~A%hiXP|98-O~bVbCx(ue_th6chXUB~s+lR>W0 zrnu{qy2(z{YLG&qpl>myo+cupYlw%Fk;x-Ll+xrHPe7aIa8^ArO={W))LT5P9RaXC z<~Ao|1K(K^{#t3}+1~?{i#`tH9u~&Hm#RxRh`y1u6YmR<{tzKS;@^bV{?HZG4fTtm zaC7CUpz2PDG(L#p_!Qr)?9x_W=(9E8yXU3t9@m3wYYN0w0Rg1YH* zDvMX3zwPfJR&FlrZKXCy_Qb_bE->lLj`X(Rd^7g8$epU}yUkF-+F)GKNLT?yL-6N7 z61zi%nsl%-pa&no#)ZEGVQLBnmjj5=M0k59C>NUOKL?cjPsB(h_+R)UinoR6{zBF# zT6X*g2$WYE?jV7c!P<+#o#6IjNEs&?F|<8GTFhPwg>rC~LLj9br0}Z@blSg^`HOso zOpN#}Ix($ZS^O6%4Z(~}eDpp5?MT3X|6Yfwjsl33RjI-CE!2@Em(>UM_V##XHQ6C* zB3;n}BlXOh??P;A`-bGx7#6vdJmcL53&!HA)2`m5;35LxR~Q&@PADly6jB=QEFptM zIiMY-5mG28N2G)lR>lE!5kw%Qq>*qLdj#AO;b<@AEa`-hw0Fj$rQwcfxHK9LRs5G2 zfEN__3%vk2_+&;X?i9#BV9sA;#)cvi{MU&8R%_t@xjVceSQy}m$1pK-d-DD1%sryE$2f3AX$hwroFx|7Q^gEgPO0%E}}q2A33*fSX7luS&>Vm6Q;d zl$Mr)BYv7wnne;W`A_Dde=+xmuyc4Aj8`$+p1&URQ`Nsn|4XV1AwU;<=|(dUwfGbNBl<@JW%vg$0eCEyf7t0lk^i0aPf9NwpU&b3*uvjt;U{_J z!hhbB&oT!9^fU#qu>*eT1z^EXu#5j&FCjZU-iD+6|E*Wq>Hm)Xn2GuST^_k;;P0OH cMd?3|8h?dLj$*9}z(~wb&N$cP$K^!wA6)i|UjP6A delta 5317 zcmd5L^_Dz5u{1+JUwrm_wKmuoOkd2@xDKMjIqT8j~6^Gla!>Ahb+Y z6G{OEFaiMlwW6n%?mR9&fd1-3HlcZsd`FPWNx{9>-fZqO75LgW?Q8z~%jjWxs9Y~z#ZV4LbwUnmM{fg+g8WX;?PHw5s zEp|%Z);5_5x-grRkadhC8Us0jdPE-}iooQ~-ka`8O)Cv?+hUn-BPV@-Az3szUMd(S z50>=cB&ccDXsNgw-UClPx~V3+bdY)ma+b>SCIP`hbr4tA_hM{u<{A|FdO_Rc3_F4| z{m{#~o`2i>*%oHA$Lq3f&&$(}awBU`J)b+CMQkTYk8?^=&yzQ3uO)Q0Q0E?Nej%_% zM}NF@WT#>j!Y`~_Sr=zGyrr)z;5;|f)^-3t;(b2W=Vo->!NKP+OZjo*`{0=G@NO&? zd)=p6vVX4IE4}mlmfjUw*A+M&0U;x*dx(KYeF^q|n75fE`rOFFHmtZS6+Fn)+vrl6 zC>t8B=$&FZ>{))f-X6iGUBE2ZT1)uKTWr&l%9i+xtQRb*V|s*(o3B}kGFUV$vg}@3 zL5*lw(~#LA0L1>Z!yQTg!KnuxwiIJoc_=(S&+-hw4FLF4Bx{9`ch7#f)Sz@++09C- z4=jz7xU2iETC>%)VO6e36`#}C0+gL`e@&otRo!e$ z;tsyLcoR4AxRJU}%c)ixN)Xt&J#1-JTWLBY`OC(d73rHkQ$`<9(aV=iIN}7?)cUNH} zzf>Z1XDFv+#6mT=%gY7Vu1@xje=F*o#szKGN1FFzechJNE9DJj5bQwd*aBUzrtG9b zQ>*4|vhSFq&T#W}#YW1(0SODXM}C?Xuj>`sY=-tKdB+pzS?gPGu5d@mAn8lJGE^T1 zQ_&dhPK=)n%$PBjmVQNOG{nij9~v5(_Yb0BVLV)v=3JKx;SP%0PwWg4%%iyy?$S-y zLkLa!Qus+;gn|@nXk9XPGUDG8yU#RrSN}Gdzj5XE?5YVNIK1qu zY`2*^owHZCM_B$RY451VY6i_!M$wLQOZW0NAJUmRmwO5{dRN_g@Qy53oVp4T?5SMJ ziQ_e)>j^+pk-GJ|EkSx2<2r8Tq%&*`RlF8Ead%9Svq2I~FoP(b5IxmJ*B4}nU=poI zzv@#_Gk2G;@fzWLHj31)7Z-Z?1LI>ETCb4SS|lY9-;NG+?d72HwC z>bG>K-FfL4YiF-CRox3p&|Z}O0MobbXZ?v+Q2a9B4znCL-d=CNXaR!RwM*=ZFVmjF z2FOP0EK|l5O6~1GZX+5V1qwp~QTPS_YB{!KCVMK*7_cFSeV+b?v4Nx5I^AR z3fFHPf+F9{6BL-|<#TJ><7H{4M&*i!h57O-z6Da9L$?7afOfJW?cJ`C&{Ok~(vzvG zng=DpI=W~Qs)akR?3(d3Icb4@;Y~<7NfUP8jlK8IOd#Ex`9oSp1}3a!rgD1D>sZYX z$=Q}xP2%5QmxZ^V;+dHY zWr1pcX?kUndBlwL`4_pLD$*KPR+4|Y{Sn-p+&+p zxQQYYCyxtvOk%xgP!i(Y+-kA8AZ{uiDhQl8rnga&i^xF5M9?y+dJHKc`BH+G z&XY+)nF@!Q++lt@vb_sDnY#y8lL|#QSm7nVsGK*`WJrH+LWg%o0nS~qV-7(c`R6V( zn0LZVOVFEw*fC`UA+7U~H-zQj4uf&8YN%iwl&a<;YrsXeB7-p=<`7YEby?+*+RH;5 zKc9t**{|l~)b}9Vgk4wW=2rmao7(SN?$w^{NbVrX#L=UTv(>6Z`xSL@8XoWE&{wVO zPK)8NLHht-gzMW{mwufU;*G_%0O-QJ$~fAd-+Rh6GS^#Iq7r!mi!gV&?PwK=ZlzJ} zDVwp}%zv2%zv)kkPbhS;c7!LBRNW{YzXEUV1Yx{3>aB|j579I%#&?;K5gN~=mJ8cN zhiv!u;7ma`sO!c>G_auAxw30EpHGva@3@7o^ZBTbuq1M*8~x(wis|dQb60s7`^iaN zKir*j4kk?Z@)pO(*0_xe2HH(v-zCb*CsaZjCCzl59f~HZrd#fHsfwmvvEMpx7@eEt zn;DG7H70H$2yYv)(+Ho3>h}X50@V_BY}FWA8K^ju&U%(^`8rPJed=FSHO!5Nn=SO2 zVWIKDLy95>$*G;H69=f9IiieD24Jb@tj@IAa?QC%QUbe{18;n$m1XMME219&rtv;D zHjc5Q3j}C^IM6(UNo>9wb~k0;gBf|7>Zk76eAo8pA$TM4OhE1LbxvgKiS#K@#de-r zI*U$@E=+QmT&VAzI76y`H)dD#DK=(2IyFYhIB|%wXuQ-Jv!7 zN@^>J*prWm{>B9%2%s0KS>`?^0j4Nu|}kjS5E^$Wl4TKMFf5 zlOZx&XScIB6uvJJRusE=8t$vwWAcTWa&=y%yn^z*ggp6N;T>;lw)^z@mK?*~c6IAK zm~9B97Sg8b5xmVXg$`59oRh)NcrEVNbK%?#2)c?F-pfTLkZ(CfDBlLeLHjG^hq^v} z1lq0v*r8$8@c|hwWn+_>UMyb1oYI--{J1CkO@bb}B!X-HrN|3}g0++0_~?(86u%TMo63ZrWud=$Q~$x+4OZIzxk*O8*S@g7#c6eaeprXuwxgK9C!ktz2SFT=eQ zI=IX?QCwR47kJU^`E$Cr@OEk3$4{>i1ux(2L7C{b_ z{i}M{gWhX@IW0P4Pw(K z4gHF%5fi@sJmbnO)`2$umRr#VLLQgd`sak6oQ@T@)NxZJyUfVN;!HNnM9oGj_}&Pq z?3^-vajVQr5R%8)wv+k0qwW9 z>3TtRFj(G<^)P3u?et)srI>m7!`OGv&yLmL^aCYAzz;p5^8~%BDd$I~wwgghrA4e(E`ftRl`OsCYJ+D6lesXP z_lfUt8B(^cp5hb60*eF@U#RN{xcUh!i%|#l?unXMo~|orD|V0%5qbA=tOU!0ppEq{ zu7pP27l80VXH}-lA{zpuNaRP(AdBEBbZxGLGG1~$+Ie-c=w-o&*exrB=~7bRoUEC} z0`;qwOQ%f{TctHZMU_`4Hj#{ zqo}=y8+~>V$FFEMbgCTRZ0H>tiXS@&=kB=|t$QFHls3&DaRyABXkUtjC;{7uzU(L> zEh9^{AEW>XL=&|cSW#%ee}8bQ-w_PJ|3COglxW6y<4;s=GjI^s8Q}yB4sDAQx3v?- zQBa}zz$GaB-1w+B?k^p|5KeG@1$5o&o z^u!nj7%_=SSp7f5`IAx?0;}d__5c$6fq(g4M=HMp0+c%GAQikW%6fKZf@W*#>Pflm zjM*c;C9ls@M9lJgcWHZtG7h~jO!j1%WRD&vJ?eV<9HN8-{2dh(#IbgE)@Um!DV&uw z+Exl{hqkuCTm4w1C2@EwNilJ}wY8O`Bpz>rmXZ>;mcol;B_wU})>vsA9*wh;wt@dK z#UC*IR1cU$jDf)k`yjq{Bff0Ip9A{0dH?U7LH_!1Kc*l^;lfMQVSsbH@c!=G-_cLs z{*K{45Ye2G9k@wsWMLB+Fyo6cWB(cagH6QmEYd7kVGKqXBW{3}zAP?zSxj66BZd)^ zk|N%KvxxtL92_Br7W)Ue-vk{cmce-x67YPbmOs_}C*}W?>W2VIKaQZFvJF=FYyZHh ze$V_b`l-p^Mt|!lo0x*UC>mtT*KPAdnZGUiyIYCdNM_=%toMPd#QO+7${`27qv|Zg zBoNqE?aR&q{8Rr41aX8z<2o=9~rpUrqa?y??=m2nxR6JR|>adWKlR3Hui( b4gHHL6EP@R>iH5NjDn8}Hd^hCvZ4Ghu1|pP diff --git a/transforms/language/pdf2parquet/test-data/expected_batch/metadata.json b/transforms/language/pdf2parquet/test-data/expected_batch/metadata.json index 8756a013e..63289ef92 100644 --- a/transforms/language/pdf2parquet/test-data/expected_batch/metadata.json +++ b/transforms/language/pdf2parquet/test-data/expected_batch/metadata.json @@ -5,8 +5,8 @@ "job name": "pdf2parquet", "job type": "pure python", "job id": "job_id", - "start_time": "2024-11-13 08:37:05", - "end_time": "2024-11-13 08:37:11", + "start_time": "2025-02-10 14:45:21", + "end_time": "2025-02-10 14:45:28", "status": "success" }, "code": { @@ -36,29 +36,29 @@ "num_processors": 0 }, "execution_stats": { - "cpus": 143.9, + "cpus": 28.6, "gpus": 0, - "memory": 34.21, + "memory": 24.32, "object_store": 0, - "execution time, min": 0.1 + "execution time, min": 0.113 }, "job_output_stats": { "source_files": 2, "source_size": 605137, "result_files": 1, - "processing_time": 3.364, + "processing_time": 3.426, "nrows": 3, "nsuccess": 3, "nfail": 0, "nskip": 0, - "result_size": 27226 + "result_size": 26903 }, "source": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/test-data/input", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/test-data/input", "type": "path" }, "target": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/output", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/output", "type": "path" } } \ No newline at end of file diff --git a/transforms/language/pdf2parquet/test-data/expected_batch/redp5110-ch1.parquet b/transforms/language/pdf2parquet/test-data/expected_batch/redp5110-ch1.parquet index 9e3302c8cd3fb8dd8382798d51897fa0b8935746..bc4b054f18c96eb9f4a9279dec521c75a1a50698 100644 GIT binary patch delta 10027 zcmeHtcT`jFwqO#FA|0f+03jg10HLLTV(3+hpaRlD5~N89p(tqRf^-fby(6H4^d=xx znt(J5(m@dwL{LEyo#=aa)_d#TH#2MIy_xyrertU@`F1~h@3Z$wnVSLMo&wkMgQ-QU zVBQ=@Q(gvM5H|=U+oX8poPrkZ0DTvll-EJJql_54ySt$ZQrzuGB(j@dx=u|}&qEUZ z@-a+8cY938s=O~|C;7k`WzBjx7;?C6?^mQF(b$O|N>X5Lfx!|+&gYujGUsFyp-3{b-LWuboWfNKl`g(@M z#5iIyITziWiCmN)OuGGak#tXXVf%4a#Do)~euuC??jIiLI`38?{Z_lKm1!Smt3Wn3 z&hGa+Jzwyg|diALWN>&L<&AJ6ciIX!g-Z- z=;$K{!dDVRYc6A~sePAa1(R3}Vx;;~SB89v7-bPb#=uzmv(sww~tFbMhggP#L9d}Y9@}Wt=ZY% z6;d!8Tzt##YbWQv(lzP-msbv>6FdwDbNe2rws3>)qDORoGIj2do{L+gS6lOO9aA-G z-uqJW;cQZ`UX)_;Un3>D$rgbCYA^M3w7@4t+>MO^WQ9p>nAS=>cb;36A~ymlX@7VY zyKh&Z2rE(yl-i5?vIZmQGruX_X1%TLWVDpx435-Zn*JE`mvr!W(7^3Me&Q2+$7}vT zoyUnuA-r9xzSM`Y69RHEPPpbdQCoUxbJoMJlI#8t^jimqGqU^n^&i_mvRv0QsxklI zxBD&M&&aeXbi=Y~9W86Gi)oxg=s&+9_3QgHVHxDj9z%XMTk(MOdH)2QWsa-UF=mp% ziLMt+3yTB%r6+sES^D{HaF&_JB~SUr9%Io32XznhiwjYmyO()5T7!L$v&5ar2iESY zD_2PWTKxQC*OF1Rm|)oQtvB;|f*8NdhBwn|7Ycad#q}mE8W7kwGm$;46jDG{ud>w0 zX%5gOtW6JQN*c~KJ!-wcYk|wg-936cQ=vD^A#N?cVzs3| zQRdndawCztnQ5Gd&4E{}f5_BM@`7vm$j&p1!7v)~{JHv%V5E0!^SPALj@Xbyl#o=& zxMbA4>Lr)<)|~#bm`}oE{O3bA1G$OEMJ3L&hOe&5Kk}6)LPOCX?z~GL3GlXz_>dUG z2$Mr1P!y^j8`SomBtU5!quI#HQ!SEhK1*M$>wBA*+%Yx>Y94=Jqf~oadH)$=H}g4{+nR<=Z*J3vu-#7f?&I?w$~iu%{tJKC!(e+}&y&=m%%zU%IK zWB!C+!AvzxH40Y#o>NxgX9|dd*=$q%_hlTc#5st(VsS~H>mr}L^Ukx6>qwk!0WwIw3=u*b$yxD#zLZ4 zHsm>97h>6j-pQ*YnCNj`aMPIez{7w$LQxw!xf+r(Ew@OXT@5u(+tFi?I%phXj}VV< zEIV-N9}H789;}@`*ZWG^pR-`$1!sxxz;**hhQeFG^)8j}bB0DC+3r3Zs)P0)a&0b-}y*i`ZY4q3{i++>ed+dCvY@ zrHy>K4~&?c_>3u1vgRaU%*b&|a+pt4hMMhEFWZg|g$LEVGJ4jz@U0-O&NC<9(@1tC zVqA=+wGugH-k9&Fy&gr8GCW^l%;$h}DBP7-@^LdqV>kA^(*z$fd!q`!Zlcwn$2i>` zUxR%fh;85CcCtBae_1F{uRL%TDBsHDPjPrvk&(KrpBN{v-_BLfG=k=J(hSh-q~^FF z*s8bgu@0X0Hq*1^tC^V$kSd+twj8Xelh)CUN&c94XqFE3ys|Y@5j~VA%#XA||)i;z>X08&a7{b1NY0A3G`?=}rox78@!pBo*<07xW>PRzl3Kc43ZhX;N5a5$8 znB8!4-6AtUh`n}tcT-AX+s>jns*9Rc#^JO&sw+OPzoc?ir*K$CKlhOkIxiDZJaVDM zGRk5X0`gOkH#zsoBaaRKIxcgj1`(Q3WR?p&uZevx9(XjYJ9jE`SEKV`qZq(pw2D?Jomf{34#;5$X{yTt)5ac?&D9=ht8abzh!@om7Z zhg$-4;cJyuiF3BNFsOfxXODXGCf&Nc-}zB-F^%+PF|t=xL)-a|lTl|C1^kV!ti(^( zE0s_xmKesj<=a|etO)u#x|rAVJK+qD=L#1dY#P?Asus!xuK~w71IwMpfv}RzNmiPV=;T3XB+j#8E~3Iz0fp1WrJ+ZeB4;`#JaGbsound5mK!T_?NdUegbf5u z+`^89b|G)?^+x`xvkAo$Kje8b9m~N+jhg~^fTX-{j;kn^_+q(@nT++Cn&ou^tV7mHKu%9~wyFjZZ_S=TJ zOLLOJ>iif?-`YTdojHr9#|UU(ai=q#pR!WlL9-s#D#5Z2@0wDst`WxgkA zl8{C*^I*EZQih1|U2u^d>BUXAmE8Dn4~Eu&PB$(+c%6}KRS*9`F47$lY<(xdZ4;@=j!z>PIZ~WYYpT; zl}#c0Ve3*+nP*!k)GgR;>o;#}CR#*mRZ7KH6~VRMq$sOOsZ7D#SsOTUNGMDsH>FZ~ zO^m0zd&W!G$W)!=s;jFDlQv4r$Fk{5Ro>PVqTIEGFuM+(zVqs7iq?VkE&9(y71^Gu zV&WFi%|P`aj$FL|^?~m}xFp&CBbwe-qDOLPrF4gNk}pq;ZX}?X{Su+t_+s&MW0c40 zS|Re;2adz-6niM|mv=5eQ{cY$7M`J~5R>b7n3tc+~rj9*vO?MpL=NRpz zcp6oTdTzM#Tw6!;E!_zY{wa3J(cJ|mh-uHkyPFwcPxg<` zJt$cVw`xa|FZETjoTyQK>>+xyWMMp5r#k zFv%VW)YTh-Q*R$PzvpLkpNT+jt~#goLSaQ#7NKnY ze!uWWI~+j|x4)mD>YEAQ!3ssnnG&CCgw9Fkon70rQ*(={Y0en1NVE37!1|*^RH|?% zw?x3;&4Hmv`>k#}lPTAH8H)-$ahctu#3qDlptL$6f2LtLEsy{syT4bDNj)%1#` zaAq$t)z|(rF?3ggu})YyC2fFgkl~W*=RCLC_K zQvcpt)I-I(FiuNYFn9q?UCi+2IHmC!{&8cL<#65m`HcosI`q0F(caQR9{OO4W z?KzjQeWQsU(B^gD_dfDbtt*}p;YU`{=LUOx4}M_x$EXnoUjVHy*+*vASSOw+vrx4_ zog+*-&x@_66*TpfhD@8YDgBsR3b^`|S=)E7Rz$P&hD&?cnzfl{*G-PpUXOM6TSE*f z7cXI{#-oWw_$?Zjff=lLMh=}-y(C#M6|3xeIhlTyTQa?OZMuLlb; zRa(98zPMP;s=+>*$jAe**ME3%y@tv+FeCfy>d9h8U1wqOm~ZiV!&`N6xDC3DfVyJ_ zTIOrcO|90qB@lMl#l&8zVG$L+t{M8PhYA&_82r++)2>L0$|xf*pE=^)hzAW z$XZR&uK4xzry^=i_liwsKSVq=NWSSo;BRu>jfpayBsfotYk&973T*soVCC`Y*Jrj1 zZH?Y(*Afdq_wkQM6MmEoi^t7Qr)7K#Nojvt%IBcj&GD`R|Kr!zy_RFj7bnYJi zoI!f4a}G%acsRw=$g-%l#_u|6jhZOvm3P6Af7!1}X6AItEYsZS{=RRLdKyV{zUH06 z=n~BPqL+%-TuS z<1a94OXGDZTDuNh(x2rD-lcPGK-T9w&#c})u-(M5{YnCHxIIuOIx-Sks1+=@+_-lg zq|z4k#tL`kEDXUJ$JsIXVx)rJi;+mb>AtW>=!ZZaEWFNJ>+v~q@%(E<_Zlzx;;*qV z)bW^)EA4Ov4GQB1m%(me1eG+U{h673SpmUZ_9LXU<%__$X^f?6e)1dVhq;L9-uQJL zsqIIfTOfx&c%hwE`0OxFrmYCZFk!1&yvn4e|x3|db%DR`|nHWp?V+}zt@#|9Fg6sOvTF@ki{4j!3t zoZPl8|0y$K>=MY^asmmybxQ8}C^qFinEH&}klT2OpX-=nOt7=YMLCw6+CdFPQm0ps zjOoLE&U}&W!dV>g2-WkQ)cL~93K(XGvtKi1Q4cnM<}zf+YM^vltJ_fMaZ4}r)XVoN z&+aAY8J|{)-J>LH$(lJ+)o33@nsdyZ~UdG z)@U5l0};qR!=qs{oT7HWC2jh2TXnRC z3Qxs!(_K@aQBek8-AfZy2NTzBI1HDzla;@J{cwcew6y5EI+Od2Jwi;LPwicN^?c1VUjqE~ETdDqt^G5OHmr4)LGR?IOrJ&>E%GIaYKj>K95Yg9O%Pm+*c@)(& ztQO^DVcg#&`Knz!uSG`g828~Aq^PsTRV}doM5*yRS-<;nq1{Ce2AvE=8^(+8)1IJQ z@vS|=pQ29L`{T+s-w#mlOLeT~)C6hSXt&wR2;1KPprP z5VntK=ZofcFEgbUJ6frDGbXWmwX_1l zl-U3KzH#m^L4WrvzFs8gpA`xv`20Qv|2_fi%Ax>T*-9uBid4X=sr@VbeZpXV12r_@ zVWq|Po2U%^!-P}?9$HBOH^sSuYu0Q4v$ZI%vdX`x{#ORtY4y`UlkbXki4imFz;f3&G8|2CQASzKM+p@aZ03e?pd z2*3D*h6gNcGQrgV5+eu4$sbRK0yk{Y98g_% zZ%;2gjpRilUxflr($0|H%7#bMf?%NAiA_S*8t>vo()S@>A(2lapm=X0lpNsg4<*rz z6WmGO6yVVr0f1k|0hn+WY`7(J0H#6%yon^TKZVBY?tK~R@8cCfardFgxZwjyP!dhi zWfBqU;|-;_k)T)bWKR+W0lnhmPl5WlLWA7g32sn4ne;miG**H;-ixNfl@dh5A)o{= zp8%Syt1F32)9nwvq~h?$r$6Fk5`pHBn%7mDI$BuV<*)bokYR^jtEZk%@cM6F>2_OTX)D>x0E(F9N_#hu100js^p4JBtzW#e)___hYolm@{ryfk=LTNV-#15Ds$SXQGT@PsIchyc^yh>x^!mRu?2SDiAmZKL6t z7E8R}#iQv-)23BP{BY~>ua@knIJ$ZD@0a16%O~aw=_YAkMB8WTaW@(^&Magz8+%_y zQ;KR!3JlVA3z!b9DbhZ!+8|n!Hh~ccE_f6cuPl#6(|{@-<0_9K5S8SyST($o5{86S zQb7v9l$4MdRRWeIkH!;7@~Ub|D0vJKiI!JZ!YZjET~H`Ife1Ki^FvDF0ZkXl3+-_b z`0`E2?|bk+(*hXG2b|ZjD@gxYmD-i-z>C$qb=OVG51`*+96;uN;a0&|0 zk%B3_tb7c7*Z2y+@Q5@B{BbHUs3${Bg8bWGn4wR;(1Cgr#5ig7o>b%>uXI!!> z*#OM99z=D7P2=XKj{2RavHEXcSt4e>CG50a)PLL4{z?fHhP2Ibfgc5Juclo7Rarfv zZ`r%-7t+%<_Qa*L$->lJPXH8lzgfU)P}8MHm(X_eX%gkVN(g~l??*?8V~L`hIgg{X z10x6r`JD(Vj)+ntpj6RFS6a|fE?5FuMGZwHs4A(EFfJ$>Qbk1#t%}6pRgeS~0$$Bk znW&w!JRAsVIV+_IR?+lguvecf0For-8=4o@7^Ep zerNx^0Wag`aS(zDjq5t$Q$_M#BXm`AS-|B!-lEm@vqYyp@I1}H{ UP{UV}Jv|dv3!I;ZsC0BPz!3;+NC delta 10432 zcmeHs2T+sS)@Ta78CnRv1VThY3y=Ux?*h`L3LyzSfHb8UI!K3r1*F5FNk>FPlnx3g zh$5h95Ge{CKvat551w=H-1ndVy}9qr{b$}kb9b_{_qX<1Yp=HVT9fbUG;n?f*un=S zcfSSlj=GugGVlVp0087I`S0$pFwc4cGUA-_2HHM_Rp0>yHrh8RtpW);{NWq#1h?)) zSm%+PxOA5n-o{1*uOV55*jvA`EG{pPM8`pGJ|J9L!i|t z#IOZMV>Zt+pT0OeW5J-OKF(F<6nR2Bfql`;x-^zS@Z@Y*Gg8M0t((!)I*%Gym^pu1 zqcwb#hpu38h%bG_*=Yq9upOk(mNJr{W799lo*^5bB^sA{(cL@Nv1D0Ou^=P20^X^4 zxtQ+$F5-FGX~q8Ph@qO-+OFa35a@#sTYaAyz^s;OY^CaRCVaInFDfwbN^;dLly4p- zjzMs(evK6_1J5V8d3`W}U?stz^MJS(thSK8EC)Xahmf`i+woO}xe^)I~vQ2YAeBG6Q=lMZ4K2$YfO zsHf5hfrEW|baY@cXZiqo@&P(O1Jk1GdC?F&m@pJa<>%+`Dxq4YJ+@R5;?H6;N`?>Q zQb&UbSf%_7EqKClp$FQll1733d>4}|WM9eozL06`uK9^%K5+fy^r?*DbmM*&f}w6w z#((zP;EU@1)NOXdVUDFS&5SdZAroB!O?GMQHTb|Utk^=bsnd(ajXS)mnP4}WSZ(B! z?}MSv_v6fclZvx_zp_XmyAqS4Gb+VrI&t@&NBO%&^2mwp?Q~Tr=WlR6w67y-=Ji70 zKj=q)?}4rCGD8)RJ>_)LtbJFRp)g4FVfT$q<;Q$nPXxo^pDurXD~UH`Ua9=ZHme&r z-K*FKVo2&G*StHr@ewG3Q7lsLmMo zKyNtb&YcCr7g-c}^kw;sj60XwN7<$BXts`wtzHnF*CbhgIHKEYZX)Q&S8Ws#LuRvndliEqczz4^XT#wmG+TjO2`S_ zIfd?6uZ_h{H4Fu|AZ(62tmP3@b=sz1kkL#6!$xYdQ3g-y1MgFP8)88~2jJ|i_BmOp zSXL@dbM)7Q z@u{yMqfS%hjO8nsicFPpHx#T3fd?@f~64SwIF^l9-KG2dGdH8j`Ga1h7csfMBURq;bY^ZIsShQS%)KtJyA9Vi z3i;HV!_z`kY*oDm>}Nk(xpi?EwW~OH%%kqD7;#7kwyY2}#-?&hZV%S*&mMP4%Xmeu zTznp{N0|KDbKQ!RF7%Z&VKk%YWz;S$^8@RTsGZQfP8jNqr4To=-WMh}Mz}O&9UH!C z=w-szDY{^8f7k213#(2;s132KkAc0-l(eoQ01Isw6o6j~h<9R14%r#B_#L$wxHnOk#{4+IHW3-!dMG=|?3&~?xFBHYJ%T;2X|O8! z=Gk3r^U71{G3M1%ow*rr_`O?z&S~%gqw1jrMK?M(rc+$mLT9+kF2lXHlk)~ky^GpU zai=Y{oNOw3X$dl@dtgzm$)fm??5%W*hb;)3HK`Mkq6c8H)CK~OEyjFeWNPuJ?1x^# zSuOjt=weoVt!dhT`=oN5`bc-Ilj)%iZ9koQO3$vun1H&4(tUze=-Ft*g{IGvADZ|w zA*!~pP3Z=&8xXZ}5@$!fja0gvd-J8)n&{Kxf(uX0C--}b2*UyKs3P*2&2@~r+fcLX z>Uplnfz<9*F1L$^3v!3tdgp#~z&QFgMgZr0q9Xa3J8KnNlczeF9O>Q_rOB77m!9i} zoz`paMGdqHV`?g7S&L?-!r+yaUu)k!tsTqBroN48?&X#mjkG%xxoZ?FS)z<1x#B6?kQ_>*W%trKELPa zNw9DoEtj3}AyhUj0yGNGh_>RKSxwE?(oGJxq?oi0X!${Yl@y{qTlLXFu ztt*V}BpWZ)LwkSoLig}SrdQT=`!gK0#tDH!Awek>%1>k}i)HZ(%L}UYHFBG~W13!s zw&TOqSL_qH%8G$cfOjdO$uroySmHYT}S=0o9Bt23i ze^AA!R(N+$h&(YYW@SA}Dq?s%?Wy-e7|wXlFqeT9XD#9p*6 z=_g%CI5u1}Y45_D0h26k#T%Mmxz>)qqQ<8akyZOa(@4+LI$Pa;Tz9xl$GnrH^b_5! z_zGS$4{v|)(V(&nZGQo z2}`xyp*lrd%{f`ARn$F(-PJf_Y-Wp<1PvvWBTo+`4$^MRPp;ApxgnsOV{Io>tCGeY zJI~ZOt)!PBxw*J`7a!2U_rxE8MJr}yr93^xMuNy5jVORS0Pyp zA0P)5-?(Wgw2+lldp|KRrsg@NB&ovqn_E~%n8NUW(8(tC;}+^-G2w}N=>JrM!i&tc+$DMkzJ^-mzi8F;UM#f4;@kXqm&UJr1cUoD(v ze-^BqZ4x;#7p~l~bb7(Fc{<8YS`gQin@#d@NUIL%rGoVO1^Hl?%&a3URZG;BUTcy= zC+488tr`o1=Sw#<1x6uA9?^@eK0SKzOhkbs<~1;O%GE; z7RjIPx1Wxd5u8Xpe=jbkkWtaxcd{CJUfT-FD3YoTioXmt-jHO6appXzRR98$W1PCb z;)<}E6E2wMGsnt9cIq(ltKsN%0hcjby=-p3Q%RipdRJz?&r2t1)`C-WD-6jx3`*7*ZCCmEcStKE)jyYv*rkF2j#jmXMGzG0E$1V_;7<43fK}X*pw-OsDsj z?&)#w2jW6q66=xHa^3@VWr@6bp$xjXhba4Yx#Qz$5)6@F-I*qIv|G+s=$v{J`G4EGsAD{Z+dGw%!6Kzf8o2 z+itgN&)3UOs=I2X1)H`g7F~b}r8&Km_;H~=R+)((b;-Lt2g$cqV^#XE*XK96OO&;4 z^g0+d_5}QSImO#J8$qn080cz=?p4ADnIjN zno@On8^`d^Zm~L+ss()qUE@8~IRw`*8RFEnTA>3lG4a_+YBHK?wU>S$X*<8^u%`j|npSa$Tvy*H!8KXfTfub4jX+CY;e`Oz=*Usg|!>g{t2}wRd18Xf|^`Zbz1w zLz2w?bR$*DfLl=dQ45{3wFd>rx7P|v6N{0=@aIU1gbNfq0ygF46XpSK&gG_|v(4L{ z`Cs4OsS2t;yFz|q*$m-IRM%<}v{qrpn}mD+7A;h~{@A~k0@yaD*Sx6d*v)pP!EXAC z5+;djt`Y97C219^gp@Xm)@ksWba|WiK=)?0fDAJKk-3oDE6{8avU53!Da0zu5kB%- zJoAK-*sVluZu{2_2ZDFy55GM~iaE2{#4ggNIn>n6FVaJ9qBBxTKg)G+>fTbyOR;X& z`PcLsvR-36_mlErPrgYql4FfE<*!yjQ@ch!WW}x5U46_w7UuuWXJeA=VIG#MW9D#| zYfR)^vu`K3{EHnAZ0VRvR`@KBjl>D^gc;L_fotq!{~0g)z&gcc;Dzx%F)0-K4t7%VV+G42u~bdMz(JIH$y~Z4}a2p@F#;ZX=$cShF!xfc146 z>k?($YWhC?GOU{qn+x@9a&h4g5y}zVfQfQQyqJnE$!(qSej0Eb*QQ^a7?iM2JXM$F z#C>m4#7=|SpsQmzpl=HI(DEltZ zZogV`utiW%{4}yz+zUJi zL*+lmkjKt`f?cxJ8MvX7R1v{4-vhl0z^EgIcoQv8|f zpWc+ZmXZd+Sa3mZ52ck2F=leNGQV`#9NtVC2mbczZp>wn3;g;;2R;kF<8lE!>@{nz zCO@{fTU_~Wkz)Le7*Hb8F!ipSN{#`&nAriQij!049!alUC^%p%5LQfOs|Yo)9mJGk zZy!I?X!oVin3>_Q5r+7gu0yZXr~UE+>$~GSi=?TNel|ejHzvjTT&8m+6QZ}iGLZ)T zG)-&+EdA!!gdMsb3V;)C#hrfR9-dy@n94F;)N)J91y$*it66!@C5jsUgmJ4VQP!t1 zV(cVYHd2456=I!rn{;AqFf~8caJH%Tv&5W>4WRPv+a1^9eg~%M)4Ho4*WXn&HE2wp z(#npniQivnu?UOUJK-#JF%O+Bd}&BNcxy?)1kWKTmjQ2Rfx|pCJfw}6cM_LaudP+; zfwMqER%VI7Q@iK8RiJ!C+bhBCg|e_o*5+h#4XNaW5uJIx)hWK`VT#nuB4?u*`fWCP z{0*w#s-rQyplLu#Qzq;EZr6&(^5EjY@Pz}mUG8JIvjCFH$wKwtAz){CYl5?l$8vP+ zTwi^=T68|E^Qe@WcyXbQ{F%fBwoVab-B~F`c&TJ$u#gZBss{oIUHM?W(!PpTXV3Q0m?Z&{_GdpYbRCy*>>cH)-T0w$ z3Pq^#0IXQ($KM7lO)l5BeHWv!51*AH7sWezoZ=L8I)|!#+_}`IaBA&%VE4e6#D^>^ zbm7%cTU2042ke4}*HVyD(n8Jk1$GwR{AN6pjmEaX?==DY9`p}>GUizRXx;f1bxeBq z6hAMZ!NPcgGS(g6!Jh}rVFrqnX6j24F7$M>KtWT1;n}soc$o{7cVFx!VOJsA|$DAW~zaZ3%o1p;0d_2x!_y!$&)WW?NI7s*Y`EcF?=NoU*quNigZu z;=8P#x*EEu%Zb1r%H5a0W1R%E#K{51VTR_CHQ+#cvNYk=vncyjT(CS1E{c8N*#ZctK>&JBr{Wv6DRo{%hWmHK%4%p zpK&aVi7JDI=WSVmoQ~E@A0sPB5B9`)1HSm@J%!DwSKS(aSj@|$y~jlW<{`;=U=AtK zTl?IbS9hYz?B&_HKZe*XuExBU?a9ypUMzv|UOdnFn`wi#l@i8%36B_Gkr*1=Yjk7H zJQ2Q^fUZXj3XBZr{7B|`itRW*X?y1W^gip&YTZE@h<6iNFCl$T#?BUecA4(&gsv{*O}88Tb>rrS2KvYb=8WSK z^*{CRuEq4$e0cI8JHr00$_s(;yf+`Cym!OuhTp}3*wmvN#vHpi&*dcDZj#~sIPh{U zDN2r2@FF$v1rnUQpKyvbRb}ysMr2s5NgsAg4Uq%082NTV&+ijEFU)x61DTh>lIht! zTjP|JWCJ<3%j~hSiJOD-pQn-+(Sjf3J3rn#nLM>*KlJkV-aBR$%X0|Mg!{WjOfaLn z!8KW~!?p>xJv|}kb@JF0TMLeH z{)EW~l6D`N<+IKpL`#tCnuaCvg!^}Kv=mj7h3l2;_~{U)1B`AH_UD8? zn@~F8OMXM9ZuDN#gKBnjiP+@Z4wa=M35M&*oERJEK2bZ7Nq!F$q3P4e{a!ZAwHxaf1(&Z z@MyS5X|@t(_+ym<6mdLGn?{Q%$f>EQ{7G{3gvI^|qD@sWe`2dy>p=dP{vuFNrae%^ zl;HHgfRsfMe#!|OcFLHwFt>`*5xo-n57r85N0^GDf5S8v<)`#mvs0XCJyuj2L|gQ; z5oY~^_aDr}(6*AaS@b`^{*+Vsi1m?Bv<=XTlr0;3@b5R{+NtRKK(^m+CWzs~{6ocP z5Q@3P_|XdGjr|p1ECqs*q7(GVhJDeA5!zwGnixF~B-a9~id4>1%+vqh_`)-OAwWSoFdJdF+)jQ92l zCE|}X!}YF!0uVgY((;6mT8wic!TSa*Bg_Azd-E$2E=W|xCI z&f$Yir^x9ym?v9}6_t@*493<0`$*aMl=F|NTmCoBDorSPd&UB2spPaI(G#OYR3czd ziVAovN*U{cLt%+%HIxDYgCnXcqCGtEc$lOD!4rd3Qoy1xI3*lP8Lf&#sVURjp?3u|L!lh#Niufife=-> zAWEm6EaeT3i-Om;6n}UHB*e?cuL59z@Wa5wi=n)1PzGouv;>G)D*z#G7EosNrO9ca ze|*O-HvSF_=w}mQe-K7=U{y59%L%rl{nOCgUieI7G7VY;8M*L#S5g1=`*+dXk&pSZ zOzibbe#moiQu~q{zBhedW4!GMqZeU>z5dkVH}~3Ebn-XAX4VuivwB!ULZM)s3|Do6!AC(B_#qDkHIR#w9(3*o_GZh3`WfZ zt3bpkdn(`wI1k!@RaL`zs4AiHcn?)o9FCxX!JzRp9?B}JL>yijtAl8D4|-@kJ>uXx2=V0VFVFrr2`u~1O8iUUqmVc-16{gv*Y<9TFounCMi4g6l~$hKD*=8i^p?~_1^oC#FwXrc{c`rFcxmHu4* z2k0-!*6JwF@zP48wIIa%nj;(k74LsTeI&!SNi67LBgKG#VCcC?yKNdM=Lwkh=ccK^ ze-|MCuABeo)y+o*2p1jVuN8zA%`p6&u?-%S5^?$xxu`F$qASeTZ33}yGdpp5}jwwY10I<}tR{Rg4 CDuLbr diff --git a/transforms/language/pdf2parquet/test-data/expected_json/archive1.parquet b/transforms/language/pdf2parquet/test-data/expected_json/archive1.parquet index 584cbea22668a8e91e39cc8ee8725713a27a653a..87b1d67dcf7fcb31b5ab4ea5a9ed38e61573ca63 100644 GIT binary patch delta 6974 zcmc&&2T+sUnx=#jiXbI~-XSP~gd~)J(#0R7Mn#IW1f&<~pnNoGQjJKFV(3b*f=X42 z^xiBqkt$tO>hk}0@9f?E@7=v~XLt6@nQz|jZS$VDJZI+Q-k7~nK}!zAHdC>m=w4$7 zv6Hcok-=)EkF7e&N}rMiP&|-S6!^_9ora8oEH+#q486Ao2-^1ZhVRNQl7^558dpKoR3DGH$?P8pOFo+c2HDM^ih0^6Fuf!p{9+CY&!Leav6L` z9eE|JuA#Vn_Z8dJq_=DYU=?)`)W5k*+c$`ka5ZV%Rt{1ddfHTDIP>0Tn2fZY$lIN{ z*7NIUM|E|WMTy1e)Q;!X?%rUze17h3)||ofS=vc)@ZujhiS+t%Z>*Z~ zq4$c%!oAYHm;A#JT!h7J;kkitll=*Y+sB=cc}asP(td}uIM>=bpmd1OtS8b$QS;tj z`ZGJP@zB13MZ*W8YrK0+FgFSxz#?7UKBdo?uiV$M} zoX^R%bbQijte=Pf<%4i)s^uj}ELkR56zJ1zl~*X0iljZ&IR?f>@Jh4n{z#$T$IoN? z6JX7R1L4kNgN-f=pm1?wWy<<+Q=Mz+VCaps=FfuBL(@W!0@jlc`k6z$$CW4d3$e@X zmfRi}Zw)R--UYJqx;p9_HhY}7&0Uc${&20Iz3FyvYAIJy_7~G>ckVkM(H#4Bd%WUo ztJO1Btky~WF5#S;jeRq;Bid_kx1x%Ctnnb6f>kt>^g`!n01(z1N*x(_Wx<*pBUoN* zD5_1#*-c{UP>6VS!J4bfz(lmngh6u*6cJ;^doEcrqRM-oKV>wWHX>m1;&Y8pDJO&P zl%;MIMb(^(=>A@=b0OLamLYtvW00RTW!4a?cJ+Mh@OOCnPB0|s?C&^xFhN5St!8Wj^&2V-8 zVlcyZ-|p(q?+t%@a5t~U56hbj>$Mq#GX`!*lsF7xB0?+FS(DbhvDb5WBt*AVYEgk1 z*B_Z9M|F~phGIuS5ifSrCSGV@5^5^=N*#1fl=6{4GR(FTqJz9%%Q^!TX)GQLv6Y4v z>T#7)v>WjIXKpQ%qQ*dEWG&?6v~iexc>EyBPYhJhfphU3Q3 zZ3gGRWOOC)cIv6DXLe1X3S?gpbXC@E`&;vs3RSd~^{!dL2BfM>r%ad1wM8zBI+ZlG zUn;CGJO`|9HRIxOM$sDW`iy=@6Wjw4ze0PHg;3}v%}LvaermJFP8wCPb|!$ z3z)p9SuKn zLVK>mdsV+esdv3sg35?-pJoT`XANAJ(+t!-Vy0i~4R{BPRt1Pdf8YwkKY06u8)a{c z9%arJjtVnWG6%t4;<-;3IGdeK8aH*g*L*YMHnlS9@Z2FmuphWP8NinrALj+084t*w zT4-gIqoP`IV*~N5O4;IU%oRa5erk(rB$ ziJ38=GPU5+GNk&m)1yw`Q|rUrv^B3KX9ve$(sS`G6o`Yq$<`Kh1m>D&FL7+;KGtcl)Mf4jUhhThJ~R` z($>l;%hCJa{Wze0IA09LW;p)|AvY}j$oI7^az+7hi z4IRjb_;JTiEvDtxE{z?C2a8Vmz~JaYiNVp8<_WY*>jcE}n)UHDhEFA~(Gehe9^qG< z3D1Mq;xXd@m+;(^Q*Vc=AJ$H#5hhj?OtU<5yJjv5b&!`_Z()5^=#DpIDg}m+*m=-a za7;`L-MAz}GU?9cJeE2EbyX9+sPq;CX=@uy2@8PuA=wN!< zPT|v?81Ge77Syy?E=?LeG3ztBiSIVGs)pXv^Ki!WB6R|#l7jp237Q&+#~D8i!EX&x zr|S`H5tEANb#I8W^i5LmhQ%Z4^HZp>)V!YbUcC~1Y7Sp{0^=0MpZwr4qc`t^w@dWE z-2qHH^5J+fJkA96sb-?aj@_C{-MxWUDj7x@UH=VaoT|Oj?+0;#J2^bWuPN;F>?o;ahOuL1@O|gHFZ(pD_^xix;3ObtS@l0^#_!O`sO{-Cu3QxBn@GL1JhUC0<7H%d(@ z^@jAH3sAjeI(`1Y_v^v&sL7g_;tFsr_KugOBTWCQR+(jI-fj3}VWXFF`@j>+>}X5b zEc)mIVjb4{Vh+mZx#uy=vTBcC`cN?8(ragvORbOk->a7PeRF@0*V<5?=BHR(A$u!N zUDpoN{St8EFB+I%*B}ZIzV8RE-upSCcVdmlJG%HdkGxMBBkjizIx0rj*EH~cNF}YP z)v^v~;ZkHxQ`7Ve#m}d&bpp+ql^PH2k4F7xJe+&p4^O_gj3ji<7hGOAhE-LbzQPla z*d}s=V9GyoJIDH!3?Cjo$K4$G*;K-snQ_Q`5%c{pp)-@P{wU1i%W6U)a9AF9aI$bH z%#Ea^Vm4|LK5&KgAO1Y<9MNND3!V#Un*eg|>`<*HWXqkptTVQQ<$$suo<7eHv6&(l z70~D!yyC4@g~7{vPWxddniguUn))cIlYs7|UkXblY#W*W2Q`t+)T} zn*;R{M}nEN*!vd1o-*sG@8!qaItkKixK)F7C(c>JzzIMLABhe0Djm`1jo;P^%%^gD zy5=0S9N(dz`RgQi8UU4;WSDj1W837HYbPBcX7mNp-)1BZ6&Zg`$GfYGiZe}ax`BRiM)|4N(6Mpp^N=hWcL~CJho+t6>OCn4K+)3D4<;?|Ggp1*<9V%~ zW~R#F-08_GPFV1=Q|3vhM2^n!wjM-!G{w26^ZP;`Bh9(YE=%|21wB4}7f-DUz<4vy zqpaG^Rf*K$sOP}oRiVHUHeY0OAa%t~<7^Q`etUFIh=C=-IU}#oY?=n-Q~y zDs89$ZD<7$g$!pF6pVnXMF~=yTpuOR=T3!f(I->OX3s`z)nqc<%y2G{c<3L>k{%^s zG{OGh#;B(q*jIRsE~3tQ3+J9J?efKCI6-}peO?mclGel$Cv%rds7!5=fzE%0B`Ag! zT8k*3*lc@uah*}(*!H7*4QF*29zSB98|DvclM4Z)PMBG}Wp4q_xVku6BawF&c^$Jc zQnqYUQ+9A|r*Zlls`qT}x9mi@27cXrBXs_Q3wM6AoN7Ec?W<&3$<^DRQ<-w89*U9- zUBqLVC`f$6?K99BCu@suim%mcH&w65ta-pdkqvWHrEl7pyqHQ_9_v`;JTnZ0r*eu) z>6@DY7d!ci!sDCOs8Tc?rXv&V!0`yiw~M56a~^DX>6?qUrjvX?PY`HUEA{Pg&)@2e zgkw=LejQ!OU>D13=kUbbHW+Snv2gGxr+6y;c4@2Dubq@4?(>N)yuKyOOuidELZ5s? z!Z$XIU52`KmYfA^k|WReiJXSnDfS+PFImt7BcfP#H?JglMeIwt>C}(*2;+cdx@fmc)S>U1%=@EMztWhkgPi9-EN1t6^`*C*Wb#TFPubtzG2)7Y z$^O_FDQw7KAe!%{>fE2g*qNjhH8;?0T9mEeHU|uv-o$S}>r4jrHMt!^{T@1KGIqMQ%x)dC0lsc@LB?hm`=RE3Te=hrOX z6-ad5yz{6Y(wjO(F;3E%joo6wnBgI5; z%{?1R2t7&<4;y_IT9zX&oX**B`R#`nPI-PZS1A@4TT~1k?^Tv_o==$l>OULZ#&-kK-j%n6X2{mJ;0Ty~ozKdHm<1}e2`Q8k^*eda zRw_wPJLAm!3M%lRYrriPRNQJ_pYf?0w{BgmjPjOvv?0wy*8__2S&=mU*Uqx^#dk#e zh9ovy?kD|HSNbxk9%8*{C)#6)k+g7=Ely1~(Paii;~~B0q8zQ1tC`vbitPZI!*{xZKnT7F#N0fwk^~ zlKW2}Q9{8SFE0!_HVP{>ysI06?Oa(WQL}VH5(BZsIwo8<{3>=M| zI(;noXHcAr3NH}8ODU|)f@9#G+>KG0?Gi3#@rYFJIe$%tXGaY>XslNf zyYlE)qXqBobB}#rFiWTd#g-0JTcO+cBljh0wj;!l`nE6lG`j2u+`;tA^M3*`n^ z>b9S&zgpy=rTpw8P9LPrNWC`}l6XCldR-)sKfFJt2H|(-sYezQfZCOj@bU3jA#7)! zx(a9L+9{}b&bC<4l*>fhHtVdPYTorw@4OWw%ea$@lOLevt&m682#ati2UWxl-8a4r zSWgjkA41zV-hb3L84j{reygdkoX76Epr3aR-COk{--$pF#aXHx#Sv3D^Ju>?+~{tZA?*?* z5Q%;w4^AI%D6Lec<-SeS?U!7*8-%|SALy?9TsbX4WqvcO-Er2dY4PAgGvm71U7bg& zGF?InaF?QWmLpHUtC~;Efdbc{2iu_dJhSMO8>!`O@O2&FP2(2jD8)suKBo5!s1#59 zP8t;C674~Z*JP_Qri~!T9?7AQHH<^2)=ciqbWwFbILw~ikPQCJfMcOE=l(XqOCY^# z;=8AHO=gV9E)2x<=^wdriUrptM64~|@3k;@vJtFln_E;5+Ct~oTvoW+`uHkR?_F>!5#jRcrOUqOiHm ziw>pKzSz^nNT6>`ymBS^$Y&!NZmM-o8~Hs?)d-O-`P=bg=eA3K;Js^e53^jCWzY9k ztwdzmF|=v~_*VSL4drk2T)pf*QGz6ElzyzjW~2|2iA|@)h)LM%}A||mrk>4T?2?~OY#Zrxluh=DtdXN}$bK(?49AqZe zVoH1hkt8BGqyXj$QKoQ4ajK`ux{jSm&{>ht1l;-8KR<^qk3<~rhjOHRVU><#lkKA761R8dqE*qVk-0rxh+weQv^g1tH2Z2pm1V1C!Wuq zhn;~FLq^Ta38l34^I~U!fFJ>oEON2sJ7Ohwh*Vrk08_;hc^o*+=zV&UkTOF&!>=lH zS2S%NS?Kek1?W+sOKw?is)WPn>-dyrl4JsXTlt+U%!1EO>}%nDue}aEp7Fmp-^K8b zxwL#HIzKwb1#HX$mqH0s{Jo^A*x4ZEZ81n2TLl>#BmzOOLu1?scmj$bgGS5P%3(0F z2pM@a3XhPpLE&Za2n7Wjf}MhlJi!`IK**vI2&f7HbygMyG#(>^lvBXlDA=Ho1U%9j zfkD{VAg$%kNEw1W3XhkyL0QYAo< zV)j6gCXiT_lUTRI?o3RjXZppUH|qmT|mj3oQy!hb*3+akioza zGH@i~IueaVpKUoQ1$h}cjDozBjJ(2`n4FASlgPu%iTW>kf0aYI{;`ighGfr&7|bi6 z-bEo+=Pg#^^?Mcmj>$je?%xxqeHM62dfB$}?*|6Qk0}4wS%=d8RpSrp51rq0`DgAQ z5e^3s-T1^1HU45>{mxSQXRUu=`yox45zoHJ+6+&g#8_hWtQE%UDDUC*;-*1YvS>skgNX}f}s z`_ROM8^jF&0{~Ku@&_(Y%d4{itH2^zZ7qY`CSa!mbEm?LYAg=wD;UA^#*3OS_D1HJ z?dv29qmwL)NadS|hQnceo&K9wNb}+Ga}xDWr?bpi+I00#$38{Bq^Y5KO=E$w_75tR zGjN%?wiv{_5PKr&MAM<4p<7!5i9U{=_Tg}YlV@%YoB-d|Rxt57m=hGpme&h#N^aQQ z>ns7~mcCG}tA@+4u<^0Yz5ZsIvaxVYl0@qy>txpZs&FsZxNiDN8xtr~IXLuuob0n` zDL+u*n3br()H)?+$<0AD+2p9{-0p|{CJWz5QOB=@`R=VxA1{?{?7Ov&lY~(L1S4|V zqusD7*!@)JdL6jD3>!OhNIiG`dH?G-T4(0SbNqfmo#tn1jfclBr;F_0liWU$T0#tpl#eYKp9by>5D2G(@rtE=A#;rXgJisqW`v_$}Z%tWz7Z~({={G&>FT0nN@ zTACB|8BW?0#F%x(;*9`BQuVr(QSGVia`~s5YSv8tLGXRS^mnS!Uo8vo2g9Sj==rmI{IONn+-rbqampc0&)# zw^#5$S{7Eeq@GTiUfqVq4&6|JSTY9O?Vn7Z(6uw}G}JJkp+b92U<#=WZx#&Gx((s^ zzm=5}v_aybR>qP>uIf$3oI+?xbF!{$utI97Z>~`qgMNT!)KN)=*S+{IPiSHCjfZt5 zRuaZE>|YFicSq};eNnrw1)(z zw`H|RtKPVN+>l*DxG$E4nsd`7i_ab1j~uo>?yqOH`ncLEy!hP+6mJrk(}PGxxFav& zjJoq#Lfs0H*3=H&v+TCi4nuWwh(1`f@Ah}5Gr=?#pSz|HPs^C3p@HRBurI2%XF3Kx z`MJ`FfBL$Uexv%%v@W^K&#RW#R=Ohl3233L za9dn?ba@)AqUNDyIBY~1dSLxLk=*VN?ac7bSR{WAC0aIXgBaBuw5Ng&szjQ*fh22o zTm1*=<(fa>E_d54BqWo|?$1cu-fpT~v|-x6VJ$w|WcpIzb?kgh=lhz9d)yN^gkvFD zxw(<6%(0CzYK@Y!hw2W`p;_Du+Dtp}NH&owy;nT(&Sn)xMA0i`pZ^iM}BtAt4biZ3!RL29dtl;jkpHohnWEzC@vove}S6?FpM= zd96`-nHw?>hso1J4R#CrRwNB)X>^7|#xmc7$K7pBB*A`9R(!#D9*?K#8rVr&s8(Ah zCJ)@gecI*Z)b<+d1TK=k%FMbEEqH~O?M%D*#Skgl!V zMuoc+%ANVt*7Wr=J!Ve=sh~G!*va*n>vP+<41Gq~-UWNykq7Vw$BBvUX;@ zRg3eSuuFiR^dN?@4{`nPr|Q)P@O=1cTmgQ~-y2a`cgZuW8@kBnkc8}M zh(!+}n*^5gy46b!6^8L9-YI&z3aL`WG$~f`$JCBkvZG-mr==dJUKB5W$aUdOL3hW1 zI?Jf-A|L4+pL-~TNpWSuJFAE9#j*G-J9bv}o7w5w&g&9toK<6jUsY$y9; zmTEiU4`Fhc#m_nn`EqiO<2tRnM6fSOF&h|c#;PVQZN0o8Cn01r5)_Nd_eQo2LS06B7Us!stJXWCYaQ@1z&5g+AM<1|6glX6I(*~YxCHN7tR0<0P8V=n zOUK31$HFzY41BJ9vJ2Y<$nTxrK0SQsaO(Lh@a;?xQTc&VDkyP`L30Hy!dC2 z)2|QvxjwVo$fYBQniKboTxBlqt&!@A3|0f1K$GKin_Yur@7kLs4-0%^CQ9#p4Yoxz zjyM(?J&5=`&u_jvF5CbMF&Uk)7xIRb0_B@$@oRQI=mf+4C)LjdqcAaW6@w{(P;7~g z)2eVgHegybc*s4D#f}7@#K){#+~oOcUM1igdMv>zmc73GV4^WOv+lelDW*Yx*)o%T z!&rvjQ-*=d=l&qWEdQ+_+A2n_oj(5TbGfY*=+@i^|$mC5aVcTUD& zo&+0~2sLOYw`c)D(~NZn=YYAcaZMFhBUP=jdnH0QUzAxeR_eR!9uI=V*;G~(GdLE< z-Ypy4cmk~DXQ*)7f<2mah9OFXA*z-E70nRe#;(hN)aTXH6O>;Ur;BOW_fyNYVBEJY zZ{4k0ACOK79LwmvZoq~J6qZvTX3)IiF?T+^GJfu0b>w`}l0z@BICMTVzM=K;r2)D} z`G@m$32|Xeca2Rly?rgdoZ1Qu^pB|BK`*6>S>Lwb zih^;%Av!TrO*^!aBuxd5ClHepbW{*nQKYq+E>0aD09T6%`Fs@93cA7d9oLl1w!jiY z*^9o-Rz#a0gmSeJ;h?_&As%MuXL_me+G7Oh@}Xy*6md9~84OfOI44-?GdNldU5dB&N`0`@c!F(EukCs<%0cT@>IEyMRjX_y7qj#wETOFlY?7#2QGZm zsy;ip%l0LXk8~=rV#=y5-|6OOt$h6lE%OCw_Z3X!~7N;+% zjMR3R21nU;xpZ0XdO1dCU%S%7Cs-aLyC+}pw5z*iyOqQ+efGWCgHqX?)JGckJc~V< zNRmEG{i*o2cSjoTVu3SnPVT_+A2mnrDmxQaKc0Hgc=~&)nYs&8CfNfq;zVT8A1BC19)-RRP{ zI-~I48YdwTBi6{LM}k3&qhC+oU>cwClAGygeqREfQ%)zY#3vFulEU9!d>VZ7^>aGb zXIP;P7d?G1AD{IEuD)1W6^9HMHz+=_-|OXh*vw<<80qP>vy?$TWP|o)ukYh{ME3H2 z=!qd&0RSzrOf{f&R4e>svDsxy2g}eU7ON_0`}we3`gAtej%}eR$O%S(j-L|m%TS7j zN6k^s{%NDrg*hvUH0}wd*Hej*bGqO*!6Tvk_g*rb8cE_yUZ3cP(`dgz^Gxx^*~OHQ zy8_#~w@5dg)FH!PGXPB!sL4#q+Y*M1wILr7m1wz@mPU5X@^zgF?&VrWDL#U%XAA3* ze!26Z=X%nL@eR)OdLe3w@+*uY*Y7G;C_#Nv4D<~@Mz~r>!FRkrfo`rwOmK~z zn(j8+$-eMuS%e^vijq0U_8fOoP0jMhg{o&QmSxC2Ju4uVs22WOM3%hdg3tuy`j%DV; zi&+Y4kEt$9g*ra|U1+PhxkF3dxrcimF2bapB+HpQN!PraDqf!mgTy@FTTYzWNH!1+ z$hIXtve2E2r+Ybc-cZ}wYwQ7}vEV40wWh-&;`7iHn&|!;nln#S1#jF&Uud;vZ>UVKH9Sf9kwQo<}Q;5hHCdJ z?2^a8Tr&aEM7G{fSB(|QKA7}{zo8BOIMwLf1>ktvdDUD=DC~RkE%$&+Ct`8A!D3}J zuny_`QNyZ?_Sdu3o#wfGwE`&G^jE8-E=EOB8+Ivm5K}31m!)}Tn_mC6{A*{w_X;7kg1V|jUl0*qfHktJDVB_qGS4rw8 zZW%|-m3$arU@9qd?96{gjX)^44#0(W8B>l{8+344xxlWV-c?I3V4J&3ZtTg zQc_fMQ$|sMJP?{37m68F%+d)KeevJP|IgBk2tg>*PzsPhxy&m8!ozA^DKfkWN}D*C zGR|u*bV-1lnHLS9XXk~|x`hOBGeba-Fh~Iq_R$?y)E_})e2UrGzUZs|2q>Sq+fv0Xg-$9{QH;nh#W}~{-n{Qn9KFCu zN%1F)7~by^zuFx7DkG7B&X_vH%>MU))pAug25s& zSTquiQ$eE%IAujQjFKWs5#{EJB)B4x%I@w2Hw9M}tg>1Mf@LJ zKpwb2kDtJ7f*{X9uzD|8pT{3<`g_&>?-VQjuU9J~1ZUhy0Q?+Jg(&S1sMvr1LK&V4 z{igu}kYf1rDn%F~K(p>oc_qOw?FWnS6M+6={s%z^0z#yqKfj8Ls3idbX*7Jt{wM{M z5(0@tAQi0?Fsh0ws%Wen28l$YlrX9a3UVj~MU09H5D=@2{e|lPMJT?%3I9_J^cNfJ zuepDcXD^t7loZh!qJgyp!KwoP=*%xM$07eP-`}JY5keS5prqC`|3ILbWm5BR$q!{X zHvIJXIs57J2e^M-{aM;XD5XjWCjTS^wtn>(W87!T)uw_+z*K291uJ_P=*%emdQ&ihbTE|AL;*p{rXxjBK!H%) zfPf$f2uSZzMG+A%`)d2gS4-o5wS_r`eNSbxm1X8Gov>o}oxqN*6-KiQT2J24y>r4HSYNTy6F&rq!kW~?2?}&fo4p>(M5=(*Nc$vGV zXWv>?eS6_=j8f_XcH%pNb3sM&WGaJAeGheTuWpG8l&4sw85A2)t5x4|oycv(DUTII zC```y2}Dj0tWp zT61hj@kRO^ALs}bsU&x+Jedd+#}|-F5{sB>ICLWB(`w`WYEB0ozQRayf#BD%3##rD z`!y|!Wz-{8i;20M8@GK=l71a_*hkqcGxE;D+Yvv55*v8ATCcmkE9-q-BW(NuqhMHc zQqSkK>DZzs(T*#=Py;z$!Q5o&+#Lzp&Q;onl1Wl;2Rpx9SR_is4x^XJaVM&^9lmyo?B&=CWd- zA%C!dBXc+4pas8KI)*499s|f(AYNYn&cFwPN^3L*Y=a~X9{ocEJ$Zos5{jAJ8*0nJ zP%6X;rlGuvPItozwzK0iyXh+hf#d-VZRr%=bkin6yaq7m9Cri$^gA=B-d9xwAM`L% zjXh+NhF<2NVuLyhGShrfNjpL_AED^=7NL+g5s-dwNPhsx!#qDu7^r%MgrIA3W*clo zTr8L5 z`x(!+HDjY!vO};w7xlf7C?jN0*j9N(HzGzv%e2_EFC!8uae{-V_R8rLn+Y9b!?niy{SAMkMH5t&cBN z9<7ObY)yw-XJNxOS#ZZxdMdjWni1_XCfl%Ai%`Vnmc6EWTHCXITeSQt*W=u*P3C`v~4k+l-mk!1PD_0Ae>m}d*HPCI5y)Z z%t%ZCzpi1zVD4_cq~V;MuYYV4e26vIHD1dme7_DJGTqCduCWNu)4o8h0sk9!IaBt` zZeJW;a52lZr!jjlmjvanh!E_z&AUoqmAWmrC0?DwaS*No)^`?+4hXS!%A>MSNzzvE zlQX3c?SQDhL*!9o(aANI%(%rau{#Q1Z~G}H$Y4^&;W=VY0XRV^J_uGnvwpEhqx5>P zH|}9Q&JPPmD(JM9zz3G1S)!w3%Ob$5I6P0{)5Z`@^&2ct9F#H1UzXHbP8TM_$1W8P zBQPgxF2Siy>aC}P@h>;ZI4IvP)xMj~*;_aX*cE#=GsxOE%}(3&Ne#2KbjQz;x<9R_ zOhh0R(Eqw&xB@wMfCH;KEN|Ivlc-Gl)qlp6dGox9&RRA1p9oV4`0`=uqsliYy*zx+ z?yWrJoWKjb6yJ=6a%>zmK0G19;l&7OkEM7!sL1JK&1urJ;1MioZP*8>r=r@D-{LOL z^`nGzC#C3%fpX!P`-DG*f+3;e+f};n7df|*FoW38XM;<>J?$g5W3A{Q2dA5(eIe6G zqk_PugTCPRS1qX9wo}YWP|%uz2A)1kV-sxc)EmleU~AX+je_^t#%~p_k{SCy7HIm} z)ysP4+kZH3zOT~i`E0vYGiZD7|jXwRi#II|l^mRnqwvT9wg;PU7|4IF3Vt z7zKebrJ2uB&*;YkBR$7~Nz^Tc;V62(-rZA==P&!oa+xokchxRB)gK;P(czQA31uez zO`+k)^hZ`5HTUK3>{-47HCw!~u4NWil%!j}qugMaA_C_yi-~@$O}_c@AOy!cy#Gx$ znI{riBSWsWk229L4$sr+f`U?)^U8WJhA%g5cHL+`hL8zlRNEV0{IZaka082HuCK{J zsrc`z=k9gY9q6iMPP|U_$LF7j<7($n+i-Wi7lZFU0l5ulogD7u?HZ)y^u8M}Zp&-y zz4h%5%vc2$BO$8mTOLJ<vz;su;x>ATUW;t^(j3bP%zEK-T-rbB}{heX3~NPjR>X{MA{*WnT8k+8tlgX zk6T8g+m~*d0oqP`aV5QN!<1=d{vnM-@b}{KDGgxZ8ini8h3mi5d_11$J>mPIRB-;q z;j$mr=D3Hk$>&XTz6)bio|GK^`i@ixDpo`|q=OrIL15DLHJ^AKv2I+&J(d+AXRcnI zfUXCW)RrW2)zp$`{)A#6IR(wx;Voatp zh^bVAe#5&^ZeE1=3qs!jr?Quiz18(HnVAc4$e6>QR zw19Fz6PjfOKHn2I-OvzaWx~fRnpzu15-xyc^H&m(^d6tH~x? zT2(u#KW46;pwg|yzoOx&Fn~VNUWP&xEw(zT>R7=b2!CI+YCJ;#`ljHCnTzqbj<%XhudHC^#qTR4790iLZ&sHfg9@gFW8;==(S&8pm1PWp zgfSudgHn$W^jh5!AmefP@Vz;)JL|WDRl?%aJeiMX0bdY#vEzuLCWQSfnv2Dj(t8Mh z+*5+VYmhUuj4o)9hec9>z4qni2jLbPNxF-uBq}aX9GE)ru?=XT8^hGB;-`-oE`g^k z+f4K9@Y{r2G13w5f-7uMX-#Oxb20wjBXo*~?T_xnF4Lj5&V!iA{f7wpoC*@$VPWEj zZ@NtmRbYOZcMp#;{|=UdAVH|rL-)~xzOK!8E*>#^~w{xl0+A;O)OkyB2bg3eV>aW*RoUwAaKgKYY! zVhvrXlFa-p6A~N-U+RKdTkg!Vm3Gf5zr93AU1%vnOnF~wQOz+~A#gSrmEdhK-It0U z*4T`i$XS7m*!qDAnZEmY8Dllp$0^cl767q&3)tQA%qh=p%+JOWRW=hCN9Hl~W8ss1 zZiPNzh|zD?d5Wy*f+5*U5&Ly~$)RM?hDz^#mlj&|ZOyNJf6ZVeB}mnizQ0DgrGoGF zUA?E>s&udN{7UXkI-i=9yw$?1OO8^G9)wG74px%Ko6V+3_q75|H;n{Z%Vd7sQNTQV zbW>;Laz^^(O7Y(8BM$R%zezU*%M%c7_Onzh!%5u={o6M2^_C^94;Q|(T6E%Aro-U- zc3@7^^URw@deV>FxjB3b*+=fW7dt#mQt$^pzrqqQ5=Ud9+@WgtGbivG-ssHv>x*g5 zVT0U#2CJ&xddE<@^+PWD(*;_gyuxe#q}qTi0+ERODxI1entxvc^(gG14@oIq( zGh&~M_XrW7Uzn9{2R)b*{^lC4Eiowa#?1XmPF86p6Lc$hGhr_(zP!Lg)=C1Ky=FQ0 zjrVBHCKmMEC2`>O>%1=`oS@#V`-NtZH$wo(@NaFWECQrFuuhVz(>N6^x zaxo@XcQ=o~2ASeP_;XFq5`~&338PDCg$cBjo+P#dN0OJ8g*SIaRnB`&4Ej5cuS`PW zJK$0CVvqX^7Q&8Pv3{uetdSWgVtMH?h|Sz`I%DCAAzt9pO!LcW*Pondg0QbIDD0Qg zrIkbVT9Wpg=N#~X;ws_2w?GcDTUwxlc~6gZ%@L^i%Pgr!Nc88p;Frjo%iaSf z$f0$7dBD-`jDK1-@tvKj=tJ)g>*1H4=-Rgr9)Vfu)$h%|5=M{0(Wm^ieo&@gQ2}4l z_xc(;j;?+y5#4ig{Q3eLx+F^SV;_oz4{u%L!@-S*g7A%i0sfjk`_2P7L&WHfqMrFa z{(DqUug&KUOIPFtmQytx9Q6fY*+-yObJ=%`%6EV zZ5mwE_W{BGUIJFb8(bu#{`x(cq-;dbI|!{Lg|M2Js0%@Dqhwz5v4Bh)H3fI_FRo{Y zrQQ(8B)(K=cT>cIwp-`Bb>6VcHd1j;Pp4~Xg(8W%@4NARW}-4gEXGximzLvqOHZf} z6lv`)oAIk)Qio59?kW=-Bjks2n@r;2qDzd9nJGmz0da2m$r7n~U1pcJYie z=<}qSGsCU&YO!tcq0Fq0D^CQf!%*WZT?u1cbA_ZKtx zr`K!ysmv87xky?@24n|wiyZcIB(!OqZx=7^tBu}OYRs6HcAA;-%?mz4p~m^|96A^HHz11WM7AYMei2f3b99RPI|w$GtAd zExIn=B>j&R!e?FVZQMT#-QG=8LD<$<3Mh4)jZ(#hW)RF3iuZ` zzo^`C>^WFc1!JSam-ar>_%}<5GEn0?oXrbkq&xE3W`vu!PjTXMk4&F&uTi6EV;`4U zJkgnou-9^_Wa&9+m#z;YtkAyqU_i32o0R2YYAZf5z97qa8?b0ya$v>vOZi;fdd8r{ z*fDl54&07nnh@c2T?TitIJiGQBeX?3ts8eQX==7m*@}Ck;;ZG(x|FF(f5YP~zIK32 z$_kS(`pGT5y8^YW4?EW`$-MleQc4oav`0lineLj-!CO&VqGiUv6yKue4a$?oHJ&cL z94ba8n-MjxLei88eKOfs1+{uQ`D9UjoGDcc*11tDU>T zuqn)!u+v+Sv^ksSev^W?%TLnPFpA7%sp?Mnf(+Z4>Dt_g(MsE>_xPhXdQ2%S6W}A^ z-wKR67m`=_E9I#1B|CG658)rXy`LDU>y=BQ?>8{@NDd;*at9BUk~Gil#zYNhEp9|} zE#tsipgNt!dO(;Wom)V~P;L5X!UFvh_f!Dq)wK}&!!zU{zlITj-UlhZUo@E5ed1XJ zr{h?1NXw>*-9Hc!$G1imT2N;x3MgL5w3NF%$dx|3B$Jib9@*yER?Y-#@Zsepv~vz0}Mv0Elz zzn5+ss)Px(2)cFdU8CAIslR&0SE5kHAJfeQu3#erwmDV`lklS^)EhfqOhDejCprcx zG>H#WSu>nEm{{wUSmSB**SqpOPG+HZq{f91mjy0UB2qkoOe|$~m)3<#+;qfB3bvB% z(e=*QOL7y6_V~vc%i>Sdlfb_!3xN>G)Ku}SRDD!p1GRZjyZzIH#YZLxQ0U0FYD1T= z5dp5wsUXf|CH%L>Vi~QLt_P6v4;EJw#A+5e#?5jKihSqT`Ls-o?Caahm zH0Q6weF)s_x1W@Az?$?XXNWtdoxNd2rmE+d%+P0o0I_YITb}PdB9ya_SsW5RoNRAy8jkZa8 z_OvrdbKwoxM>G28tVpzII0*Z;YVTTRkoSJbs)CQX_c1lCv#jmbSaF1vi9ZI{95ic7 zu;{;TcnSZxpcNnYbbNKpmGm;M#LlJSI9o?9j4Ac=cbl&s#Y!QRXfMQbr||RrYCT7_ zZ@YFy%Ur~o_LDq|knfj))USF*++XFCN_=cy)QGZ?O1pgfwdr_Hs6?3{jZ52$iaD!h zd^&5k{SimmN5eda4GYy^a!vpz8z&TT@&AH_lZumQ#LWXIT!(NnaLWUzSh&G7u6{Rz z-ERg5aWa6&K*_&TfabkmoxZRJa)<)anMab?%56ioa+A0wDnQwH1D1G$IKqt}(ulDU z^?A97DLgi0$$>;8m>}hjKdjE5=nqr-LjhDGI`eW1DFOcafNB0FfdKITDPQao^LhEm z8bgRrVKDKmVAy03z?XIAJ!k}Eyt|%EL4+V^Zj3P!>PTs}U6^HY1Q}XbTMY$_sTxI1@D0w+Iln2^F zQP#!P-3=x02L5kFp!g#Mv6vS`J|71pn!n!>@S+XOMg`PkGp@IHrZ(gaZ6lG zW;PuND@^~RNB?N|?|KtaA}M4&*~AVJ7{wW)!M4!JyH={|c9Jki-5RB4htE zfvNusgv9&*m>1Z|VSge8)qf-eK?>Oazw>la!M`>lJpWgLe38aqQ7HRgMxn;V99+2n i8HdUYpsINa5E+aHloJhX8kDf3prN4$0L(61$ov~9HNY zLFrAJ3Q|O*id5++Am!*j<2(D_-Oe5Ro;$`}WBr)pedm1EoO3<%dBs&su4;3zAnYs+O!v-k1a(3gWzNyQJUx_Ny0G}2R~}He19QPVHajcIU*M2WVL;Pp;?|c)lyp` zcdWEc`N{+-dQ{Fx+ry40^6ss~f)iVpOJ7VX?pFTM!y0NEmHK&`DnngQ|FB`Ei*dO6vL4ncOAoH>b>c(!@(`vz6D znYa?>o-s>p4|{Y{tTMTPAPAZvyX9iwMxx1inrv})tCjMlaBdJ_nm(=)bB2N4Z6ll$ zA>iB;3*u5%)~tvzAR_}4Fv)WSGzTUu>v}966L#l!5;CkL&;Xf*-2k8_U{s0Yo?tef zzHXWn6N?|cU=;T};WD$VFv1v(dsiv*1~_2hsNxK_bU&kcfo;u0E{)OHc zpFWFEo5hp#iMSp56m$!=WzjjVl43 zySsx+T8q9MLW{(3TT6KTlS$(TYVr!!Z#(MqK9K56ZZMZf<7WbNP<8<)UOlw&)$!e{ z6`rWn?%jEk@ap+);&SErODbwto@HOWsxffEc-ZOU%2Sh%*|ya^0~YM*{*QL|Dm;e< zuIU59AGTX&&J!SP%4;a;b>gmrysWD2Eu9OR-L06c(_wcKTK8KP7Re6Z`hVPZD9!U6 zN-vtHTxC3kmc;8_GonQt&>YgmsUt7tX|1gnR}x>(75}1=c(#@nWr$IUxe9lyB}|af zioy+&r9V&62C1?l{M)LG5SQNWi&3IMNRsNhW*``YZ^ylDng28 zl{q!Qlfv$U93(oKsjvBWGHpy!LNx_h1TmpawRI)(tPGNZh9cQDcZ(S7FQz~Ag7pF= z(?xuT)a;zslONqYoXGAcu=md>vwk&}K(n{e;`P~#H&yP#lQM70$zI-4SutaywPTsi zLk#UTIgpsp3T3W*%`~6_`43VV&%Sp&{M$gYWdYxh$3sjt9h8X;q6eW_jW6Hz{$cab zCDjjx^0Wnu5>TRjCc~^U+t)?2k{w37;fCGR5M$YzaT1CEu3sip6G>l=UJ|`b7CB(> zgK5;)pgC>FSNaK0-$q6IRV|5+4QTmtWS=MjAvd-^_zY*!oK{jff0AUQ2gDac&roez zc}6AFA5K_^eK}E8F~vD6MOqwW!Xma-<7w6W8VR-6exf0h>*t_+PD~=`pKmfRn*(DX&PF+EZiR9doWQFDf#?N z*ZDFoNi}KK*|g((m;#BEC6y`w_1AGX^DHIq7&c=d zHv*7V&v%K=bDJ4IBi~prNTA+LZE2&-+0#Qu3wM63cMGt@1Nsl=H?>ApCb~{n?o)Zp zd3D4}Um!eCv=xaE^?ehxe)7+f9i8%g z^btynVm$ro!>^gk(7t?s>@r8m!p9V?sWQ@HOjavN;z3salWFER??<2Q6Gw05|8Oaw z$k7I>FUr%zEzZC9DX>LwE*A|9KHD*40;T{Z(`I7f#RND`7XU%V#L|)W+XOUVUd}Ob zAZ9o$r>V-e)5dkz$PrjCFsQJ;-9d#S_y+d}Z@mAh@MCs0aXJ&MAfz5cT_Ys%lh(Z-FR_tYW0QO}b#f{;#9O?IadQra~^- zR-ZB|%@x(l;w+23iQz`N+atROg4~71>r*chR@z+b@$cfi!J47mBF^Z(dOS@Z%*=x$ z&uiYA!Lbj@ELNm%>w%fMy0$b~=3^CGf}?^IqC2C_i*%y&bJWTb?N!_y%$_A`n^O5z zgQ@Wd5|Zcfa-R`Bf_BNiQRUV1E;+sA67%KV&YT$fI3+f=Cm_!-F!#15J1ZQ0c8aT! z>*9?@04}L5;>*P|ku_;E9gc<{_e$$5MWea+G8wA;(PGbH1UuDdwG`=f@-(T<@U!%u1I3vgLRy?kA{v#MAC%evX6Cea%Xhd+ zhCtub=L>CxfRG|%4DxgUQq5|qiNOhllaUG4jrA!XQCX_t+j9$n+^AHEbRC~+Fk$5@a|NE(ym2GSTfOJhm;P@R zK2ge}^bgix9*a+2ES_EZ=v7)LY;7T=dNdVucFLBl_+!+^yT{41dek{L6xKJ35n%E` zc81+641gsjY~joXC3YW$Lf#I_+;jiNCJNFaDU`>#ULRi1mSOD~BxI_tFC9u6$(R7u z`v#Ob;XObyX-AJigWkAhn-eFvz2HGpSLIB zQai%#wL9il$xb$J%#K@JenAyYs}cu$G`jn@wI5Er;!GDxHaV@mOP5X}Jg{)An{CB~ z*L4h44d38+m`)16PV*M%ip`p{^dAb*4A>qS{S=u%E4KdH;+(3`k!ICRPD?9ZoaHsr z)y1HBS(!debMbQPvoC;CR+T4Cng{$k&=u_bR_%i~Jv4yUC=wt)xkTdUW}n>-(+!O}vd84r%)NA!7b zGOwopj6UzNh=*rggcY2~XDo7eUBACIJ2!*B$LEy`IayY$bDuyVHUdJaNIDWf$4BeZ z)_nU!bBFCyi5Aoe`x?@$DxKgRxsm+s_nY!3*8&Y}zd1p926y_6z7Ep`PJd&&w^6J) z^KFd%pwOwq!HqZOUDQ<0e7321zb}jVQ`~xmqheACcs!}W`bA7XQ|oJH;VY#|A5FhM zabP}!eDLPSf)wCvDfiV9b^6^w(y61~X{%eeMNko*<5y_?-O2%N;#~aI)=ZPR=R6F1 z-4bv5-?Q~rt#8$Zem4Aa#Pzt{pTx)CIF+Btrv6N!Mh@>BwyMMjl&ZDC=MRL;S5~N< zzy4|;b*b)p(}TEJK2O<~;+*5<4ZLQ?@b~4Ig8k>Z=Zhh@`1aStqkzXRrb+n@8Z?fs zGW+14zIqP&M>B6VF10NA@SjZVIH|Fv(5~0+cKd}nxvM~-%rZ44v()O%URMUJ!~R)~ z;p%(JNRM-p9F?xVS3WqU4{B%g!85VlcEYXTmFQKiO*7yvs*RzKRF*VX6)yCn6@i%B z(GUzKe@N6EUR@m7y9RT<#zA@*rn;(|&YhD?b(<_?A#*MxhnipJPNNC3o$Uxwqn(1K z&k){v5raGE6Q_h*xX#-#%mY|t-$lY@eWcoVvo4V66;xqT>EPoXhl;yyC@-!_{**7v zUWzwnI+E9AF*2soR?}k>Ww;QHckIpN=a_e-U~Q~hEUqg=0~yXKjJ93Dm4$3HNZxUfnoFP#j*jJ7v3 zxsBTUf!z~d5Qn|KO^$hU-{Kis=7W=?Df;6wJq&E|6CZU+X78aAT-zn8Goyi;tR3zj z6i^S`AAYG;l49kfxOY#pPZzhhvqfPzbuq`OJGAELsRD`Lb@Fb zP)Gce?uY&rzUV1O$@BIeorkSVO>$lSD-06d+F3A+^>2P1pkrKFO@SK3!xyrq(Ms~l|El_TNxywqLx!73GW3p_N)4ZT9 ze2}NX>Sxx}FwL96-)DEnw~x5;`BUl=Yq|fk>PWR8BNW{+$=Y$bF>Bas7O{{1V*Bf( zKlTz6u0nm~J+&BGQ_P-92-ZG1M13)Uk`-Q@=f0P3%OkQP#In(B-t1!&k+*JKVqtH$ zU46lK$JBOt_@D#Av$ShChk<6wQ>!)*Cd-c3m{l>((#Jz+zld}SI_PPK^-}=Er z+fQKkZO^^WSD%8;Hw)+{w8v(*1gR(A!>|iGW%kP-ivf7bFVug&b?v}M;34f!v5PU> zv#|CqlqFjB!7NM@oOLVG1FW&|yxd>*iCB5BV=W`MDL4Nkd(tya&adjA$73VgDA>ky z5HB|WY`dkG^s#3_nM3uxgGfzU@p+JycAJNl?>)31Fys4`1DL2T<4J9QYd>rP-*a|I z)hXbOuVQHST$;n-+N6z03ij5=6n6;7_xCaf>uP@zR<%lwxyi^c8WVni8A|&a@fb*k!m%fkEnml+IoimP-M3ByRM)iDn7lUEp{Kp&vE;Oe56>@ zrQ+t*^p3YS^1cc@IV9I8KL~A#6)*D<#Z=3DEcBl*d+P6beO+PH6%|C;Q&i5BF<@ZK^`)3#>Wp69$AcDkp_Y;4F2c<26^@4>#y zMV7=VmSyGjos-ROh$iW#(}@+Z=4B?S^2=9-foGHCLeov-{y3f`2RL5RqwCwMK~`^4 zjBCYW)s8MGjih|9*YrHf3fR%GaO4m+w*7IszOg)enPay7>B%!M zx$2a6q@`l+IOA2++2SwdeV-Uup0Zk+oOghGmN_3Pb6&r(_srz>Bj3lwmq`Ii%h*Cj zo@|SSE&q&|Q|hi^%!+b75AuPb!U<^6qo~>%;Zg%xkJCZR9c!oCLSpsjH-j^tT~fD* z?8?mII-bH&y-nH6&hO(1Xuauc4|uMbMFs2xa#-BkXKw7)KxcrIToYCv&o#`3cig@~Z>dVR!Sy2{vpq z7sQ3X!rTIC2fA^qt$PEuzocSJGLS*W#VOb2(pXACv@l0gwEE>OHO|eXi(dzKdq}4y zDN|v$yMR6KokI3CV@?AmtC?;mO^otyI&Ozwot3! zM)_TK`BDz-_B1FVd*jNSLi>6!1Igbsx-+lp^Zhe@!us!SZEZ=I?vwj%h;Qejx_siL zL6LwIYP#}8o812R-cdh;hIlZ(cX5$)YCrazGSoFzwfDI{Hr#kaj9 zhS0ax2CX*HZWK!lum?X){28baK((_vdT7YOsQzOaxRFTEzPgn0X^}O+P)bsFy+iu! zEEd21mySK+T~SZzo`h7n@q@R&KzTF?^+)4D+u<*Q=BY0-ZFH02_i)i>J5 z@Y;3a0YzuDx>ij1(=;15pZQ=@rXJJoOOUx-hS1Y$^R1r{W@RmZFD>0%o)PKXYp`J# zIa3-&{;TVnWa;)pI@6n9*1WSLr`x+5RYov8Ip(c77?#nCKk2w8E^Ih1$5^vXl;us= z@b{{x=vb{GM$Z+jJUt9rcubo_>?*aDJ}$ej@Wj>x#~AzUd+Yhh8^wiUqSx-caPMS$ zx526l!Cwv0KFF!j;0F8D5WmgNzw|J;DrU@A$7%eTG9@4Z=yIo6io-b*Qy5@(T(J9Y z*a{$0mGTV5M_ENeD2@m_z(X%ey0{Qij3>6m0|}>WAb2UOh&zBbUrG`Z%~l}-E`L;w2{LBXPOm)@KD5P_gWgL#iz>MQhvA`DNj|GNfBNKlqlP>@wnI)}q+DB(45igHSLMHMxbfAH1>6mg3G#2fxE_Wsi7;Tsf$G)C`E zB0cu$b^70}{udGd675%a^nRCQ=1H&nw@Jd8|J?bj^}F!DU;Rr`Pf{r{7;(k46nbob z^6$3&z285H{yj`<7$RUhodS}Z8IgA>9ift!iaj%Xn8GkGGU_yiA`01F>g1qO zDHQ2Q@-CrJx<#cry^xesPNl=Cg&u^`}e!si!`rW(k{$sDTpYQX0p3nF5 z?C2w*RIA2%c0}rlB30eKerxD*1v8 zEds8vVCQYp6c~4i-WuKe9nZww`dZ5K&u`2!2kf+M!Nf#bs%=MKSXQ!8<=YXbjjx3}ie|2U zGniuftLo*++sbna4f4uzuhxGdaMxhqfxR~-o|KK~tfxOit=sA<6T-B7M_}D#t!-n~iRMqG8gHmJ%Og=fQIFv;wI5`?^{cX< zGuuP=jaq4roF(jAoK2@X_1@|aNgd02oW844D!Xm;ZB0bXB>8*@@8-%&4Xowic#^kM zBkT1kTOTFb+0NTLGQ78+M;*Mb|KP*?qC$np3q$@SnJTT6@2aIeh0 z;=#?MWewN*k8kyDPq`tnPR2e1cx%lZGVWNJRFw5Q#BV9{yi^!{=5SS_`iB?X#}#~0 z=G#h#q#!3}QC7;MJpt?TUG~JiSTbhfY;osK>kO-0X}zN@&p!_Id{kXgo@YH=oB1Io zK2Sz*7)<}h>qPaK1jhi8Uu%9x%*b+zuT2n0O)6idNl4bF)U8qshE8y-%}HfrgOx=Oq)Wdo!mnk+B=CAJarr<7>^8s z6O~GgyXL5=lSW~x&IJQ^ts3{PRkSxnkIp>#_~=U2ajD^ftlA#uCyq|&*VW9_)(7{% zphfM%wMm8p@#UV`NpZmk+rq@yEu6KJRzZ(MBxCMD%D9KoWv$U;8un{LJEDjzqj`=? zdW^Q3xSy!LUlcl_S&!RQT5DWsX>*0J?|`dF*-=nJc9^!j?t`(Z8HoCj;FMW(mynzY zb#(4SS2(6sEzvvnRP;K>T6tk`Xx(~h3bFi$NolwLmS>+z`x87AJ9Lz zqVD>Tp_2;N4Bp^W?#uN@C)|%D?`i3U&#t*c+j0Rb9U#zGcCAn^erXTGHc$`%9aZ^xGmCqe^ z2H|s^ll+ssytn#nLupxVO8Ma*Wr}YuBuZ&Y?E-KGd2My5e0W9maqR`~%~#B0`|hks zl#iuz&eG-HeIrDTo`KiM44V5&x^np2t7Q! zZkrzjh;D9a{%@wRE+x-S<@t4c2dwWh`9vty3mx`5b=EGW=t+z(|X0huJM2rU2OD6Y@%NeRKk!vb4?Mr^pn~z=4vyqT=6Fms%%^ zDrkX^w=k$5_~s2q{2!N$-h89EEIa?<+f-##M9s!k(yq?K!)JFq^jAH)+JxrOx803{ z_C0mH^v$n-Vwh*H^EmOzBF2Aosu*9bDt~^`my@6VeM3L%g!wzXzC%!x8hBIv@pBlv zkB}X6_`CrvxW8%qCcV{Ix^Q@5#*EY3@Eat{|2mp^iEYvm z(4<@&6|(-khNbh7n+`Q@rMlsiVcU^K`C(nTS_f{W=e%3_JPDsTZn9yI`kG#1HO6QT?{U>onlp>DQnt|;%JNh7WcYxzJw=4ryjc>noc)8^)uQ?*2v-$kz^&7+?-G)YYj zuzN0Rfd!3ek;km_UZ~4+4K~e(*nfEs zVuy*2`9LMyplbmT;itM}g*TdfCqQIMBd5)Da~QHWrSfGCm2NQn@VoG*f6WKjyD zIEKgRJqR_5rBi^CD31|95~&P{6U9UjrQ#^LOe99)Bf)rx2qB;m zkYE--76}ofLCH2qMj=Lvr81&eNZcMN;ztre2{Z=}N%$f#3V{&Hwj(wQksl?NBe6mu zBtfvH#8BquIev5D5{QqGV@D|vI7A{56`_Yj$BU#$dTJCH6Cp>$KvD^yD5(w6RXUqc zhWH=>u)o`?SG_bywacIy;C$~gsZ_H>4u85r1+EJjt7!^NJK zYMEyNQp*{*$J03wxyzRTxJ=-$FZD#fNk9d)S7$+$#e#f4iRIAdfX|2EVe?9Zgo%m7 zKKIV(@y&-iF0cC_*^i-FS>8w+crjFNtMyA;c+)Yir}`2U8jiKbtrkEWJ3eF&QfUG< zgUYgJGN@b-LOz)woz7&~(d_6vyg7ppB7P8`%HgxQR2Il%Qh8h+pGt#RY!;iwx2JO< z_-BeCs;COq@iGn0okyxW2awV$f!V**{2!RajBE(YDLKG1_^6kKT3Zc3s=okV^KyZu zR6nx^mw>+Dv2Ec4hM9t~C#y_)EftAZYeW$YFv)CYJ`$K@eonAOTlEgM?grkk4d60tTPMp)mv;4j-c9UDynW z$p_gGoz1fY1uQO?%d?{)SZqE=z@bA7dm)#_;;}ed5!8JVmZtFAKlP%vVC=sPGkXRv;p_18F&yAk54g9F?JCP@veekZq-`V!Y}hrt(WE-i(LY0a{>Vg5Cqi6 zJjD+y4|@AY@56AjcH6TPgx5B1=7t5Rqtmtn`ORuGtc*EM`lvW#;NR+a{r3Hqc2x67 zakoHuK*z|i(W)-dZ?dJ|BEPvR`*Ll1R9i-PV^2&9|I~a~hgbGmC07qHc0Ol$>}Ni2a}*zqK_G?)boKOCFW061(+(9r5lw zHu&hlp7Sx3Ps8%j8Dk@vZd#v?yctg~o_ykGkR9mKRywrR^-%uMorh~r=^CS&fP;W3 z)@S*Ho7aQi4J!!gmL1^cw#?SF!+F5IC+!(~BDt6iInH}f#!GW%Czp11PJ#jk&#`AG z7$K9ZuT-g1?u1m&u=$+3D(@3Fct|5bKSQV7>%gNsLPGq#aEC(bq>@T!-`1fU7$_ zCn9>Q49q+=d5pvcE6=pm5i`$1^1A19cW3nw?z#1pZ!L#P4b)yYOD|34A6lb7Fn+Eo z(LW^5y)Zf726t0S-#qnjNBWbJ1>9xDXD6DBv*RnC(oV#&be|L_ zdjEu>bbh~LVen%!r}zzCV*x#+UnY&!6&q{qi!!5Hx#i&&$-NgQauPX**iYD4V_?pf zqFb4pwn+LFRNxh-9U*auBcL{jc@p&(yH!$L06PpNKEaa(fnWA3E-MQbp|pCgi-SKg5qSmcW`ecR(2hyt4neSFrPy{zX^E*M07i zqz2TSTR%7$9Ejx5%cQj~a!b((&P?@@GKWhgi{=nBn)J(#v~zVt$xv#`$lZztPyK8~ z>D}X%)j9m^*|QHUO)NhV(PGwpqWIc}aBono7+#=ju*~1(p>b0sxw*O%#M?TDPJJt{^Uc~3>U{^ zXW(=AbMMUdK!3?H(E12H?g-zi7CP5>nU5M4xH%`^T@yFqxYA|)zBICF_`Qgnjlcbi z`Gq*}RN<`-?PHMzmqV}cQ%!CJdPKF&&^y_Cd^bsR@w|Ay5UY8OC*S96zu<|-uEqM@ zG|jBXb<7Zme+cV3EJgq%iPL@ zNsjw6J@>PX8L!@w+kX03oa|V`ip0LHD=q42FsT8&##BGESR3#2ecnIb$#@%atuo=6 zhEBZv(#IUDBuE+J5stAzJ=o?{%|~s7#uD7>x(8@)44$id>3X(NU1!TBuQ)Tid$sJe zAEP!VN**ZA+iB|bMu+U!ttCoUACw)cD@C2n&e5{}-fqYGiSbpm`+eyx)wX(ji<)+z zF{71h$FeL(zqgn}*((mp|Fm&GqvYiqx=Fa;9368$^}YT9?!4|E$lIf+d;Yvd^Rb;F zzNCpu+g%w>am>CMofL=2%M$@*N8(;-vRUMC0)h1_NGFi37g7Dz>Ml~e(Vkq-$akW?AELnal=4WL_$Fj z2|~9^6cQ7eloBeC2C0zX5g`<+up>H8xkD6okYN_+In7TjXf``sE(@1SAf*7|q8N0X z=6MA42k=6~P9PTsOO%jsDl$Q6s4P+e3#^^sCIZg3K)3*>W5hyf>KhTXg-AFE5Cu*L zkHNR@#2?)X4v@h8m<4Q<^*{6X50=04=;3^rWJA>I$OFLSJOF-dO@ z$wC0EFNDo)tI@aq74EaO(yaa&0H2?P&D|Y0Ah-A&0G9&%?RB2G`d0vjzMwbvjAhv2 zRyb>*`k-iqTA5#~)9gJ<98uO|c&27^-U5@-`d_Xl5uqiuNBWj{)S;hm($R}PX!h={;iipdli+BtslSyY0NL-PSN2iOZ6snL$ zqaZ>NMZgv@5iN)b2?Y>~DPqItZSm@hrNDpG=xmWps0u@hbgM)(U8aXM@aY<0e+dAV z7Xwp;`gi<1%7~1^z=hUx;T&=*kOaSWx72!B4uAt?0Q{Ovfs-hKKs@}4Vyu~04S>(8 z;5l|yp{;*rHQ_26wc7)y+oU25M2<#?Y6P(hu$$Qn!ylHJMwx~BnLh85{7@RwlbX%h zdCZ%5=0(11l5!fIEa+iQn0uYT=hwCS2PeHq3CbGI;3QbRlM68H`dOAfiYVW#<3 zI|Z_MOpcI7!{SXP3f=k(oXaPu>oMSH9RTjHoleL9Iy&s1;Z38l|q0u=2N^?06In!hV zj`7#pe`pcDvG<3k>6H9(0ZyWsFTaQee`^82cg@o&`G(Pd$>HxT8uC$0;X}u%xF=3^ zJK|6{ERKYn6P!Ly6Q_x=Gqp3N@%yhSohf#{mQDc#a^Pj2*7V&M0kE=tirTN${uARV zFeCI}^oy9^*Vej@zI6I)TuNCN&j-2UHQPJA`MZ%#KX8vM(6bJPI; zI^UfBuSbSHawyO!r{Jkmp v|6Q8Ww}4Ie6p#OYPx+RwZ?+eMnR#gd9s_FQKUWxfZNg~7NK;?!>`VJMqsAhK diff --git a/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/metadata.json b/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/metadata.json index ad1709b3d..5d94f7ea9 100644 --- a/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/metadata.json +++ b/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/metadata.json @@ -5,8 +5,8 @@ "job name": "pdf2parquet", "job type": "pure python", "job id": "job_id", - "start_time": "2024-11-13 08:37:31", - "end_time": "2024-11-13 08:37:34", + "start_time": "2025-02-10 14:44:09", + "end_time": "2025-02-10 14:44:11", "status": "success" }, "code": { @@ -36,29 +36,29 @@ "num_processors": 0 }, "execution_stats": { - "cpus": 143.4, + "cpus": 28.8, "gpus": 0, - "memory": 31.51, + "memory": 22.7, "object_store": 0, - "execution time, min": 0.042 + "execution time, min": 0.038 }, "job_output_stats": { "source_files": 2, "source_size": 605137, "result_files": 2, - "result_size": 29694, - "processing_time": 2.077, + "result_size": 29781, + "processing_time": 1.506, "nrows": 3, "nsuccess": 3, "nfail": 0, "nskip": 0 }, "source": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/test-data/input", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/test-data/input", "type": "path" }, "target": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/output", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/output", "type": "path" } } \ No newline at end of file diff --git a/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/redp5110-ch1.parquet b/transforms/language/pdf2parquet/test-data/expected_md_no_table_no_ocr/redp5110-ch1.parquet index 004f70d2de4bd6fa28915270643f1bbe82d8e902..96260b9977746d60514941578c98a0d9d64c9457 100644 GIT binary patch delta 5299 zcmd5=XH=8hwoM3Kzz|yKy%!S*fe6x(-g`9=K#+21awtmZy$2DIUZsWJ6cH%_LAo?S zs)|$*Py_`Zo;S|BZ@f3|d5&}MkGFr#vG!VXuRZtNdyMa!(|N6vwt#(m56FO**JU7L z06+i$p_VIWXkR|hK)~y!udJdgzU6Kp&+iqtp9iq|EX*K{1dB9}bfs+>l=U@DkA(`) zeC5Ao^1TGhE!H;zD}hbGYHfR#)5#5L=h6EsJY7+vgPLq)lR?R=oSOXsM{dhD#?#Vc zUn^?$B6t_Gvx>fPS>bGe>_B~75Re-S@?jgy_alM0LRVf37KT^A&ul!68)~i0AXqAo zxANy1*{*pBOEp_mjo+#=0tp{c6kOC)-BsrZ3GkkN)nNyh3^1 zhldPy%*D-W{jZI!)!Piz%j!{}UZQ2phx=;g8$EVK08nJ_W&VSyU5Tip0a#3UxN^9R zd&!QcuHu!o*Z|=h(QUp-eFcP39lFq@KH4sDT3sglA^(9kUd*T^duk0bQtjY3NiiIXhf9>Z1kOKf=VN^pM-Z|$hA^aAuiZimjhvsSb z8VTYe*sRTC@!3wn>1eC^lO(p!@D8E zoXF~-e81g_hv@BQ^F*vkI!_$~wSYP6aEs@m#NDoeBY3h)-4(YVhD#hcu{s9@7hO+A zvIB&Ou5J;0N7(cJcAseeb>~N=#d6&`JX6nX%E#_JG>(iP!C4RObEY6NJ4P@p7R~Tv z(M-4RTFocIRt81==-vs)L~mAIfw~$TZHmOt-M#-BR{TQOU&%4E<{F zAnOcyGu8T$3GdY@C;0;d@F%Z7EMQJ4yGV!xBJru>4Xyrz?Ps+jWImlhH+0!I#G0^@ zZhP^w*7v_J?Ioa{^JXoF%D%gil9I-d(ifsv!>*+ChA&}+R6e+3wLBgfSFL~o`;qI> z>vBq(`;WFw^hJtL=V;|N_A*#!Pt6+P*pd`ho@5Ls0rgVg#(M?++ z?AzsCtX|JPie6VTm%jE}DnsYEyX3<|zJxex{F`yTd|TCUU9D% zw?Fhx6{$c)4f*VR6>@%Iz9tJM<({$}g0gXccB3Xa&atYe<+eHCH>f0#^ypbOxhucZ zbQE&tJD<5qT@-=s;HvQO|2oMqo8_vvkdeJ}i;y{xy&{`NAu_n~Jc1mQB^g5;Ps70{ zqULQFCi!%S+^jLTbs>&(>V?U~>q$I{mo>Oa@>ZL5x$UIl79)f4&aL6)lB=Hvnu{W3 z_rlOyR{YMVIX0OaA6RoXLhuuj>q;pN6SrB^lgUAY+$N(~x)Va;(0v7NA-U^oc%ycR z)03wInfMa=6t2Zl)k_9vd&iis)hI(;pad#&dUoQKV4xJ{lm! zq3W>SMzBo?z2atm?qT)1b6Vw*Q1(mkjdbH#v0b-L@7R53)iGUxL^)X+rxs7;7-#R! zXmWB21RAZDU-CRdcR8mR?f67$={Y{FyTZ`Q6i&)!Iq06{Y!Evf=n(M?=un1ZAd{j& zP!4?Bzv0*L*f62NVFG7PhQLzBE<28zaB}xB&}jD3ja}AQ$*SqoSARj{(|l5kYItq9 z1IHIJ)+|}wP1^eMMbKzSj3WhPRL-=H990$Fyy{Wm6_gsj1zDIK^^y2ddY5Rvai}8y z^0cS>4>x&2ByW<=TE<}M@aJY1{>acr2R0f0l-$x%JUm(%r63-1J}?=SK5cNa{xXi1 zoDS<^(-;b_wmKpgAh4H975RjFMHH10Gvmx>VGhsWL_ritaPd=A8S|SRlw|qKjXOSj^dj_8I1+f-UkxW*Y(eXLwA1qoM#gu&! z^)_k2(ysk|IlbS5p>OQkdgsE|7~xfbYog9Z3xr7+LaKI!VsZ}zdq?5yI{lkN74{TO z!}MTLNC|qM@z$V2_>JP>p=~&l{0T(2)-%hMvi+J>s_WieR7j4vn597Abz{Z>b`cAM zsU`S}*lAa_H;7$#LleqX`A+d1s`iH9&1Xm3yyf{wvfyEgh)7M#tg7$OxaZ1t*R6N& zwhBeIJ7pZk8(2_x=n=-MP>=8J5@EA%yEQBB4`#YVTs&)82RF5NLwPT8B$2(d+zj4q zs@p#3*E288g68$m-Eb8Vu_`o>ILgV5OHTP{P^Ef<)HE$(K&p zAhgwe9J7%>&x&r+!Q=}UUN_9j*=uy-;T@y|J%j>M60MM4Y)2F=Md3js3}NDHU9D8$ zYc=dC?Ss60?7Ku%L-n5n-G@e}ujRF4yQ90Z3 zTknT_WqfNiO`FxY+>?n_q_M5-m@J{YA7!pjbxb>bv7aPQIx6!IW#1TR)G)L&_dR8^ z?KFg+WZQ+7C%F$^Q60HSR3JHXuH^OpH>KP*pN*_KpD)u~lswIo+cZffA7z5ma5P_I zAZ#XgDNIzK!rOJq-{+f+KFWkZ$ed7uNUl|DZ+$7vn|u*|1%yc#ItnbgB`KMhxr*Sr zEqXG+FsgKoOX=*D4%^Po(OPTti%St%Ep^i6+USq76Mk2hA}hFIWD>z;{QLNmbT1XN z3fAg&)ojP@K@V6{tbR(oAQz018@JmFXk~GZGb)3gsin&nfB(&p{>3yp>~;&UfnD_5 z)Yh0+!_yu2clg&5dvvf%!pN@fwuHUq0>WZiW%!!uS&fQ^M3>W{D5DrQqW>sN%%GoY zU&-Ez$Su1+LXGSi^-Aml*SvEhM?G73(}^-83j>J8K*WZ;u;=nd#zSj#fLVE%)VZPD zXHKfh#$~j2L+?mUPl(@35wO2~bay>z-rU$H*My$I)29!t+!nF9`NtaO3dNDMo$DEg zyD*Mi6unvP!Pa(pF1Kz1WTLOPg@Zb~s(@RP6FaVZtfDUvO16CC<b+=c{|{xTjgs z+&41yBxz7K>V9j1OYi0_xs$$oBch4xM;-HGj;)a&b^`edY;-83MN~A%kvA3i`vjK9 zo#vxq`$)gYQ8(9Ke%3sC-4^XGqEbq44Xq0<6Zjq#iQm5i*tEhN=LaFquiN8 zRdUfEj~yn}{oL-3?||qRRlbC^(vG)L9X^W~4kuADc*aZ}*ErRVVAB@3l=O#=sd$T@A-sDqiQYe15!P4kh88?Z)w3WhMPNp37cJZ^$nBA7YXCy62A)l)kXnHQu zSw_OaZ-(H79Gy{~d_DnWIrnZLJN2GR&FuEh57uyqMr>2fvpvo>o$z4|X%0)%XDyeE z=aL#PN!YrN4a3~|!lNc=D0^R-=gk@Mo`I)q2$w}rR1%lR#E%-fFI=f1Gq2py&B~+Q z6h$cR&H#_+?hR(-@yAgfrIrQJ=K1Q?C!2})cBzt9x3?z}NGh7l=4~YBZq#nMi}Dou zEWG9|Xd2l&nB}1E9=@ufKJM+JJ1ZY4_Z}w`7#hj83Z+qMN zj2WioVCQ=SUiFZ;VU?r+Q zaKn;mu)B0NV&i2hIkNNR`>?TxjcwCOA8HtVuuSy{GL@0@%EN{fnwnC+8M-K@u0>7zfO?CUwIh&g zkGajs#L#b+SO71hGW&a=O7X`b?8Bm1*iua?2jClVJJG%%$q!Lt1nx}){)e8JUYLI@ zg}WPXB~^D~l*vIf$ESp571y@{p|op@d^tOA8Yji(kHI=@0rubm*|>3+AWTgm;0hoS3V?H9f^wq(|2d%KeMqtJ!etKV^P89IJf3I_?jQ{~EYSiHR7V4n`SAxet;b?qK`qg_B+yEeMg@FOjH}k#v%hbZ~J-$-poum<$RARr;40 zKo=Ca3%!83xD-Yx_7o&Qun;IVXF~#n{x#yi)f)JJ?ha=J76E$U&`iucUi^PL^H22C znSWyF1;W`w*ns=EZcbK_Njrf|JGP&}|FejLmJP=YWo42Uhe?XVVWx1zRRrQH94>)C zAmB*YPjkw!NWvul$vpHg=Kc_N4#&mCr=qp>R+V)CDnxxWIy+!4%omcGc)r4 z1d7m$KM#L|6n|>*C;mtDj|R$d54Z$jan1rmP8W*&@1%cHdg1sC7JuLt?ly}6@hew> z^QHopIUum7DUgjF_)9MU3vQfU;@^4++39h%92Ng>y~>D-%^!b*h@JBIZI`6i-2IE+Ka8}c{wsabsdU)JE!G-h8@9~ zw(sdw%fIRMd;_!I<$2kr>-9-nnW2@3uJ>J!Lbl_?C)vd*=gDid*W=rpsB?}qz7bfX zqCQ_bw7q5+%rC4{UK49DxS^*b;50MP(y|9XDcnUGH3; zAR7{;=#^|T=uvjL)(*j@mCr2LTuu1STV&mp!j|xhtS2n8ZE}cR%?(Na$*~I_x)5zqzArpF%kmt+4FLF2Bxwedx6gjMRHt-Q-pNX; z4Je6~xTkYasnP6Gw=7pE@;>0h9$z%(nRM&qm=)qE>asx)MDv>2R4A*akqCjDIOJ8z#$n%a8WsvEY-$CguB1 z)KjcgNtF{vyAfe#91Hr6d71K`MQ~EFxK63DW1-M+IZb`xCGV4%e3Y$GZ&iSFMa^_m z!WO==XdO59q@KD))3I6_N)Xt(GiYI1U2ZZZ`ODCXuLc*{e)lkD_x#cZre_PWY}Gkz z>xR_IZf$2qw51Js)oC`)JLm_i2_AwPdBjc{Y!2Vo3P>c5%NFL+an0YjN}7_*bGyby zeyv36Ojk}$kAbRlmz4>wTpjNoJt%CS#09R`Mwsg!R`5knGkcZ&J~5t>A4nuNQ$h(ZsHn@YaFmH(X$8aiU(d&qZwG_!-9=S_kNPX0mKuFa;^bz^ZuUc*w6SW|wK=p57fYfBn*(>1AU=P*~}A z*-le8Iw#LC_t3mA(q55IRP`Gx45J)o7VhV+KcX{nD)SJi_o}%4@B>+{C}kNU*j2ue z9m{J>*X56DtOJeV(*$DrvoJ#VEU0f!MZB*E-%Rt!9-g3 zUX^E}rf$xmqgBG8J*Bo955uVmkswk=03A{&TERlVO1WhKNC~E8WM|LN5S3Ai6UyH( znD6r3(yywia_s94o+$~Rd>Jp0upL!@Er_{ca%-O!P5OB{eO>mT-r` z%Li$W+q2TqR!*L&DmoXGpxr3FKBj}t=e-G6Q2a9BHq&fZ-fl17C;@`$^-JuEuT!7F z`pAZAEE7f)O0BJ@rx6X01cf4jDEypXr5sxllN}XjG}wT{E>~~ONZ&zhmF|U@al6|M zg&Vi`K@sm}2@1@!@;TM5ak4ZM!*WG~!hE@}4g#pop<4hHKwH`1)=rlQ=!w}-$?-%* z)x+W-Z5=cT)x@1!dfjM}oH$26_bxb%qyc;2%HDl^?0c0~6LFLpd$`ZH&gJ zq$~@|2Js(nOG8@nwT<*KV0M#k$9Nm9XYUg60iS^QmqgfEar!EnF5srQM|r(=*dTt6 zXm}P&2z&O)vQJOc4`3*Zq}P(YYT|EP!6*aem1!Sbu3c`wd$qjlb?}{6K6X2pqN%BL zWr0dRX?kUnS@@Ln`Ik9gUZvJA$$^X1oy=vr>W+!K)Yk6=(|xY!*St_gw*!OUJ#bOy9`F%Dj|ZgP^zknto|3-3iU^Ln1e;Zm8Inas;~F0 zeZA){X1$q>Roj7Z6SiHL8{YtwZ)tsOx?g>^Evbzp6HAXa%2KTm?N!vlsk?uaLtnMD zJt>02`tAIE5H9bloqM&Hh&Sg~{GoHR*GAEH{9Y3-5jkEu66MHaSh$(<9S6$@bTf@g zSLu|+dfw|)_$@zDTzrAEl>m2ASD&zfAiS@~P9nS?sXYjI1XPXRvQcGdW}xCsJnK=i;o~ro`=xhY#ULjRZaUXv ziiO4r4=9T0C#AHjjP0RrWs5RC?SrM9vpmyc!!_d;K?&?w47mA~R+gz_r;xr6n9BRa z$SB&DE&!kj;z0BCC$jmh+1`@<2xjDMsGYcP{X@%-hv0?8GXb@J)Hsr@#?mH0uQqd4 z(^#~#bzquyMins(i-ACEIzU&45EBdgcF$yVi|iuJ0Ia^*7(C7yf8MTc>ggAq#we3} z+7;Pt+IX=B*Upt9b!b`U;)jrtH6aWSrEclfWYG_UnAGE5dX}oBy87t|tbr#~m^yk# zg(3?<(4Q{nootd~TlhSESIk$`fl_5i7tvgL@OI)thzLK9KrbSytZ_v$A)TSKdW+WJ zJEVON9FZs1%$vNdjB7~9Z<#s)2Vf842b1?_hlu3F076E^ERSNvDr?}1v zDn^rA^%iK<7kaMSlvZ9tr`3^~XyMETSq_FCU4Khp8; zA<%a8!S)R@kM_uLDQoMDv?B3h=H&JSrzc%m@6s(XP$XF|8A#v#CBEfcWd?FHBcF2L zUy|R;XtYhJYxcc-Y*2=!UmKAEMp5vst!_FGmS`F$V9}XA9QorrYGj*DLBPvHyx`j~ zdnGMpiXjR;ouRkuVR91#ClOroE=61*L*xV zK#(&^Mj@GU+cfx~tYyj1f0%5jav@6yu}8NO)8ps<-P9|NozSfV; zqtM*d|BK_oT=m;9H5p!Yr>rq5Izo8EQtiTKIwsdoRE_HeMqT};QYrr3;h;DyLy>Fq z6X={#xqenv@fnFO`p=3G6wdu*+#}(#>6C>ske`}ltSGT-IT4{Z9$1Y@ib%e%cp2`g z(8gu9j^fhXy}*lR&zsS?jkitZK6-YAD0ul^J0~x4ShX$xJ;WO3t3P-QBe=e|GY_<< z>|NHq5%^K-+ezUWJA!*$Wb|f`@Q-vaZcb%4trQ=saIbFG6i678lH*xeBV$9*fOk}n zO2~Iym6-6I7wK1Sv-Y*{H{FiX7jnPM);lBg^kk%{sfL>($$3gH24}opDr!3Pitn9} z+IErx?ZsJXzgI6gmh*NrdmRajU$oq%{EilU4sK|<c~t4UFu$;*y_(4F#m%K0M&J z5ACx*Ppe-``|Bj+!o`(-t7@08&!;P1^RbdZMA{?k1r?O(psikadxB%F9*wSDqp&fF0HcTm;OMQ#y5<$f_s_^ z;uMOU3=>ejHW#jk%(SJ8J??Y;20{8ifj!gglWwmGG5QQ}w+IyNz}*Q)%k9uQ&iJTa z(6-I!d20Uph3+-!2wzYF1pLT7Dp%06l5%!vVxti>P*TWh=^W7CRnF2KrrLj|KZy&o z{($%amo8=F;vqg}Brs19@qxMwfh(WFG8wf|AD*g;tw5E1t-$4Ia|3|w2? z;EJ!;c?k&XcT!=xEV3prj6{Cs3^Wg#Kv(BTDB~qpqnwuK3t#7dirKJ4m@Fg~%*dLW z&r!c=x^&VI&eiH$>Av&p4t2{RmlonghEkue0en+F>2?y}Gd+{zkcuv=9}9KPi;^+;{ns$Z5yEWZ?1Mwq!;hY_}!c}*qz0$hbV^06EW35Xu5G7y>(T5#H zq-A8O^o8UDfoP%@11ky*_^%I6^&5f#`2QQ9Mv113H~&D@CIbg?l@U%5JH5#ytZ|aU zRyc8SVXT;#q%h9f)>0TRiN{OhCB!gTYb+E1WrXqwoxXb^P%sqCE(zf0`6KJ#zh4D9 zr6)!+z=(-V!fJmK=TAx<2&|Hq*&Rsm1ODxM9jW{V2vBOLgS_H(R@SvW6F6N}Q%lNW zXUrP%DSmsNB7B2b&V7Z4>R;Gd|VAda=QwL)7;N#QJ| z(Kb?8TeOum-tzP(Es4WhN{Wf&t*k61Po=a*OG$}aN#Vt@5|TD}E37mQkH*Sor&AE5aONdyGr+l>d4KcmZ|J9Q zf5Y%oL^NY$2d)$AS=a>nO!=Zs*?$IqXA$uSi!=*X7=sbUi0h-JFN;fE7RQQ+qb0>8 zB#1ZREaLwn2Sm(P*J*vK%s&?W)1|~sBs1|>)(604;sXR9<$yikVPz&_ z90+Ww^kL@!{-JvUK^)=``7_UEIVUsGj5GiLt7%W$`wwi0py2zHJX6jI{SQYC{hKip WF(_H;* Date: Mon, 10 Feb 2025 15:51:37 +0100 Subject: [PATCH 2/3] update docling and test results Signed-off-by: Michele Dolfi --- .../language/doc_chunk/requirements.txt | 4 ++-- .../test-data/expected/metadata.json | 21 +++++++++--------- .../test-data/expected/test1.parquet | Bin 14363 -> 16705 bytes 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/transforms/language/doc_chunk/requirements.txt b/transforms/language/doc_chunk/requirements.txt index c24f0113b..b458ca98c 100644 --- a/transforms/language/doc_chunk/requirements.txt +++ b/transforms/language/doc_chunk/requirements.txt @@ -1,3 +1,3 @@ -docling-core==2.3.0 -pydantic>=2.0.0,<2.10.0 +docling-core==2.18.0 +pydantic>=2.0.0 llama-index-core>=0.11.22,<0.12.0 diff --git a/transforms/language/doc_chunk/test-data/expected/metadata.json b/transforms/language/doc_chunk/test-data/expected/metadata.json index e83a0375b..69a62dd7b 100644 --- a/transforms/language/doc_chunk/test-data/expected/metadata.json +++ b/transforms/language/doc_chunk/test-data/expected/metadata.json @@ -5,8 +5,8 @@ "job name": "doc_chunk", "job type": "pure python", "job id": "job_id", - "start_time": "2024-10-30 18:38:40", - "end_time": "2024-10-30 18:38:40", + "start_time": "2025-02-10 15:20:06", + "end_time": "2025-02-10 15:20:07", "status": "success" }, "code": { @@ -25,6 +25,7 @@ "output_bbox_column_name": "bbox", "chunk_size_tokens": 128, "chunk_overlap_tokens": 30, + "dl_min_chunk_len": null, "checkpointing": false, "max_files": -1, "random_samples": -1, @@ -34,9 +35,9 @@ "num_processors": 0 }, "execution_stats": { - "cpus": 19.5, + "cpus": 25.8, "gpus": 0, - "memory": 27.48, + "memory": 24.41, "object_store": 0, "execution time, min": 0.001 }, @@ -44,19 +45,19 @@ "source_files": 1, "source_size": 12073, "result_files": 1, - "result_size": 14363, - "processing_time": 0.043, + "result_size": 16705, + "processing_time": 0.044, "nfiles": 1, - "nrows": 39, + "nrows": 29, "source_doc_count": 1, - "result_doc_count": 39 + "result_doc_count": 29 }, "source": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/python/test-data/input", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/test-data/input", "type": "path" }, "target": { - "name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/python/output", + "name": "/Users/dol/codes/data-prep-kit/transforms/language/doc_chunk/output", "type": "path" } } \ No newline at end of file diff --git a/transforms/language/doc_chunk/test-data/expected/test1.parquet b/transforms/language/doc_chunk/test-data/expected/test1.parquet index 46714dde7b3d7623a2840b84998608fc57a669d7..72d6fd06d9b99df9836cf9c3bfaecce7762680a1 100644 GIT binary patch literal 16705 zcmeHv2|SeT_xC;4A-hIHWJD8%88e2V&DfQM>`TnX3}c%ady6e8g^;vZ((;rLl2j^f zsA$omR1y_WWhv3}w7l0nNQ>w9KJVxE{+{3eeLv6rsoS-lbH3-C>)e;?I^{@lHb9A> zv{9z^C?OQ2072TvaGx>L_dt*w1VLOTBb;PlfY+n54D`bp0VoKifYQ=MnWE%SVkj|H zBS>5B=P~GCS^yydRS7!?5&{)d1i}=;zD1~_q?AdJn4B^i8NlW+L#ZK5l$4UGQZqv7 z6#}~oi~Otfm309I2qgr8mmK&3L7*;&n*9SNBn5&*K^Oe|j026F=$R}jLE2Nag2_{0 z*(|Dx5*f!L5gHld7fuai@&sV&5K0db*a2Z|rLgKG@yXJku=&f71bC)|G|_`bPLyC? z6XnTP&@u(2YUCs&Nf;8JO5^;1DOVhOR}5hKmExc9`OBE_<0OAI(?ky%IZ=XnO_V2F zr^eJVZ4#3ljENCO_hWLHAR=WKftN)|I#k1PKGF zrVbCnpcxInrk4AQ{A3UiX2$RZ(#9zjBCxyVv0ZX-0GOmOr8sE_R1)F`fh;c}AafCE zx#ImHUx{C=>vk=l{lM8Z`Lb2`Ztch@O?|(c_Cb7MvCo#h0huPR5VvNFRIJA*$CvJq z5Co8)1Du&5k&Y)D1ki|7yb05QKn|c8&`CxCG&(*2Pc|VE>2#tIgG?uq@Wx~k4R1m= zBpZ|HBojj_!NiDWY>581DNL=qQKd=UiF`BlD`BPg!r0>J*e?pO_Wo_&{}-g6(r#0Q zXU8Of6g_mC86-=HYmR*@SxjIC6?6|Sd%vfYDQ^5`>gGKQxtv}%~ zC7OjOhaY@!)@uk-0m2aQ1VcUW{Un&U7#Nxo@TPcUeG?KMPayn^5!g!Q&qt&GMc>qz ztJNpHLz+S5GySOah)6d0S_{@#ijdMlA?#Z<>?SqXP5uS>f601EWlUvSAY?<`DIv3; zVGBMVf}Y6|toVuXldV(3Owd$CNsfPc!nk6-efrpA zZr&iI^i&9&rGw4ZhIR9oxcx<$rgV_*Yy@g(p*`3jPwb`rt285Hq0(AIwZ)Fhv-yq} zTck=#ON9=U&MwuSKf9<#Z;t%d>=uF@aw}t*@A~^is!ODbl$)bu$lQys^fXVdooAom zd(V7yi9|i(1mXfhK|w)|GX&8Rd&q`Kox!Z7?^#85DK0v`Lx-=wPZ2v9X&hqmXrtCxxTT8{NC&k6AgT6|s#rAt8#YPpp}qrt;aBvJTl?-sbJ2 z`LM8xEv6ni?5yDAboj;*cKodUCf8+b@-1D0TW12#9=diRi3*XhT1WNji^w~SoxvJIES7Dtr=jp=5Ir*%Dot8<_jmx|rj?qM_xUN76Y`dO6w(vQI&ni?8aPg-AS$9vz3MV+l# z_F-}T_F&yvq^QESC+!}pPc#u9mfkn;n|z~o zwd-L=E-r51PVeY)P5!go^Usb)ow3}1j}+MVreOQPg$RFb%7qBk8FJE(UpDRB)I%0R z9xRmm?TY5=BTuFJn~RA?gQA`~$)ORkHOtRto_Ox-#8eS081&T&c8EwhcBI{KR5P+{ zp2qG?^|gu?yQ?1_AU(Mj{BZGR+x|uEvDXeYox`bREDU;x`*>iR%QPujjI!78VTn(7 z>y$HlkO-$x_a%*5QR?bBclY_-+I}!Yj~bto$1{q5d?w=N;o#jttu=3LO|Y*OY4A2GS;6?-SBpchRhS!IY(X}Y%fGItA5{~`*`C=OT*N<`c%)3UhTA=hAg4# z(ksKG`RC;0mIuU_q&VF?cTD5a)6102jPz<1Z@u2_NXrAPC0qO5?0BlnkNc!QSbaGB z=9 zJbTPxS)D(jd8yIj)mN0;^H>hmhn2V0i-|TxJP}C|nr2j7S8SfRzijKdZehuES=218 z;^nw_^KE6n_1AW{^WV7C)YkB>HRpAfZSE+~)BCMYGctXiLDaElIOl@*!5wW8AOI;M z4-{@ISzg53^=8(heX(cyR4C*O3(==&wMR`iZ|4g&&slk0@k3+!)$P_=+NT=3uQ}Bz zFixFr&K=l0D?{tywPo@vZo4lvzJDUAr27I0q&GZN#l@s-QzbWTl9i~38}AopJ-_6c zB=I!yPOs+`9h5<-;Vyov-Ht66B7Ec$3`9h~&YhiA<5rh#tFh~(ZRANSD;-W_g0Q#> z%WMxsTxFs0)_Z}6Sr1gH0g!V)?WP83e|!2 zNa{#<%8El5GpY=GMqAh?;}6#LSzjt= z8eR+BpVRGmolv2mPb&WSQdP_oX?wE#-QWsjHyZDqRR0wjUxIr2DsNcG;ru?Sc_S9p z?@e_-?if-Gm~(1hs+5HUR=P)tX!SKy&8Q>5j2Q5Ua(|U=+gf$qaGwaj?W ze^ym};2isl=dry9GESr#w=`D2xheKM1!w!#>z>ST8FfF0&O5fwD9>xAZ=U&md!w#v z?Q;BF3y&6sHM{?+o3J4EA3> zPb*AKexd)c=(Hn`SdNqjnLG6cmSqkU>eRHyuaHeNEo7N>?t3B|bH8_yzH#u27a_^l z9~jyO_AJ>@%ar#nW~LK4964)MkyltliHli<*BAp=g>=ozm!smFIBgl zFE2P=7WT+}pgy$zT;$jJH$KD4ggicq1{t(Mw zH1o}=bjxKqt3~$zVZ7SU;G@*~6`v1fzG|v{ftyL(616BiuvKE;30l>8?oOgoerR65 z{?Ys`PZ!k9)k;&kTvsA_CsY^rEIjC_6s0Dx$&9h!`d+ENR*!_#tn0eh_!8T`ZmPX= zS^u!h2)oeEt$%jso=UmqjSSRK`Z6EgJ167h z29CQzy4qDEK4hPLt`!A?J6!BGOt)D0pc>P@ZLXI`q5lJ;R{vOO&w6fEKCfiof{qWz zOEz6g!7mAHEy~HW=-d}*vD<(R1>ocra;kSeoVBYVfF*inXqpXf$Hv66efr*ujBKg- zANQv#l*bngT|L_%V%x!)`<@wp4x>(p)aN_8B)sS?NZDSZp%8J3TCSsE z<0W;=Txgd^?|~p}1+RAf1Cpd|nH_%E!H9hB;n6R(Yqp-B-ZFZ_XzrSQG5Nn4O6NB? zzbBpdkPT4Ye%!+SP4PnIqIe6lOxv~DR!7gwRaX`7rXys1P{zQt9ZxO@( z#_f&5>1Lg|ReOt05t!G%I(<12OA|fO-tA~}lC_}i-t=|kb-CG5cjBrau1QxxXx)d* ztd{S5JbRF@vv-3xr9mCq=2Ecbb^i12$~jFwQjOh7zV~F0Wr>>$Ny%}})U<2MmBfyS zU6drgtI2P(7RcKE{7BFh>Z)m8Yvcyx66u8 zug^8LUco`iB=0hl;*Jd4%*vMWy^kOy$B9y|CBCeVirBo@<9hbZ)35g=2Un!AjlLAS ztn{nIr*8Np9AnnF)8|#dbjPE2pJ~odSkU`yWaX+5pPkXoerHzMCTqN%^YA0u{4w+3 zD+_d9R0=esNqeBc(Z++Z-_P8DZ-OGszOVa8NCY_#^k)q~IC0n&S?G=qZ5NkY= zr0Q~R_+eU6LtU-SVNyVx=8>0Ku^;kRnzeY1JRWWIJ(O*?8&i4EQM|?2UCK7)6mhZq zntA7T$oXtZ-Iu3*&1;Y1`pYK}_I^vyBm9TGl}67zNvbC-KP5zUx^&nWOm91clVPZK zF5dRa?Mf#HMNWxPl>Nvgxhi-bs~pjxM`qZIK6+X3heWKI6hXSC_F1BDI#PY*h+42S zI+3@fN5b&9XOsJ ziMrQKZ3?G$aq8wSllt%#&C{BE7re(E)pe76p>hK8Ha+}N@gb& z4EDcraXWXZ|ISY`Z}|gN+bp;$*iUAS){rw6`7~P8QaX4``y-;IN#fN&_ZFwJl(n^s zMY_Cc+51fOUK6|1Oxx(}nPii51)eg_i>_8i-VWRnQ#u<;T34fgW7Ul3SO1V%R8jkC zd-lbhh?T*w_lmEqdP~9F)+`6731rmLAFi2>BHY=07A6*)e_)ugOdcjQEE*FQ&J4xy z!!Udn6T=P(599Kwp>!rDEC9n}(j&R-&_E13G$4!{Lgll=LNQbxkICoh3Wjr;JU%>- z&tmg1bQU$7&*Wmb%qTW9nkN_#!sN5U7;u@&$56RU3^j_%=D>F#7(j870XSNt(x^Np zm^^j{!NUMncIL}5Y<=_*aooz~BojP%FxG4?kFSfNhB7cAVSxFBz@QC?07GCf1K6Q# zfv7+-Dux~w5>5?`oy!~N#*f8>hjG~SSd2af)<#$)knHC^0X!fspcxwW6UpdrSm<r~pAUiy10-bD3ih zSz*!o7|PGWVR%4j4p0EV8A9c7Vlh+(11Jv0&t)>f8?fh@0_33qYzDZ(U{g74ek{;X zD4!X~6%=6wj@yC_cj`;J0NsUz0nLp`DHs4u&EvtBAh40C3>K5igk59YS;pLBQlAVa zFh39qXaOJrw&0F=+P85$J{KIJ2FAi_jSk}m12Oe6D+2g1YS`A_1_H@>ksRQMeBPMX z@Pe5Pm~A+d3wz?^NZ1?4L>yE7n6=rk2;;WpG6Shx!DQGwzOfkd0iazD>;i!-*mr>c zeNT^04aLxyz?#f}NRGg;VKg3-8wIZjU>Sk8@nU&=W(W*O;CDbuSaEPK?9y;MjR}t* z^93eD;8?L3Ksq#x59W+(OhAjnjAC*o*Bwyo1cqPy^Iy6yn-7+ez%K;u3*38b)sCBz z%EN?2(pdm128;wls2F(Zae(0jAir}0SYu#Whk|QZ;5=i11u+502BH8T_A}5rzP9OL z{YQeMas7#tM%5=1SJk2tm^8{J6YWEncE;nc*d0Die%XqEutTdS4~0B=Y#CYulp2t*@eG7)b;Fg7s+Z$pwQ)Fl_LN=B1a zxBqq?uyDeLi6G1IOu|cg1-6hiXe8v=8If)6rCg^WP zIMYIKar?gwyx@jqGV)5n2^>MZu+VTSe=O6XMO^8iF!uab1omVm0uI05Sp2|kN@Y2s z%cRy5HH zIs`x!woZIugykSPglM)_$OBslTbu{rO{5Y{gVTuoK%xqb$raq%ZW2=3EQB4(MPPGt z5#ToSSMq8Kj+H1~vGYK`gU%2U9!} zv{NB!aTGf7Mm)reNf?Uk3U}ldhna;XgeN#+Rzmh>Ax;e`$i#&8=;ZvA23pp-p@c5~ z*yy?h|5&pKOd_7&z|IPXeEeDY$#RLQwh6Y$L#v0@-QYXMhVw9M@KFtJ@00VBZmb!K z!$a;jcnO}iDP0N8>&*o6)Fql(VY+&SWfu-L2oq<9)ZDcG?$ye&V89)MUifvcx;dsR zQGC`txGlW$q4iAjf$zG7WjSzfj6Wef+tY}4S`(Obvk)pVyW8eW71{?B@FFZi}ZI)Lo1{s=`3-;4FQR33tDaP)Oood;!KU$X$;g6l`AV# zxnEdo9ofWnx)mXrXY?K#f!;v+=JzyH(_o-Jw7Gh$rsV*LQvAl-y;vi%RV=C)xB^t z?#1?q!K+xp6VIqSW!~>w2jg(XLXcek>Za5MJ@0E>i}Vif=A2Sk`uo?tp~fLPDtF&~ z5OvO%5QfbB>s`N|oLR=3Bft9jJH7tEGk3`x8S$cOy$<)*@+{dW<`9Iey^D`H)#Q_8 zaqQFP((SdhJ&O)~j<*_!l-^$;Y|hg#d|Y{Hwja0&70)d(BBqW7?Ee};n?s%QaYoqV>> zg_p&iuW-YC+xNXyxl(*-aL?9^YMW=4&rA08eYGXcPd+5n%RYVJ0zFt?PdWS4&9BWzl1mAC zGV=bdXJhs63&KQYV&~|-S@L2>ZjWF0T{V5|tfvS?D50S*;7w-j3PbThHM`bBch}WL zTfB|De&FIgxeIINY`F7f7L?#0$&uRL;_^r2$k1`cMGt#|3b*f-X(F}RKB5l>KJmFT z9e`Fi6^2$nxw!eURk_h+^P#e?sE^zgID^NpKW!LYpQoar<>`Cl_*|o&X(Lz7$u~Z1 zQucB@&m^Q*H@<25l3DWA<}9fr_v-%LbqiFE56e;aTsMDq%iSAoecUll>}ty={%6_+ zc6~MeVn>0@5sgQOO0S3{AFN5*FxN|Y;km|B3a*>*61NK$I~~CHEJc4DIhjY-Dtu*{ zwSE4BWhpt1L_?ceME8~ z@j_LMWy9<9ZM&LkbE+aX_PYlyeK*{)>=_cL7o$|UR-Zr8&L)fH-0XfRS%9%V`W4-l z#tvoe{xEAROG(zINVR&g&8?)-@>Q<&b1&QU(?00?hR)w@x(5|(zF|oY=R{SzVs!fJ z6@dgh9n1Z>m3JlqjZ^tSROkkx!L)tNwE5CgYzoW*sCWjMYGBMTFd@^(bTW--OgCVVK(-x7 zyd&co#tfQ?0h3N3k*G{2&49_Ek{Bd{5!H}EBLX*vl)i}C0zW(XBE8$wCV5t_`zo&(u~fCUn% zLE~gf^H}}6eBiN$Uy^CZnt#kh9b*7zc}~!V845BNCmLX8f&{&>u|MXmO$?F*gMLZ% z8k_KQe$>Pin9&p|Lf?0P`z8MS#vk^V6CJRS-|x&I5)c~x%7P4fG5du;X(SP};EM}q zq{Grqe0j%&{N7{6X)2j2t15qz|Ig;cWB-mUb?o1gH;(-qGr6&US8g@--%N(a{y&@$ zjQtO%=wkm{>9E+pF+mmk_oRwq|Hma>{&N}&UpV}8kMz$y61c+q=N<_xg#Y?I(ytqC z?0@Ry^6xt{{M%2Ku>W8Ds-<+`4MNEji&Q$3h5+Xw;9na_QJkGuxSNjs>V>{1i4#15 zmJ|xb3H*hhWfV#@#XOK=O`!x*ECK~>=H?X2a=2tcq1eGcL4D#Us93>0o?sefB|OZW z0_a&ff$0)>Jz+8)HP`4z*bYtPyQusx|!!OW5q1eZf zaX7f!${OZGF$au51%8b^Jn+Ljm_~3gpt-w6GOSI)y(ktuoFfiriZhL{)%st!RgKUZ7;4hQPylo&y)C;S#n5>&_dQ$3tHG=b(U-EmQZ z@fH*-OOtVVE&heP(Vp%Mjt7ARWM0al2bz<|`B5kWNzL&S{LP&O3j=#VjUN$B{ z9)$2X8i5q;>ERqE7-9tU`!myI>v%gu!4NwhYgdMxF5!HqU9^MO7CC+1UFa3dfNp6}!l=!Z_= z7{UC3KYNakAGfEa)5QF7{!`AU+E~X?;pr3$GCY31qw%=^x(cXH`m10Nyi|dFZiWns zr9eJg;PYetZYLNoF!HyhEm)OcymOQ%(T(rrPQrT-SS&gLR+2d#4g~_LWB%tlUN;}d z8w~e-hgU$K7V5_LAOLy>CLCZt0ZqDKd0Ke>NYBFQCwf2oBZxZ`ihv%`ndKQ0!-46< zP2hpU5qM%I>P}<$#zKWa6hXhhRc*kUax)REuQ6Ey;eZ5GL5Jhl_%lWi7!9Beg#84B z27%yClOV_A&Df+#|Azw^#XN-S9>bx9SPT3OG=Tnzsf!jtXiPYj8xhIm>(Rr*^{8Af lNN)lEJdg*cgBcj<8|dS~u?slo`ak^xEm4FZNARB~{4WDGRxAJj delta 6944 zcmbt(cT`i|w`~$y=%FN3AwUcXy%#CcI|zt?fYO_QbfhQrj)8!H^xlh7MWhQTND%~# z2+~yqQIsaijeg%7jyJ~d-Er@Df1G*N+H38TwP*IoI+=SQ8kp~clLZ>#?I@MVjDSER z06txmjp)~iYTz*-mWLsQ}d;&kwss^e4Gwlpd@ z*rWmE5H?m4=imS+4a^8%DlUlcBfU;cWFsr&_$ToY_BWB#&EDVbPv#8S3u0C=x$3`J z&p9}ur2jsS)ZabS`A=4ngdSd#LW$;|MUZpK08s3BI|_Du4TaD@6)#f!F7B8jiin}$ ze=D|=U@m22!^{BV;R%|ux;ju* z9d8{HfThEvf_2aP6uvTaWw@mMQzTJK~ z$CLJJdMli=Y3=G98%JtZCRk@%KK?Eo`eUX#;;!zE1;zF94jEeQE|VyO3t-A&dh5HH zf}P+vpRkcCj_|G*xXi@oRuO0SJGM>kA1wLA>Y6=0%}vwwWDsM!mO@dx?$KwFvpI=d z<64Pi+rsp)`3adu<5UidTwl>K= z->cVOW9vH-!z$VVc0%WfA8PSZ=F#`)V;L(7r`+JI@F+^kWd57-9oE^w3L}S{haOgs zoVUA_o;H_`Y%T4ZZJWv-OGjwmk>?gL4wD;19AcbNZ&5K*!QqCO8QJ<%2{ZSkly>y< zy6y;prnt$A2=?P5^(n+dW$wK+8b(Iu%h_WVw^>Z}FmxUW1VWJsoH6U)t<-FtlGyKD!;6PCUVA%zd9$0Y#BHRhH)yoqINnGpCC*vUQRBc9vj~F zdX8aHI{N7Uqw0(A=KSDhXHf5O<)ZZV=%&Jz8wpuLsbU$X(JIK^g}c%$=t=Cedm2l@q{olzb)ePi337o-l$0~_>t3X*BO_#?ZV z9~pWK7H)S%u+I>}nhpxA@5b25aE8ov*?z+q3f8j1I8{X2wk(*A8+|3KvXoV_tZTR( z86V%htLrh>{6XJl^j^yo6P&+H*h;VOOtb&etrt??b>xv>G81G*gSYSVA!iCt42_6Hao)U!6#^0^ug>gTt`bUC+}yZ@%Su zL!S<4Y(I}~TeaW}ar4h#P+WHN;PQ4m2+ndl$lf5wk3otD%{!*NFV0V)o%M|;#^Btv zZsm?)LYDgIu6Lk(N){t!qF=gFuM(y)nUBUm$Cxw3T(sNXr(9b!T}4k9G?aI3qf`e? z+uy)jm*Zijy7490t{*Tk`!<}nQUf6RwuWJ9z+s5xXS;OYK@dqe6*cWlckg#OOx@n~ z1s$K*FwKQ7o10%dthcI5)MYPHqv09ogtm~f??tBO0{F3xk~ZfyKK5iKSzEvZ> z%}*q_pl;k{+*w~Cn0jqQmsvJ*s=J=W(TX91*XP$46G(4Uo4U~*mMe|sonBn%bsT@M zJhd|tp|Qw)IzpeYUzfQZJm+yG7V~gXsx50J=~P21va2UPs%^@I9(`RdD&}eYtL-So zv$C}hD*B>r>Uy=n6s(ZCJEaUg~L5GIA4lpijzswXJw8 zsJ=ozz0O)KquNB3qVlTu1Qz4Ew!#CmEKq)dU4X!LAm zG7a2CfS&s|*;MTK8m)$MDQJ1*IizG{#xdZu><_T_(?t$$<;^q?eJTFyHEh|638ZM%EUb=-DmfHtg8~Tq z+w{LL0hJGtQBbm`{NuQaC;JlVbwQ#U1#b&Ap8ADr7 zF`bO0jPs;vSO(FK6t+V2aJz@R*4=mgJ=u&|!$by3=+QP6Hsn z)v~oRb&zYAvXtZ`gE5>A)oDKb{-x&}n}D-SN*0{d?hHPFNd_YtS=4mfa9%gstlpMA zgvnUjl>*6G%~1WR+m^@2jTfPio8Q8cNH(WfbqbH7nMJ4tT#{FeO&5fOZ=Lxh>~E@1 z86qKfaEzqn4+$eBm>~(d<)OGZa$3&dj3c*xr`gU9p_-PrY**~PZQfaTiFLD*&?#=n z(>PhsvGA%{%5$z-3qEiZdhsYCny+AB-omu}YS^xZs$4J0rabQC;&|3Tf{RvcP^@2$ zY=F;=@?BJ#x*kkU3}+f`39lrz_o9kVK1WwlNb#BT%{vq%Vsej#hu@KU%Q<7)lmfUD z(mht#A9o0s-!*>&8(zBkgOFxe|C0Q}p!ZVQn;yb*SYn@n{d%3XM$Fpi;=vq?aX$z1 z*f7%+6s_8TU1CH%xSH*dph!5IpRkOOI^;Z>rZPWQiAdc!JVae5XqZ)y`U1-gB}cpp zs^OYP_KyR{F=2Ez)h!XbMcZFZ2$EUC&EX$UeBO2(mXCNS$_R=$LtR@HGrxQXwPt8gj>{Kr z@4mhu@@v51Y5m61`7W-jTH_5_c(o(vB_@fbEw>FP_-b3($&@K@j7Oq~aUb5|c{&1p z!@k!Xidm9d_Pu*`W7u_4+-VwVIPlR#`svuUs$c?iN7G5Qz!<9$I9@Bq$bHz*&9tNE za)2R}yNe)(n9n_T$UvI%8`zc0onQNvS(CZqMrdzekbhK)Xw4NHedm_-k)qvZOZb|* z%9rR?p46nyXCp4oo1I%p4;DWY)V%V^MXyotc4G#=gw`G5Q2Zh1A8|U`jaAcW)K7AY z2EW_*i#DUU74m;6ksWY+mi5`VfY4hjS<5YqYYF20H8j`Nne%NhyYjl&mpH-~^i)bC zFYP|NH9_>~y(}*mW+E`{Nu=WE$MQP2TzQ$|4*EkHVi19M>e|)%GO^VN3zAoC%o)Iv!elKZ>kG{dt)#W?;csF;9fsrzj^lY@FmkU%l7gTA|~4W zNi|Z9AfZ4`s2MtHb-HjeBlYUp4ZXn&6Q#>gHAa;b(ppoHzSNqim)r$=PBsFQy=|t# z)K&5#@7P}Tfe#$6@v(_#nsc|BIs#=fQT%pev)uM7(Gl0@nN9-O#4pN9wSWyfhtMUM zld5AHO^b1>Lk6kuH|S0JsEHkJOk#0yyh9W6SH!XP@oKzXWz?lpdrUf+NKW;HQ0(EJD;xblfj%}-E! zIA4LqbqgqfyEHBEKovwdoVwxjbM9-k_@>|U3w9^H$;_N=?0(3^q2vbbf{HC$3~7;u z!YB89idh^fS8mFzO*C*b(yfMrIl0tAqRcMjeeVjymq)M(jIQ}fU)G&O?Q=_VWYx{k z%;14pO5HC@zg{dy zO*cgAzc=TxGEtF<#a@4*ysL(AsMT?>+<-&CNh$u!_@*G=H>75-PeXCN1-Q^UuTlDE z@GVMZ`s~9P)x!rt4W`U0_0s$)59ipT8Wy$Hkghx%wAe=$(tTHVomBn3F!YFs(D2Qh zT%1xYbIrG9b?q};{5@LV0(hTB7Ewu;P_z!wQ%cfp&5Q0Eb>js!-Fx`=;Hbevrli@E zqPM<-iL!tc!|XO}>HblL>NX>pfRkS)w)TKx9ycFlwwbE%vML_atax`d3933xx4pg> z9kApzbY}n41WveCpQpx(Y40FFM1hOoTVs?wft^Nq$^|x{4&ZTx@N2}~?Z^eIuaWND zn9(58J36`H1+f|`8wX@gEg-0HdFSWe8nfw#yY2c;)B-Fd3EP3rYa2qdt`DDrq#aUD z#1xjKG1@IuWV4xayrWFLk#w@rL7FAsNJDG1eeD$)C3zSlT_y!F5pLfu>;uqKuoqg2 zIhx7yT_XwRk{J_>b3Wo&OUyb|Ww;~L)34zWzY-xQlY4^hS`F;an`A3UmM)8u=6#(; z&Il1mRVzQakEx`sle020gsE0$QH{?|QbNK~Q~Q+7#Pu!ec(Q){d^y?f0lHUj-w~&)x4#$B!x=BdzymQHLYE^IwL@z{V{PzI#R3y(rE^ z{ZJloV7O{ge3_;3gW+R_Vve_z1FLWyCfaxQX$9YT#3-%XRx#nd6>OSYa)edC=X@SU zt6y^{CUw7^GJ2!tlEyuSd|BT;Gu)}$cS-H1UsgXG7cFsb9CsLWw0@nYA9iYK96oqu zEarWqyJ8(JJ^8|zs?-RIq<;${Z0Q+Ux{jz?&~tR8 zoWAFszZP(H-{uuruElOxb+39}r7(-WxWfw{4G-~Y$!g2*s__2qMkoMk0KF*$C8`YU z{TMNRxVpDbh@ZQwTL4^F&5-E%`G~+J#KfgU;HsXU@IMiMxSzAXvtN+2lL+9SKl^Z` zI1m6LIz~WpY85z*hCQba4n;jHN6)OwbnQCJJ@@?Kz-5Yf2N@8u!RN2i0e1Ah9pHRB z!2h?70c!)_2f{Mdk#in+8}29K|LbD#JW@;;C@Hi93_S%JK#l%t5gfom5EE-57~BTpqRCN3c*Eh{H4AujQIL{?6cIP%Amm--WvkwwDyHxoGTw5X?t z!AI%j_w4A2SArbQ@!!Fga$motf!_gNmPENlmGsqEg>)!CAgA{)y|)nj!vBG#G9iO? zMPr1anez?HDlePu2s>1|lRF4G9E|stXD8}2$CR*15+v>p@-P7JgJ8!WBh<-J&`ZC+ zG1ZIms?4Q;e~Bky1~3C5AhI_dX$T+yi4FfN6;b=xyE@z64)k(x_WQ$hItYdXAtyqS z`yu#!dF2a4(n_eHZ6pTJ3gBPXaKJcnKpPdF1*t#@Fjj>4sK3W*;7yT8J{41dIW8bK z1`-vogVl-I58ua32N;L=_`@T_g8I#|0f|_A29itKxCVk!@+5z^4S_`M0`=)sl=@lX zyZSoQ)&=rufhr)%n^Z)GmO#*HkPx&B^hbmv$J<|G0xIIg`Jos^mH)HBntqZ3%qXEm z7rjSELcDc>NGQ-)tepyql`b*XA<9mQGcf@aov4EZ>phL`vXi(^TfP1or3^OKiDgCW zT%1K?_0VAOtK>MhM>oN_g7G-C%YEA2t8W%BC31))OrwLqTull4=t!{LejEUtFl_i> zUHtcaM^u7kVLG_N1FO@Wm3Xl=$w#>~@%=NMC1!=9w6))*fB+39*T1PSAiu@p#V^6> zyFjoaI#?A2vLXtIoQ(Lpp5LoNv0zG&&?@HtODn^H4N(BAOb8QQOC<>cBPV~2SyLj$ zUq%8b2owO04_etf1b|kYdT87O^W+w^UoZjzkcRZM)y%wGeN9}yg31!YY+fAr&8>E6 zvDb!u`YBXUe;)*Z`!Ec0iQ7#03m@J~Hn&fqR(>XVkJ1fuO?dSFtL@Ozct<(_7Jggo z^OEl|$%%GV<$P@L0$pr?X6|yB=&g}8?uHSBPX-A9GH2K<`8G^!q1J°0}+YM8A| z2IbVldY89@-jNGo#4UsE-%yZQVjJyjJoUW3>GUSsSnr%W@Ugr8OiS;SC~nYzl}OWi zrR-GrhNN$AyDgKme7N_V}bXN?VMab(^kcPuif= zySlq8f_}|0a+sORtDiQx0Du7|k=b}vuZBBL$<8c0Zbe*d7ldEFT5c*^`~HMx-2U z62rvsyrSvja=hx(ZMxUCv4|s@<`|a;eG}!jnJ+z1F|fV_txM0jvYGNyGRw~SUNz8E zQ595UzTc<2+xV!>NoP;%ynM;Ltz8=?0bWC9raa^}O)OOM%2~F|?-=i{f8C2B5W+tyhPo9Tc45qRXF@q6qWDe(EWzcb)R?blp7k@$`aLsCgg57hkUP}; zBhoAkE$35Gosm`H{07D2-PtD@!Zs z&}%;Qoas;bK6qK&~5wcb37n=i9q_(`w(tWxk#ToO=&G&Yk5v2Sz zhQ@tZN>*wz{H~5vN<#S+y6eT|?y5iL1(xv}1gu3|1T)w2u`iqkSLhU7m?!NA8qNMJIJ$F2-CrSs(twnn@%ELq!CKBt2QxPF?5 zzUUqj7F>bKdD|IF%a>|9HLog-X}^9FHzDIFpKx01T;aiQ_3>e^#JOdArVW0{r zU=08V0ek@vpi03`Q&Tqi--d({%D(m=8q)u(ZPyh6|MqQaCvV3)k(ZlnJ|Bi1FtyFPl zqE#xcLc|%eDtw{@lxP)6xDoM~!U}IM$@3piDkO;UJxO;Wu29`Z{C95_|KBeb%%K0K zBQv7^H|_Q^=zr8Fm<5slbI$R$!uiy&k^>;j2nmLzV_SU`o{@pO=m7v@Ra5c*0 Date: Mon, 10 Feb 2025 16:59:44 +0100 Subject: [PATCH 3/3] use new models download CLI Signed-off-by: Michele Dolfi --- transforms/language/pdf2parquet/Dockerfile.python | 5 ++--- transforms/language/pdf2parquet/Dockerfile.ray | 11 ++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/transforms/language/pdf2parquet/Dockerfile.python b/transforms/language/pdf2parquet/Dockerfile.python index 4ecaaa89c..a10833bc7 100644 --- a/transforms/language/pdf2parquet/Dockerfile.python +++ b/transforms/language/pdf2parquet/Dockerfile.python @@ -32,11 +32,10 @@ RUN pip install ${PIP_INSTALL_EXTRA_ARGS} -r requirements.txt # Set environment ENV PYTHONPATH /home/dpk +ENV PATH="/home/dpk/.local/bin:${PATH}" # Download models -RUN python -c 'from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models; load_pretrained_nlp_models(verbose=True);' -RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; s=StandardPdfPipeline.download_models_hf(); print(f"Models cached in {s}")' - +RUN docling-tools models download layout tableformer picture_classifier easyocr # Parallelism ENV OMP_NUM_THREADS=2 diff --git a/transforms/language/pdf2parquet/Dockerfile.ray b/transforms/language/pdf2parquet/Dockerfile.ray index 4dc62538e..6cbd20ea4 100644 --- a/transforms/language/pdf2parquet/Dockerfile.ray +++ b/transforms/language/pdf2parquet/Dockerfile.ray @@ -32,15 +32,12 @@ COPY --chmod=775 --chown=ray:root dpk_pdf2parquet/ dpk_pdf2parquet/ COPY --chmod=775 --chown=ray:root requirements.txt requirements.txt RUN pip install ${PIP_INSTALL_EXTRA_ARGS} -r requirements.txt - - -# Download models -RUN python -c 'from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models; load_pretrained_nlp_models(verbose=True);' -# RUN python -c 'from docling.document_converter import DocumentConverter; from pathlib import Path; DocumentConverter.download_models_hf(local_dir=Path("./artifacts/"));' -RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; s=StandardPdfPipeline.download_models_hf(); print(f"Models cached in {s}")' - # Set environment ENV PYTHONPATH /home/ray +ENV PATH="/home/ray/.local/bin:${PATH}" + +# Download models +RUN docling-tools models download layout tableformer picture_classifier easyocr # Parallelism ENV OMP_NUM_THREADS=2