From ade575f825ed04be61fbee391f7bcb8a6576402a Mon Sep 17 00:00:00 2001 From: Ladebeze66 Date: Fri, 21 Mar 2025 16:27:36 +0100 Subject: [PATCH] J8 --- __pycache__/data_filter.cpython-312.pyc | Bin 12781 -> 23307 bytes data_filter.py | 299 ++++++++++-------------- 2 files changed, 125 insertions(+), 174 deletions(-) diff --git a/__pycache__/data_filter.cpython-312.pyc b/__pycache__/data_filter.cpython-312.pyc index 52321549cb6c30fe2582e5837ae23891837c5c99..b43c857641a597aca1a9e80d47b697e115fb4b6f 100644 GIT binary patch literal 23307 zcmd6Pd2ke2dS_jlAhq^>AN&V z-J}?bp#xN({7d)I(!H)vhhJSlKWgYR(DE~5pHX^d>NDY2A25$v`YfZ?KI^Eh&o*lB zvyVFZ9HY)Y=cudCg)#=#!n#@8kgf!cF-F!iWME9cW$4Rc%&c3vx8Ocky0@~{%=}6j zE1O4RLng+CocWAhD(OZ|4%`u6 z$XU$fBVPgjjcCWj6oR4>rkp8KO3SIm$XUv)V@mL}j4fwMFKwkL&Z48(vX7M+p{a3t zoUUH~b8>&YniidhSzjdVABqHeLXoj*vuJ4X4~9iUr=Jgt`sU!cXz3mc`$IurKs5G7 z#saKpd^3o9QQyOcMe``j^S)tLG;wS=!UYF0&)Q``OppJk59QgXqts=JYY%IAqF4`P z%)>gh%*RTM+Z{HlX=<*Im2!;kQVaT3p{6s^tdE%0{OMn{*ON4(kJ6VZ-ohB9Ujt(# zzZ^Yoic%xibO|-an6=g{(kLP)W0mMXvVwl=3+Ugmf_^ENR-;V+2GB31e+~L|j3ufA z{RYNHehJ--U7}|13To^UHR@YOX`@+*wxnk-9VPuR#W;RIPa2|z7OLMuQIp1~F{($q z^fe1sj>$lcJF8uh6h_JB#dYfbZO%XBXT7}N8>qrMVk4ZFiFlPo;br--x0?xt4u`_t zP{bPvdXa}i_HjOZKR(LVOSMQrGdIjPN-ZHQKE{fUU7172=E|6qYB$`MOc2qQ62r=W;wul|I_;}tw z9K3%UeTukofB$yaH^zDgFZzPR_iwY2LZl6H_irNyC<=|D)^A4GQIJ1jm&>w@HyoO9 z@?pGtaygcnC{QyG`T~J*?;ywe!Ym`2BV!DSNKKFWc`}kCp$Hct>%`|B4Kbkn{%z4L zwa+qb)h5yA$FQLG;2SE=Pf%91xToG`EZVNLw>+_^+u^QXI0 zn~wft-({3Txs!6NPmPg$;?qhGgNMDf^;5f7YI` zSHvQ7_C(cdcPAd6OB_72m{+uHqAc$3e(N{BHPe(dZ;AyUnRhR`^GIs`%$0dys5dC;UHh~>sv99Rue#K&ADh>H-4U`8NPBt< z_Ewe~^#>(}5W$asljcRzemSKtJZU{ z!ALj|3SAOSL!l5xQ#4&-E$p+Pc|ECOse@nkk z(U2;lRDTtg6Pi6iIG)-Nr#)6~%qy7qgn?7Ex z=Lo9e){~$dLA74g)$b7LL2eW7c(VC;y;r#k{64epA7+E>+hbhQ#Afg*Z4MB_s;l20 z2!S{AP4!wfY+@7-@~L0^FX|UpsYmPk@6vx=BfNPkb>`gsvGcQ6;u}Ayx?S~gb!tOH zVtxNV@&twaX2A~nR?!Z+O*CHdGvSM5Lj1!Q!^eNV3s0-vVCB9svS@)sfCAVce-R+Y zDA9m=?nPt#lHxlv(!x6%z@82Hed_mE~avTs| zgy3c1sS5#u;ed@I!x1M*^52gSvVayShL*waeS;SXXnE$j%tp{0JK+s;p~z*H>#eqM z>rg}*Pprb=xHPlMdc1So7E&Gp4#m}w zU>gF_I>_?_(mEFNBRm^`aDtH^*1!kXOtKy(Sv}Hd4ETc#L2V(97p+=co>;Qm;H6Wq zL(&Y3xzA54*N2?D@z0;dwu4z*E~f0R>s8ZLGn;2~leY4!M;2|)mG=(!_4euZnfBR} zNr(4p>!K&`rul|>)*Q=CdMdAWJTV!{O@g6#*+u1VeL@+HCBO%cycz!H_>J-3{Z_)U z>G`|J&2QcK*6&Ux9F?N8@co7)meSeO`u_Q{{>OR6%XZYU?4g{VmF?pG^z`YOGqYUM zx#?=#vd(05K3-pOM;~wf*tW2>F|oBVwRQh{`kB`EZ1*asjE}ur?lig_uM!YGsbg&V4pMH-Smf!#k}GtHbXHwk`HK7xCPL}SUhDV z*vhd`T$3oSNfy^Wve!NatTN)!vW;?he(~i7%31WjRj_Y*LeZec;#k(>_ZgpuA-->I zUf*h=9$4J1rTPaYG(ypnUTT=`;817bQ8VF2T72sli9=J?ez`_AuAaR-` z$?p`v6M4N?T~QNbA^W0AD|XFG=%2oasu9WG{l3yW+RF5^JAf) z9}JY&s*)nlv0b7e0lNp~9W9_fOZUcDE)3v--B7LzxTF{(Hs(KfKhE|Kvgi2zgUVQt z=!7dxvvhBVKzY&6a?*_j5|C7a>qZ?Db|3~H;KmR(tqCXDy}jy|J>gbwK@=Z~@Z9U9 zgj3p>&$0anNqwS4ZVY0hE6pRwlL*c+5wIoVmTI)EuV!Szeg%5K%nsB z@~*j}+0${u`vZycu7Bjoi~gIg99q=*87RwiV6i5i^>Kq2MO#Pv@$QqYN1A(D)4Yv4 zfdZn*H#WuwnQEKNw9b*Vf((i@#>0ya4qWRpFe!8gyy#Tkkb$94B*@6!JWfh`RPLq< z(?nt)P!i>lAOf+ujh=2aC5P*q&QAjz86C1WKx^t$iPfW&j*v3l; zKPtT1330ce^lH~)9(a@9m^)=&vOBNuo8C8bdNy$9@F!ijyXNhUi{;*_uE&m?WizsT zVWTVspOw};p)A%{rj9M{JMw4i|7iU~Q)*xLOykX_8%?n-@lDCRx`n)kL|#KGZ;$Zi z>8X|{Cd%7S3y$)olJaDEm$1JxRo;~>>6$wBKl0YaD&sYeHtk;YlrMNH5}u0Kv3NL9 zxqIHTXE_IrEa!uM!BE6)Mt?-d#qelgKtj~P@QT$(0F}fao*^XMfro}Pz57FRmKJtt z?4^3bFC;zz@G%L!Vm1*bs#o_#?De_G2c6o>kSBJ}d;@i@20JMIa-N{n2hH5t^`Wk8 zx=kZvx?@)P2e?r77B%4xE#lRh%-|nUbJV1k=MF{9+4uuvWoR|e+cXa`?gja5EAr`} z&9{RGnMa6XY}Lg$qL%MLjQqYfx)`@^aiQ2$9txT@;Yh?1IU;5e()SRtX`AE+)Pil?N^|)8+PnHs}4w&u~ zsQsC=hY4bmu918-mi~>})isKoMQZv8VNt6trV#payNYPm%w5D3XXeH`Q-XKK5p`gM z8!^Jpm$w#mGIZ1)CfY;k8i6Wd^_B6Ih-9m-2`EYKlzyzBvPt{3{TMO($c9zfaI20n zP)k{)UPUd(A*Y6*)&?4pcZ`VlXm3}imkk0pjE!+%f6yd{Ar^z{6BHrK{s1&Ss8^9| z49Z&w{$w8~l}J*&3}|{epC7P+AA?KKeUAh!!EJ^Z;53B7}#=Gqe)f;f)Neq)sfttL@-k$Xma zdttRQLU8quK}sAQPD_6ix#v10-Hs-8v;7lRubkNEozV4rMcRkbhKqmz9FOVeBxY)1 zLRW%;kdF}!1Y||X+wDDfB2%1g^j7P*eWb(~f(iY3e%plkeDFM9b3Qnsv)QU06Yd@$ zJ?{_~8ugBjd+Tc=1Vyw4FNel4d<;7l!O|EF1@UB}?1%<)af>B3 z`o7|!k34j=bo3nQZ0_i4J&8Nlk?!Md9WAZLdpnvtSKhah``XU*0%i~;cM*>BvjIu} zwW{GwQo{=ThVqKR|F|~|Q+V{L0)5IL9!e|zdEkqD8b3k!Wh^?rrYulxDQr z#RW*30eE>#%+=Q10H`1sj2A63K~P1Ql{Ai^CD97gX+S3Co8;aCm709Z;Z|B@qJeAx zqFY|qX=Q|@K5`+K2GgX7tY@$d0^kP+NEf&%yBy1@3@HkT?7k4PQgv*4d?7&Jb%WS^weMN01ouz zU!Rvy`B-KZFN&Xs$; zf4YCRA=W$9pL7CC>W$6x6us#7T#rmgW==tw;`T1MwrB9E}I>9u~?D&N~hfsM<_JSK;tnKQMhD z)(~f=4kYajS6j$fOKG$DN&9;F-gW(z=~rg9%vQ{XllIEYw3^wW7?ZSbBeUWxGF|On zUQgv?d}!k_8erL-bN%x4<(c7GF1Gs)_sQh#$v@%#Z2XVL6Wfob+#S;fGTz5%3;>#J zoNoN>{ThtTQB}xfqarH_atGMZSR_2a__;yy>Zlhn6p;I;{~P*?o0OGWfoWmFxuaJ( z`X1oeuvRvkHY1H^Ck>4Br=kp+N}D9Uoz`5`hBbV`7{f#sk**OC4har`^tBw>EVKH_wZm$9Si_t!=9EFwNYmzwQoz5i zs2S~OtBcgl7$(i{n6VzD-z#*6@n?sM;}oz3TaKbh0UuWNO9ZCL=tco)E07*h=P0il z|3aMg+`9;{b&-XwiCA*7vSa1a<$ ztxu}q{%r|<5q6QmpC#Sruc+q|Y3`XZ5+161^)?NQPH+%j0m0$pkut2RF4@P&3jNkK)}sif{eDHtn60}~k?6RoXp53-W06O0qnB+I)bkd!s_P}*uv+!4h5 zEIsB$k20O<&6#E;gN@4IW!9b#;|a7+{6OXc;yyxa%lPLnNlbn@x7>JwUUKFLxfQXV zxxFdpYs=u^Tkn2zscgds`)=)vz45*MQ=MOdg>U^5#{G4(=Tr8YB}a~szx^k=pPGMc zUf6LcvEz`?d@{MCN7&vgE>2|G0f&cS~Y-i_m&1x%+go?2KS3mO8U3c4V$9Vav{k47lx{%$(>=zPgXT{g zEr>tR8C&h<2WB&gyR5Cd^bd-1TI=)=>WqksIRKzAkn+T7MP}038FLWt#y=-xhEc=# zZ9_J7k}CE>Apd_9bshj=(llvijFRF-+k7!D%Eg#emdf>|%?PmbODtG>W?{^e#KjKj zgn0&AU_?;zfMp*DYi0yBb)^X*ovJYH%5;jv>dV6#^IZ8PgMmYR!&#zM?YWiIg^{p9 zO_Z%?wqeap4x{k~Mk3Q=LQ|2zB;rPI?C3{*MnCMUdVqU{_Oht*41gm|wUV}qUL)wW zGZLjyTQ=oVT2%veRu7aJ=nZ3&TC%{v^mkDk==*!91T`q6&K$sxHg_od3zUVoNAUx1CWG-y+~<-Ztk#9hM_aKA%>e~sXH8_#)exC#}5`2#Y1oCkoB7jmPPU0F4;(mZz z(V@<*goHF^7Y@O)HX6FjidH#;xvth=#RJ1i-FkiHNv1nba;KsL? zw(gpPPUOc2l3SZ+jk80yoTSRyIpf^WA3B#Bx*oDZZ{N>Gh5pfGLr^H+@pwbS;*rxa zqp;;*!h7hm{K|#=ZHfGC@z(kLUCTviVY!^zxO1v)W*-I$mOD?yqiwG(dNwY2wkAAV z<0bQ+hUG3AWwptoI-#&$s6Qp-o}PD{S=Iwvf5p>js5ydo8f&`# zNm!Fe6W?(i^#E(O@74e4@ODJ+7dAWa^J#^Sgj<@o;^!}1rAH0=zu4OB#LZvsH6EqS zf4Sd+_(Oy7sMGw=Y9?`y_2_Q>!;+k%+w~8(8xa?C6_H|1>}i6!=bs;YDu~LIRuQGq zh=iz&#K{DRN<(IFD=`x2pmI@iuOTYs84#5nS)@zNpMj{%@Iy5c6Q!d3F=jL8rRk6n zRobI;#G}@o{vFW_Y3Vc|AhIz@SJpDwltWs-Gz4VHKtRCHFF`=r6g@be0p*bOj_oA~ zNJC1{M3P!bC%AdYG_N>5?#! zHG>+Lgo)fyYZgpoea9+cBK&4xB2Q%ROBF_gAz0o9iQCYx4~w3EJzs!}vN!rK9~z~h zP#OWX$q;BGChSP|u~c9X3x(1hLU7*Y8AFD7F9l>9M)Y^?F5vZp3kfM zERP&3$*Wx2vSY3Y?0!RXOEdgV+irBrXrq0u?GL(vGagn6C(rzByKwGOa(e*y;&Ivb z#pX9>+l9&liSnk;G(KK5@7cLrf+oIP{}Qdw>E!M+$+ENAv_8kuS|6dXDxNd9^B(Ly z9i$t_RJG5}pKSVR)sL$db{$UaIxHM{Gr8-Ou=BLwI+L=Tg-&Ni_g=%yWHN8_9lcNs z{L+-jYZ7t~3J3cI*IVhMYkg25Y?n) zzyVyd_%4cg|pV7W4 zttlGOutK`?Ff(axR4~YBx*KTF3@f?qwJ8J9VVQpgdT<d zAe`(mG3Gm#3=^U?tRbF}Y*@vrVmfFVt>jOo3w_rHC7ZK#HVr1$OuAodjWb-Cf33FQ zlBSl2cJSr2W*ft_7V(ocN@rbB8Ld0kD4(r$hdSj>_ITJ5f1 zsk}=o?_Q&Pw%Kv1qr*AF1R+UR)D_id_U%`w*PT&sg;uZopHy$Q*`OHBVRF**E70Z9 z=!&dSI$P`djMj73D4%V#bF|hAGWKSoBU^PvwVc?UG`qzJ@laoNMLo!?aO4%cxYpAZ zsX0*{WT4y?nqlj6dSTtQcQsij(>Pu-Y0KafI6=9RPrxj0owU7Uo6%kS138u<=j28H z4y4-%BJGvE5MdZ+LO6@g;?zELk`go_c5wo4;pb0$7t>~N$p!Mo`rb@jlPS|^4Zw^m zH}GOJxC(=wvxSofiY45q8perA1Uc?+*Kc_Ajz*SWfr{q9;&Qx`QE{ZMh96 z!wk26`7DxD*=hxHqy=SH8M$%#o*R~&%`h3fmr%CWX_ZE=Xi+7Q38PH5CaTD{`O0%u zH7cdzyuNlgkB^Wwz0&vK`I9g!9zr5~7YnLlvca>NGaE+vZ(3x)#X3Fo@LcY5#jrs*c3bX&?^y97r; zoWvLMH_sX7EqkDZF1$jMQ;WN%tV^hO?~T1T_utqbbH>M$dHbd;w z3GoxW8NCq|D)-EdCySb=jxITJuXj#&&KAdBO*v{M$A-$-+1Tqs?v8oKPI8`c7tS*l zm*Q~Y5|qa6x7t7Gyww?BC%oF7EI)y>DobYTcdz`$mG8du8}A5(+fwG*Wu3wL7QK{H zfZKS_-My)tBajQ7JMMjRsiNwmeIM?Nzwz()3mz{dQRfbbiur{q<$`yo&~Pl7_c~~| zxQJ7vP*4*;nzHP{XcS9Mj)eGF*IZ+=sEJUN+4h!{x$1N8rjN`Ynm=-U=n(3f?%9&w z*9FTu;t6BLVmoT)iProh{*WESDI}V`s(sH867bKW2|c4U9(iV<)iiw+du03uv6ICd~aW}vI~9_OP)$$^DDxuT|)Q3yyqM6 zY(!CXs-Wm*`;GRSoi{pT>*n0a{6kl}mde&?&Kpi)Z&$MHIF6ovp6chRUg#es*O#~jV4Q5;KjIN zT&h#%nx*yL4~B0Ie-OA85Vq~VJDObIDOk!f7Kl($fX53*u=&BsE^hQp|8@3?dLN{A+3r*owJTKEHN)SqSa)+mv!M|yl%O2LX6ZO#m- z8pp}h=a6WAe~lJ2c{|;sbf2NW_*3*ci|+=b$@Iu0q;==n4%$T58ux6y%+MUAdy}zx ztpo#Sy7j^j0>|Gna^TA#D|3wNtL>4`&`%p-Z*2tbk@Gpc0xIF9@HY9}1o!)rk44M${~6ah5r z5J-R~jZ$W7QufUfm)H`UfYW2W;A&1;4li0=lb}Z0w%yp6y6Ewh*?C&pnWH2k-CF zc&$}vXib*2r5x>_gKJgC`{o_5q8^v@NrvlhO}{nUG4I?G-~Gvf+XwEJ&2MW}AyLT> z533x1uJmqivhr}saYR8#y|FeSciVq;)Gn8SSm@}DHo*aJ1D|l}f?yk*H!#wH*g*`J zb`eP%{E2$H8+#MDl(x6y$fFX^-0^WBHe0H;+pfr~Sp-+4YFgG6sTrPX@Fb+tM-t$9 zNBEsJ&F`}5yH&47a%h~#cd7767?aw^kt{rIM6)8Rxrkqz+Bfjq^f*ZS5o1rckD~|< z#tE)Q#!b55q)sIJuS=sost1ia@U?U+ZnEn!o~RyS&i6CNDBVh~O5e*yAe+To8y)EB zm^|sDhQ%44i8MGzT8ojUDPL`m^f-gog3R>l!pIp6f{nbV9XM{t4dEn~mq>Sh9-p3r zD>pXf0QmqAQ8eH{7YM?Sl$w55t*IIIb$r6Aea zwQmDX9Nty0Fd3OJ^k`&TN`CmacPd;TLDX`W{jJ*&6M$VRl0Dex=o zWh+i_dYSvThwxQBXfpJ~z5j2Km0Ln^Ji}2-*>WS5tLnjwDu)(URwX&V*)o|7eEKu_ zn?w;#5^{v)XbcF43(1K-!qTO0gFK__C-N|$_@+h#1A5TAHMBhlB1JkRolCTJ<4~>q zJtIvS!Fqj{*+E#42M6!R`Hhi$kerbXAEJr`@L7v`xcq+)kKj2M#91ooNZ@KdM_djh zE>mq3T?~s+!a+ZI=p^=)PLv^l%+Ab%YXvjct9(utADwDaTWWZYkwp%5X@E^rZ~vi1fYn>hYzV zLZPJbZj;b>3{LMw$CEkT3psBla^6hloW6SOvAtlipkm6FW?K;EU(6;m>pXBbC>-c| z*dr9Zk<2-{kaH%Hb0(S7x1vtMbE@Mt@p560%6cvg3N|KXV3%BZLVk79RkPsQk#Oxu zx^^wNUQM`OO}ZMdwtr@J%~X8noh8eLm@R%RW!X!Zwre+Z3ZIim<-BFH{5c29z<}gY zI3Rrp1ZQ*qCz+kUM}RmvV|KiHojifJ$b$y~~X4#LAa7F@b6ZaIiWbft4?h1*NrvH|*{w-xk_}{2?QpEY+ zse=pD!31@1*;Ga6%!Xq};(4*7x87N%a64D|l>99B(tBx7ykeQc4@tw%a+uym7sq>+ zDg4}Pd6@rjQ0O`J@Z!(bCtA)(i9)|mxWFd*hn|uw%jfAgXu5b71;}r#=c)Aj#YNp_ z`UD+^?u7i_#bfd-^g^QU7vAy-gG0hwLy5D)iQbFKqww2Lr95AppxbHWozpKlXd;iuC57bkUInx*3)j{H6xlz)Xw lp{G*9(>A)2-m-k0rVNg$iAVa9$A*G=LlGPV4W-g#{(t+{&-efU delta 4297 zcma)9eQX@X6`#G`y}fVW+4tek*cUst*T!}d<6t}f!0|_LKAadQ0f)fp;$0`_*mq`j zFUHn64it$*SYVh|SfU_PsWd1l38kX$hfqiyic~0ZoCvy23sIr86>Tp#ZL0K-_RXH} zEh`@H!QD{-gzlZM3B zlLnPvIRp(E$-1U2yWu30nlqErt@)XfQDS3;lkA;hwDG&EtQ_EnDtu_EJJOwlgUpqUjJ#z8fLHC|D1Xmo#T|4#;Xil^;_zj_#wGqG4GX2#(5{0m~YzME7GS=!q<*-qtt;UW~@k z1q!TZ5NR{6t+fmc%fWDfZh&(@jjAf6K#rp8Ze+{;IKy?rh-^8t`E!#?*1Dq|aU;1> zw8inMHRIXF?b6a&aghEr_P zQFREs%aYS|`IZ>?kGkuWnY>(_XU~G|^d?zSTdjJ&q{uM+3({8Q+uQaS+mFVWVx+p0 znk5M5P4a4br8S0LM3ODWzldJqlD0CaA0lXyKf;g0^U{8d8AFNPM0@meM7~qjr2YNJae?%H5`K1GWR*j2{%3+NgQX)WyBSA^`mu8448aEnL z55iO=6xK}pgNhsuj>sA(MMp+8eiVlQ{N~Qb2INsS6bXw)oCnW<^U1YRkKY-Is-sag zFc=z^1Nt_MtQcXwNN+fs)Rg6B)`LKVzv6^94l|^?t)cMINBS~jJMs-zS>p=?76Jm_A2&VRW0!6{d{|caK2Qa zd==lmK{&r&=Nrr&1?+;s)sf9EWb+*b#3g#Z8DhC?8e#?i17cGwc||O=D+uias7``2 zVmw(CJ$-!AS$LIXjPVHo0vLU&6|C1=QYslKIFtHmsSZc`iZV*b25j7X77HkMfYxk`}!A~_Aae^XvR2mWYJkVcX-iR4cBQw>D=HChF%#u zlQX|*skC(||K6{V!M>e&&NRc!)P0Vnr&EgubB%j zy30>^FS*xV=jnpDo!WE53-O1YS4@33%OLcwC*rS&#^2O-c%&yBouv?eopy-7^}wFX zX)A^9d~0rJF1wIZ3;YE;-{})BWSS_S%XbzF7YYT+mzujQ?1dUv7tg-S^T2DFD>zWn zcgDasUi}hCNvr5jT>ex4BQn!h{SO!xQN(8GXqWaHVq;*@@&Z zUBn@kdRlKoHu9I%S>)I@H*pmgk$+@okx%L@q^Hj60;cu(I9(a1F-5Ft} z^%Ya;0JD89z;G)Zs7KcS6HX>fZ|B>>$)0j8jVIg=vzm|$`&9g7C?=Rl1`vc#WrII+ z3y2QDUnxsvh({X4mK!FNS2DMKDR(^}+`O4NzU5fkv9XUcJhQ$_8AVqs>&_gQAABRS zRM|CSpA9ZLi{`wSoaJ$z>fW#btn-MkF)y!+nGasIIiKx5-aRui*L&j8`REl}=S`L| zcafe(f58<~|4m3eB27Iaq#lqnl<#8x4ZECftlF^+Y_P?KQA6(9SWVs+%=`@G z$`{C)mLjrkYr&h}-?+e#H(Khf>2gva*IR0>LK39-b(pUukG0syspe+#%dK{@w#m(y z$Oo-C424^kC#4I?;$E7lV%(62Toba$+uJNGGwq=}lnb#`^)zW#eeigCiy(bAk}Nf~8Jr-T@kMkfvA;kJ$B=Q(C4wyCsQNbGHmv1DUQ zvmHt1)P^?te2k5(+tg}JOYOSUPEK#~kjHbgvSaq-SqxHBd2(nKePGrWvy;|l4-58? zw&pCKHD;iN)Jv(m0IGev=pZNC|H&B1_c}I{j7~54Q%5N|(n;JsqVqPe>uIBglyt3O z){{rNd{$bzV2V!MNse}TO%A-34z`h(yEYNy<_ac{tlQk@c7QMm=|m5}{_f^HD+Tqp z;IH%;$hpnC4Lc1a=e_~M4JTo`Bg_u+X!lyv6=zReV9h(osqW6A%cjoH1oKm4kB>cd z*%B!ZLi~hQc_uO~c)J=R_p5if0$#3#o$GO5D;50!E4IF}U9`A(FggJnN zW7V|M!@cCC9lHzDxp5cFCo0S=*?$#u3_@2F$3^VHClSJ@LztHAs_bUdL_doQmH_1@a;n@U5>c= z%E-6xzrT`?F)%jX;V`JJX|bJJW7eHY*KAy4(e(jMx|>tw7eFJeZzqrTbutd}lRhyc zBMqK$L1l=m-OY^)qKgQ-o+fjYttrnop2t!e;fBxKt4f^ghemF8F zV~ok)_jm*Jt!NHO9t=i@)j*;Y*6%}2fUm~E5FSzJ$Dj@foK2NoI2xvj4+@sRSZE@M zB|Hi8Yw%aT1acI?mB(8yS{g2MjpVrpKJB0{S2G0ydJPp24Oj&MjsgLm7MMMvFJYu! zC%LIQyI;9m|H#9cRCy2i*@NOP_@q;D;&RtPO{x8Ns16K-jwTtlS4KX0n>Bh(l-<}=x|eQ^ZcT;DS7;w?GMPUb{=9MOIr z=4gL**RtNn>kDCYhK(cIpUq12@41)talC`cVS1S}`r=;ZtiHIHc~4*5%f##JnZ^^a v2D*8tqW5RjWjefmm?0nREm~E`l)@&6vmKkb#OC~l^IYM4aTam8`Xl`poT~ll diff --git a/data_filter.py b/data_filter.py index 556efd9..9d7cb51 100644 --- a/data_filter.py +++ b/data_filter.py @@ -7,184 +7,148 @@ import shutil from typing import Dict, List, Any, Optional, Tuple, Union, Set -def is_odoobot_message(message: Dict[str, Any]) -> bool: +def is_odoobot_author(message: Dict[str, Any]) -> bool: """ - Détecte si un message provient d'OdooBot ou d'un bot système. + Vérifie si l'auteur du message est OdooBot ou un autre système. Args: - message: Dictionnaire du message à analyser + message: Le message à vérifier Returns: - True si le message est d'OdooBot, False sinon + True si le message provient d'OdooBot, False sinon """ - if not message: - return False + # Vérifier le nom de l'auteur + if 'author_id' in message and isinstance(message['author_id'], list) and len(message['author_id']) > 1: + author_name = message['author_id'][1].lower() + if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name: + return True - # Vérifier par le nom de l'auteur - author_name = "" - if message.get('author_id') and isinstance(message.get('author_id'), list) and len(message.get('author_id')) > 1: - author_name = message.get('author_id')[1].lower() - elif message.get('author_details', {}).get('name'): - author_name = message.get('author_details', {}).get('name', '').lower() - - if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name or 'system' in author_name: - return True - - # Vérifier par le contenu du message (messages système typiques) - body = message.get('body', '').lower() - if body and isinstance(body, str): - system_patterns = [ - r'assigné à', - r'assigned to', - r'étape changée', - r'stage changed', - r'créé automatiquement', - r'automatically created', - r'a modifié la date limite', - r'changed the deadline', - r'a ajouté une pièce jointe', - r'added an attachment' - ] - - for pattern in system_patterns: - if re.search(pattern, body, re.IGNORECASE): - return True - - # Vérifier par le type de message/sous-type + # Vérifier le type de message (souvent les notifications système) if message.get('message_type') == 'notification': return True - subtype_name = "" - if message.get('subtype_id') and isinstance(message.get('subtype_id'), list) and len(message.get('subtype_id')) > 1: - subtype_name = message.get('subtype_id')[1].lower() - elif message.get('subtype_details') and isinstance(message.get('subtype_details'), list) and len(message.get('subtype_details')) > 0: - subtype_name = message.get('subtype_details')[0].get('name', '').lower() + # Vérifier le sous-type du message + if 'subtype_id' in message and isinstance(message['subtype_id'], list) and len(message['subtype_id']) > 1: + subtype = message['subtype_id'][1].lower() + if 'notification' in subtype or 'system' in subtype: + return True - if subtype_name and ('notification' in subtype_name or 'system' in subtype_name): - return True + # Vérifier le contenu du message + if 'body' in message and isinstance(message['body'], str): + body = message['body'].lower() + system_patterns = [ + 'assigné à', 'étape changée', 'créé automatiquement', + 'assigned to', 'stage changed', 'automatically created', + 'updated', 'mis à jour', 'a modifié', 'changed' + ] + + for pattern in system_patterns: + if pattern in body: + return True return False -def is_important_image(img_tag: Any, message_text: str) -> bool: +def is_important_image(tag, message_text: str) -> bool: """ - Détermine si une image est importante ou s'il s'agit d'une image inutile (logo, signature, etc.). + Détermine si une image est importante ou s'il s'agit d'un logo/signature. Args: - img_tag: Balise d'image BeautifulSoup - message_text: Texte du message complet pour contexte + tag: La balise d'image à analyser + message_text: Le texte complet du message pour contexte Returns: True si l'image semble importante, False sinon """ # Vérifier les attributs de l'image - img_src = img_tag.get('src', '') - img_alt = img_tag.get('alt', '') - img_class = img_tag.get('class', '') - img_style = img_tag.get('style', '') + src = tag.get('src', '') + alt = tag.get('alt', '') + title = tag.get('title', '') + css_class = tag.get('class', '') - # Mots-clés indiquant des images inutiles - useless_patterns = [ - 'logo', 'signature', 'footer', 'header', 'separator', 'separateur', - 'outlook', 'mail_signature', 'icon', 'emoticon', 'emoji', 'cid:', - 'pixel', 'spacer', 'vignette', 'footer', 'banner', 'banniere' + # Patterns pour les images inutiles + useless_img_patterns = [ + 'logo', 'signature', 'outlook', 'footer', 'header', 'icon', + 'emoticon', 'emoji', 'cid:', 'pixel', 'spacer', 'vignette', + 'banner', 'separator', 'decoration', 'mail_signature' ] - # Vérifier le src/alt/class pour les motifs inutiles - for pattern in useless_patterns: - if (pattern in img_src.lower() or - pattern in img_alt.lower() or - (isinstance(img_class, list) and any(pattern in c.lower() for c in img_class)) or - (isinstance(img_class, str) and pattern in img_class.lower()) or - pattern in img_style.lower()): + # Vérifier si c'est une image inutile + for pattern in useless_img_patterns: + if (pattern in src.lower() or + pattern in alt.lower() or + pattern in title.lower() or + (css_class and any(pattern in c.lower() for c in css_class if isinstance(c, str)))): return False - # Vérifier les dimensions (logos et icônes sont souvent petits) - width = img_tag.get('width', '') - height = img_tag.get('height', '') - - # Convertir en entiers si possible + # Vérifier la taille (les petites images sont souvent des icônes/logos) + width = tag.get('width', '') + height = tag.get('height', '') try: - width = int(width) if width and width.isdigit() else None - height = int(height) if height and height.isdigit() else None - except (ValueError, TypeError): - # Extraire les dimensions des attributs style si disponibles - if img_style: - width_match = re.search(r'width:[ ]*(\d+)', img_style) - height_match = re.search(r'height:[ ]*(\d+)', img_style) - - width = int(width_match.group(1)) if width_match else None - height = int(height_match.group(1)) if height_match else None - - # Images très petites sont souvent des éléments décoratifs - if width is not None and height is not None: - if width <= 50 and height <= 50: # Taille arbitraire pour les petites images + width = int(width) if width and str(width).isdigit() else None + height = int(height) if height and str(height).isdigit() else None + if width and height and width <= 50 and height <= 50: return False + except (ValueError, TypeError): + pass - # Rechercher des termes qui indiquent l'importance de l'image dans le texte du message - importance_indicators = [ + # Vérifier si l'image est mentionnée dans le texte + image_indicators = [ 'capture', 'screenshot', 'image', 'photo', 'illustration', - 'pièce jointe', 'attachment', 'voir', 'regarder', 'ci-joint', - 'écran', 'erreur', 'problème', 'bug', 'issue' + 'voir', 'regarder', 'ci-joint', 'écran', 'erreur', 'problème', + 'bug', 'pièce jointe', 'attachment', 'veuillez trouver' ] - for indicator in importance_indicators: + for indicator in image_indicators: if indicator in message_text.lower(): return True - # Par défaut, considérer l'image comme importante si aucun des filtres ci-dessus ne s'applique + # Par défaut, considérer les images qui ne sont pas clairement inutiles comme potentiellement importantes return True -def find_relevant_attachments(message_text: str, attachments_info: List[Dict[str, Any]]) -> List[int]: +def find_attachment_references(message_text: str, attachments_info: List[Dict[str, Any]]) -> List[int]: """ - Trouve les pièces jointes pertinentes mentionnées dans le message. + Identifie les pièces jointes mentionnées dans le message. Args: message_text: Texte du message - attachments_info: Liste des informations sur les pièces jointes + attachments_info: Informations sur les pièces jointes disponibles Returns: Liste des IDs des pièces jointes pertinentes """ - relevant_ids = [] - if not message_text or not attachments_info: - return relevant_ids + return [] - # Rechercher les mentions de pièces jointes dans le texte + # Patterns indiquant des pièces jointes attachment_indicators = [ - r'pi(è|e)ce(s)? jointe(s)?', r'attachment(s)?', r'fichier(s)?', r'file(s)?', - r'voir (le|la|les) document(s)?', r'voir (le|la|les) fichier(s)?', - r'voir (le|la|les) image(s)?', r'voir (le|la|les) screenshot(s)?', - r'voir (le|la|les) capture(s)?', r'voir (le|la|les) photo(s)?', - r'voir ci-joint', r'voir ci-dessous', r'voir ci-après', - r'veuillez trouver', r'please find', r'in attachment', - r'joint(e)?(s)?', r'attached', r'screenshot(s)?', r'capture(s)? d(\'|e) (é|e)cran', - r'image(s)?', r'photo(s)?' + r'pi[èe]ce[s]? jointe[s]?', r'attachment[s]?', r'fichier[s]?', r'file[s]?', + r'veuillez trouver', r'please find', r'voir ci-joint', r'voir ci-dessous', + r'ci-joint', r'joint[e]?[s]?', r'attached', r'screenshot[s]?', + r'capture[s]? d[\'e] ?[ée]cran', r'image[s]?', r'photo[s]?' ] - has_attachment_mention = False - for indicator in attachment_indicators: - if re.search(indicator, message_text, re.IGNORECASE): - has_attachment_mention = True + relevant_ids = [] + + # Vérifier si le message mentionne des pièces jointes + mention_found = False + for pattern in attachment_indicators: + if re.search(pattern, message_text, re.IGNORECASE): + mention_found = True break - # Si le message mentionne des pièces jointes - if has_attachment_mention: + if mention_found: + # Identifier les pièces jointes pertinentes (non logos/images d'interface) for attachment in attachments_info: - # Exclure les pièces jointes qui semblent être des signatures ou des logos - name = attachment.get('name', '').lower() - useless_patterns = ['logo', 'signature', 'outlook', 'footer', 'header', 'icon', 'emoticon', 'emoji'] + name = attachment.get('name', '').lower() if attachment.get('name') else '' - is_useless = False - for pattern in useless_patterns: - if pattern in name: - is_useless = True - break + # Exclure les pièces jointes qui semblent être des logos ou images d'interface + useless_patterns = ['logo', 'signature', 'outlook', 'icon', 'emoticon', 'emoji'] + is_useless = any(pattern in name for pattern in useless_patterns) - if not is_useless: - relevant_ids.append(attachment.get('id')) + if not is_useless and 'id' in attachment: + relevant_ids.append(attachment['id']) return relevant_ids @@ -192,7 +156,7 @@ def find_relevant_attachments(message_text: str, attachments_info: List[Dict[str def clean_html(html_content: str) -> str: """ Nettoie le contenu HTML en supprimant toutes les balises mais en préservant le texte. - Améliore le traitement des images, supprime les signatures et les éléments inutiles. + Traite spécifiquement les images pour garder uniquement celles pertinentes. Args: html_content: Contenu HTML à nettoyer @@ -206,41 +170,38 @@ def clean_html(html_content: str) -> str: # Utiliser BeautifulSoup pour manipuler le HTML soup = BeautifulSoup(html_content, 'html.parser') - # Supprimer les signatures et pieds de courriels typiques - signature_selectors = [ + # Supprimer les éléments de signature + signature_elements = [ 'div.signature', '.gmail_signature', '.signature', - 'hr + div', 'hr + p', '.footer', '.mail-signature', - '.ms-signature', '[data-smartmail="gmail_signature"]' + 'hr + div', 'hr + p', '.footer', '.mail-signature' ] - for selector in signature_selectors: + for selector in signature_elements: for element in soup.select(selector): element.decompose() - # Supprimer les lignes horizontales qui séparent souvent les signatures + # Supprimer les lignes horizontales (souvent utilisées pour séparer les signatures) for hr in soup.find_all('hr'): hr.decompose() + # Récupérer le texte complet pour analyse + full_text = soup.get_text(' ', strip=True) + # Traiter les images - message_text = soup.get_text() for img in soup.find_all('img'): - if is_important_image(img, message_text): + if is_important_image(img, full_text): # Remplacer les images importantes par une description alt_text = img.get('alt', '') or img.get('title', '') or '[Image importante]' img.replace_with(f" [Image: {alt_text}] ") else: - # Supprimer les images inutiles + # Supprimer les images non pertinentes img.decompose() - # Traiter les références aux pièces jointes - attachment_refs = soup.find_all('a', href=re.compile(r'attachment|piece|fichier|file', re.IGNORECASE)) - for ref in attachment_refs: - ref.replace_with(f" [Pièce jointe: {ref.get_text()}] ") - - # Filtrer les éléments vides ou non significatifs - for tag in soup.find_all(['span', 'div', 'p']): - if not tag.get_text(strip=True): - tag.decompose() + # Traiter les liens vers des pièces jointes + for a in soup.find_all('a', href=True): + href = a.get('href', '').lower() + if 'attachment' in href or 'download' in href or 'file' in href: + a.replace_with(f" [Pièce jointe: {a.get_text()}] ") # Récupérer le texte sans balises HTML text = soup.get_text(separator=' ', strip=True) @@ -254,7 +215,7 @@ def clean_html(html_content: str) -> str: # Nettoyer les lignes vides multiples text = re.sub(r'\n\s*\n', '\n\n', text) - # Supprimer les footers typiques des emails + # Supprimer les disclaimers et signatures standards footer_patterns = [ r'Sent from my .*', r'Envoyé depuis mon .*', @@ -267,12 +228,7 @@ def clean_html(html_content: str) -> str: r'This message and any attachments.*', r'Ce message et ses pièces jointes.*', r'AVIS DE CONFIDENTIALITÉ.*', - r'PRIVACY NOTICE.*', - r'Droit à la déconnexion.*', - r'L\'objectif du Support Technique.*', - r'\\*\\*\\*\\*\\*\\* ATTENTION \\*\\*\\*\\*\\*\\*.*', - r'Please consider the environment.*', - r'Pensez à l\'environnement.*' + r'PRIVACY NOTICE.*' ] for pattern in footer_patterns: @@ -288,14 +244,14 @@ def process_message_file(message_file_path: str, output_dir: str, attachments_in Args: message_file_path: Chemin du fichier de message à traiter output_dir: Répertoire de sortie pour le fichier traité - attachments_info: Informations sur les pièces jointes (optionnel) + attachments_info: Informations sur les pièces jointes disponibles """ try: with open(message_file_path, 'r', encoding='utf-8') as f: message_data = json.load(f) # Ignorer les messages d'OdooBot - if is_odoobot_message(message_data): + if is_odoobot_author(message_data): print(f"Message ignoré (OdooBot): {os.path.basename(message_file_path)}") return @@ -304,9 +260,9 @@ def process_message_file(message_file_path: str, output_dir: str, attachments_in # Remplacer le contenu HTML par le texte filtré message_data['body'] = clean_html(message_data['body']) - # Identifier les pièces jointes pertinentes si disponibles + # Identifier les pièces jointes pertinentes mentionnées dans le message if attachments_info and message_data['body']: - relevant_attachments = find_relevant_attachments(message_data['body'], attachments_info) + relevant_attachments = find_attachment_references(message_data['body'], attachments_info) if relevant_attachments: message_data['relevant_attachment_ids'] = relevant_attachments @@ -328,48 +284,44 @@ def process_messages_threads(threads_file_path: str, output_dir: str, attachment Args: threads_file_path: Chemin du fichier de threads de messages output_dir: Répertoire de sortie pour le fichier traité - attachments_info: Informations sur les pièces jointes (optionnel) + attachments_info: Informations sur les pièces jointes disponibles """ try: with open(threads_file_path, 'r', encoding='utf-8') as f: threads_data = json.load(f) - # Stocker les IDs des threads à supprimer (qui ne contiennent que des messages d'OdooBot) + # Liste des threads à supprimer (ceux qui ne contiennent que des messages d'OdooBot) threads_to_remove = [] # Parcourir tous les threads for thread_id, thread in threads_data.items(): - - # Vérifier si le message principal existe et n'est pas d'OdooBot + # Traiter le message principal main_message_is_bot = False if thread.get('main_message'): - if is_odoobot_message(thread['main_message']): + if is_odoobot_author(thread['main_message']): main_message_is_bot = True - # Si c'est un message d'OdooBot, on le supprime thread['main_message'] = None elif 'body' in thread['main_message']: - # Sinon, on nettoie le corps du message thread['main_message']['body'] = clean_html(thread['main_message']['body']) # Identifier les pièces jointes pertinentes if attachments_info and thread['main_message']['body']: - relevant_attachments = find_relevant_attachments( + relevant_attachments = find_attachment_references( thread['main_message']['body'], attachments_info ) if relevant_attachments: thread['main_message']['relevant_attachment_ids'] = relevant_attachments - # Filtrer les réponses pour supprimer celles d'OdooBot + # Traiter les réponses (filtrer les messages d'OdooBot) filtered_replies = [] for reply in thread.get('replies', []): - if not is_odoobot_message(reply): - # Nettoyer le corps du message + if not is_odoobot_author(reply): if 'body' in reply: reply['body'] = clean_html(reply['body']) # Identifier les pièces jointes pertinentes if attachments_info and reply['body']: - relevant_attachments = find_relevant_attachments(reply['body'], attachments_info) + relevant_attachments = find_attachment_references(reply['body'], attachments_info) if relevant_attachments: reply['relevant_attachment_ids'] = relevant_attachments @@ -378,8 +330,7 @@ def process_messages_threads(threads_file_path: str, output_dir: str, attachment # Mettre à jour les réponses thread['replies'] = filtered_replies - # Si le thread ne contient plus de messages (tous étaient des messages d'OdooBot), - # marquer pour suppression + # Si le thread ne contient que des messages de bot, le marquer pour suppression if main_message_is_bot and not filtered_replies: threads_to_remove.append(thread_id) @@ -394,7 +345,7 @@ def process_messages_threads(threads_file_path: str, output_dir: str, attachment print(f"Fichier de threads traité: {os.path.basename(threads_file_path)}") if threads_to_remove: - print(f" {len(threads_to_remove)} threads supprimés (messages d'OdooBot uniquement)") + print(f" {len(threads_to_remove)} threads supprimés (OdooBot uniquement)") except Exception as e: print(f"Erreur lors du traitement du fichier {threads_file_path}: {e}") @@ -407,7 +358,7 @@ def process_messages_collection(messages_file_path: str, output_dir: str, attach Args: messages_file_path: Chemin du fichier de collection de messages output_dir: Répertoire de sortie pour le fichier traité - attachments_info: Informations sur les pièces jointes (optionnel) + attachments_info: Informations sur les pièces jointes disponibles """ try: with open(messages_file_path, 'r', encoding='utf-8') as f: @@ -416,14 +367,14 @@ def process_messages_collection(messages_file_path: str, output_dir: str, attach # Filtrer les messages pour supprimer ceux d'OdooBot filtered_messages = [] for message in messages_data: - if not is_odoobot_message(message): - # Nettoyer le corps du message + if not is_odoobot_author(message): + # Nettoyer le contenu HTML if 'body' in message: message['body'] = clean_html(message['body']) # Identifier les pièces jointes pertinentes if attachments_info and message['body']: - relevant_attachments = find_relevant_attachments(message['body'], attachments_info) + relevant_attachments = find_attachment_references(message['body'], attachments_info) if relevant_attachments: message['relevant_attachment_ids'] = relevant_attachments @@ -464,7 +415,7 @@ def process_ticket_folder(ticket_folder: str, output_base_dir: str) -> None: shutil.copy2(src_file, dst_file) print(f"Fichier copié: {file_name}") - # Charger les informations sur les pièces jointes si disponibles + # Charger les informations sur les pièces jointes attachments_info = [] attachments_info_file = os.path.join(ticket_folder, 'attachments_info.json') if os.path.exists(attachments_info_file): @@ -502,7 +453,7 @@ def process_ticket_folder(ticket_folder: str, output_base_dir: str) -> None: if os.path.exists(message_threads_file): process_messages_threads(message_threads_file, output_ticket_dir, attachments_info) - # Copier le répertoire des pièces jointes (on conserve toutes les pièces jointes) + # Copier le répertoire des pièces jointes (on garde toutes les pièces jointes) src_attachments_dir = os.path.join(ticket_folder, 'attachments') if os.path.exists(src_attachments_dir): dst_attachments_dir = os.path.join(output_ticket_dir, 'attachments') @@ -556,9 +507,9 @@ def run_filter_wizard() -> None: print("\n==== FILTRAGE DES MESSAGES DES TICKETS ====") print("Cette fonction va:") print("1. Supprimer les messages provenant d'OdooBot") - print("2. Supprimer les logos, signatures et images non pertinentes") - print("3. Conserver uniquement le texte utile des messages") - print("4. Identifier les pièces jointes mentionnées dans les messages\n") + print("2. Filtrer les images inutiles (logos, signatures, images Outlook)") + print("3. Conserver les images pertinentes pour la demande") + print("4. Identifier les pièces jointes importantes mentionnées dans les messages\n") # Demander le répertoire source default_source = 'exported_tickets'