From e9e93ac3f01aaef8887f1473a5b8c457e3c5da5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Moreo=20Fern=C3=A1ndez?= Date: Wed, 8 May 2019 10:04:02 +0200 Subject: [PATCH] more cleaning --- src/author_attribution.py | 114 ++++++++++++++++++ .../__pycache__/dante_loader.cpython-36.pyc | Bin 1850 -> 0 bytes src/data/__pycache__/features.cpython-36.pyc | Bin 19050 -> 0 bytes src/data/dante_loader.py | 12 +- src/data/features.py | 4 +- src/model.py | 6 +- ~$perimenti.docx | Bin 162 -> 0 bytes ~WRL3794.tmp | Bin 19798 -> 0 bytes 8 files changed, 129 insertions(+), 7 deletions(-) create mode 100644 src/author_attribution.py delete mode 100644 src/data/__pycache__/dante_loader.cpython-36.pyc delete mode 100644 src/data/__pycache__/features.cpython-36.pyc delete mode 100644 ~$perimenti.docx delete mode 100644 ~WRL3794.tmp diff --git a/src/author_attribution.py b/src/author_attribution.py new file mode 100644 index 0000000..6de0ba9 --- /dev/null +++ b/src/author_attribution.py @@ -0,0 +1,114 @@ +from sklearn.linear_model import LogisticRegression +from data.dante_loader import load_texts +from data.features import * +from model import AuthorshipVerificator, f1_from_counters +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +def plot_attribution(path, authors, attributions, paragraph_offset=1): + + paragraphs = ["Full"] + [f'{paragraph_offset+i}' for i in range(attributions.shape[1]-1)] + + fig, ax = plt.subplots() + im = ax.imshow(attributions) + + # We want to show all ticks... + ax.set_xticks(np.arange(len(paragraphs))) + ax.set_yticks(np.arange(len(authors))) + # ... and label them with the respective list entries + ax.set_xticklabels(paragraphs) + ax.set_yticklabels(authors) + + # Rotate the tick labels and set their alignment. + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + for i in range(len(authors)): + for j in range(len(paragraphs)): + text = ax.text(j, i, f'{attributions[i, j]:.2f}', ha="center", va="center", color="w") + + ax.set_title("Attribution matrix") + fig.tight_layout() + # plt.show() + plt.savefig(path) + +import sys +authors = ['Dante', 'ClaraAssisiensis', 'GiovanniBoccaccio', 'GuidoFaba', 'PierDellaVigna'] +attributions = np.load('attribution_ep1.npy') +plot_attribution('plot1.pdf', authors, attributions) +sys.exit(0) + +author_attribution = [] +for epistola in [1]: + + print(f'Epistola {epistola}') + print('='*80) + path = f'../testi_{epistola}' + + if epistola == 1: + authors = ['Dante', 'ClaraAssisiensis', 'GiovanniBoccaccio', 'GuidoFaba', 'PierDellaVigna'] + paragraphs = range(1,3) + + else: + authors = ['Dante', 'BeneFlorentinus', 'BenvenutoDaImola', 'BoncompagnoDaSigna', 'ClaraAssisiensis', + 'FilippoVillani', 'GiovanniBoccaccio', 'GiovanniDelVirgilio', + 'GrazioloBambaglioli', 'GuidoDaPisa', + 'GuidoDeColumnis', 'GuidoFaba', 'IacobusDeVaragine', 'IohannesDeAppia', + 'IohannesDePlanoCarpini', 'IulianusDeSpira', 'NicolaTrevet', 'PierDellaVigna', + 'PietroAlighieri', 'RaimundusLullus', + 'RyccardusDeSanctoGermano', 'ZonoDeMagnalis'] + paragraphs = range(13, 90) + + discarded = 0 + f1_scores = [] + counters = [] + for i, author in enumerate(authors): + print('=' * 80) + print('Authorship Identification for {} (complete {}/{})'.format(author, i, len(authors))) + print('Corpus of Epistola {}'.format(epistola)) + print('=' * 80) + + target = [f'EpistolaXIII_{epistola}.txt'] + [f'EpistolaXIII_{epistola}_{paragraph}.txt' for paragraph in paragraphs] + positive, negative, ep_texts = load_texts(path, positive_author=author, unknown_target=target) + if len(positive) < 2: + discarded += 1 + continue + + n_full_docs = len(positive) + len(negative) + + feature_extractor = FeatureExtractor(function_words_freq='latin', + conjugations_freq='latin', + features_Mendenhall=True, + features_sentenceLengths=True, + tfidf_feat_selection_ratio=0.1, + wordngrams=True, n_wordngrams=(1, 2), + charngrams=True, n_charngrams=(3, 4, 5), + preserve_punctuation=False, + split_documents=True, split_policy=split_by_sentences, window_size=3, + normalize_features=True) + + Xtr, ytr, groups = feature_extractor.fit_transform(positive, negative) + + print('Fitting the Verificator') + av = AuthorshipVerificator(nfolds=10, estimator=LogisticRegression, author_name=author) + av.fit(Xtr, ytr, groups) + + attributions=[] + for i,target_text in enumerate(ep_texts): + ep = feature_extractor.transform(target_text, avoid_splitting=True) + prob,_ = av.predict_proba(ep, epistola_name=target[i]) + attributions.append(prob) + author_attribution.append(attributions) + + author_attribution = np.asarray(author_attribution) + attribution_path = f'attribution_ep{epistola}.npy' + print(f'saving attribution matrix of shape {author_attribution.shape} in {attribution_path}') + np.save(attribution_path, author_attribution) + + + + + + diff --git a/src/data/__pycache__/dante_loader.cpython-36.pyc b/src/data/__pycache__/dante_loader.cpython-36.pyc deleted file mode 100644 index b67149f690af1254c5e8f326c5bac125a67b7352..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1850 zcmZXU&u<$=6vt;~XMcF@q@fa&6cG#(5vCXpBM3pf564s!!e{TF(H?_0-JfVJlN&CHu$?|t6;cw-}8{C4f|j}~Kp zu?wFE{d2VPD+t9D&sd-SMaE~MFZgQA>wC?d>`Ub-`IPm2<*NYyf$FG8wVv{RsAAQ| zNTm1y>uo&8er&(TotzYDK9tnO7ow4V8?Af_QF#^LCMmYrRFqwdJ4|@RE3p(y{ykTs zmWzHR&6A2vM8%i9_A8t|m1a>fXx*OfSRWEd&Du%98+QisF`FQrA$Xu**wbsKS_Ou%7N}ZVDk@Fi<=fUPz zW-j5V7dLL-Jp2ZSl!pgt_ApHj@91)Bi}|5Su=KDrLufWxbq86IC~fxUkKLxxv*Mv1 z%oB?f@|c`_8G`XHmptGxkN9hRS47;rg3%$pi#~xCpfyq1gJKzCz~D`81^mesBBI1I zS+k}O2G;$?fBwGX@Ao_dx|pF;O&j6`KYPJzc&t4vvP=)jWu*&muioF>peIbUHgBH3 z5?dH^Tj!4%rZ#DoZez40bB2d(;b*Rpoqva%G9;4e+d%Wtnz#H3WsIL-wO7kAt9=DR z7C*1wcZYrVV;xrEL@2Oh_KtU;d&Kxs)KL{pr1F<|Jy=@n!8YyIs$=V0uvUdjp+Xfs z6O(Ys>b8xlHvHDES{0uP^Hb$bS~S909#?X$S3a~BjaGpQK4EomyaN`6`TcxX(0^gq z4tDKe*ADVTR-KV|Kp7)P;sC6J-`2jNP`Zt~^R%=@mVA5f-o3%8+_MYonBxOO-f{B8 zjy^?7MmirBD$U2e*ogum$*@YL#>EYa<)hRdxzOl&mJBucoM$QUF6KHnM2eF}Cu-&V zAem!W^){RYbVu&eyeLzfg2Cj(9w9q6Px5JAJjw^ihq1OUS{t}1*JA|sp?2ZQw(R>k za;KzB%Cw6~4u+Xdau=NM7FoTpc{0;ZlIFtm-T%u@OgS+ilkzRxxO@+Si5N*B!A;Br z@9-U@LWET4U_9m>Pg0-9p7{v2FH?gG7PSVaU!bI{=8MqnngH${3Pp8k+u?q1?$^jYJtkc_T26x9&3l#tADbYm40~0IoZ= zw2=ydk5thlrj^^6r(%gUPwu}2g0f{rfVm3Qyg`B}X;?T7&gz@3v-(tA@AleehsMcM zrc2^&67P_B7Xp>4Fjgxkr}}Z}{JBYUYp7l{IP^lZOZqhu*GYUpg35^b5Tf5%1#eIo zl&Swm3hb>SbqmJj>k#Z;QHl_i%c6rAnH!ix$sn8kUaPsa3yX4(&|9G?j5e=gx~VP= oqD?rPl^2etDr=f56Wy8>>Lk;jk!#9L2oQl2qaeBz1@W8z0Hm+FcmMzZ diff --git a/src/data/__pycache__/features.cpython-36.pyc b/src/data/__pycache__/features.cpython-36.pyc deleted file mode 100644 index 0adc9f39ad589f58bb797c93ac846943786979dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19050 zcmdUX33y!Bb>6%;D+U7)gdlFBAdlN}|V&gOJU<0ehIx^3#VubbO6ZP+d8lDJiyCjWo# zn>Paxl;y_Bmkz#j-o5wT_1tsMIrrTA@TRe`;;t{h0 zB~CL{wNIz&BJMWR%b9A%VOqMGUCvc=8qOe`ujVzJZ5EbCsv{cCHH*um)zRfrwX{4| z9n-XYb9{MQbsNG38IhulN=e3KT(-%CY?n#7M5g3Y*&#b+m+Y23a+&Ov%jF7rgOue; z*(X=Y)pCtoE7!@iROEVjqwJR(WJYe3o8)FWAa9aeL-Y)Nud*z+-F1b(cm!tA-c|hK?@XqRlyjPCNad}W4l85C4dig%7 z%KPP{JR%>Enmj6=%t>8De3_R8S(Jt>$th{dvb3Zv9XTyV0tuxnXXLD$lNDK&$D}7~ z@uQTdoWFCUjr$P4mG`A+#R`ILOOd|Lj3{6+a5 z`Cj=x`HXx&M)(8rgYsEG zQNAQUD?cYcFTWtaC|{OelD{N>S^kRrRrzK4YZ%o@`4#z!{B`+N`8D|)^6T;&@;Bvg z$={a0BY#)^o_tmQzWf9Ehw_{9kK}9ekL91pKb1)Snf!D47xFLVU&+_yU(3Id-;#eT zzb*ex{=NK;oR|L~|55&v{Ac+un9JYA4E&z_SNU)9-{pVE@5>*^H{^fH|B_ebf6M=o zKa~F~fAoSQDS!iT0ck)6kOkxbc|ZX$0w@AT0VTi~U>vXwFag*Om;_t`m;zi1*a6rH z*ag@P*dq@CUk2C;7Y(gz*T^&0oTYP@U?*J0Mmd9;CjFt0s8?r0A>I; z0&W7_3^)LI6W|uWcK~h$+y;0v;4OfIfZG9w0CxcH1iW?O8`bULxCeD4vq*Ux;4bhU zOWqAQ47dky1n_piI{^0r-idksDxU8G+y}THa1`)vzypBy0Nx8Y1~?9Q5b%(!0zV8m z0eByv3V1)@B;XOi2LLs|qW}*u2dD!C-~;9tZmg2l3xLHZQ&rNu0a!wqv_A!CBFs5h z2DA|7T(kilgm;SLGk$uWvuyyulTMYh6au=)!Pz?wqwo+p1j19%oWIQwx9eMcUA4)77YBUgMB z@G-PZ?sy*XafJ81J%!LG053er&Whs7c#gE%X4z{ZQzaec7K6~MpNfk0z|@w#&@|3Pg+>r` zJQMiL9E9!8**3&WJ@e&l*mO}E;Jbi-=8-d&E;9p6NbS4{^#UY+jx)y(N` z!$YZaNNdz1H)vF|e%SDqtJ#L2OQ-9Q7|$)UIGw@`g6^i4H`R@&7(@^=``)!VJmcu_j& z&QxIvsoZ=$r{!VGkQG7Vs~3WE7<6QvsnWjv#W z6gL!m93HA6)}gW~rYJ^J7`(UKXpt-IqhTLtjqlJNA7~4Y9NWY*=yQ_-$w67fi_W1xb=e5e*cypPGbsN&~*rfWFgE+M5F_ z`*WOQkjI-tH_%7mbi-GAaH^5Om4%gr4(ME?1Ny{BlsHI^X`xRf$CTi_gA#mX8sr4D z!D&a625suv(Q)}PCG?5%F&&fv*c$1;YAqF%3v4GeIi`VS1dP)LKzmCRj~yVHv?r{B zed$8(V9_;#5UfOux$_OX>QYAdpa>92LF9uO5aMu@5wch{U<1UoAl@kLbgmo3E+vD{ z=>}rm4f{X=9gBUiG7cf0S37w*L^*Xi-%c3f+TFDAAutV=A`p6lD7%0#+cvt=44OrX zwul$3oY^pDJhW2gLmZNcvH&uh@`Dfd5swfI$3*3J#!-2WDHv4RgM3N*L=7U(FiO-Q z@8&_cc>>18HCwPqSjN;B6h=0(U9i4Z)%Xgsp#RK5kmDs2^F#x4Y%qrx%9^;$#6FyxCy9-z3VC0l)}_8hO?(~=8Di+a(k(D z*lJ-~GD{hT*PRbz4{|J3$sLWdO>j>u%KNQ_W+PaP@-Vw#K`gFLJhoPDwZn3wRSxRy zj(=svHE%&9<{(fd8%=4!+C1!xu+eVS)Ic*Mco}7_u~vM@Yj*v6jDdX4&C}ph6R_#E z7G6%7S>$~6z}*K=J{b5WIC-qmJk#(_9`=J%u=Y+0c-cQ0m^wnCcXD1Oogd6}R-(xT zKit$T2OkuHQcl6i0E(`;3$YEPB3CG>u0~)Jsa%k4S4bkcl1|8zIcnaFygK65`(e8+ zbr(Rbb}J}jIAso`>n->}xg9S0rW`JMt@3>&XBqOKe9RKE47sL+T=By)Osh`YgucM+ zowNiTt~Q#Asb3pTHFuyB700}leVI+r+YpK-NQ~jk?C=IsWL$F((l(GH;hHJD5$r>s z1VNVI-B@=HE3Z^CN0S1i8w(r*1bcxNkol3RHK*q+rPfm>W1pXNPiKzV{=sI?1~B70 z6*qEcZi<|6Q@39&9s;w4_4ab-&LgarZRAp`R|lPDBdpD>)clq-Va?7QYPRcMGq@9h zeu2m-Y6HxQKqmChZLU9>*jy1wpsl1Fb3ek#=(w)9^prDx1_4WsFtrpf91>*BIrr%> z-NV8;oO%(QJaqWaeI=cLMs;bZtUiihsXV$HLrVq7oW&c1dI9pnV7^2klvNLUpr&`IPO} z9rBXpql&wsG;0V5JAlA_<9+UXCxZ>E%+_} zT*ut`Zq~`lGpW^^Uf-x@lEP>XDu1QlR5om%@>`qQQrac022pVk-qhD<@H|uNl(Qo}>6Dytz=X5UEj#0`@sM68`S{~XqQ=sdG>5=K zYRwHXTkDV%ds196HoSvsDr2U5ST(mH{oE~~!*ztUHNBSUfd{5iYuTQ=1X;Y4?xlKI zPd#U6>M5)oDlg3V0dT&HIfsNMEzNPF2Z1UX^L{*{ER3NAKgxNX4n#?mS!y?0m0UDV zVNshm-U2r1VGxa;#csNNwibZ))v=a;cJQuRXkp7#U-Sb@&s=+c9%j&Hn&oW`!TZo^ zFaeZ;0wReD?l=@t*)fYq(+<{Y+3-hAmB^frBLYfg09iop+`Od|s8NNa$*&c9Zm$3- zGXg4&fM(9~?%GH%1GPRvTJ&-VN|u2- za1qp3w1(=)fY{sPYy%ocxdqeib}WUJjg8OtW)npD16HQ2hXX|r3-$n!$o!iGiAzq= znZk3gt9rkUw-?9`<;h)mL#a(JT!RBvWwuHlj5)ngh9=_bS`<=ob;Z`=YROqo{fR`D zIf@qg(wCBA)9B3uNIM7U%c*Po(qSu^J0zW4^B$Cff!96@S6nRK-@8QvT;oXt(87CP zs{vH>5bLM{AxzO@YDBXM;+&a>fzAK|WqM4JrX<3Gf`V^oSOh15Vqt;h4S{gDmje4A zTFZdtGf2sXE(8IT>T{_O?24!JOzIhD-qlGg^wO$$p%$0G$|@ixA5Mk2r92fSC)b;R zm?)Op|yz${4Yt>iE)8}r!>86`1<@UUN5gWK=HEPRU_!ems zvrMuAwC02GS9;5X#Dit8OrxcI7RI4p_PzRI8O~5HnR3H6N@-M{rm56g&<4GvxUEum z-7VJC>dZiqc`NKzu;_LC^7P^TM=EjUhgl;u!gSK5irSz@`so|Z^orKLVU>{KFX3bi zTX>Rs7II>DYAW@tb6KCwodsmWiP1voCtOivRSCi`>+54O-fM-(kR-hGnS*`_L1JhUoY3%s0 zBA#=sQTm+wjPs0(bwh)-zizNfU{dCjb(3F$%F%VRX%~?2$o1Mq*kSvu9ri4mG+eA` zICL0;e$PA`b4*OTr~vH|oNa`QR)q{nBv?|TLz@^YOA=oB9<vcCWdxfl z4!MAzK<5S@M%fM_f2=Ajo2wsPwgjjKd^e=eJ&i||QSJ`zPMe9Pi7;G*5_p3CW zZ#SW7A;?uV;3UN>&-sfmFU$9X@BP-I*KA%Fl<&rV1o{WAIXKJe<-5D)j8B<VM~1hJleOV4h$BGcaw2_o{+=HndTB03FLA$}>a)C&-GISQD2J-1A5Q_z=Zb%YE zzM)AjC$C{Vh2} zp^8cxm{wGBFR2>c+`!O4XpX4Rj56I0y!MouL+0qCj8aioi)zjWHSlV+6o>0=IGpVe z;*P6qoZ707FQF2pMJi8WSi}~%0LWmEH;t#EmOoFToj(FpXNbQNAd!n-Mg+1geeUxr z7hi(QNTp%yW`d0yyN=Y#UCSY_m4&xZpFgr6n?E|FGTh>=6=RuM)H0*J{8AR1ztLFh zAAC3k37VrDX-I-*B@^aVf@USRr35_#37UreJdmJ;C2Tj<4!{QQn@LbB3HnBJ-}eyg zFE{*|GMpH5aF9Y^SR*>NpZmFH{?sI1-y}ba}46K`gb(1EDqCKA8zwy>mFf zSiV@+Z<}5Itw~bymP&(RBmFhpu&>Lr_sgiXZ(iLnSOERWxzDP;t%TwRv61YU_bIPK zaQtaF_+Hm;`Pcd*f>-ahZ)rqBV&2j1^EZ?5NmDtjZ~m|?+CY|XU7DI6s@Yne{s!Lk z<>~&{Gq6Oc7Oha_uH2j;qL)lJ@m8{`jNK$&ODbNgIXK98EV{-L11b9mqSb7RY=ZkS zz$##i8nWW`11zAvfkzpd1?mq=rS0yEj^O8zRX3uk0!8i)2wZx5it2xy>v2@SSH`>VHN_lnH$gJ+ zf>IefceUdLo`d%CpSe3A>~@-d`TqS6?TQ;ED@4(n};j8R5jqk}ydA<>9;iJbZ>1qkLop_kznOZm> z8y8a+rBN;#0byGK&kHMAtKcU>QoeR3xmadC%-lJxA+S6- zipU6fJiW5V>kRUj10?*k2N8N@p;R;L zX}F^t_)N)D&hzk_(qHPDr;tbc1=q)f?O82mlD5&d;Hl>(4}OUt+8*cFJf^7*u4MSw z6Vy8NJ9YKAF)G5XQ;SDC%XSp$BAOTs)MofP-p5Y2l4D*#EHOzqo;-juDfnI>8j>M4 zTERrmS%6P8bk$)1<8mpx&b0ww4~&-B0fDWUs|U$c8cPr0u9+`O# z6k008&N*!3o-BkTVUanXb#~!MdM84OgW>@-IYwC<-F&)r+s`!6GXTEB=M!9aQ0Z(I;3*jY) zze>==kefgSu?pupyJuH``4}_tq@X&=LVaHN1e0fhFr@Ed_)|pW3quFnZv*1(d@+@C zCS8h9Yr7Vm8F$LnTnT%Sx?Bz_LT^Pt&xGOr!!91yWG~Ib1$d#rB)yEdIEKK%3~v;0 zufE3RkySC8K59wfM9wK(6v(Q(GD_oOi6Q41wh@hCsn-T8Jr_AAB4;JqW0yKzbhSXQ zj`D5+Z@9FpZtF;O1j0!YiQgXE+;6pA7FSkLE3x{?it)@{X?WGAK~Qr zC4@Px$!{A%vw(oR9Y0mYnG8NTP*}L2W z0WRof@%aEgC_qY%9~B@a&kqamEby9c<lJ|WIIv393cVumNN*Il z*aIbNqrEZUQg0l1thWt#T)XLdhrooYIq0juukDF-lqYU&8ttJ6Wui{_jt8BFBfjPe zl^UGn$vz|IjlP-&6P5K;3Re+l%E#%u$0ak~Q(jTM8Q{tlstnsn+i^S4WKCS~z%60C z_Ud(Y`WjuR`=-^`*Q|0thqH_Az%R!qYKx6d*$ZKp%ysENQtjM^sxJ$! z*BK~WU+f->RE^ImNV|h-wmmFp)2<( zPh%WHygC+alP&gE`Uf}aix4>e;Lz%&jU9T z9q4ZWPQ*5O(r~;{;VO&Qj^0}18id}3D+CSVLW3IgD>{vQg=z7dL3z60+sf@pg=kST zqtPV$Yc%4=0@E3GTHYy!4HdK<`IH`Ix0D6Zn3^Z_b+^W_3S8^rJ*SM)We# zXMv)Uy=ZCW6~36kX+j*vn?X!%Me}(UWAo<6iROvEKs1{mwNwhV8rUJM)zUD3f-CSc zbyd!}_}}X8Bb$tgnSKq4F-gmY#*rFQUxyJvhvZI}l7gLvbzo0nXRcu!HcsId9862t z{xC7IzY6tu2w|8MOKI~F9Q(uQL>Pv~5)O@?&zwy2ZVYb1q;brzhnLuScVb|A za6f_E1zdZD`;>X?9wRI+jq0_6qK0rhx|Tx-*Jjx|tSj_q4aUmSnE5*JcyBb^Mq_7b zY#lCDeN&JuoTH;l69V(t){6J|B+pF7B__4R_MsBFElS`+GBX>Om}2Wm{rN3&q@SfNTE6lH*UX3ewB@T%>S`H#-qvJW zzlL>d>#mM|FY-@+lh&`@vi0#TT4!xvLv7pe4s!;$1h5mpIm|9yw~m8HjPi1sKn{fC z*mhf25}!o8xHP@aEkF@tE-qEpQ*sG5L6|3&U&q};7_~)vd#W;Jw>^3x4ch}K=M8brfz1>fTK&4V z&fb@N$?@Kn*|F`R=u0*ctJ~vj&?@}q4_Yzls6h z`%d()=v%P^yjgIGY96^t~pPiK;?Stqy03( zW{yb3d7Ghxw@d@kC|B#?22p74r+W75Y8U%O~dhm|GRiDnqJm zKFE-&oF^Fi9MLmGs)3$j=(9xBB2m`1`Xa;Qteg96xcK0!)19u>j4v?%$A~z==6UAc zmgJ6=7c>xe8MT@o@kNe&79)Yn+aXL>1qns+i6`xG%Tivxg}NMW|(9$GjP87dw0i zN;n!+WM|;x;khH+a}ar1y8U?eENNAeRL%vn3Ov$-3e~a&Re5|f3dLK3DjwUQB*zWT zKsEn1%|AYrKf6W#3G=l$|8}+v#bfKvZIKt>XyXdGt^JY>x$|4(#+Tb>DZEsBv16U; zF^|zgRY*Y<<6(sHjW(zP^~v!gzGvw&8WL;palok9%ez&G_xl7R;)3zw3vClYrIM~x zp=H38G71A`b9-OH8@Bhkq4xH@W_zVA+S>*7w*xhd;T>iNFah8!>&z})6|2A9GS0rP zyO`JAs>)oH+z%fP-A{Vin%=x#?if6(a3M{^UcDjBrmF7$OWpNtQd+KxGZk#Bl~~ut zn(H-m-RhsI)*8j9hxqV_S4Iz>|0Db=SKFPyCUrthVwK027*ZFs?nQo<(X&KfBvK{w zE$NPpYQykw&-^^mlSD5O(U8!y3acroB&@pl1%`eR2-iZW4fweaJn9p6Q#~ZfX=!1L zhL?fiHMVNz_&vvtS4)0paJ_W12xt3GQ-q&HAO*o)fLNud9*Ar!xN{J>6yH{4Ps9Sd z1M(isTsB&LfM-$fip4M%Xik-<%d0N=s1J1aO%(2XElvF_^}LsVcmfg^cU z7e>yJ&6FY!{5mOBL?8uqHV$f(+||o37}v0!k=zl4&KdZTc_CJZHjbgG@!kJ?$F;jt zG6)Q4o$mBoK%Qr1tE?45^4wQ&g?cCzz(T z=L-z+JWG#itiT|#hbCZ_QO-hCIAkaEPRv9q;03S(?Xvi9$|8yVu!+ z*bXfENm^$bvhQr3>z$jN8RvRuzq8-H72)fha=KI~jg>N`iPGqfNrW#cZJ(H$*fTM) MFS~bpDPPL`G2jJjnE(I) diff --git a/src/data/dante_loader.py b/src/data/dante_loader.py index 7ecc8f3..022d559 100644 --- a/src/data/dante_loader.py +++ b/src/data/dante_loader.py @@ -49,9 +49,15 @@ def load_texts(path, positive_author='Dante', unknown_target=None): # load the test data (Epistolas 1 and 2) if unknown_target: - unknown = open(join(path, unknown_target), encoding="utf8").read() - unknown = remove_citations(unknown) - return positive, negative, unknown + if isinstance(unknown_target, str): + unknown_target = [unknown_target] + unknowns = [] + for unknown_text in unknown_target: + unknown = open(join(path, unknown_text), encoding="utf8").read() + unknown = remove_citations(unknown) + unknowns.append(unknown) + if len(unknowns) == 1: unknowns = unknowns[0] + return positive, negative, unknowns else: return positive, negative diff --git a/src/data/features.py b/src/data/features.py index d30009b..230e8ae 100644 --- a/src/data/features.py +++ b/src/data/features.py @@ -387,12 +387,12 @@ class FeatureExtractor: return X, y, groups - def transform(self, test, return_fragments=False, window_size=-1): + def transform(self, test, return_fragments=False, window_size=-1, avoid_splitting=False): test = [test] if window_size==-1: window_size = self.window_size - if self.split_documents: + if self.split_documents and not avoid_splitting: tests, _ = splitter(test, split_policy=self.split_policy, window_size=window_size) test.extend(tests) diff --git a/src/model.py b/src/model.py index 38ae9a1..e2d1267 100644 --- a/src/model.py +++ b/src/model.py @@ -39,9 +39,11 @@ class AuthorshipVerificator: def __init__(self, nfolds=10, params = {'C': np.logspace(-4,+4,9), 'class_weight':['balanced',None]}, - estimator=SVC): + estimator=SVC, + author_name=None): self.nfolds = nfolds self.params = params + self.author_name = author_name if author_name else 'this author' if estimator is SVC: self.params['kernel'] = ['linear', 'rbf'] self.probability = True @@ -117,7 +119,7 @@ class AuthorshipVerificator: assert self.probability, 'svm is not calibrated' pred = self.estimator.predict_proba(test) full_doc_prediction = pred[0,1] - print('{} is from the same author: {}'.format(epistola_name, full_doc_prediction)) + print(f'{epistola_name} is from {self.author_name} with Probability {full_doc_prediction:.3f}') if len(pred) > 1: fragment_predictions = pred[1:,1] print('fragments average {:.3f}, array={}'.format(fragment_predictions.mean(), fragment_predictions)) diff --git a/~$perimenti.docx b/~$perimenti.docx deleted file mode 100644 index 7c3f6dede3e1372cf3e76446961338a8c826f733..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 162 zcmd-J&de#xOjK~rFG@-*N@O4o@G%54WHRJ1lmW>^1_cIZhJ1!1h9n@n2#6UNn1DFU z7h={r&94%2->xur-EOq4{d|S-;`f*0AU;S9GRUn@&<64UBuO#^<+d^zWUb|AU~FIj E07Y^hIRF3v diff --git a/~WRL3794.tmp b/~WRL3794.tmp deleted file mode 100644 index ad605a07aa3aa219fe3b8d8ecea2f6e64838898c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19798 zcmeFZ1D7sAvM$`%?cKJq+qS*iwr$(CZQHhO+qP}@*E4gzbLOr)|KOfl>y5QCGqN%= z--yhLc%n*19QYR!04M-B0000!0DY{rvJoHvKp_YK03rZ5kosS1D+fa>2W2EM>xlE&(uZf30Gvr_0AKutjm>RVKbf*FL-FD;+c1_@q3XDlZeqJ+9vKb;co; z0Izzq8wB(GXx8^PFo4YeVV*d#7|qu|EmA+$2lZo~+IEH(_B7Q0p#N)@{}0>ZzfHX& zuJ^|*fAe1ZfBMgM%C7aIr#gLOqx42{`qV5h>#wFMlgxxlU?geSY zQ=7~{6%*aJRi`=*F2#4;k}pPMY4*vn!g@C2r-dalrXglT zUdo*-12eA=G5#5!R!Co(NrfUhev#|@v(8h$hh}Z>m82U3moA9RbEwlw7ksoR80Dh+ z)~=W9>hbZ>z#Xt2RX`~Ap~pe#{_dB@qwViY>*;GHneEjeMDP;uVejXQvt`tEJezGz z&DXc38sG0FW=GgJ8|;q7Tu+c?b9t7ktlZ|dI6+auZVlM8pzHiMlpa^0FAyrJZ`taG zM6j&c@fO}kv27q4TCa`-d?soG2d2)n3sgb|qyV_FIWJkU{k$RNl(J4gg(5#VMR%ge z6|pfu79$GLEgDgM3ei;>(>|rSEY{U`AQ;VlKG@}%dOjbk&dEgRj%$u*1mp|99uGz; zhL_MWq+waQ9)~UlnSiY@@tQ=%eS`)I%&Onws)B_*=i!;h9r@SVL)ZFq+kw~f;txT} zVBRldR7^0xUK5Q~LQrRJn~N~bTPD>o+*%?bUCDuO4b)}3jFrnusRfTtnGFubeH}-N zKbk9J?(PtXYK6jg2=-XcVq&To@&<(DUgYQy%@HON%`jq009>nmYOR)#PD>(U(!pWf z7-XZY87YjAkoUhEv=u&AHyh%Uo^du>X$n&>#ok}?-ltcfSV+UgH02OKoCY{8e8VOD zD!boXY;(YHs085S@Bu@8dDQ^E1uF@33fo)wa{K;hd=4z{%;qJ#M}N71d#RlSI!pJo zb-7%pVvhd;bYY}~($JXYyy^_LQyF4cahv%v`v8|Ip^v4OA#l@e&&J?=CX&o5VZDS0 z1#EbZYo2-6KRP+F^YoQ?1}zsrCkTuYOHwfH`=dH8u5@5)MZae$;V59?qmCh%Urqy_S2BTg z%4}hr@UYl#I$v9tLM~7N#PuPLf8HPpKJn3DgE8X&$~zh;Rinjf{st2z||&vwU*XV7E`d=b3$N<9OQ zs96vc^lYFJ6C~Ow`cN!sJ{_)(uHC1Rh;poiMN@=OKe%h5o3|%6g8#%OJ~gWrIKn}g zPA@SYJEC-`quK%zlB12@;H_guC(i?n#G_@j$b#y5-j*~s19I$&(6TE+mA~D$uA&stuR~NuX#we! zoZQlj-JMUKFeR~nYNHHNtN$ZSHK~S~x8^TCL%7b7EAN5=!}!L4W-@AQ>1eTC5-F4{ z%3n#!*r|vR5SBf4MNa2{{n6Gx3qls)&%j|(@N)%6;KdS_DaV_x1R3OWA5KGHyCO#X zDf>XK1RCUZ6VST$)&jx6CUPWOq?&?-EMmsScS0a+wB}x|AK^a6{2<46`+)1HUb0&y z)eF z#lY%!6|nk9J^jMW{390HgLF{*+}w3&xr~^TgG9IiWkUfm;n5dhyDbIKw?q>ucd@3Z zo1xJX%ChjdHcr7V{2znZV%7)p_Cb*;&u$sIJQlL47JIApg_KA%`cSaOE8zI%1!uZJBMEK!C5XYY2M7y2Jml#3 zU2~;~3&NhjSNo{Axl2g`h_q0(`y$A)9aykO^G%Dn$utEY6V%AvN&Hi0UOk&3cm#Ch z&8h2YOq&wyeKuchF7&ZT_%CDZ13vK#FjV&#{gpyuig(!y?#YBmr3POU;0J|;sk6vU-N69$T8JM6?5wGCPyc(>?O#wO`_oFdo!}QTL3ziIr$>?K@ z?(Tbp!BaDS>`t6=Wa|ZtsdS`Mz2F&O${y}}xbdsd_xsT34k=%Icuf2iX3+`w@mQUxQDMllcas?o^BpJ>@vt`9bVtZNU(*g9_% zuJR)l?}9dOCs^)8cri8ta2Rzn~ThHdPeu${e63 ze)w=$!l%q_IL#lH1PVA_;)p+#&BCzV-e&bNk^;9k<&AAFL?@%IEnF2*Ku)cqjlSDv zb8&TR0mM*@GCRDosIE`K8e7i$wH>EU(6C}mAMuZ`o*6{*NMxSNG6L@bVDDdnWFVaDe z#idg=s1#+&>x0iLzFLd!3b;Vxul;us&=&4YW{8JY~a$_@$ zqx#tcYA&=M973D)BTgY3A_`Kl`59fBJb&xJXqk?vDcYOgA#J!?tb{E``ceM6J344> zFH!NyE}{7xTKUSt#%L(wit1StE&KSrj;s|n9I*q2YqY<%z<{%RQD=*0kpcXBb3prt zZ#Z6_uD}JaPa=lol!ORqg2ylOpY(qz8;3r8<>8R|AyTV}=eEu-#3DRv#|O-l3`it& zBGBi&a3(l7^#Pmx6u@YR6?Tg7Z#u553Qi zCEUG$gn5Vefl;9ITX6Q<^Z`-sSmYTnw0yiYCS}RZaGsGOiPdG9O6yZV(1|8u$LQCX zi#j)Kd915q&PX)Djtt}*>}G;|TL*hIL$KP%@_dK!^j^mBWak2NdZz12R|QU3^ui-3 zVigzEiP;> z8?*E#Q&fTM+U$a+4e!mYW;ppRHPL?}J!7A7M7(m|NMpe`v+ESTMX1>^0_SE6 z4z&*fSfO5FrLq9Xf6A}f?cHE~*9G>E*Z>}%dRkwLZ`qzUlTh{Q)z$%ct8w0r*>W+u z<^ljcPF&czHJ~&v?aVUk`%Ts<^B;cIu0q zb9PFj@tj?$=?H<7ITOJIx;&X~K}1xUR&6yocf{P9GKIG_`#8whx6)H4%1k}Cab3>yfI?nUbI+$!6b{5V8re$fGzr+ zbo${w$fr*aVFDDWSvgbY!&_(6UtW1;eEZ7wTR*0C_I-|wTA5m=KpnbLxuCu2CAJoq z{l8EqmQYVLAYYf1R>~Y)FF!JQmztAcoNK61Dt~s0h1?wsEgkk1|6&tXQAE_sE>;QD z+jQ!&>fQKP6Bo-!*UjB!VL~oq2Q?v-iHVF{rwN5GjK=&;AHt|6*O5JcE1S7K?r@^!=76`kuPNtQ3vwQOY;}Eu z?7U?uL9@s!3+!RCkirz^qa zDayN5b7R)mV>O*#=*l4$uJFAc>ns)G(9CyOJTm^g$x3>96 zK)4t2mi_2$?NWkT$J4$hs!fBH?e)@uli0!roE0*HA{GuM&_O%S_KtoT@(npf_rq9=NheF zo#}Oe7JiF2s40Hy4*Q|YU%p?heca>q3lmCl;hlgD2eW_11u@9iScw{}S0ACnCKna% zO0xU2$a~^mzE;qZTd^n>6q{{p}@=PGglSyI0Q>QV3FgVan$QBWaL=2}$g$&HN3Ya+Tnsy|v z2t|RYiw)YAg)0q{&y{!@Cq7s$XCI!=lhz%IoA(43wq_NL#g3&!Fp>}X=kVj;p-6oG z`u0pEJ8>Ex!WVVGv0ljj4caCg6BnBQ!OO4RXO;1V)DZhh$!XXw-VcJVMu1LtSG1~O zNzP~l5fm`56kucIZBM9cgkj}@48aMF-E2$;LBoK9u`vESqc|eb{6CVX28QfY;!BKo znkQOTfmwmEQc8;(2BP5;5`HTWw%qcnOA2-uB62p0w@%FA2>tsWCPH%jy{?>NCf}Sm z3#{4D)`;V{a*|F>f(&D*q%9&U8B+boyEep8mnPy5= z{YCc0_v(%gb#nh02O^DY(={i`W))(zXbxRs#3P`m&ztD@Ndk>9id34(*J>B~?8NLP zLWsat4j$UuMfG8N>rWZ{P0;XOkP?_0VI6iK8p@Waw^3Cr22MV=VQXcefyyt0IPHgy zu+rmIX`V9QyqIKJ5f26I3Lx5@&=fdg0tpBgm$0EPKC~|iDkDI1HC)j zb`fM5^5a8@Su-jR6L5vRGIq-H!AxLyl(cq%WbJ5Jaq7JH(Trg}PL>$>R7x=3WyEwx z9xphI1K>>g0ikiO`hY>-UamDzx;an0#qEvSyGiQE30bkSx76=ySsq2QvQIA$ciyIm zKG|X4^R)`g+V0}0b0%GnPY024ZhoiI9tscyY5DsbpHyo30iQ;427p5U1$L#wF`dN+ z`D;=$&DF^KJjZ5kcRlgfU)J)a6NDiry zikPuB24#2AHU>l6pm6QSW+RvP=!uQD`&~eQFSLq8p$pBZleM28(ZkRPYMakAl8WAB4HjG?g^@aq`Gvzp5BsLBoo z=p0{TC!21XV3rx^p$QPFG3QvS<+#r71C{gT7^W8&S%Qzs6TLdsm5{~82eM}etY;{K z={#O+sVIpAq{7!+1li5rO}_PDl&*2tctA+^NM0S!38$?G*@W70UHxH}1L zl2BBJ<7UBf-hZQf}HKY$&3v2mE(@{-5dKbHy$t#~1?iM6`)Ku*j!2uw+q`UVa_*2B*WKBXN7+tM zy>VrcIy@$~zy1cdIMLFU5TX(8-ec)&tlxdDTAppK%6$#Rv4XCl=fqNrc`P*t_aC@4 zLXA;3D|0Z}9km+6kt|@W$ca);+pUuvZeaoT#9VJK4?#UR{s|Nl^2|_hRG_hZ!~Q2A zqP{B}8{`L~XaWWRzybgV{3j6cFSPF8;fVhqxdZ$G)PK1D-@Yp1C;tH`2V4hs`LDQB zaajI^h_xs4)lh#1G;sg?`>019H#Lh6niEh zd5E2gOl0|vPbs@juuEi~$a((vNHzUx70Qm&Ka7OYBAgo6XPn7zt3&&7u3lDr&b$ht+6gVWKTOgZ+ZX@R5 z8?5-;+8{{PGKk^TG=G;ch}*!!_w4wL6cRYE$Mrkhpa1^<#VS&+PF9(Y1rojuZTi3Z4?R;;MoMMv1lJQvpPBJ?Il zMDDC4AQB!*X#)Ax-e!2W-(5%||JB%G|<<4twy=m+Wt%LyJ?viU!U@7;zF|TZQN_n8X)jNWP?1BCcPFqTKa*Z#S1D(jfG8mkTgVF7 z_$P)BG0aM;*g>LW-24$3^jnVTu_q<;Pi01h0p!YPMOpWOT#!T#;siTR1>?3Znx7hYt9M&mLHvn4!B<}O+SO_=eqV1eU6?1)fh;){r`J2d{j|-xfC)_g z*d8j$z+-8V*dFzBm+z8L*1UB2rxswE5Ezd(o$JsBW2?v{9oR4ZMwTGcvWNT* zhIgfFsOAt|!-H@DL#-;aNTr7v61QkJ_(l&ZG7h}?Xh7Cc^3=L5a^=4%9ABz=x=hid z_rian7S!j~ZlKS3#9%?OD`Q_{m^?zp7)xhydcK^`2X}LQUep)EdaG4-QaFPY%gSC2 zK5Kt{zQ{i;(`^aY=LxVx5=zx3{orEcO*1S>DY{@)!WJA`$T8F!J+P?H zlzqarDx_xsTG5D5E+cH`w!0hij-5)gkl~DWG}I`98az(@nz;}!hNlGJUriSF zE~t;J_NCYHh?o$@zzoS^eUuDdA*hffQbVk|f(L*~wc!V|auHPi;viX3;QJ=D_;PIq zR$+yL&+G;0&FiqM^tG}4v#C(7K5R4u?;a>atf?Gq^NiGb)nn)Zq(aRtc5ypT5VM{) zz*5g`@6<#|?b=>V*Sr{ZU?Ewm;LEXi8{E<2%!31k9TW%@gH9lhLc-SYs=1!L;Qhre z{NQc&i-E`X<40_#=5z*$6a$9o<||Q(o6@0#BS_wyrhbFm zlPpF~Bfodb#4~Qkh8QRk<#J&w(+4C|Ro>UjAC)31Yvw#pqm~P6KAyFw&_*`gq0#Tg z=wnvC>n(*+RG|(48Tw=lTjr=jRIa84A8hIMpo%vZxEoqA$J7O-`R9|Bc-0%FZ=TF> zOK7t^-&cIO(%pukJ>tcTwSP7*>!;nO^0b(62+!y6G;XbeX7G(aFek2(_FD5F*XC)S z$FCi6Y}E!+6zI((>LmtudxM-nYCI=&YH^*?o<#|@e;N%Ac+={H`&P4m>&`et%pBp* za|Rumu|c6iSgeWIJi;s9%0?X5NGw*sjV7LnnVma( zuA|kasO~uosPQ7jc|rdJ&H{cK`Xo=>{dAS;?v_n=X}OHGZf;&>yl)yVfWKjZ5J(%b zd$)qat*_l|pRD%KK;~r1sBYcA%pPiS2%5CU>>L=WaEXG}hk z*RUVCI5txquX@B^#+_sf${@e`g;*BC8$k?rF33{Dc&t>(^oX-9Yj-);Zp?PC=s=$; zHL7xnKH5&3OlCXPg|FiL0wjVv7%%TIfwIR8`WIxfjCm(Otg-eeAdKJUSZPB{xjf&b z(|R@bt1{ikeZi3zyUDG)E9g^zTrAxMgUls0>UyXt( zh<}{+Gex1&Ol5K#pG-B3pIPB{ITt{F32Q0n>e}Cz+UcZnmS-c6<4eyv>PDIfSGl7i z7-~%2`=>6o`mHTvM|zA?S6JDa36jXQ8cpkGj6$bQ4$9Y0t?~sJ%ucy(c;@$hFSLfW zelQ$%I-PEH*w3LHMKgu|ngbT{G_P_^v$saIWosvO;O=)FL-Uo_H+M^|{`0&4ocvV8 znSGr0a~K{Y0sK#LhrOYLgQ=CV{XY)E1{GENb!IrPYl&}vp=m_{pI@+zQo+U-#7^@f zmc^c^jafg8IsX?sb1fA=ha;tlo+{8HB5*yvP|LhT8N!m$Uc~*VpY4cn$zM{PVa6>V|7RD+Q))b(e`F&PYyDFri7^4b z4uqNspjuZCO3}U|5zg40++#(G-sKe9>RFZZ0B{W!E$-XU8xy}3F@4r$M<;nK_lhWm zx`64jYB&8ARyzmmH_SoIh~#G(V{+=yuqVpnbo&* z`~h)FGtMsV$8nh8yYtNw^2owqE;EfPY{5om8nowM|7(5MxK~EAfnR0V*3wc{uy+o$ z;rk+mClEi?{$zcAD{hj;K{h_BD+`s#f5qzv)!FU}azkG4)FpezUC^Mc` ztV5NMs8omcO7BYK2p(CM69pPtWhq^5=>5w_U{HMJDZi;JA=QLyyZqzz)f9yNJb;Vw zbs;9;kN9D#O5?89nLa*(jaTY6ME9823JW_}z=h$FvT1EiYC=0PDjuy+wMM%r_dLkzz~6L3L~A_bM{#C}nsS zhX%&#ylog|?}6cQ4lifq&#x)cz2)+am2D(BkGo898<&q=oSPDt3wta{ydSZ3(! z_Y1FEDKSr6^8Q9q6WGb+Jd%N&qFoszx=bC56_OA`0e!eEya)saA^3FAiNEkRQ|MC7 zBq7GXaI8pbHsK!AUMOi1kRT*z1TJNa+?4_dm>VbI1T|1zn8RM8GIMZdVnA`Ks2Vg- zmqA_av5-Pjb@Sj0L$QNPu;bHEy5#4X^>CAKB@v)CzkWeLoBh7d91amQImlb1T!-*v zgi1R81ydHu7l@#MnV*E6*F`7v8?_SdX!ZAV8VE`x5E6vsx=`b44Q>a&MKD`#I03c2 z#zNHP+5+GEFiZ)2UaI|$ah?-aHfwfFC^QXk`B>=WfN=gk{{d+f+#Vcbah0J8AV#Ki zwn)`D0VliF#fysoml!sbumpm@3HlzNvHOV^=W@hNhMNsRYY8@P)jajuV^{AUz-H-n zpKhqp`m+T~oyzTq9>)gMRVONX~lwi>&Cmax4Ji2;p5ND1C zE4jgL*ziVl3;rVnP0>qy25dNd=Tvs0yt5%g0}u!ed-}tA#8EC0d)Jq&#JRpo*1tW) zX0yT(F?LhMOGbY)@`Q?#ZS_KABq2@m?;-B(t|3qmXD;94_+iSonjC*Ln`t!a4;Nez z@oQBnPLU*ECi}RK*DAH+RSS7g>BJ$y#vXcc`PHk!G7`~*{i5#qGlQLUNHCtxj*xDp zsbS~nImcl`ngGeL2m9Q%mMUMOEOvE{!$qp~ygx9)PiY=ALGejUBkWeMO(R?iX|xSC z(>3PRA*-1owaZ4ys4?AMdjWl2^@?p>WsF+>RM$Sm&@yJvqz>~c2GwX`{je9e?8t*w zQ9ZiaFjC#z){blLGS;%@2zzeMuG7$O672}fX~_b)(Z45<|K}1gn{93aaT3FHDRf8( zgJbq0)~J~gttHT})f0?iti|FKQx)kBukIf43%(SVzy7c>He4iN=?46|ap znrZ!6Bl>TZTU`TKIICyFH*n7i&%iZ%y%h*}{kbHOE)n9DFYlG?PquD>(ctLmU(d`j zPy;6;cGcMBV5WUXA=JW4%fs*GBWIz%XV`io`eGaX``%pM@1=2@{pSBc#%`D_spDZInGJzOXZA^G` z6QiM|UXZ_A69eWV>aIjOC2NdAG{v<^;wR@ojX;bUP4+NF?8#m_O+=F73o-)Fw;*B4 ztce2*;WnKRQCfr?$aYpjTM3xNe>zTt#8fqx3O4*y)T}PzraT)LR_v8l^_HdA9Zlm+ zJEUl48<>L<8iXu#>?)q&{lTH zM064mT|h3D*uCK@Pla=Ze73GIliOsAgapz5}1l6p2f;cN`{d}=;hw*jlXTVRUEn@XQZMix0t)was zEX5ZKge+v+0N||SobJYbnHs25gjL=D{RzVY$8#m85~+G=gf;msttb~li?>e@u>&^o!LSCF`hXQre4JAXhB zHLZnqrlW6KI>VE<`HV|TP#d{t>ICy`3NPf-wAM@W0oso=1=Ln1UER8zPusr>)D!v* z1E5M-5{;`3h}R&rVUSUs?)44)*GpCzw%S z`qx%i$o8y`<5g`|Dbxo;0DoGg6rp@ahJbvBkqPg1;04gX6)$xd0N!_BcelG4MJ!@g z@NQ-OeY6uQc-}8j>85_$3UtDs7D+W&I5R5atK~@DGlE#;@!hTrCX$`M}@H;jpkV({pkG|=6nFb&Z;gFb=dF5PO zFZ2uyuWcJNExY^kD`lXUrivbaet!1gv6pnml=M&RR%VpfhL_u;5t0F$_#c$#bAPDariq}dP;<>qb08jmx@sONhQ6^#fD(+2C&k+ z#yI?G`p$V#jXMP5IcOt#kQ{*kF(bRw?j;WS$C12-*Mpv~!~yjlIv5*pfO;7L)}ac$ zh(<&Icw4NamLUUa2yZM9(U+?;>clWAJ;}Mhh$o#;lq(-d%vZ7Mln0Oe1_RWUJ zzA@X|y}m}7JB)H`E^7>@95F>t?{{noob=eNLYKKicIjUPj_ ze4?0=aDybChHrW&9muU6!Ej%nqRhH%|3og!%}+Knd6R?2?pF9)pKNw=P>A7nF9)qU z@m-G}EA&x|L|_3b^K6S{CSZ@H_tzdvjo%(ij?W%TjMpB^H|mIW4{g#mfXpv`dYvU7QJGz~x``z}r*q-O=>*8{FGrQt;!>$xr!!)(< zRVR_Ea6&%y9`881-;{EZQoVlmW};vic~lBPXKX@Aa=_mdzdrj}I0U)$+aoKzj5=mq zhwKWnQ2a9&`}mXw=RA)?N4CHU|03C^*XCVaLIEEIc z%EZlFk6w^6C<;Rdt;tJ-SqM4CtcW6G9$>kNz`oKD$zsb-!SMOY-RDk~C^s^LGn|`Erd5ccS3E5L83;XQa%Z%|8vX;V>d1k^g z$4=;2sLo{qsvy`Cm`E`VOq|#bW=;ZMOP78kr^@6}BVQ*``RR#p!7Xw%@iU?|w(xK} zJ`N2_N?aUWbwCdU@;~u>97%Y7-D!svwj_c_>rZwr!%Y>tb#7USBJIlf5N_z>H(A8c z+T4|`bB3#*I&&oG!*{;I4g;=L2I%IJqqu6x4{;yE2Az7THH~vHdEW}!+1apFt8OaEO%Md)Z4JFOG;sWEire|*2+Mp3_uPIpqN{pLyX zI=TL^a~kNP`su`xiP94goEjF1fQ(|9xq9Ce2O5y)GoHsd;|yg%Wo>9$6%qqDIGk$*s{oG* z)quu9=SGARPLe*a+<`l<+?6~h(;84l@5E#rdMXhR*arp5y7u*zZ0P7JaM#evf&K30 zF9yW)N|gd;{!q(A5Nh~k(o?b*;qXSnK0fawJD3{yKfl%4U>}-fH+Tt_LB<#bx@F!u zNk-GD_(lZ@E_i&Np^qqdEIcYx7BT9{pR`LVnmuCPH2np-UA^`JS+-Xa?HI?&8q#+` z8-awu+5~~&r6$Cv#*I!V%k0$7czqjGD?OCE#E3stSnmf-i;s659aQ_S&z3B1*iRn> zS4yt5+LlpIEOhn$ok@3*^C>2vDJRszXAUH9636Bj83 zrt9k(T!437ngY*b8Qv&$oL1fTeWl-$n#yjw2}YbIhggq1wvF+I8~ZC9n=SC2_feR$ z9DuFxo%FHuSc!2P30tkBktdl28{iI2ehfg=>U{1RO@2?@tB`UYb&EcVKA5xaY&$|Z zfspVfxiBhZE%2ndK=w4b{E%uTaK0$y0zWONI;^?;?in(B0Z& zpKN8vYg?vQL<%B&P{;%`{DI&NGM8?)J~FvnQYAB6KRx#T{CgWd5S2m*?+>dCKgf|5 ze;`tW%zsrib}Y_lm(1XhD41OTbh(8mFZh>e|25D*<@vuwG?PYwfg^7ihCp6>2S=9k z2!JH}Z`uCUDwKCs;~oA&6<<5?oo&*L}h?~?e9X={F!7`Cn1pI-u=4ZGd8#6TTd{x;xqq>eGvfn zr8z`YsYE+}mnlS8hpBP_3srBK{aT`DdXf%Ch7*xy`S@EN5*Ie$eih?9Z5Hc9d!1uH z7L2WOloeVZ{MFb5^k3E<&DR!P7p#iLa;t}T1Sx3*bH-q-oiUXG zW+`a3KXn!5pYiqSJBMaIx6VuguI(6k9~#ib^cCg!fahi+J#B6~0T%-=Kh+(Y(6U}t zLE#+#-GqVAp#e?%RTV^Pt^7A3yPPjE!ikAcM-v)CMftB^ayj78_Rm0X`CoGxQWTd`-Y+ZsZ$i{uZo4R#{x9-hGty{WIQ@xP{F59w zF;%IV+5ZoD?15Mf{h4x=+L^--v1`XrrfO{cKM0#pHVIUPx9a6|4R~c zO9F5lKql#wG2pi2LCrdg#QI%Wl8;#OtkDy-KK>Bp$F;UtzT zy0Ed~CMDkY{_+wp0>oyTYx??CYN@)lZp9*s-7^+2u=&y5U=~zGV@kv) z0*zCvfW`U}tj-o4vq~2Q?mKa%Pc5+-is5==%jqUM#qTzZmrIkR?sfF>sj6e*Ada?5 z{fuEcagN~{1;6rWYYNlr^Mb260&4AmqVE>5NJN~nWS_*$NF3GBxpf+c<|@@!^H!&C z#-s6$4R*E~SOS+OPg!pHY13+pJ4c|Q_DnJ!B^rJE->$C-pJ!zN$`f~#6mmG}&(ux7 z+d5?oPh6hC!z8`A&vipU#>Y(nxX2lD*#Z=x$>$|K7}#>QHhvQbcb~8KIzK)eF}Idf z7uj_ZSHH$}oF4ss7r%4LztAWPp-5L5Bdx1U8~hV`oO)KjBOdCo zV7Wau)D$jGt`&I{xgXa(@_tQC=3o8ojk|eq@Fr=XoNg_q8yE6GH~eBecoO7{PRBae{eE7 z7b3e!ezGT`e_o3Ifd>BNWH=k@$^RQPkT5rC21Ex3u;ZS44QIDpAy-IQ#ZQ)`C6;>$ zJaADFWJHgT*uGY5yEZZO!d73Cq%t{WjP|kwcCoLGM=Xf%R;4GoPMX6-} zo%#0tY|sSG?f6RWH%{QXwbDaU*(qR7wuB!Cc%%W_ zW)0i;$TM+r^qb$DfR4#DC7i9RB;W3>iK;7kAvcc|a+eghvVPVgNHh# zlVFa98D$G;{jwr4eIEK z<<`8SSgHVXpHl>dBO$nrZl0sLkt?E9_(fAsuMNb?*mgn5@JBN zQttAWQ|BrU6%{gR0PJKb&fXx!zkv~pvEvc7GU?E>^h`ET+>v781a=jL8TXXn;#~m@ zX%u~;?I*$SjG$t{ebVLwgFcCI%I3XXdVkf{UMBwEP z3-h?6;4vf|qK64KA?VePXgp|u=b698bnRgphAm&Lk0lZ!o}cv`9(k;ec$qHQ)vz)! zZBl|%=U~&<_XDl!Ok5M18xNu{ttoaexDzc%ZWjrv-rb|v;(6?ZGL{lMKDCuwSxWUj z$)QxmUslQz4Na5|Y$Hs;vaL?=5d(VQ!E@#z5zLU#I`rAtt&`Huv~`I_6`V`EVgBj} z?sk@CMCaokk-VBZ63m4DHq~lCR`CG)PpZ~n+qCHXW5W+W>IVOlFZGioC1Yo8V^5=R zZTC-2`}w@V|0YTOY;F;8vetcga6a3~7d(s|%Dly?l9i(x9%-WpnoSpxy`>W<;5m!l zJZol}`#4Wd!=BHNZZT5Pf;iYc?4^spW^wr7Nvi@C(jnZ`C-;Ss*rpS7T9s(|NNh~f zHn$5m2sPAQ8rOUt5j2n_#)vi~W|rLJbuL~|AdlR9WpZ1Vnsm4eocy8pIhp`BNDKm? zZX{@@ZHOWy%>|4LcPi&DEVZD14afWi9OIC1N77b>ZL;xZUP#n7!2cZW$v!DunG+^a zub~{*Vk?=Y2pYNU?(%&#j8^yF}uE6(5`eqim2N69;#vD!Fpn4Hzj_d>Vy?NqHQQVM^)}Q)961V(a#b>H~T-@y8m+?!Tr0o>e|@+V_W=RW&P3ApDRPY z*J_>icPH!$4_X^b-6r)2km_KJ-ifX{@POrXngmIofoRM^&gyb>9j5fEQ<_wCG}5bp z+{KgQ%pkmn%o1aPT0#N9AIsv2F8t`r<8ov&exxGAH4Kvx2%y%(>+RagW*83zI7kUy z{=YDEZA^uHI682;zE=KOTW@>IOEdq2@aW9;tS~elOgJZQq)EJ+) z_gHf)MM%Y1!t_C**M@dzB`!x8{dzyhP?8h4p!qHVi(_9o*UPruYt=a zbRM4LwHILi|1GFSoH6LRRFOy=r|?M;GfCxZdM@XqMb~cYId_EP^A4HS(s8|v9D2;| z$s&zEg5;)M5Rj{?iTTrhODo(uYsQweqN3F2QuQf!Zye-omR)9ii0?l~|GKT^nU42n zRd}W~*N1)FQ0(+5VBPDhTJeo%j+UKpYB!#{+(`J1-24ZnS`FoA<#rXHx!<=X#Q*aA zr&o&tA4zV1aps}cnfaanfdw^Wy&@x%F0k2!{QxOQGC+`6j*|-TMl}HSh#Q1f28Q{- z+=g`E4Z243vp*18_X8J=V4MVkt{rv7EeF-PJ z5zEk)x*&|0qX9JndEpDXcJv8Gg!WW@sCJa;Ms)q?!}bXM{w6T%;e+_-n$er%2+dm@ zpqkMd=jbM&w>uFg@H#ql)&AapY@RJbrOU^FezwWF5q$lBH1k+eh0`T%cM SVDe&M-~mELU<$SO0`UNB=zaYF