evals/__init__.py,sha256=7oUo37WQzvHsLhB4VEnibIkm6jbkz-4ec8txTN7yQE4,743
evals/api.py,sha256=WAmyNL414UtwBD2mGBdjVMiK672k0cwigMYptZUYh2s,2965
evals/base.py,sha256=L8L-vg4inepq14KnpwnIYzNgxnPUWsfMVS2u5_XiRDw,1951
evals/data.py,sha256=hb9Mn49jbxcv65J2Fqr5aJth8jjoUhOpet0GRkmxKn4,7283
evals/data_test.py,sha256=WVFouipCxB1ir_soBT4TzBgorkFl07KbbAZA9uuDD2U,1353
evals/eval.py,sha256=GGcXxgIk1f7NxbbaMjf893iF3mBlDvqVEfOu7O1Q97E,9420
evals/formatting.py,sha256=XbSASxVJRum_yCW-V9UBS0qWzEksdVP7agpp2iDbArw,1137
evals/metrics.py,sha256=-vdRkII76u-h3e4tsNlfu9oW7URYo0CJW3bxXmZNbtY,2561
evals/record.py,sha256=HP0ExeMtpSlwaDm5qGqSsX8PNPA_RtrKIP_PH2ykBx4,23568
evals/record_test.py,sha256=zuSXcA2O6cR_RKuYKw-8JKmW0yJzLSvpjaJWSxZP3kw,864
evals/registry.py,sha256=gdHHgdwBsPMNLTfI4wuFU_s6ec6eC9ntfq0TKmnXXNc,11667
evals/registry_test.py,sha256=LgWA9X9ZQvpBu0b_W9IgcyrCIt_p4QC5BQV_k5Oaw-Q,1463
evals/task_state.py,sha256=cLwDTKT1oWnL49YNIV333ofL3SGzz0l92kcOl_RBD44,1609
evals/cli/oaieval.py,sha256=Fw0pWCXJvvn2LSpMBhDVlcuLOLV6dCr_RenMWXXslsE,11049
evals/cli/oaievalset.py,sha256=L3vg9UkzGnsNjTkCD5Vu1F9Zl6yOXE4zp3pjWnG97t8,4322
evals/completion_fns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/completion_fns/cot.py,sha256=WpOy86QVfqSUcTfY0kAoGrM66Tu8sL5fpK3-jGYAPps,2678
evals/completion_fns/langchain_llm.py,sha256=4CvfielD4lJ66M3Njg3gAviQgJGGcR-CfMrmK_cHd2o,3395
evals/completion_fns/langchain_math.py,sha256=n6QOU9W8h-mTdrhn-TWM2hNZjItIJtXZzdWAaON7VfI,1093
evals/completion_fns/openai.py,sha256=caAWwOugnrQKnY8Ics_PGZmdjoc6Si5BNm6v14ztejE,6130
evals/completion_fns/retrieval.py,sha256=t0MGRebrwsDoJe8nl_zzWYRMWKSlFXPgWuZVvEw465Q,4556
evals/completion_fns/solver_completion_fn.py,sha256=kYbrqmZdpSIvs5rHtth29ATUd85M_1TpqnDcrTz5Xqc,2886
evals/elsuite/lambada.py,sha256=5NV6Ju5DvCQ9aXajkkbw91I3-1SHrs1r50HSn0agSf8,1335
evals/elsuite/multiple_choice.py,sha256=R7bGMPPu9b9W2OMZN8aenwG6hup7BM69kwhR6GDPg2A,2729
evals/elsuite/solver_tools_convo.py,sha256=jpdtdjJNTGho9AcNHsVMa9STifq0j6Sza2esUqe_eF4,8396
evals/elsuite/translate.py,sha256=XPgzLbeWMXYxA2qZChKlFFe-KvKyjiZSzWsgAaA2g8s,2710
evals/elsuite/utils.py,sha256=DRxiMz_NqzdQEBRO8NdcBUikBTxstUNJTIQYrkJnLjw,6415
evals/elsuite/utils_test.py,sha256=-TONgVrgJ6kcwiRLJ93r1sKg4oh6Che4usZNnfu1Omc,865
evals/elsuite/already_said_that/distractors.py,sha256=9R-oYtpr8T37FUbnDUbVkv9VVvu3KkeP0YE3x5tCVII,4112
evals/elsuite/already_said_that/eval.py,sha256=MtZU0jmnnW49Ck1hGdarOiS5CjL62tUcRaFCxLp_sHA,6622
evals/elsuite/already_said_that/prompts.py,sha256=9G38lQ3En35WQLsladndYjSBCP1i77pPAJoMVOddaKw,1045
evals/elsuite/already_said_that/solvers.py,sha256=w9Ezmpf0gD9kGX2fYjC6nNvERTyHQBttpHwwukdCLqU,1407
evals/elsuite/already_said_that/test_distractors.py,sha256=EdiJxr8tnUT4cCDA92KSa171QiEvikUdbzHJsEVfCzw,7146
evals/elsuite/already_said_that/utils.py,sha256=oVIm0xAmHcaxHKlFpEkkI1K80U3GFOV8mT4NpBRJj_o,6480
evals/elsuite/already_said_that/scripts/gen_data.py,sha256=v8NTCNMXB-NZkltIevAYVRhgZ5pp2gu3JfXZHTR7EzQ,1998
evals/elsuite/already_said_that/scripts/make_plots.py,sha256=inuyNxDaBvmEYbGpBvGIXpFqfSly5QZgiaw1eNSFXWs,10176
evals/elsuite/ballots/eval.py,sha256=JwMQ8bsNGkMSlw6BuMDdyeseHMPljvD2ujPb8_0enaw,7598
evals/elsuite/ballots/prompts.py,sha256=Dq6LOvqUUZffh7GdI_VCcsbCKcA1-aiIhMOKdtJPBQs,15843
evals/elsuite/ballots/utils.py,sha256=HZQDL6A2mlgI6GTzACsTBzAZ4Ggj8uLhU1OVxma2y7w,3804
evals/elsuite/ballots/scripts/make_plots.py,sha256=-SzD-eEYL0PspmHOGhHCurXZ6NQBuB_c-iu74AwgSPQ,10658
evals/elsuite/basic/fuzzy_match.py,sha256=wD5K3lOT52Vxw2sBZbHaOWe7utOQyw_Q2DqPYztqGiE,1986
evals/elsuite/basic/fuzzy_match_test.py,sha256=5In4zr5sH0KksZYQGWNEy0t3FcZV5EEIq-pEawSyvXE,1641
evals/elsuite/basic/includes.py,sha256=PFOE-jWwMDXQXm03ZxdhvtnbKi-aYrpmCLi1S5B_MUo,1823
evals/elsuite/basic/includes_test.py,sha256=zfPQHcyA__9XzsAsp_Sz-mXJDvCOjpsDVKmqgV6msKw,1801
evals/elsuite/basic/json_match.py,sha256=bMlx-aNgqJtihZXx3lsDC9OJafIWDMp1vPEe56oHleg,3782
evals/elsuite/basic/json_match_test.py,sha256=pAbf8VAkkoFEh2KNaBECYcCvS_kAHsFbyRR-lrc_I8E,3740
evals/elsuite/basic/json_validator.py,sha256=mHD0eKAjpUX53ilB9qyqqX3rUidl-KdFL0b2WNNAwes,1397
evals/elsuite/basic/json_validator_test.py,sha256=vaMdDypstgDdlyd-x1o0rOX_NraCDcFZ40niOxzq4M0,1415
evals/elsuite/basic/match.py,sha256=Kn2nA-VB-u3f5TZaOSc_-8TGKZsRxS7-Z-wjbpxG3UY,2301
evals/elsuite/basic/match_test.py,sha256=7gaKzQUwiyHH3T59HeQjRxE5AJlfQCI9p9h1VMOZkLc,2266
evals/elsuite/basic/match_with_solvers.py,sha256=r1ad29ol3b0TAghspiK-EMmEZ4JiBvHfRZiWh9gz3mA,2599
evals/elsuite/bluff/eval.py,sha256=wRYhPafyin06i2gDWUOKyP84824ZdtR65bt6FOFHJg4,7835
evals/elsuite/bluff/prompts.py,sha256=n1_-rmet34AszOY0VdNJbGE8bcffqZKJ9IdVgmRGJ90,448
evals/elsuite/bluff/solver_player.py,sha256=5fe4BsQO51rQIsKLJ_EHebiiAjBikSVoJSAsHu38VXc,3788
evals/elsuite/bluff/strategy_solver.py,sha256=Arckv4-APnv8IPcEbx898KL3cpT2IvW-4ACzfSE4nP0,8455
evals/elsuite/bluff/bluff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/bluff/bluff/cards.py,sha256=p_cAbi5xn0S4ft3u9_D14B-Yj02fCvNO3-4oRhFJp4g,8248
evals/elsuite/bluff/bluff/game.py,sha256=vhCPGbx48k1RR_A7bk7hCdp1vWO86o1R3GnpmBaEpnY,2100
evals/elsuite/bluff/bluff/players.py,sha256=050EpiGswGcsij60Zh2_37RLZk-e0dhsXO0mZDaYPb0,6493
evals/elsuite/bluff/bluff/round.py,sha256=Z8mDXbB0l3eHal03xQjHlFluf6-TXOqJ6sdfUF-ROSc,1830
evals/elsuite/bluff/bluff/task_description.py,sha256=kLZkGb80dbtqnCfODDzLWa8WXNngn87JPpmnX72a8D0,2504
evals/elsuite/bluff/bluff/test_bluff_game.py,sha256=sZEk9DpBeNhiX97lYt6fRE1VJW9lY2E2JlmLym9pvOc,1121
evals/elsuite/bluff/scripts/make_plots.py,sha256=P4jkzu63Q7-7jFepOJfmxmXGx1AAHenSSB0ogaA8IyM,4903
evals/elsuite/bugged_tools/bugged_tools.py,sha256=8csdytDEgkHnEwrxMvXyjZH6lpM2uONJece34dCGqnI,6752
evals/elsuite/bugged_tools/eval.py,sha256=xobx45xrF0fwUL4NcHyg3of1qTrfgQqORaBWcs74d-4,11172
evals/elsuite/bugged_tools/task_description.py,sha256=T13YJJyQIfFCAC67cfzFJK_UJP1V1djoAcOkT_sW-08,4926
evals/elsuite/bugged_tools/tools.py,sha256=HNofgTdIao4HWgyHrKyusNlYCRxhWcGoanzpW6Q47ZE,22202
evals/elsuite/bugged_tools/utils.py,sha256=5A7xbdL4TJ7wtR84-r8fySmlIFa_y6ivLU6ZLDXcr3o,2268
evals/elsuite/bugged_tools/scripts/plot_experiments.py,sha256=iiQvhhMnfjyuIZMVf3e4AGLjhcSyuL60y3ezQCCKk5M,4023
evals/elsuite/cant_do_that_anymore/defaults.py,sha256=T1cLvbqbPLSPZDYG6XW64A4uhJgQPY-IRQaWbKjhekg,1057
evals/elsuite/cant_do_that_anymore/eval.py,sha256=jBhzueXXdz6rZTMArfNV6x_RKarGNFUFjY2BY-EMI3c,7809
evals/elsuite/cant_do_that_anymore/utils.py,sha256=HnQBNqirCw7_Pj19XdhGWK4MxwOE-sMkG73z2oxFvMo,8208
evals/elsuite/cant_do_that_anymore/chess/board.py,sha256=9mcw-ZdQUhzlNlBybgIyQmUuw8rkexzeXfjgcebjiQA,9486
evals/elsuite/cant_do_that_anymore/chess/board_test.py,sha256=28Pxy-H_XvJf4o6EsD32BFDRpe_Kj-jPt8fgk8faBXk,3164
evals/elsuite/cant_do_that_anymore/chess/move_variants.py,sha256=KNv2QDPW1dvpnAQROnB1evFv-QDaKtAnz5G1M5uTQo8,2620
evals/elsuite/cant_do_that_anymore/chess/notation.py,sha256=3A0D-DjhQOu-xWQJFjhLvQOfFn9ejkvRr-1qHW75dXY,3626
evals/elsuite/cant_do_that_anymore/chess/pieces.py,sha256=9EZUDj9CSGd5qd-CWYeSWlCTwo4Gyut2ttYHcSDCU5g,9964
evals/elsuite/cant_do_that_anymore/chess/utils.py,sha256=BKGIbOwcrNXFYiirVKh8JOKZJzZ8MIf49mMwbwNEH_Q,2969
evals/elsuite/cant_do_that_anymore/scripts/dataset_creation.py,sha256=ymbsV47EZmQsyFQqz5iWasw_uTFzViOoAwcHNaHO62M,10156
evals/elsuite/cant_do_that_anymore/scripts/diagonal_dataset_creation.py,sha256=0xtFbONREiVl0R94S6vcvkzoqZrwc65QaFSA42iNkZw,11220
evals/elsuite/cant_do_that_anymore/scripts/make_plots.py,sha256=JC-_HZLlVQfmN-CsLd_9HWfVv0O0hOCvHEh9o2lq6xY,4226
evals/elsuite/error_recovery/defaults.py,sha256=jO4Bnbr50GZ_P1Ps2p-PIGyVNWmb2v2IaWLMiZBxCvo,1019
evals/elsuite/error_recovery/eval.py,sha256=LtlQts5HDsoGpCxRYHeVNj23LGpBGCXZurWRy4QwFhg,11310
evals/elsuite/error_recovery/scripts/dataset_creation.py,sha256=qYH9r6e8M7ES8qhuqdBfPB1GEIsqDvaqOCAgeXNTc3o,5626
evals/elsuite/error_recovery/scripts/make_plots.py,sha256=Y_ptr_-qVS5-UeiP7GQx2pzZZ4UrBtWArElP3P2Vd6g,22063
evals/elsuite/function_deduction/baselines.py,sha256=rMOKOb2HAgF-TDyBOmFOJ-BjfZsjoYDxtmCcqxnMcP0,4953
evals/elsuite/function_deduction/eval.py,sha256=r5j0GhaIKBh1ypLZ0A1vI9wsFnPoxvAPcMu34VmS1Bg,12285
evals/elsuite/function_deduction/prompts.py,sha256=rHPwDbyV7_L-LubU40wGAt6MCRcE0jfbaJxHQxWOMzU,2669
evals/elsuite/function_deduction/solvers.py,sha256=xTKGUXagoSGNS64sYYXnZVQtfd-vsinRxpwhELkeQGI,6635
evals/elsuite/function_deduction/solvers_test.py,sha256=mJfyGLizz-1Lt9sPMZuhKl4HfRTW2ezyGQL8fjpXiOk,6613
evals/elsuite/function_deduction/scripts/make_plots.py,sha256=VjeCJ2OXpBMPEkYe7Cytn1Hfv2QT6R5Boncjx-r93jY,7595
evals/elsuite/function_deduction/scripts/dataset/create_dataset.py,sha256=DUoCOG6X20E9nzP6rwG8CR4kMAW1F1D7WOG-4hRGF-Q,2022
evals/elsuite/hr_ml_agent_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/hr_ml_agent_bench/actions.py,sha256=MVmN51Uv9Vl9UGU6MlqEnNjnos0p0OnPeCj12s0glIs,1737
evals/elsuite/hr_ml_agent_bench/auto_marking.py,sha256=fmXBPe8gYlvP6f8p624n1lOWSpudC2oQl56Y5FLUDA0,2962
evals/elsuite/hr_ml_agent_bench/autoeval.py,sha256=khjFHmxyBRxIFauES0PLRbhOKt7JmANBJgSDa9YQBi0,6361
evals/elsuite/hr_ml_agent_bench/environment.py,sha256=dyFoh60Fy4Do9u9yaDJ2pU7m1LV-gctSHCwviIkgkhg,14248
evals/elsuite/hr_ml_agent_bench/eval.py,sha256=W3w4PgFkH28_0E_oNEJWoX2g8j_NyFyi4H5NrGaj4U8,4346
evals/elsuite/hr_ml_agent_bench/high_level_actions.py,sha256=FW61LT-QUVCCAT9KOenXZlCBu5OWqKTe5vBVkwiTYII,11094
evals/elsuite/hr_ml_agent_bench/low_level_actions.py,sha256=pSK5LwCt3rNnE7q39Y8il0EticBYfsHKPs40ZCoNkkY,13517
evals/elsuite/hr_ml_agent_bench/prepare_task.py,sha256=lmcc52FwHiZx2szzAh1_W64J5lnK19MN_mA8BacnUwg,1834
evals/elsuite/hr_ml_agent_bench/prompts.py,sha256=1TKR5aejStC4KrPShDvi895V46dwmHHspH81PnT-vCE,1732
evals/elsuite/hr_ml_agent_bench/schema.py,sha256=PigQ1IobZpWatUGyJGpLOSdDxyZKuwQEx0u_Sbbqjvw,1299
evals/elsuite/hr_ml_agent_bench/utils.py,sha256=i-k6oRMA7P116B6V4gcVkjCoxMzyyrhuqMxCoXJ_oEg,5183
evals/elsuite/hr_ml_agent_bench/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/hr_ml_agent_bench/benchmarks/ant/baselines/human.py,sha256=G0llz9jFGzYOkfm3nln2c7r6FhOQ0TwehmIGV904JoE,1323
evals/elsuite/hr_ml_agent_bench/benchmarks/ant/baselines/naive.py,sha256=VaIyjCEMWJJS-2W1VJLq90Do4IoXkTm6Ndn582Xa1OM,1200
evals/elsuite/hr_ml_agent_bench/benchmarks/ant/env/train.py,sha256=jo7V9tGSk94cCOQLRQXObWAAXgfuhz5lrqft0W3YQeM,1142
evals/elsuite/hr_ml_agent_bench/benchmarks/ant/scripts/grade.py,sha256=ubLVjpz5StBRkv5u2A9ry9gUg6baz2DauHDA0pW6XcY,2890
evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/baselines/human.py,sha256=WVz8yMqaMOO99vGFgYjWU8HUutIXmiFv5eh9gQSIYfA,2389
evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/baselines/naive.py,sha256=_uz7ZoWl_8I1JzgehV0jl6qjT8xeAyZ3VusTAWt_OBE,1182
evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/env/train.py,sha256=aX4yGYea6AOv8zdCmOQuqzMCuqbNgMbNosUJUHQd6h0,1122
evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/scripts/grade.py,sha256=xKFKHE9igGZOcMQNZUeD3V77LTg55e5PyddJ8qyUtIA,2368
evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/baselines/human.py,sha256=Pa1A69h7hChdVd9BvULLpTVC2Rs27wg_j6-NF5N3ZVQ,1041
evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/baselines/naive.py,sha256=DNpE1or316KffXk6mabJmrVnOV8bJNVdDItNVRpdxEA,1173
evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/env/train.py,sha256=k6JY0I0OSj1-At1zJCo3WhN9cWr1Nc6r5aYSJ7E1ddA,1118
evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/scripts/grade.py,sha256=CG12IyFCv5vqGqmsC_freSDWQ5AU_YdqicQaNdg5uG8,2269
evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/env/train.py,sha256=oYfgaikAtD0YPIinhQVuHyBfrnI1tQ5yAdon8HR_VXs,3794
evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/scripts/grade.py,sha256=GixxhGmDhtODvcoccXj7KFkuK0URySQZwzPjggYuUPo,2013
evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/scripts/prepare.py,sha256=7qjYGIcQ9GlXAU1UcoS0bGcCKJxkEF0vA7Xiv8a5QrE,274
evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/env/train.py,sha256=Um5pU-YkNEJAbSw79wsp7499hRFZXcjSNHjgirZFtRQ,2767
evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/scripts/grade.py,sha256=gwcbsNF_zk3cujPPh4zh5yEPDENN0SCSjcE4vIFLtjw,2231
evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/scripts/prepare.py,sha256=yMfmswmIXUbkB-i2dVa2eBpq8ShubW6iQ2VIEm9wH7Q,1147
evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/env/train.py,sha256=DKGZBogQx0D5C-MHRCSnp1o1a717RBKlc6XZQe_C7yc,1792
evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/scripts/grade.py,sha256=Rjat-EL-3UycOsiNNsdsFb_2AdIzTbKQgl0R4Y4lIPo,2277
evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/scripts/prepare.py,sha256=uuHpf-SHn6XgnFqJAREDTTqI26tZ0rlH_sy7a5eLgHE,995
evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/baselines/human.py,sha256=JJ_B7KqU-9CdJdSyz53yFlocho6O_ZSUCw6IuZq8tpg,1287
evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/baselines/naive.py,sha256=zqlP4rcR2GtzFFsaqI60tKvrddMtZyqTPGBX9QEJp8M,1216
evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/env/train.py,sha256=ujv6n4oMsAHojTzqOX2hznQBdUmJj1eHJpQNfvnx_8g,1153
evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/scripts/grade.py,sha256=0BQw2KChVZCc1Hz3OscTCDLECGcvl41OKsuABelkvjw,2806
evals/elsuite/hr_ml_agent_bench/benchmarks/imdb/env/train.py,sha256=fFTETGC9xTXIQRzZTjAYhDzQ_o165-qIyzmPe-tMYDI,1233
evals/elsuite/hr_ml_agent_bench/benchmarks/imdb/scripts/grade.py,sha256=zJWCPCYaKjTQUUl6lupGu1z_L08LOH92t3ra-wvO1-A,1596
evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/baselines/human.py,sha256=dQZgLV50VqImEeadEjLKvEOTZy5ZW-H-I9QgGr9qEmQ,1155
evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/baselines/naive.py,sha256=EfWbn3DIz50TmXpkkPk29-7td02_OwYFpwjpNg3jnys,1241
evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/env/train.py,sha256=Qr7lY75WqBsBW5ETGDmQEq6H1hX4CwttvFlQEI28jQ0,1170
evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/scripts/grade.py,sha256=S2iuF9YYGyF_O5r8a8PkxTy_jPsgiNewA4nnYyXFKHA,1876
evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/env/train.py,sha256=9EMBj7GHGLcJD9fo4-eiDWWNopy2i_Qh0SXMWDVE1sg,4519
evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/scripts/grade.py,sha256=oTCMWi_MmxC57gxjEuhcU5soKImtOvIMSgYg3fpFdeM,2481
evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/scripts/prepare.py,sha256=4tTQQvhtz_w56mTU1ewq230aNsyhvzIXTiEnPkJ9yag,205
evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/env/train.py,sha256=BbHb0suzUPyG4YHi5DebkzJRZDP4nQqX3QnBYrQZYq8,6030
evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/grade.py,sha256=WsoGDqzZlsywKuRpKXI618WVssl9DP_H2ZmRqQPqoG8,1635
evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/prepare.py,sha256=-NCvo-I9SkyOrGa2yUdjbVTXwUUWx_SC-ab7ogVgRBs,4371
evals/elsuite/hr_ml_agent_bench/benchmarks/pong/baselines/human.py,sha256=vSYnntcwkw9nWaaI4LhXeAJlfmG6FC9aRsrQt-FoexM,1561
evals/elsuite/hr_ml_agent_bench/benchmarks/pong/baselines/naive.py,sha256=YDmJ4aJkuTrmtj5y8rbiltpq-a84ECi8SrdTcR1RtyQ,1251
evals/elsuite/hr_ml_agent_bench/benchmarks/pong/env/train.py,sha256=3N5E7iJg2roRoUT1HZGfFSYiiHD-EQpj1JxnDQcKs8E,1189
evals/elsuite/hr_ml_agent_bench/benchmarks/pong/scripts/grade.py,sha256=sXPxjqPw433cpfhM0N2kIsztGUBvKCn_5E_WSXp9eZU,2344
evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/baselines/human.py,sha256=IflM0-CIecypMKNyfd5gF0nIA39IpJhqotA0_gHh0w0,1086
evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/baselines/naive.py,sha256=03ZlMsE6mylB_i-Jkwcs1j7tNMaC5ru0hBh8TwHw7kM,1206
evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/env/train.py,sha256=AhUBqlpTrGNXSftzGD4qvu_uyQzbI34fALX0WeZXfgM,1145
evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/scripts/grade.py,sha256=JZ0NgWoL5Fioc954MGh3JlOmhUl__3aPsPGWyshc4Ws,2694
evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/env/train.py,sha256=-4A6OX2ErHTr3fJaBJOnDIBFI_EGeYv9oOiW9cAKDLA,2312
evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/grade.py,sha256=OXjnuKj1EOQSpskmVx2T9aasrQYng69ve9cxAuH5sSY,1788
evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/prepare.py,sha256=YJDH09xS0-F3SwK_p92Px_nGErdtSkCf8MkbODjGcWs,1004
evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/env/train.py,sha256=MBkVlsOlzu6ugtkbzVqbsZnndSSAv6HQx7oNbhzm5kg,9533
evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/scripts/grade.py,sha256=nd-97I9Td3rBtF-wNLSsnA6SgRv5egLhV1KDclUzsv0,3443
evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/scripts/human_baseline.py,sha256=lrrd_unfJlLXnCUy7H5ozxlKo9SkaIg0PUDXi2PkLPk,7161
evals/elsuite/hr_ml_agent_bench/scripts/plot_experiments.py,sha256=rJgXwYt5JB31MxrTm-_OJxG-g50kg9pJhLx0qkYhEbA,11331
evals/elsuite/hr_ml_agent_bench/scripts/run_experiments.py,sha256=sXHzlXsfRPKHLEDsOZeaauFN9arHZ5OuWVIxS3zt570,2540
evals/elsuite/hr_ml_agent_bench/solvers/baseline.py,sha256=GRhs1P2jRP57zzxrbnN3_P3xJ1ASwsLTTuyPaRFxn3k,4659
evals/elsuite/hr_ml_agent_bench/tests/test_actions.py,sha256=JSboMUWZKvb_6h0BOp0QVSBkTMdSnHz9Af0JbGHvA2Y,4268
evals/elsuite/identifying_variables/constants.py,sha256=7_q02bMwUOsaokKj-nfjYYkOcxvaWzBHbnKNM34I3hw,517
evals/elsuite/identifying_variables/eval.py,sha256=50firrg2xOIkLCsAMLwP33IlovoxPE1JKogLdqMcE5c,12179
evals/elsuite/identifying_variables/graph_utils.py,sha256=dOIf7yZ2b-Ajw9EUlciw_14PC9TNRL-sndW6bUNtxAY,8632
evals/elsuite/identifying_variables/latent_funcs.py,sha256=qUILzKZDvMwmjYT-aJm72R2Xyxqkr3Z5oozMlrOJL9s,1090
evals/elsuite/identifying_variables/metrics.py,sha256=4LAtawFTwHU_h2Ms1PeUudZhVv2IBL5DU9PAR1eXGYE,3714
evals/elsuite/identifying_variables/prompts.py,sha256=3gbiy0bDBhyAQma6Q07dQvs7xwuO8J2G6fvRwv4-jcE,3479
evals/elsuite/identifying_variables/solvers.py,sha256=9zpA_BkKijgErl6r6K2PlQKvKevtN1fCMM8EUNQ-4zs,1717
evals/elsuite/identifying_variables/structs.py,sha256=IbEEfGaaBqv93JEZc36VtJAKKDU3nfwzhS2e_i2KO5g,2008
evals/elsuite/identifying_variables/utils.py,sha256=OhOsx9UFHeY9sLCEnOoH-clEVx9bB0w4X8Z-Y0LiMKI,3184
evals/elsuite/identifying_variables/renderers/__init__.py,sha256=O5bI6fQfad2-_U-euKIEzi6T3f93Ng4t9b31xxcPKgU,343
evals/elsuite/identifying_variables/renderers/base.py,sha256=eWeO5mUFWf7JeicBaCSX-6aF0zasUX2rTwqYYtK8qCg,381
evals/elsuite/identifying_variables/renderers/corrset.py,sha256=7ltFj1nBHmTRRN_v2_Y5zP2jXONiL5_OnqIaqXtUiAU,14269
evals/elsuite/identifying_variables/renderers/tabular.py,sha256=-Un2ABYOWRVd-YSOCatutDwKqD8iwaPflWqhqhkRPKo,6719
evals/elsuite/identifying_variables/renderers/templates.py,sha256=mx1hoLhXMG5HEPTzRJx4nB1EPC3-iExtBqg44N6BEyM,1731
evals/elsuite/identifying_variables/scripts/gen_data.py,sha256=urm1vb64SOJP9dowUwdQWANSu9cdkvnulYDBsMCBt7c,16344
evals/elsuite/identifying_variables/scripts/make_plots.py,sha256=d5AiSIAj14-HVWGhjiC1lvhp-tDO7LeS4U2NbHcCkwA,13863
evals/elsuite/identifying_variables/scripts/plotting_utils.py,sha256=R5yHOpwXaMSFpoKM0FVpiPlMFfdG3eog2Qurx1qNKNw,5260
evals/elsuite/identifying_variables/scripts/table_utils.py,sha256=pAweMwM3rDwvX2n-9J1XibDjRCsxnwJ8u9IaJc7a6sg,2474
evals/elsuite/incontext_rl/anti-cot_solver.py,sha256=zo3XKgwIBg5ZcDXjKt6a0TNP99EiZrO87e6D_cp2hWc,1084
evals/elsuite/incontext_rl/baselines.py,sha256=D2BitGLlFxwoMRauYQikgVSSopCRnMMaPWOuR42NCT0,3872
evals/elsuite/incontext_rl/defaults.py,sha256=qCSl0goeEORCFpQKh_C4C6Nh_6Wm_pifraxCth1gnPY,1753
evals/elsuite/incontext_rl/env_setup.py,sha256=Gs2mpVGrzx90Ol4VB2mHW9z6aZSc537-h-Y8Z1geaUU,272
evals/elsuite/incontext_rl/eval.py,sha256=fWPPe7t-iDE2DbwfFQWAIRpBZqKwhr3SyPo-1X8AK7o,12887
evals/elsuite/incontext_rl/scripts/plot_experiments.py,sha256=spZgNTYuSZxboVIYgKMZchOAvCp-1hAHladj-pDC4mA,19660
evals/elsuite/make_me_pay/eval.py,sha256=u-8lRO0i-VxFPEEXTIj6DhPSvptWsURhNHahNG0ArEE,7086
evals/elsuite/make_me_pay/makemepay.py,sha256=ZQ-Yv4mv2Nl4BVt1W0UtsfV5_tTSPmV74bbWHtlJt2U,10435
evals/elsuite/make_me_pay/makemepay_test.py,sha256=1rA8F3CwyhYkTutQFJyYBjGoohOZ5RDUndyhAwlhwC8,3901
evals/elsuite/make_me_pay/task_description.py,sha256=2qOw8l46fSu4mMvWZOafhmTLDd-_XhVYb7nH2ULdL4s,4785
evals/elsuite/make_me_pay/utils.py,sha256=Fh0ej0WdJ_VPQ-eAHnxu5VdwVtEVs4MMb7oIH_nq908,1964
evals/elsuite/make_me_pay/scripts/make_plots.py,sha256=tzkc3dJAO82I5Ih5tbwCIU7gvawpo6ru1ZAaDWO_p7A,4490
evals/elsuite/make_me_pay/solvers/lm_con_artist_solver.py,sha256=hchvJ1IcQvuHHxnbAwy6ahZ7ZQJkrLxb1zQRSw4GO_U,3106
evals/elsuite/make_me_pay/solvers/prompts.py,sha256=cU7pkmgBglaKdcqikZ6lk-BNrVUq-hagSrpb4Mby6og,2064
evals/elsuite/make_me_say/autoeval.py,sha256=DYCnrPWUUNvNbzILOihMeT-y4GY6lHDihF9Z-ko2KHk,4372
evals/elsuite/make_me_say/core.py,sha256=6Po17UukUOmQHS33iEXiQEzp-rxo-cbBYOLvYN5QIQU,10809
evals/elsuite/make_me_say/defaults.py,sha256=z7h5tBAuEFxFesfLKWWXS_7B0wDSEW74FUKuj5Cvm4M,12917
evals/elsuite/make_me_say/eval.py,sha256=8QOX_QMOly35skTTZqG78OlRNP87BFoGKrN83g7Yc4s,2202
evals/elsuite/make_me_say/makemesay_test.py,sha256=yDIgS4O2dPAAOc_fELqGrcTgS2NMcqrSZbzw0M_NQ_s,27328
evals/elsuite/make_me_say/utils.py,sha256=TVi0WXOBu2A-9ZTzUfVkNU1YPZ655rQ9XjUQap-mcr4,1224
evals/elsuite/mmmu/eval.py,sha256=7nGw4X5JQJeuH8USiw7d6NJISlF8d2VW8E51LXNs31k,6316
evals/elsuite/modelgraded/base.py,sha256=NYhV7T2kdbFA56_uBjLw3XRbSe8nyLvcHn1UIRqYddQ,624
evals/elsuite/modelgraded/classify.py,sha256=9272s66FYeBNtmt3m6CBxOWTgO5w1mUb_hGmOgOLQSE,4680
evals/elsuite/modelgraded/classify_utils.py,sha256=hrNvLWm2LWMUF7MtLOKC94ON6w-ruzfkUfegkWaYrBU,7632
evals/elsuite/multistep_web_tasks/constants.py,sha256=m2tzqIF0K38EJ47_CMUDWbtO91T19kxwWfgsUZuI8jw,3039
evals/elsuite/multistep_web_tasks/eval.py,sha256=_UmCqN6q5_G5gGMkhBrLzvyg519N4XTp3Ch-UuXoBFI,2363
evals/elsuite/multistep_web_tasks/session.py,sha256=oyLaZcSdrfkwQRHXP9sYTcieSNXuGolmtQDYXz9AzTU,22614
evals/elsuite/multistep_web_tasks/utils.py,sha256=ezF7IubOlNIstL99xwAmEjjaZYwkIlPNrYiaF-0WND4,1675
evals/elsuite/multistep_web_tasks/docker/flask-playwright/app.py,sha256=uMEimSBAxfhT6lwClZpcDk5-Xaznwyn1GSR_e9KlVFs,7333
evals/elsuite/multistep_web_tasks/docker/homepage/app.py,sha256=PMd0_9g-rHmhrTRnSa3MY97FoaHljzl4gA07hEmQnLQ,517
evals/elsuite/multistep_web_tasks/reproducibility/make_plots.py,sha256=1N9X6Fp8xTo54-ICC89X10tPD5al3O4C_L4QmiHiE3g,4366
evals/elsuite/multistep_web_tasks/reproducibility/make_task_jsonl.py,sha256=BovchkQL0qZOBOQnlmvguLxERQCeDmMo8fTLgMsalyI,1806
evals/elsuite/multistep_web_tasks/reproducibility/run_environments.py,sha256=DL0faVkbdzFllxlSK5e43c_yL3E92goD3UR25jdyw2s,1305
evals/elsuite/multistep_web_tasks/solvers/strong_solver/strong_prompts.py,sha256=fRCX5N2vFcZBeL_jovHa-mDC47OaWCS4J03PPcgx4F8,4843
evals/elsuite/multistep_web_tasks/solvers/strong_solver/strong_solver.py,sha256=A-s199mWji2lG6xSei3lPSwvG0AsF3xlLr0aHXO3blc,9466
evals/elsuite/multistep_web_tasks/solvers/webarena_solvers/webarena_prompts.py,sha256=H_G98Ep3JOZ0MtPc9Be7NivRfyfnQvLP5gSZLSnxyjk,7831
evals/elsuite/multistep_web_tasks/solvers/webarena_solvers/webarena_solvers.py,sha256=MVfagQtBq4naDxPDRjiApzBHgRokP4eR7ES6vIW0Xhw,3983
evals/elsuite/multistep_web_tasks/webarena/eval_run.py,sha256=HBvXcUAllaVnsYghpTQcYay4V9oW56oVwgxsH_fL0lM,13486
evals/elsuite/multistep_web_tasks/webarena/task_description.py,sha256=NN0SMHSncwpdfxcDqAkL93pFXkjKzGj6ayhUTTAshg8,2310
evals/elsuite/multistep_web_tasks/webarena/bash_browser_env/bash_browser_env.py,sha256=3XbuKi8Odo8YEUK2WvCAnTOUVPY4kiUmnJw1sLJIaVE,4701
evals/elsuite/multistep_web_tasks/webarena/bash_browser_env/bash_browser_utils.py,sha256=S2cJ_z3QHvORr6Fc1SCex1vovhK7fvoWEOWr-gxg2pY,408
evals/elsuite/multistep_web_tasks/webarena/bash_env/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/multistep_web_tasks/webarena/bash_env/actions.py,sha256=PT5gY4hqBZZDjWyrQn4OvHSyNr6_lvi51W4zTotUmJ8,660
evals/elsuite/multistep_web_tasks/webarena/bash_env/bash_utils.py,sha256=vecccjNr36nbzReNaSICCi-OsIEu2MUf3m4cciL_1gA,417
evals/elsuite/multistep_web_tasks/webarena/bash_env/basic_bash_env.py,sha256=PEWno-TSOE5CywRMggFeYI8v6SCb3AfYTyI5VOeXAtA,8094
evals/elsuite/multistep_web_tasks/webarena/browser_env/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/multistep_web_tasks/webarena/browser_env/actions.py,sha256=gH23aX4Wi2zF3BruzMzQpr8IeiXJtaE7gYSR9CbS9oo,43999
evals/elsuite/multistep_web_tasks/webarena/browser_env/auto_login.py,sha256=fjCvkTQOoXJnezY-KTamMnUivnQq_FWLHh7B7fX8hQ0,4111
evals/elsuite/multistep_web_tasks/webarena/browser_env/basic_browser_env.py,sha256=SU0lTs_bF4H3d1lnnSRPOoc9zP6u-tJ3HC59TqshhS8,9465
evals/elsuite/multistep_web_tasks/webarena/browser_env/browser_utils.py,sha256=5LGGQrm7Qhm4kfOoK49qjhE6Ivw6ixh8S_RTMEPdWbU,2412
evals/elsuite/multistep_web_tasks/webarena/browser_env/constants.py,sha256=qICpLX8cAkLer8tmvfazpHOTLCmue--rdbpImUWGaUY,4711
evals/elsuite/multistep_web_tasks/webarena/browser_env/env_config.py,sha256=q_MolcEY8D672_aXn0cEpw3_hCORJXbuHM_1qw4Jh5M,1099
evals/elsuite/multistep_web_tasks/webarena/browser_env/helper_functions.py,sha256=C3x6RKhTgzSsJhlj3jPOILEhv-7WLFA_xhImmPPs0JA,6766
evals/elsuite/multistep_web_tasks/webarena/browser_env/processors.py,sha256=c9h77E5pbGXG_GsWiGRFD2PdLK13VutR0EXiFTguKQ0,23481
evals/elsuite/multistep_web_tasks/webarena/core/env.py,sha256=-vIlXGD_t5Rx8luDbKFQppKwK8JuaocZjV1Vr8uS4sM,3935
evals/elsuite/multistep_web_tasks/webarena/core/playwright_api.py,sha256=KCV-jjiO4kEBRDCOnk3FtktlOqCxtI4ld6ZNCb4_j9M,15463
evals/elsuite/multistep_web_tasks/webarena/core/utils.py,sha256=cg6nEEw9sWU5-Bi54z_Hw5PRfWiXwuIysO1UYWfWBcQ,7150
evals/elsuite/multistep_web_tasks/webarena/evaluation_harness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
evals/elsuite/multistep_web_tasks/webarena/evaluation_harness/evaluators.py,sha256=-Hpp5ckdpMy6elPWzZcdHpsKShvjk60kuhRruVDDCCk,12828
evals/elsuite/multistep_web_tasks/webarena/evaluation_harness/helper_functions.py,sha256=NJXOtC2Tp2zir7R-cMsCeNzswwSPAmdJZ-nE87V1KkQ,5119
evals/elsuite/sandbagging/defaults.py,sha256=58VOnOunN7cfBDAmW3jdjQ2so8GVl-kGA26L0AqJbfA,2940
evals/elsuite/sandbagging/mmlu_eval.py,sha256=4uqN7rafwk2ZBsp79zfr3DuwKkKkoLB1Ijf_rKrOF1o,2308
evals/elsuite/sandbagging/sandbagging_eval.py,sha256=OQdNP82iDY2q36OqtbRREr7oWM742CR3BbmTGVCTZ7w,3436
evals/elsuite/sandbagging/solvers.py,sha256=9Yi5Ka8ZF5OsnCJzjoni6GvH-1cRDzg8H32_hq6Qggw,7122
evals/elsuite/sandbagging/utils.py,sha256=wC_Xf0F6vp_Wp9JIMxn2AS1R7aO7jdw8iy49GrSWcV8,2145
evals/elsuite/sandbagging/scripts/consistency_plots.py,sha256=hwmikY21cA4A_-zyLpu_ORQpSZlK4K0_zfMiOeIZuwI,4350
evals/elsuite/sandbagging/scripts/sandbagging_all_plots.py,sha256=xLvSbwvkCHh6d6GIt_EblErB6VZo8oXKDDpa-m-8LiA,1040
evals/elsuite/sandbagging/scripts/sandbagging_subset_plots.py,sha256=yUuv9K3y4rK7z3CUZ68ulILDrhp9u_L9luY77W03h-w,1922
evals/elsuite/sandbagging/scripts/utils.py,sha256=IlqqwpRVYmzycmdtMWwwaPCl4LwEKNSTK8rlCuJrhZU,4156
evals/elsuite/schelling_point/eval.py,sha256=XkK_PqevA3qc_wPzVUFovhqfY_BjzNJJZI5U-gbQgJ4,3156
evals/elsuite/schelling_point/prompts.py,sha256=WYpcgwuDVpOjZQ6iBio2Vr-nHhXsgcFXKF4nbFb00So,19724
evals/elsuite/schelling_point/utils.py,sha256=KjP-w6cvu6zahpkumD1YASH0ZOi0QPgQESuvA9XBTeU,3353
evals/elsuite/self_prompting/eval.py,sha256=17RBRFL7ZH4WvSjshMGWCRlWHE-_iawvBEt294J6E_c,10701
evals/elsuite/self_prompting/task_description.py,sha256=MsSUkIXud5pwilNbF4Bgfnv01MrnJrsm2BRUh-3QPmY,1783
evals/elsuite/self_prompting/scripts/make_plots.py,sha256=4mMZd2fkEW4D58ZSEPVGja9KNwL1_ABBQ1lYsSBue1Y,5075
evals/elsuite/self_prompting/scripts/dataset/compile_data.py,sha256=d8g-fSMGpl7MQVfbROUOJ59Z2jeULW47wpTZwe36NuY,2868
evals/elsuite/self_prompting/scripts/dataset/eval_list.py,sha256=CzLoDphqqIchHC7HVXP9n5cyqpCSc_tFmdcq0TJgBn4,1589
evals/elsuite/self_prompting/solvers/baselines.py,sha256=_GBZ1t0vC7nNoV7bWncjDP35G1F3qJosGkik4Oo8uGQ,1617
evals/elsuite/self_prompting/solvers/custom_cot_solver.py,sha256=10eIGKRn9Hg9vFh56p4b7zmpMnrzyZEJhw8lgjKkRSk,2573
evals/elsuite/skill_acquisition/eval.py,sha256=1SY4RqyeX7ab0_uHqwCP0X1vIBWRWa2lwIeil9Px-0U,18395
evals/elsuite/skill_acquisition/solvers.py,sha256=_4jF8RID4321FV2ThKtthI3kyIXPn51Uj7S4PgJgaz4,994
evals/elsuite/skill_acquisition/task_description.py,sha256=_DwDvwIszzuT8kbCciqG9VmNASL7iM6_BncVnuAhu9k,396
evals/elsuite/skill_acquisition/test_skill_acquisition.py,sha256=Li5rv0qCRmn_h5OU82y0tkng0Cf_k7lvwrPfUEUqDAI,4303
evals/elsuite/skill_acquisition/utils.py,sha256=99slU_n7JXENFWMDIDuoeZmcDoTEJLhb0XbPIeieJPE,6902
evals/elsuite/skill_acquisition/scraping/scrape_distractor_articles.py,sha256=_C2dq2bibrzkrufKqMMFijlCECqFlOv-Zrw8IzwIBbk,3234
evals/elsuite/skill_acquisition/scraping/scrape_miskito.py,sha256=cfaiOoQa-c6xFdKcNTNXLjxEMF7rlwtsgpcABodFfOo,5093
evals/elsuite/skill_acquisition/scripts/make_plots.py,sha256=lbcfijFbJfM1DBprwxHGvURoAHm29FNshRr-G1kv-wI,7565
evals/elsuite/steganography/eval.py,sha256=PE3JWKNDDeM6zbqXBXlBxQxYVQc4rv6NuZLGObORm0E,3415
evals/elsuite/steganography/monitor.py,sha256=IJvvqH3UjQQdZFDCd0Ys0vMuc3Yl8byMqpbIYMDNiAE,4402
evals/elsuite/steganography/prompts.py,sha256=B1PMAuK8DgOGnYsvjoF65devhPILVbIpt8NO-Nd2h5M,13791
evals/elsuite/steganography/reconstruction_metrics.py,sha256=F3kSmo3trSQgNoJCRfCHTvnFT3I87E3H9k2C3Re6MM8,1493
evals/elsuite/steganography/steganography.py,sha256=r2KU5tFf-iye8gXnrpFNfVmR1GdpWeJl7EG9RJiqumU,3638
evals/elsuite/steganography/scripts/make_plots.py,sha256=rEOmFQi2Wzx61pSQUcF1Zt5-iB637vdTYCTkfFkEv4I,3230
evals/elsuite/steganography/scripts/dataset/complexity_metrics.py,sha256=p-KfX3xcATgI_xxZj9sI9hvyLurbSdIBawctlxfH2pM,1108
evals/elsuite/steganography/scripts/dataset/csv2jsonl.py,sha256=fYFnvjMc6h3fJXH3BbgPwyYA8hoATbKy1EX2OXKgtwk,784
evals/elsuite/steganography/scripts/dataset/custom_datasets.py,sha256=am2IyPFzl4HT29U2lc5Wm2yDuoSDFK4rk_13ZoLYf3M,8809
evals/elsuite/steganography/scripts/dataset/dataset.py,sha256=GGZOOHMBxZVpLsYi92wGzIlQeC5KlqL7IftePUBXq8Q,3806
evals/elsuite/steganography/scripts/dataset/utils.py,sha256=AwZ2usO6oWQngnIP7OdidtRe6yj9LBTC6NDATBcHeRc,1943
evals/elsuite/test/match.py,sha256=5ewMqjSzRjc1AwL6pzbE6uw_z7PyJvIRx1yBMynUR7Y,1083
evals/elsuite/text_compression/compression.py,sha256=dH4mPYR_ovJfkzcIeSo5L2477cRByXImDypoWT-4nSo,3686
evals/elsuite/text_compression/eval.py,sha256=8KDXBOdmE8om1WOBNV9iY4g2W1ZGsT1oYn6ylbjcMoU,2375
evals/elsuite/text_compression/prompts.py,sha256=E5Hw14x6acmzzbJ8C99Kifjyrqy36HyB3R3GKZykkTg,6517
evals/elsuite/text_compression/reconstruction_metrics.py,sha256=F3kSmo3trSQgNoJCRfCHTvnFT3I87E3H9k2C3Re6MM8,1493
evals/elsuite/text_compression/scripts/make_plots.py,sha256=ue5AicmUSbNAN8hUI32FBB2JQ5WvZUBc7YiJMX71cUc,2691
evals/elsuite/text_compression/scripts/dataset/complexity_metrics.py,sha256=p-KfX3xcATgI_xxZj9sI9hvyLurbSdIBawctlxfH2pM,1108
evals/elsuite/text_compression/scripts/dataset/csv2jsonl.py,sha256=fYFnvjMc6h3fJXH3BbgPwyYA8hoATbKy1EX2OXKgtwk,784
evals/elsuite/text_compression/scripts/dataset/custom_datasets.py,sha256=KJR2w3GjP_sqQndPfJXzCCAT63XfxcLxHsZR0JcIO84,5469
evals/elsuite/text_compression/scripts/dataset/dataset.py,sha256=1xBLh6EemmPP6srQeYrBjDx8XD8uMOIf5ZoYRUS8y90,2202
evals/elsuite/text_compression/scripts/dataset/utils.py,sha256=AwZ2usO6oWQngnIP7OdidtRe6yj9LBTC6NDATBcHeRc,1943
evals/elsuite/theory_of_mind/scripts/data_generation.py,sha256=jdwCrjlxpN1_gQaNxHjhi_cl_arxMMIuTVXQRCwJh7o,3048
evals/elsuite/theory_of_mind/scripts/make_plots.py,sha256=Jik3kNrOg-gTs5iN-ZO9nuX1XS6ayi-3pWzk7iahecI,4406
evals/elsuite/track_the_stat/eval.py,sha256=eFUpUxFk4NPebc8gBzbmz9rJ_M_-C9YjuKtFlxs1FSw,3956
evals/elsuite/track_the_stat/solvers.py,sha256=zApGf3doY_cZgsLks3-IpTScF_QudHCMxvO39JwlFwI,3754
evals/elsuite/track_the_stat/utils.py,sha256=BzJ1W8KkTG2VQeeuTbQ9piAxcgwHAh_RM0iglk9NLsQ,1969
evals/elsuite/track_the_stat/prompts/__init__.py,sha256=qf1_3WKtd6Kmqh-KWJmc7E7gVCYs8WvB_cD_9vfGkbo,705
evals/elsuite/track_the_stat/prompts/median.py,sha256=mxiC9Hpe_cXpVgaqo1oW827xaCmgk3Gs6Y2NUvYGadI,881
evals/elsuite/track_the_stat/prompts/mode.py,sha256=4qMgdFNmUiu-uabB7ruYFOvhrT89U4JG48VhhWSZwxE,605
evals/elsuite/track_the_stat/scripts/make_plots.py,sha256=A-EqsuzlVcEp7v0MZriwq4SgOGA4q4iqC2Mk5hWBSkg,8923
evals/elsuite/twenty_questions/eval.py,sha256=zn7QmRJbWFpzFxFgcqAnZ8O4iORKl-iK__UKaeJf-Kk,9099
evals/elsuite/twenty_questions/test_utils.py,sha256=t-OBN7WFu-so5OdmHROkhahHvy1En_gAIaiUpLK0_0A,1120
evals/elsuite/twenty_questions/utils.py,sha256=5KzSiVvnb-W7Y0kqossdEJ2icMiDmj1K58FW2RbrA7U,3973
evals/elsuite/twenty_questions/scripts/make_plots.py,sha256=2tOyyFO9wnm3-dE7RMjEc-TVL3cX4P_XAwYxF3_HMnk,5407
evals/prompt/base.py,sha256=sSbPGzVn52zuyvjG1MmRHYndjKX_ovP0BNFCyRbR5Cc,4326
evals/registry/completion_fns/cot.yaml,sha256=2NeEoxcaPG8s1aaRD-V30M9Edtko5dRefxaYMa5Ydqc,618
evals/registry/completion_fns/langchain_chains.yaml,sha256=Cm6VrHDIvQVqNg3Eh7ebOyMXC5N2nMIBSO-kmbmhJQk,103
evals/registry/completion_fns/langchain_llms.yaml,sha256=ArdoS24zyBMWeNMVkGlcy605qZLnBMJSqwcTf4MpIZk,712
evals/registry/eval_sets/chinese-numbers.yaml,sha256=gDiyYCN7X_rZmtH-TXXBmal5aqCCFs-V2tazIlCWyLg,201
evals/registry/eval_sets/coqa-ex.yaml,sha256=kOyFu83Fh2VBrrD83AMT4AMgTTve5FG1wlNYS38csqg,141
evals/registry/eval_sets/css-selectors.yaml,sha256=IwktpVcqPotOwZKs-uPnlwSZM-m7V4rQ-Bh_lkZC31o,78
evals/registry/eval_sets/exams-all.yaml,sha256=9q4JiisPXrTn0jcPb-XVvgsfY3wD0WfcLro1MYE0n_s,398
evals/registry/eval_sets/hr-ml-agent-bench.yaml,sha256=LqvXzdq_B6oS5cTEMID36U0l_U5KXo0RFjy1f1_glNM,1128
evals/registry/eval_sets/logiqa-logical-reasoning-plus.yaml,sha256=iXY6Sa8yf1UD4kESj4eziq5Q1NqTtQ9yYcBlnqDfSKk,150
evals/registry/eval_sets/manga-translation.yaml,sha256=HrC10EPvVZv_2kSUVcZIpf3BuA8S_DiOnm_GwJTr4R8,119
evals/registry/eval_sets/mazes.yaml,sha256=Vd6WrN4DCfN8KNFIZq_o9v9Eqd0zs9ggPBkJYKA2W8o,151
evals/registry/eval_sets/mmmu.yaml,sha256=2CzY9aRCGHxt0COeTGzZ1DlJqZT3--MDUXC1RBvAl-s,41
evals/registry/eval_sets/pointer-value-retrieval.yaml,sha256=HU81uB0pFbdG7pNiaAyztN6C3MepVJ27GX-rz-AHLEU,329
evals/registry/eval_sets/raven-matrices.yaml,sha256=UqT0EgTFkkgNFranko2ZKt2C3VnvO0h7qiQYXoPMj-Q,1652
evals/registry/eval_sets/schelling_point.yaml,sha256=klYVaSAatChFz5x4tAQeXksKoIg7rKo4zitISh4oNtA,135
evals/registry/eval_sets/stock-options.yaml,sha256=yBJqN0dGruSdHHLJZnrcOhfDlz5O4O2wKZVz_dn560g,560
evals/registry/eval_sets/test-all.yaml,sha256=jb65tfvToqqHVu9fECZ_XT334OKEnwYVOft5lKKVo0A,454
evals/registry/eval_sets/test-basic.yaml,sha256=HMxZrJDF8Fdmi88QdZQCj-ZD_tbWSDUKDzAaCTCXC8U,113
evals/registry/eval_sets/test-modelgraded.yaml,sha256=SVV6kEQ7LWZmECjugz5NM_VDmVwFEZKO0uFviEOHTUk,312
evals/registry/eval_sets/ukraine-gec.yaml,sha256=vErhW0PQdRrNyWtk8_2vwrWlxnGhTGbZ7NH0E9ob2zQ,613
evals/registry/eval_sets/word-associations.yaml,sha256=-kzg3KpCJ76ow4nIpeNyZUfYZs5HvLSIWzOiWtGApIg,189
evals/registry/evals/2d_movement.yaml,sha256=5xZc5b6HGLIv4DS6K_wslJkYKRK7Sd4jK8HJMvy0IrU,284
evals/registry/evals/3d_globe_movement.yaml,sha256=k4XnJBvQvh2KkXbnyeJJOBd7cBF7NFX-inDhC7iHllw,365
evals/registry/evals/3d_object_manipulation.yaml,sha256=kTJPnY3pnxjTQP5svuMGzjojB5ynrWAWTIXc83_UsJw,386
evals/registry/evals/Chinese_character_riddles.yaml,sha256=5hPKiSJnWjRUGisXVBbB6xeWzfZFuSjQyMOf32I-rNM,358
evals/registry/evals/GPT-model-text-detection.yaml,sha256=tKg4jmc1hLZBr6CqinRfeo3uHn1bWchcYV01E6Yygd4,336
evals/registry/evals/Unfamiliar-Chinese-Character.yaml,sha256=HTG-D0gBq3CrnrxDjmuUzm9eDWjdET3HprifANgO1KY,319
evals/registry/evals/ab.yaml,sha256=8MpX3rSI22y_o10928YSjF1q7FUu38KIzCsWKaaEbG4,963
evals/registry/evals/aba-mrpc-true-false.yaml,sha256=_ZCUqH1W9JQ0NlgVXwokEJgj8ZpfhdmltAxSdChk4Zw,215
evals/registry/evals/abstract-causal-reasoning.yaml,sha256=bkertrXb8Qf8fwmC_riVhUw6s7qNURYcyxqYOJZKECI,863
evals/registry/evals/abstract2title.yaml,sha256=3aBU6pyfnsASZ1EPSgzANlyC1PtzwH7en5kZyx9F6Do,469
evals/registry/evals/accounting_audit.yaml,sha256=TTRJpPC45TGI0e_u4kELJeQwR4ChjY4bEPCgN4HHmO8,195
evals/registry/evals/actors-sequence.yaml,sha256=4o29cUKQ1ntK1ynVab205f6dJ4nbtCPWgxMJdotGSkE,297
evals/registry/evals/adultery_state_laws.yaml,sha256=HL5YJB7PuXh0ImtrSq5PhMFyuaKWFhD3qhKKo2DkMdc,351
evals/registry/evals/afrikaans-lexicon.yaml,sha256=WjgDUyJumDZNL86rvvF4I0YqZ-ntH_NaJ5_yPPjtp-4,288
evals/registry/evals/aime_evaluation.yaml,sha256=Yd9aFJHfedeMdJPms7AhNIfrNyVUfyPNp2pGhmHNGjY,286
evals/registry/evals/algebra-word-problems.yaml,sha256=VdIXJEgkgR32E4miONd3dkwBXuNNIHHzCe8K58wTKLA,457
evals/registry/evals/allergen-information.yaml,sha256=Cnb9Jpdj-qQ4Zg_FICsqUyvvdtN29A-UmAGqGUuavbg,418
evals/registry/evals/already_said_that.yaml,sha256=7yq98q1iQjCec5qD8Efd0V-d8lFKQ3YJw-0_3p75_Tw,1547
evals/registry/evals/alternate-numeral-systems.yaml,sha256=E6X3ham4vLeJ5i05nAGy6RL5U4Us1Kijj6QRfviTNXY,345
evals/registry/evals/ambiguous-sentences.yaml,sha256=ozJ_VpMlF6IDq4I-RIzeqcGO7lKAyUvcejSW6WJPGo4,451
evals/registry/evals/anagrams.yaml,sha256=lCnZNEHDZ-EJDEFPzFIbyD6UoWFjj5CTnkGWyDJ2kCU,229
evals/registry/evals/arabic-literature-qa.yaml,sha256=zywti66MwB35R6NIuesdGNr3Wypfd5YlZMdsLvt6PWA,218
evals/registry/evals/arc.yaml,sha256=lS4iTw1LcYmCZPxYaRMUFoxgCB04c2nYhyNYUT-hneQ,143
evals/registry/evals/arithmetic-expression.yaml,sha256=lXYJSPdSqqrtiRtTOjKPIszQilBKoby2aJgfXAJL73M,864
evals/registry/evals/arithmetical_puzzles.yaml,sha256=l23_PDIloFOL4TS9Es5X4URw8p29kgI7n6g8NkD3WKU,330
evals/registry/evals/ascii-digit-recognition.yaml,sha256=zQgi67H21q5-nyhatvPxPyijVSDLBIjyfVxmHCqm7oE,284
evals/registry/evals/ascii-wordart.yaml,sha256=AZLx11-sq0Nub2RIyJCawUuq4hCZwSFMQluLjGS38v8,280
evals/registry/evals/asl-classifiers.yaml,sha256=u3TCo54qfVsRfRoKooIzpjmFEHI5cab5YnG0eGcRh9g,281
evals/registry/evals/astro_eval.yaml,sha256=3Fm4nylWPZVFjkRVzgSogahfUte9CVMGtOg7Glky63I,316
evals/registry/evals/atpl_exams.yaml,sha256=PjetzkE4PdrHuvunXnJLe0Aw7kft98WENILx7-4jJ-s,171
evals/registry/evals/automata-and-complexity.yaml,sha256=Q6UtKYsOomv6lxUwR4Fo_nVBGX4p1bLzD18Eo5-8ubc,350
evals/registry/evals/backgammon.yaml,sha256=RtCMyTIiEG3wWiDI0Ku4MUak5k1w08U-KcyYu0s1D0Y,646
evals/registry/evals/balance-chemical-equation.yaml,sha256=JUYOEqKVRE7s0TrT6v1HTC88T64FCM-nSKh1iPKrxWM,230
evals/registry/evals/ballots.yaml,sha256=fNWj7nFBKKsVFpC1jSdmVMvXsQZj8zk7i3ddYU4fkcU,1141
evals/registry/evals/banking77.yaml,sha256=9UTQ8G1nrm3rb0t_sHb7NiTnlsU2wKr4TgDUcbbwkSk,196
evals/registry/evals/base64-decode.yaml,sha256=edEyYzLygwDa1FoG5taxZEvoEaSgKG2dQnYjee-Qdbo,328
evals/registry/evals/beam-analysis.yaml,sha256=qI7h5fSH_KqQvm9uQaSV_JH3m-3r81C1VqnhaqwAsOk,272
evals/registry/evals/belarusian-antonyms.yaml,sha256=0hSJQs7vaebZHClsK0YFxMAIAdrkVUYsocQou9Y78Vg,305
evals/registry/evals/belarusian-grammar.yaml,sha256=Z-avBl1ogsDezJ7gnvoCKU5nyg7JDKSTw7rUxAPr_qI,332
evals/registry/evals/belarusian-lexicon.yaml,sha256=4uSi5Ww0WEh2UbOEMSH_1Ee1XlT-GImixeznovUilYE,310
evals/registry/evals/belarusian-numerals.yaml,sha256=6ajmHxK84PgFsJLDhg2Rim7xwD5jkcOxtINnH2qTsBw,290
evals/registry/evals/belarusian-orthography.yaml,sha256=YaAJwZxTuhP6IbrxoEx-PslMZiYmaJmoIOjBgU8DrI0,327
evals/registry/evals/belarusian-proverbs.yaml,sha256=A-z5rBngQ3qv54JD94KwBL1nR8Tunl9aYZJYz8-bFZg,292
evals/registry/evals/belarusian-rhyme.yaml,sha256=pjw2TTPXRgR0xDaV0evQiXpreY-sn2ot98JisOf2B_0,283
evals/registry/evals/belarusian-russian-translation.yaml,sha256=48fRNOugbGM_JTZlFr_F80G8EQ_ZHR_pzpameaC2ysc,361
evals/registry/evals/belarusian-syllable-count.yaml,sha256=MixFjPxo8l0hzDBQgbIKbd33wS-5lqYaL2X96FmxNXs,311
evals/registry/evals/belarusian-synonyms.yaml,sha256=mg3gz8-ozo8886_BxWxNctJjiHCk0Wukorzsy63VbYM,305
evals/registry/evals/belarusian-word-analogy-inflection.yaml,sha256=Je9s7lwktzmRT0k2MV-6_6e0t-LxasXbLXiWREwyxaw,397
evals/registry/evals/benjaminmoore_to_hex.yaml,sha256=k2hknzSH1vkZ6J7ZHeFYK08ikXDyBwZW5_L8ypvc0Ig,218
evals/registry/evals/bias_detection.yaml,sha256=SpsE8M17AA6ih5TqzTRN0-wptb3OCe7M4rvAAE1VlL0,394
evals/registry/evals/bigrams.yaml,sha256=HUYlZmKS9TjC93GyTIpUyhTSiVf_tXXutkzr0EMmkZk,159
evals/registry/evals/bitwise.yaml,sha256=W9UpPIttZ0dV9R7TbPjwnytpTU2larrQQLnZD3EwT94,245
evals/registry/evals/blackfoot-numerals-modern.yaml,sha256=0QNFT4GJamP5Kvreavv42w8mnogmAZ9_zN504ZlpwYs,366
evals/registry/evals/bluff.yaml,sha256=EJa1oAHqFNcpxcTpM-_kC49dhmSV1VK6YmMy7jG3bpM,1209
evals/registry/evals/body-movement.yaml,sha256=nlEjre06H21u93S_oHFAm1uYRTvlojYL1puWv25N3Iw,282
evals/registry/evals/born-first.yaml,sha256=zL7RlHv7jflXma9RrHDQwvZPRDjXJAZc5RiHaH8RpxQ,246
evals/registry/evals/brazilian-lexicon.yaml,sha256=hiSgYM7MlWOXX7BKzX7H-rhXb9wiD0j0y62RcJKSShY,288
evals/registry/evals/brazilian_laws.yaml,sha256=ZwiorA3kDrgVEIm1RAT3QF2d9cPB9vQAD67MJqcAotA,269
evals/registry/evals/bugged_tools.yaml,sha256=DFibrQMspdGQX_1GzGMbUobM4Ge_4VjqxXsFuwjX1Jk,849
evals/registry/evals/building_floorplan.yaml,sha256=Jzt2ys0DPfVy2XyiCmkP3Ka7bF72Jf8MJqZRE1u-zsM,205
evals/registry/evals/bulgarian-lexicon.yaml,sha256=UMpkdIdBPK0mBwfpRpk9D4dMvH1c26dCQ9TzduBy-FM,305
evals/registry/evals/cant_do_that_anymore.yaml,sha256=4kxxL1on-hWBD-HcVpMPbk31DOAs4FS1kScB8J2daUc,851
evals/registry/evals/canto_wu_pronunciation.yaml,sha256=tcnhfq1hvs1abMpIJd5s9gs-RhHFLm2QGdODzUWCx7Q,333
evals/registry/evals/canto_wu_pronunciation_fewshot.yaml,sha256=zGd6J2Qi5zf6uvdDTDvhi6ldZJw53jj7Hi6hi7ZTvTU,355
evals/registry/evals/cardinal-directions.yaml,sha256=d1V5mwh88tYyeVtiAvuqepBb8UqLTvWGPmT2eYjyOvY,207
evals/registry/evals/categorize_with_distractors.yaml,sha256=QrtPUEoKJBkR5v5FysenEo5uTCdKwd0g82SzjcsCj8k,573
evals/registry/evals/chess-piece-count.yaml,sha256=6Mahpe1u23NIw9W3SH4ulhCWslD9WXcXc_Dn7nAkdt8,309
evals/registry/evals/chess.yaml,sha256=cZlCJBQPAZAFdbRlAhyVjV0w8U9uXr8cualpsi0RS_k,230
evals/registry/evals/chinese-lantern-riddles.yaml,sha256=xuPtMDW2Vya57TlCtnyAPJTogHw-DaLGde8aqnkQBg4,302
evals/registry/evals/chinese-remainder-theorem.yaml,sha256=ddeZPhfbhj_yKNz0debthE8B2nqQRJN7Vgp-Yvia4aQ,346
evals/registry/evals/chinese_ancient_masterpieces_dynasty.yaml,sha256=O2NzRX39qDxV7QkgaAOo1BpiM9q1OLdeF8mX5twP9lE,348
evals/registry/evals/chinese_ancient_poetry.yaml,sha256=1HFecHae2BNVwwibh9sqJSrEuoLTpzdQ6Ew_lWOE5dE,324
evals/registry/evals/chinese_chu_ci.yaml,sha256=0s1Hkz6BTlS2awh1d0FX4_efGUhenb2qjhckiray44c,256
evals/registry/evals/chinese_famous_novel.yaml,sha256=5iEk0PqQ0tZ12TjkmDPkc77rq1tD7ot1jauEAZwc0G0,280
evals/registry/evals/chinese_hard_translations.yaml,sha256=A68cXlFNiKOoKCX9_HQ7iAj8fu5Vwem6xXBw9BZYT8Q,338
evals/registry/evals/chinese_homonym.yaml,sha256=TC5x7kLrqO2Xlb44z6qw9a0snRHj4SO_YvXbOZ26DLw,354
evals/registry/evals/chinese_homophonic.yaml,sha256=qcBXsqwEevewNe41pr3J0gcc1uYW2Wmzhn0NwHVJwxQ,334
evals/registry/evals/chinese_idioms.yaml,sha256=p_ycOf8dYHmmfvUw4TxiZJHMtRshLFH10uTlGm0YUi8,401
evals/registry/evals/chinese_modern_poem_identification.yaml,sha256=D8f5nuvabD2uztzRz7M1N7PeTEmSTwoGUiswtOo3qN8,267
evals/registry/evals/chinese_poem.yaml,sha256=CeWt-1_5YYQr0bW45UVI66j896qD6p-JDr-5zqoPv3Q,178
evals/registry/evals/chinese_shi_jing.yaml,sha256=aRsYvMQny-7fDe-PnpJlOgAI7E7jQYaeI_6frrXIHVo,195
evals/registry/evals/chinese_song_ci.yaml,sha256=zieF6aBnldTmJF-wAh0yHlpMIECgxUBme11hJKeLFSA,190
evals/registry/evals/chinese_tang_poetries.yaml,sha256=IHGxrGTIWOcfkYP4siaNCahmd67fSi5-YeZMJOgaHmU,330
evals/registry/evals/chinese_zodiac.yaml,sha256=cWXV7nELckY_FhiaRwmcUv8mmI8755rD7s_09uBM4L8,187
evals/registry/evals/cissp-study-questions.yaml,sha256=_uV8i6ljRhlMI3ZOWv3PecVVnSlm6njMQDxtaLtVuv0,295
evals/registry/evals/co-sql.yaml,sha256=aF7HYskA2mh3U5nFKeWxzzn2_lUg0wHLAotNgghbYB4,643
evals/registry/evals/code_combination.yaml,sha256=BqjMIY749fyiVBJbzGPYBrpWTEuXw61xbBdY7oOHvhU,372
evals/registry/evals/code_progress.yaml,sha256=U4fTtTgzaXv9vNVFitffuAGEGycPgOvK3eHKGABvGCc,382
evals/registry/evals/color_theory_complementary.yaml,sha256=bDqrkULMUKetd54J4YtdH-O7G4ul-AaZ_HX9x6hC8LI,343
evals/registry/evals/compare-countries-area.yaml,sha256=8yAFASQknGebNjJpsZcvcF7IzjsB3OXdjM9NTIZb4hU,308
evals/registry/evals/complex-analogies-en-ru.yaml,sha256=uojDdV63HfRkq47nizBHGXDbZiClXxtRNf2IN-ZzWdc,223
evals/registry/evals/complex-replace-characters.yaml,sha256=3IK6V6e_Mk0CVc7MeU8jca2MyrHbwOKT1QNJ3FK0FCs,250
evals/registry/evals/comprehensive-graph-reasoning.yaml,sha256=rJzwC7zZr4bPwMh1raHA75uJjXzIERLFrxLedlFkQvI,423
evals/registry/evals/confusing_korean.yaml,sha256=JuFk_LgJjx-447HPei1OY23UroPaXHqZu2gydpDrKFE,275
evals/registry/evals/connect-4.yaml,sha256=ah0jeU9zuhTh0jowaUCAVTXmbqkiqbKUZc2byS51jQI,160
evals/registry/evals/consensus_summary.yaml,sha256=_9-4vGTQO3eIfof2lscP7aKkIUueuF1-X0CWAeBJ7xA,379
evals/registry/evals/context-free-grammar.yaml,sha256=oz5sCC4TUy29Ce522peWW3tAFhzMr09F0CSv8TFrTi4,288
evals/registry/evals/convert-hex-hsl-lightness.yaml,sha256=QTDxs9RgnV38-3Fa-COYQmcNh8ALnJR-zRYASE3DH3U,335
evals/registry/evals/convert_bwt_num_and_chinese_num.yaml,sha256=-MevBZFOMvX5JHu-b48HCt_bNgLz4G1yM3i-OKysHrs,1594
evals/registry/evals/coq-editing.yaml,sha256=EF_4Tx_htEqi90OT3-4woniR7Akd4bkQYov_tMyLDGc,787
evals/registry/evals/coq-proof-step.yaml,sha256=JAbyhQIwrX-46osN1DO4Saw_iTAjINKB8CJluKfZgLU,453
evals/registry/evals/coqa-ex.yaml,sha256=eo-VpJTBrR5ivsIn8sWTy4601mzUgNtkCZoL2OrKb6o,1719
evals/registry/evals/corr2cause.yaml,sha256=BORU7UdxO-L4YblfviVBC7yOwW_cyValiMIkIf8tcro,328
evals/registry/evals/count_intersections_polynomial.yaml,sha256=Z6o8K4XuH2AF-QXBHWROw-OyHtBQ2SwdgzE5wbPT9Uw,435
evals/registry/evals/count_token_freq_dna.yaml,sha256=vyXgaN0y_BEI8bUv-6fi1Y4_WQE-nPmwpGstMxWigw4,349
evals/registry/evals/counterfactual-reasoning.yaml,sha256=AlfD2ZxBx6_wYmCOpPfQtEFJssFss22y2CCDaMHqkhc,387
evals/registry/evals/countries.yaml,sha256=V24O30XqnoDtfs9Mc6xBW79CvNUKrW1_N9zmPxhqjgo,166
evals/registry/evals/crepe.yaml,sha256=EYmAW3HRbdgWbuUC5CvIHXdzEefOGX3bm6gqbwCEcao,151
evals/registry/evals/cricket_situations.yaml,sha256=Adg2KXQmWOjdQOTr02FfhkpjeGLdv69ewH9O5IIJRtg,301
evals/registry/evals/crontab.yaml,sha256=UY8lKH0wamS7AQ6rBT1i250E1uDDZugkNIcFrwX1qlQ,158
evals/registry/evals/csharp-linq.yaml,sha256=ORAeFzQvW7fH4za_q8l8anNvg4s5ne3IUcda3AuCWbQ,270
evals/registry/evals/css-selectors.yaml,sha256=zbZjaYyiHUg70imPZFGsUBvMM-mmy_gozSsKnaiPEUE,579
evals/registry/evals/cube-pack.yaml,sha256=s0q3fah1Me30IB5gnrhxacBMlgyQqhHJJXFYavTUd4o,167
evals/registry/evals/cybersecurity-filepaths.yaml,sha256=36BU1XkMUmN2glhycDamob1UrQ3hEHpGV48X2vTqAIM,553
evals/registry/evals/date-booking.yaml,sha256=5s8kGT9SGHe2iC1DLsIHjH1tiJCjiOTE4iFd-WzVP7w,179
evals/registry/evals/date-calculator.yaml,sha256=y9OL67eOlLttm8LRYviY-eYLXls3q25CscqH76C8aAY,192
evals/registry/evals/day-of-week-from-date.yaml,sha256=z_L6BcWY6In6dctnhFeg--Tw9d1GB12ZbZBc5woUIpM,214
evals/registry/evals/decrypt-caesar-cipher.yaml,sha256=BUlG1ZpALPTD94hvQTGtPDXoNjWCwHWpstLJ_7y6O5Q,214
evals/registry/evals/detect-hshd.yaml,sha256=wOYcODdXtcaY443aNYllBcumxsJ2SNDvQKDUde2-2bI,189
evals/registry/evals/determinant.yaml,sha256=pqk4MlDRmrIdBTmrx4ANpjtI-LyWD73j4neV_fgotDo,175
evals/registry/evals/dhammapada-reference.yaml,sha256=J0vet6JXTs1L8yjeSd3KzGgeHmrk-P90k2xi5vU3cLE,345
evals/registry/evals/diabetes.yaml,sha256=yUakVSb2l41ZM2n04NFFjQdVnNcqYz4ZK82uM1xOo4A,163
evals/registry/evals/diagrammatic_logic.yaml,sha256=vMex2id5tmcOsTkza3o2Lf3xHiMo47xbgQG__QYEQEo,202
evals/registry/evals/dice-rotation-sequence.yaml,sha256=gl-DEn9uUFRQUIfoVRKsXL1LsRIO0Oib5GSqP44rRok,377
evals/registry/evals/direct-speech-tag.yaml,sha256=QP0RKh-U68fjc0HdiqpeI58LJyCNoEbnYbswIJc03pA,199
evals/registry/evals/directions.yaml,sha256=DBbJvr65KEQacSbnjMmWEhlxi2emEHUoazUG1GgvcVo,276
evals/registry/evals/dna-melting-calculation.yaml,sha256=iTepd6wnxcVKrM2UOIWB89d_W1inwQjvowFM2Zkyipk,316
evals/registry/evals/dutch-lexicon.yaml,sha256=8j3LZLwB837v1FRCqZycXSLkajLN1pIJjVmQE-dvTVs,306
evals/registry/evals/dutch-rhymes.yaml,sha256=TpqjG1mBnn6B142FeNFXpiINlpDD1RNw4dIzuykttPY,244
evals/registry/evals/emoji-riddle.yaml,sha256=KVeqtWzWHNMthqXderhK4Mzl4bbiyG43bejybpc3IOQ,269
evals/registry/evals/emotional-intelligence.yaml,sha256=vvenROXWERbItVcAG8enrGS2LcI5LjWFih7Z6LydWpA,387
evals/registry/evals/error_recovery.yaml,sha256=IxMXQZ0r1Od0QHZFX-rilUfRroDk3l64Ss6yGmf7xRA,1083
evals/registry/evals/escher-sentences.yaml,sha256=wYNZaOlV8wj9s0aFsMCI6XMVmbnjVso9Xtsl9BJIjT0,195
evals/registry/evals/euler_problems.yaml,sha256=EvyvYAkYiMUj-Uo_OzqcZKZnEgAot8_9sSMxhLAxygo,669
evals/registry/evals/european-date-format-challenge.yaml,sha256=QoPRoLwuPbxkJX-H-HGY856cK5nI6Q590rWMocgOFaI,497
evals/registry/evals/event-categories.yaml,sha256=kYTPrjyOY-PXq2FvDprMJBdt5dlX4_iI9Ty1N0-dWwo,350
evals/registry/evals/exams.yaml,sha256=523PwpZTnnYF0Nby4vnj3OTagqM59c5G0eByzYtdlsA,3303
evals/registry/evals/fcc_amateur_extra.yaml,sha256=-t3Yi22-CuZ7GmVIwRXQe4PDodx9yq8zL38h3LeXMdo,314
evals/registry/evals/finance.yaml,sha256=awAtEEAqTEkCQZOGlf1lfGumUwi3Buy3zMuZrM1nYko,254
evals/registry/evals/finance_calc.yaml,sha256=U0SfZLydWOcv7VNE_XqYJe3AwIi17bAq9eZeKKLGkUc,277
evals/registry/evals/financial-derivatives.yaml,sha256=NfUf5VIMMy4IVWTQhJV8wuNda4aK6jpBWBRgdZ9ctk8,311
evals/registry/evals/find-letter.yaml,sha256=p92BKLOf2nQOMbRHfYqowvJfskeXILUoB2cQG0XSPuM,181
evals/registry/evals/find-thirukkural.yaml,sha256=GcIhyLyWNJFUv8c5GkxAR6pn76ea5nyzgCNFHWwp6cs,303
evals/registry/evals/find_country_from_svg.yaml,sha256=6u3_T1ncV2gDPTsnO58SR8Nc5AGIXH1zkqo1hBPL8ZE,336
evals/registry/evals/finger-tracking.yaml,sha256=jlkoOI-ryrkpPUTkhJSUbUCCwL_32hoZXL1P-k-zg6s,276
evals/registry/evals/finnish-rhyme.yaml,sha256=zeFD59r3VcBGa8p3YBvfB8a9wqpqSGILrRgGmkrTwPY,262
evals/registry/evals/first-letters.yaml,sha256=zzmk6jkpLNt348PaAV5jAT4-BQgcgHj8BRpZqa4EB3U,183
evals/registry/evals/food.yaml,sha256=eBSDFlSEe4NnByaMRYhVAylxr3e81YIP9igpUqsPDsY,147
evals/registry/evals/formal-grammar-to-regex.yaml,sha256=yrPTBhhR_2TxRT36CvcL1d-4ai5HSXn5z5RSvLQS44M,245
evals/registry/evals/formal_logic.yaml,sha256=qj40MTn1oe0ddxSkxHZh2yxAuT_0s8Mzok-hIrUNmI8,276
evals/registry/evals/forth-stack-sim.yaml,sha256=sB_xETrsKGOqONCYC6gAmlkTCHyN_7_uEXFWOmLmD6I,928
evals/registry/evals/french-lexicon.yaml,sha256=y9JoP1Bm0LpREsgP3qy1lk99-SNSDNTfIgb6G9YMrHI,273
evals/registry/evals/french-part-of-speech.yaml,sha256=VMDaTc5Wq94Sxb0ACYkvxYthg-G05JSkgTwtLVUX-gQ,361
evals/registry/evals/french_homonym_and_homograph.yaml,sha256=lwEu4QW6bP8cAS7S8G4EnLbsCGffaAiE3gSnakxkU6c,537
evals/registry/evals/function-deduction.yaml,sha256=HZgJvAZf6rAvwtjYi50xp52H06V-cy_Xl3Kya-Nf_dw,1289
evals/registry/evals/game-theory.yaml,sha256=e5HM7TUbSr6iVKrSZrSvXGoDcJ9JBEjq6yPIwneTUso,289
evals/registry/evals/gears_rotation.yaml,sha256=NiyTaP8SmhI38wkGEB_r07SwDa1NFVxoCLXBAz73VMA,338
evals/registry/evals/geometry_puzzle.yaml,sha256=-Kw3HIkY1s2fq6dF8kGay_4WfDSXq2HNxps1hnYcAEU,345
evals/registry/evals/german-part-of-speech.yaml,sha256=pwg2Vsx80-ieMjpuHZt7dQAe4EZoqxEVZF95cim7RPA,361
evals/registry/evals/gol.yaml,sha256=dutW4ObZMmbIcHlLFTcLI6vJ5HYAXx3e_T-YQUe7p3Q,254
evals/registry/evals/gpt-protocol-buffers.yaml,sha256=f_DXS7fSFQdCciSKFtm7Lp2dRDtCPED6rprQgyhzOsE,217
evals/registry/evals/greek-nt-manuscripts.yaml,sha256=9CkHQ8GYvtbIRiBEExOXDd92K2Ud08CaEcX849Yu8QQ,404
evals/registry/evals/greek-vocabulary.yaml,sha256=Hkg3lYh3TONOY1evPqz6NP8CoAHgNKKOT9YDaPeY6YI,194
evals/registry/evals/gregorian-to-hebrew-date.yaml,sha256=Xig6P97l9dTe0-U7_fM_umfOGmbMbuXT7SmKKStaMRg,334
evals/registry/evals/guess-the-singer.yaml,sha256=W-D-_eFQ73_ArHM6uHRihBXxcxpouDWligKV8UwWjlg,292
evals/registry/evals/gujarati_numerals.yaml,sha256=7DiHStQn99S5LV-9L8HpwAlXNK6Z_ng77YV31MNXUrI,279
evals/registry/evals/hard_russian_computer_science_tasks.yaml,sha256=cfhRq8aaSHUzjfYCiN-psLdfYIGS_AIjuL_4JD9TLoU,613
evals/registry/evals/heart-disease.yaml,sha256=vtg4-PDKRyKAjqKZMwz3ELwidw76bLdC1TPBOIWH35I,258
evals/registry/evals/hebrew-bible.yaml,sha256=TKN0i6etbTSO1mccdNQlWvexqCWpRlMAa6q2qqv08U4,315
evals/registry/evals/hebrew-homophones.yaml,sha256=65U6Jhc--hC5X5w3Onih6ujZcAVcAji_bDZ3xN3UtEc,283
evals/registry/evals/hebrew-rhyme.yaml,sha256=MIMif2NKAV0NLoUsWiwLxqbn7aRTxntu_9xdfwKlzmA,250
evals/registry/evals/hebrew-same-noun-gender.yaml,sha256=_4_33PUeiI1AAT8ky5IEWkVXLd_c5GBKR6Y0k7JlRPc,285
evals/registry/evals/hebrew_grammar.yaml,sha256=VhOr8IpXb1FUaaKlpDNP2aU9eM74cJxNE9ke4Uw_nA0,264
evals/registry/evals/hebrew_plurals.yaml,sha256=MQSNbU88ymfjhXj0G48eqv_kldVCkpT9hT3vEOijmBQ,269
evals/registry/evals/hebrew_talmud_suka.yaml,sha256=KD0yoq80cD-8xjorYri8fJaryQYY5xqGrIavJITMapg,339
evals/registry/evals/hindi_shuddha.yaml,sha256=iV8b9HC1LCim-ldDeuwltXWq6yEWnQFRojKUDYSOUFM,183
evals/registry/evals/hindi_upsc.yaml,sha256=YCcpnzrcGvb-PC_4K2FyXhiTMgrTj2iwMpbiwvVH_Gg,170
evals/registry/evals/hindi_words.yaml,sha256=6ZdOWJziVxFJfCbew3-lgRs1v8VDMZa9rpekhIpq4jQ,175
evals/registry/evals/historical-kana-orthography-reading.yaml,sha256=aVbRgHpXKcDDJp4PhsveTWSqF3ljKdZFGCKNVEiK270,351
evals/registry/evals/hr-ml-agent-bench.yaml,sha256=XSFWzlm8EEBbhO2tkC2bqAs_2PC9ONgLDEDqrb5W5yg,7082
evals/registry/evals/human-safety.yaml,sha256=sq7LX3AChGyzVgA-H3aZhJB4nEd3iQA88I7CJ7_IHeU,313
evals/registry/evals/iambic-pentameter.yaml,sha256=Sq2WjWQpXAk_6r388WWxi4xyArQWQF0Bs-co2MAchE4,307
evals/registry/evals/icelandic-inflection-easy.yaml,sha256=JCOnM6oVZxvOe4_pCKOYTjpz1SuKyZYsP6U5JPO84SY,343
evals/registry/evals/icelandic-inflection-hard.yaml,sha256=0MAhTYJUw0HUoIJ66AuLViq2DRDoOItuBpgcEfJ61nM,340
evals/registry/evals/icelandic-inflection-medium.yaml,sha256=7QYdKV8MkOnsPZgbLEmeGTKjQ5_0UVERW5x0BG2tlyg,350
evals/registry/evals/icelandic-sentences-gec.yaml,sha256=UOKXfFS6PDXqAeRhvushLnIhJJmwQTpgNWNjbvZhsak,345
evals/registry/evals/identifying_variables.yaml,sha256=Y_lDCYFSrujb_WQkkziXTcRr5AKC7VX_khTPb3dEzzA,4861
evals/registry/evals/illinois-law.yaml,sha256=VjSHfp4uxMQhbH8vM0ZdnAss1zItckBWQMyhyyJ1l2k,265
evals/registry/evals/imperial_date_to_string.yaml,sha256=stjZipZ-jUTM24g6uMH8dPPHGqGemAi92C4dl_j_YUI,222
evals/registry/evals/incontext_rl.yaml,sha256=hsi0i8T69XHIHBeyiEVE9xwH_p9NrFMgBDFsEa8jUgA,1678
evals/registry/evals/indonesian_numbers.yaml,sha256=-BEdKexO6Hm6DRq9xrxhYskL_3o_S2GIWEBKbrP4znY,213
evals/registry/evals/infiniteloop-match.yaml,sha256=Ue8fjK8svQRLldmqZtdhc_85tVqHPl7Wx54d8eGt_lw,347
evals/registry/evals/integer-sequence-predictions.yaml,sha256=dVgPXb4uZhvfp8QWvDHFW-9MqzVVzbTi9WU-szqz3Kw,1113
evals/registry/evals/interlingual-homograph.yaml,sha256=BiUY_z_n4NX5cJ3TFLImFwkUMcfh3LccgBXNwyr0p0w,339
evals/registry/evals/internal_representations.yaml,sha256=FfOjuHOa2Ky0RzKdTlpOOz6CpZ06BQYca9VuINihpMw,235
evals/registry/evals/invert_word_wise.yaml,sha256=WvPCNy5Sx8NATvn3MFLTJSsjFZ21c3RhnF_IqgB8tE4,390
evals/registry/evals/invoice_due_date_leap_day_adjustment.yaml,sha256=JlOsHBdgXW6mnEl0C0GEBwmm6nB7UpfMeHzt8mULKVc,275
evals/registry/evals/invoices.yaml,sha256=htfOfjzin0CNT1puNMh9Z-KinTvejmLG7PPeR8ADixE,262
evals/registry/evals/iqbal-poetry-translation.yaml,sha256=q_jW9gQ3Pon99dweHGkYZnFQV4vTV2VNJywSUDMwkj0,1021
evals/registry/evals/irish-lexicon.yaml,sha256=px99Gw_jMDgIm_DmpBss6KtwSbfBEU1Ooqx25_SvFcE,285
evals/registry/evals/irish-plural-nouns.yaml,sha256=1Pjk5IHI9RA-BxbvGcZoD6Au647H80IMFYL1CokRDos,308
evals/registry/evals/irony.yaml,sha256=cTB-grtsYqN2o75aTIurBPKrGK9sk5pjNVukQiCONyw,258
evals/registry/evals/irrelevant-negative-diversion.yaml,sha256=8d8rNXj07xje0rbkOxvRktT7y2ua0RER1xei3oavWNg,402
evals/registry/evals/islands.yaml,sha256=3q4WFbSf6FONVBrcak2ovwCB9U4d0cxFfIKzTBC-LqY,370
evals/registry/evals/isosceles-right-triangle.yaml,sha256=Q79wjBCOTlIyY-Wh108vn5bzPFeuCV1N5SjbuxcsKfc,233
evals/registry/evals/italian-new-words.yaml,sha256=o_wyyXpegUUtQ0pfv9G0M-IyXpYSrtxujXOFPWjuhY0,308
evals/registry/evals/italian-rhyme.yaml,sha256=a-86a65RvL9ZIhvVclM8cm-WfdJKHXgKpNF5kLnuT2s,254
evals/registry/evals/italian_big_math_expression.yaml,sha256=3mGzDm6mx1wo-KkDs6MLMpT3qlS_hLmWcgReuljoEtw,422
evals/registry/evals/japanese-decimal-units.yaml,sha256=zHzrBfZUI5dDc0DpM9LgIxqQMx7o9uF-vY0ktf-mwu4,279
evals/registry/evals/japanese-itpassport-exam01.yaml,sha256=6WTl1g43tP2DbSc3fN7Y9UeyUvxIqcYbYpZ4SvNexlA,428
evals/registry/evals/japanese-national-medical-exam01.yaml,sha256=CTVxj-krMG6izCaB-Dw93tO4k4RmgzjhbzKqQDm9q5s,292
evals/registry/evals/japanese-national-medical-exam02.yaml,sha256=25Nln1_WCaWY2qS2I9NIAG5rFHIxiHUOPtmmsXx2Mfk,292
evals/registry/evals/japanese-station.yaml,sha256=9zj1dt-6SdjpRARYNPcKDXqRkcquZ9JY6dYemAwvZ2Q,203
evals/registry/evals/japanese_approval.yaml,sha256=5Y96SM_-zOiJPMts9jDCiKnTAxJa5R3X9lrG5uhtsso,319
evals/registry/evals/japanese_city_name_pronuciation.yaml,sha256=TuqD6fOvUTjOywNZnGaZ-gJfQhIfSJfNxc22yk03Nms,399
evals/registry/evals/japanese_driving_license.yaml,sha256=s91j3Xr1CmyQeMIK9I2E-3sE3C5BfUtQGtaP1cmdezw,329
evals/registry/evals/japanese_mahjong_discard_tile.yaml,sha256=fkpUZWr_aTRSMVX4ge0t3q95UhGypiouQeEG1-rU4po,381
evals/registry/evals/japanese_number_reading.yaml,sha256=P-_LsB7d8sEmTx1TqDo9Gnecs_6CpkZPwN3RccUbmd8,338
evals/registry/evals/japanese_onomatopoeia.yaml,sha256=Q-jzXEssrxfJw_ggJy0tA9qr4iCwD23wZJ6EXHkTe1U,300
evals/registry/evals/japanese_populer_video_game_title_and_the_publisher.yaml,sha256=8ZI_UvbHsvkq52XbI7Ndr-14LmkFA0Dvh-zMc3NlmZY,448
evals/registry/evals/japanese_prime_minister.yaml,sha256=66oyXC4pYSkVfuT_VQ6ghvAsmXtHFwoO3jCEr78_Ljg,229
evals/registry/evals/japanese_romantic_context.yaml,sha256=_b7ASDtgtALnYQnR82m3_ngi91WoV0MDubg__MizdL8,237
evals/registry/evals/jee-math.yaml,sha256=NU9SV56MqGAcFCLbWkNG4ycujbvFhEqI0S4YY9Cs9t0,169
evals/registry/evals/job_listing_title_for_a_caregiver_in_japan.yaml,sha256=QnUOYjn01vXeVu-aeSMpmjZU9TQ3KbwRSxjPNX4aHv0,382
evals/registry/evals/json_patch_object.yaml,sha256=d6K9R6X1tgILrSNGXx_l9QgJjefj046X5DBKzBLySVU,305
evals/registry/evals/kanji-idioms.yaml,sha256=stmQWbP5EZcL3LWbfN2euy40zDrtG_ZZ8FfJ9nxvsjI,248
evals/registry/evals/knot-theory.yaml,sha256=rT09edvnlCf7dY99rS7A2bGhI5diAX6ie7rE2F4phmY,754
evals/registry/evals/korean-consonant-vowel-combination.yaml,sha256=7G9MVKmq4CV6qEkAR3p1MKIWhsJGaO_7b1FPkw5KeR8,390
evals/registry/evals/korean-honorific.yaml,sha256=z42vLi7yT-ydzD-q97qT3Co25UpWfmkEMFSUTtCVHto,281
evals/registry/evals/korean-phonetics.yaml,sha256=E2_pyItq5sOZxT0KimYTa-Bf_pQB1Eg0FzH6HSPAaAs,284
evals/registry/evals/korean-postposition.yaml,sha256=aYBonSiKAu2sU0JcVGMeEONhEmlqfS7KqwaSa6IOi0k,291
evals/registry/evals/korean_date_counting.yaml,sha256=Pt8JRXa3KMpYXsQL1dKuDY95Vx9wtCWcVgm2zvlwj3I,284
evals/registry/evals/korean_dialects.yaml,sha256=_QKoG0iItBwYLukBQRKTS_GPDW6jIpnfQxAe3K4Qhs0,293
evals/registry/evals/korean_foreign_words.yaml,sha256=5fUa5h_O13HSH0PRqT_1_ciU0697xHngiMhpSi6iwbw,276
evals/registry/evals/korean_romanization.yaml,sha256=OjHSKDoPwNrjJPQfty_t10PJdOscuL_en2nxvYH0Vnw,471
evals/registry/evals/korean_spaces.yaml,sha256=bsTsMjPiESrc3H07s-eUD8YKyyV5_g_LFDehLHMT2_I,277
evals/registry/evals/korean_spelling.yaml,sha256=iQOBN0-WQomMxK6KmINJYSL3IgJmiXRhX_Bailxulds,191
evals/registry/evals/korean_yaminjeongeum.yaml,sha256=7Zf1_dR-YQrDODDLtGOF_RR7ZpKcAvc6f-IwNxQH3t4,324
evals/registry/evals/language.yaml,sha256=sPKh7Tmhxx7n3TtLrNqXCMKiIK1qIhgbCVhTAmQWBGY,400
evals/registry/evals/largest_country.yaml,sha256=dICu1nvnjBAbMFiPhQmPZxjo5rXsU7L8ND_5D5rhAwI,275
evals/registry/evals/last-word-nth.yaml,sha256=eBAn9nGT8J7NcL9WNVyy9K1CLo0TcVXunsQ2-KIBPzM,327
evals/registry/evals/lat_long_identify.yaml,sha256=q5U0534OSm0s_G8-IkDJBK9p2pph_8LENPktMqQusbY,198
evals/registry/evals/latin_grammar.yaml,sha256=N8eXEkYuI3jYq0nuQ582Pqtas41hbwgoaLKt-cHPs4w,307
evals/registry/evals/linear-equations.yaml,sha256=bPYC1TwMxUJ78E1DfxC2C0-MQOn6VvVlWLB_-yCzQ0k,194
evals/registry/evals/linear-regression.yaml,sha256=UHALeB0PTX5JNsCRi8QHzfbhn90hkAxtE1lmy4kmRUQ,802
evals/registry/evals/list_comparison_missing_name.yaml,sha256=M8VmOkt3X2oTswHBOWrzAFngVtesTOiBJ9XrpGlZu5I,458
evals/registry/evals/logic-container.yaml,sha256=gxtuiHWA8jp4fR1fiBB-jybwR6v3Ff0VRA_VZT7D8lk,374
evals/registry/evals/logic-grid-eval.yaml,sha256=kmr9dNXvNFFWsNblOeJ49haXUTATrcE70A13gIzwfmw,814
evals/registry/evals/logic-liar-paradox.yaml,sha256=tt3DxsOTTbohvf_x46MFCRol-DpGghJWmMCo9H9M08g,456
evals/registry/evals/logic-riddles.yaml,sha256=4KGCd4iHmH8QX45geEEfNIJnIvyEH4spf7IKD7i-X1I,259
evals/registry/evals/logic-statements.yaml,sha256=dr_K9Ii6A3mwScPa0t23IuWhxGqa8rFmsUoWs5iNJOY,214
evals/registry/evals/logic.yaml,sha256=nA67Ix-JbTYzKoX2yIwNNOCvI_Sby5o-IHUCaADCW_Y,242
evals/registry/evals/logic_and_probability.yaml,sha256=uSHDKb0jNGpkQ-aBaZIbdt1KqmB9Gz-lJ1FjxkkuzlM,322
evals/registry/evals/logical-black-scholes.yaml,sha256=uj89NDRaSWJ8XtFP7EpCOZ_HNa6Mz4kjwjJYdSDXWdE,350
evals/registry/evals/logical_counting.yaml,sha256=e7HKH91RfIUPSBTQ_JBCx2wKHnK44QE0G-H-X7qyeG0,194
evals/registry/evals/logical_reasoning_letter_series_test.yaml,sha256=cJ025tr46I7ax79YOrDSNVk9fCOAVKRwQ1hvUHkWMto,435
evals/registry/evals/logiqa-logical-reasoning-plus.yaml,sha256=eOj6RqFCiTbsqbSLS3MGksxy9Y7nfwtTUgEd_yhYciw,1041
evals/registry/evals/logiqa.yaml,sha256=elwmRJ9P0nNcqR03NUX-xclazK9HPMbVDdtbdCessXc,153
evals/registry/evals/loss-logic.yaml,sha256=lpAFT3DKqyxTe_07EjcTayPwZlpZnkseJmPVXajZ2Dc,262
evals/registry/evals/lunar-calendar.yaml,sha256=vhxT6KNYINsxvidVJK6KQWiQpM80arF7WEGTN0Nv32I,919
evals/registry/evals/make-me-pay.yaml,sha256=0TdpHofVERF8tCv_uRkRKLo8P1OvH15xF0hqRXkVRJs,1872
evals/registry/evals/make-me-say.yaml,sha256=x6v8QT2Bp6ySANkpXvt9or65PFDiVLZbFPkuj63oyws,884
evals/registry/evals/mandaliof-table.yaml,sha256=LnEjCXtOjqWUfY2dxbpC-_qnUCr5fjxDLVu2BWKBmis,286
evals/registry/evals/manga-translation.yaml,sha256=Mpvb6qUm8Fexyd4j7857yxWNkvnrnTx9L1ULRZ8vaKg,674
evals/registry/evals/map-electronic-component-part-to-fact.yaml,sha256=bET5NPeVm69wsK31_dmFJVamTbFIHl7f3w6sKsag7BI,278
evals/registry/evals/mapping_to_matricies.yaml,sha256=qI5T9dO-6I9OYpq_m9B-DIAWtdY2Ofli2QI7d6Ch3CI,217
evals/registry/evals/marxist_philosophy_exam.yaml,sha256=Dp_QtPsVMc0qy7S3J0I75-0tza_NXUier8hNPwtnoek,333
evals/registry/evals/mate-in-one.yaml,sha256=N_g7bsQOyVs1GWhcQjPPtPdF1F-WU7gxFqI6yIPdba8,243
evals/registry/evals/math-derivatives.yaml,sha256=B8SkbxHzVFOR9vmC2SD62_s0nOE9dtm5jmFvY4N57jU,284
evals/registry/evals/math_equations.yaml,sha256=P5_5RCumAbozJWAvay6HciikPFJ6Si5HX41alrXOeA0,292
evals/registry/evals/math_for_5th-grader.yaml,sha256=MBeRekIn0eM8rSy_feqld76iDxNDtzJ_xRBuqWeri_c,334
evals/registry/evals/math_logic_operations.yaml,sha256=hljXVGrnuCslk0qLctOEIxQrWbK_g1kGvF0MabIHCNw,354
evals/registry/evals/math_polish.yaml,sha256=MGA20l2Z_NWMHUR8qQ9kdge8KIxHmB3Bz_kD8kHIO6M,312
evals/registry/evals/matrix-mult-rows.yaml,sha256=OH3SDFjP4Xe9QVwq3Pk_e1f4SMPsGUpBrVNcBgE_LyQ,297
evals/registry/evals/mazes.yaml,sha256=pf2kz1KvbfxN-gLVziI34wPmuuc2KXVu4vfFfv61mNw,1768
evals/registry/evals/medication_dose.yaml,sha256=ujPR5PPV89Wy89SC_4vOmynoiaDSW0WxG5yLr7UabBU,318
evals/registry/evals/medmcqa.yaml,sha256=sOotmRHe761Tf1RPYewTYdPWny-X2NYLiMggbmJJK-g,159
evals/registry/evals/mendelian_inheritance.yaml,sha256=Lp4CH4ObwJjbSgFBjXMZKVxFwK4STGn7A-CjhsPEVLE,215
evals/registry/evals/missing-operators.yaml,sha256=TmscS-PBZsaCetqwOoi3pIrW3srLuffqMjgJREOymJE,337
evals/registry/evals/mmlu.yaml,sha256=b85wp6rktF5MNrrakGm8s_LOtUC9FW-FrG0kMguThl4,14586
evals/registry/evals/mmmu.yaml,sha256=yAXAzxV1ZQAGFxc5zGu3EMcYJl4w1uHcSieAZp_01bI,11931
evals/registry/evals/monthly_metric_comparison.yaml,sha256=SjS1YvXkPpy9dvlkmJnOtHJBs81lUrYr6T-wkUFxAh0,313
evals/registry/evals/moral_exceptQA.yaml,sha256=t8dURhF5kcrjn_24Z59vkoNl7PbqPu-D6dfHVuXMSeo,329
evals/registry/evals/multi-step-equations.yaml,sha256=Mf22N3XHaQuY2T4Mb0geGO4kzgaFB-RxSKzh49dm-zo,211
evals/registry/evals/multistep-word-problems.yaml,sha256=ad7F14h21XiAiBuaIs2IfbVM3JzJD89ReU-or4rLVls,310
evals/registry/evals/multistep_web_tasks.yaml,sha256=QpcUnnd2weAs_9EhKXikBIQK3pcoLyHVQTfGkRJN4K8,2530
evals/registry/evals/music-theory-chord-names.yaml,sha256=UooSOy0SKnkx7GJTca1AK5pP_2vaDvdVdEfgjAkNYRk,315
evals/registry/evals/music-theory-chord-notes.yaml,sha256=3DHEagLI4GYGl2oejIs3NHSTooQFKr56Z0NWRvuyRdc,315
evals/registry/evals/music-theory.yaml,sha256=rPK6ysJg5FZpLsCYZUbxX79TrfPM-AYXcAZcKWZbk9I,874
evals/registry/evals/music_theory_scale_modes.yaml,sha256=hHR2aBjHtEooEpJhTmrcaXYbx4MwTN37CIE7BZ52C60,346
evals/registry/evals/naughty_strings.yaml,sha256=w0KiqaiyetF-CHIyXTMnhjFUpRx-k7w6d6c05PheCKk,1193
evals/registry/evals/nepali-numerals.yaml,sha256=0fq5AuOO8FA6FGDz3_IS6Irmk3kGpXyOAkX1bN822tE,270
evals/registry/evals/nepali-song-singer.yaml,sha256=2PpcIk6zgVl3-nsnSICFb7FJirmLC8wJJ7frXgds4rE,361
evals/registry/evals/ner_finance.yaml,sha256=_RP5f14PXCq8ETuPyO5BFJ86eQpUgiCCXHM7WUl2OCE,247
evals/registry/evals/newsology.yaml,sha256=YsQtIkYvVHjRMHXaF9FZ3Tlt-VAtf1ntFFAZwH8BmLE,340
evals/registry/evals/next-val-series.yaml,sha256=pUb2oPKf46OrJezNow1s41dWk3u3dRmiilHH8UoZ9NE,290
evals/registry/evals/nfl-point-combinations.yaml,sha256=y-SHkU4VWZjx29prcEH4BqqdycAopID9MKzru2PGQkQ,345
evals/registry/evals/non-compound-names.yaml,sha256=kLShbj1uh43KxNB26J94y8jqwX0p9BvRoYhLPn_58ZI,3875
evals/registry/evals/norwegian-lexicon.yaml,sha256=ozFqfAhEmYImLUb5FNmZRPL9zYjcgl9P4ODtmeQ6190,274
evals/registry/evals/norwegian-rhymes.yaml,sha256=vFHGTpNaknpiSSFlaX3LGz0qefg-d4FDOs63I_oZp_U,358
evals/registry/evals/number-pattern.yaml,sha256=7iZ9sKdmlT51DC-pwKnWz3Kbv4e2WXLRleIMtMGcypk,186
evals/registry/evals/number-reading.yaml,sha256=X3PLjQTqjvRn1Qf2x5c4uv4dvwsUmJ_EtPDiKR7Vfto,292
evals/registry/evals/number_series_test.yaml,sha256=VYovCYJGTU3i8iuRo9ANgfNm1bLEUQzCyNjn6KhLpbY,209
evals/registry/evals/numbers_game.yaml,sha256=tUfPXeN8wYaDXNJ-keh1pNPvM2sbLcaRkTMLEBekKmQ,256
evals/registry/evals/numeral-type-comparisons.yaml,sha256=vZPwpDEyyTjSCGO4eJoChUkvi6rL3sohRkCnInWP5mE,368
evals/registry/evals/numerical-cabbala-casanova.yaml,sha256=qxS3cSWF5ZTc0yz3yYykxl6xBx5NtT2Q50Bc28CMnxw,928
evals/registry/evals/nutrition.yaml,sha256=6s7B2EjIjkWVI0g7uUP0S8P77_RAZ1Z93rUlv8nXMCU,317
evals/registry/evals/ordered-history-events.yaml,sha256=02ULUiPubqC4Jpu6CVo7axZD23BAtVDnEvQZ8YkGtyE,224
evals/registry/evals/ordering_randomised_versionlist.yaml,sha256=0Ffe-vuPPU_HP6lxvm2mixKRMYuoxcRSLmpb0Ul-_dg,675
evals/registry/evals/osm_mapping_one_way.yaml,sha256=-4QSm2SHxtFeeekuzb3YValn8HL5CldtNSqUomCjI6U,392
evals/registry/evals/override-system-instruction.yaml,sha256=0OxOr9CFi5aWx7mfcupGPDT7IXU9xTE6zolQumnm9gs,239
evals/registry/evals/pantone_to_hex.yaml,sha256=94dVacWWGdej87Pe_uWXBYSVUONESEGXCkH5VWsFGRQ,194
evals/registry/evals/parable-to-moral-match.yaml,sha256=YRSDiSVNFqDrkFcxxuatV7NwuMppVdndNXJGZQuwLh8,502
evals/registry/evals/pararule-plus-multi-step-deductive-reasoning.yaml,sha256=1YIsxc8YQDFNYLvH6m36W9pzGe5M9Nz8gR8YPpK6-rQ,441
evals/registry/evals/partially_solved_crossword_clues.yaml,sha256=th3E26uTd5wud7ombx5VDLrpRkE8O64Zn8mXgEHFrEE,258
evals/registry/evals/passing-balls.yaml,sha256=UxAnb0ZRZI0l1xnr3t1oiy0DptPCUonHEpDL4n-Z9SM,322
evals/registry/evals/path_enclosed_area.yaml,sha256=gS3xtdZ67pZZDF2vdDlsWRHE4MSkYsj2z7et54Lepeo,304
evals/registry/evals/pattern_identification.yaml,sha256=B7y4I9N0FKPW3yY4U_e0vjNk2pdFFVtRSBJ8xP51qIQ,221
evals/registry/evals/persian-kinship-riddles.yaml,sha256=E6w8VBhAFFRO49krySpoUIzQkS3fRDTDNBcY1_cb5_E,353
evals/registry/evals/ph_calculation.yaml,sha256=-N-SS9J9zuKpdl2Wu3FWl_Ck1SsAc2Qn-D4ppzVnzWc,287
evals/registry/evals/phonetics-identify-words-needing-missing-gpcs.yaml,sha256=IfUrc1SD27sGaCGxBAjqXzT5NYXSy018sntREOBP3P4,429
evals/registry/evals/physics-interaction.yaml,sha256=HovqQUjR2cZuod679bd_-qz74Zw_j0Bwad6zPBaRAvI,317
evals/registry/evals/pointer-value-retrieval.yaml,sha256=Tp-s_m3kMFsY-pFgkKCaT_ydnaO51_6TEtQ70q-qPNg,2427
evals/registry/evals/points-on-line.yaml,sha256=p2t0ENxWNDa05ZVCT332q8vA5_CTz-I4yBNonl2qxOA,297
evals/registry/evals/poker_analysis.yaml,sha256=TVhzxQqRHhe-71o55S5zpZlx6s8m5l6cc-Hn65c5EWg,309
evals/registry/evals/poker_hand_ranks.yaml,sha256=SiZBsIvCMt6jlM5MMOytTMDMzm6gDwhjkF1m8oT4who,188
evals/registry/evals/polish-lexicon.yaml,sha256=rZFvz04fEkDicNrLd5N6WsuTI_fr5q90nSnao-2VdDw,290
evals/registry/evals/polish-proverbs.yaml,sha256=sWHr8lU3X8mT3eLeAC0ggBnDXr4kRXpv5R97UP_nxxk,272
evals/registry/evals/polish-syllable-count.yaml,sha256=_uRZ21amHGxkDyRnEBWavUsJ-kYcdXRIA_VqdqACcxQ,214
evals/registry/evals/polish_rhymes_generation.yaml,sha256=VK6fl4zEokq9qiJY68uO75t2XPCMt8z3KffGDuMw2U4,270
evals/registry/evals/population_span_extraction.yaml,sha256=8hNXgkFkzOaT6QhL3LdC7x57aiuhGvLHjcero7nGMwo,579
evals/registry/evals/portuguese-kinship-riddles.yaml,sha256=xrwlmeG0mKhDjb5KnrPFj5dL38hJ0w-aIOG2z_u4yec,363
evals/registry/evals/portuguese-sarcasm.yaml,sha256=aLRwge2tFexwwtuXmTzgHM581X-lMxChT3VTxx5yjyc,277
evals/registry/evals/portuguese-syllable-count.yaml,sha256=DvVXOPMMDQCLkT4O7zq2zBVfdxcJDjdmnWMi8hCGYrE,297
evals/registry/evals/positive-binary-operations.yaml,sha256=moiZ-N2fs9KXLcPG_sxT3cGkQqTG-n2gsDYop9RUXT0,320
evals/registry/evals/premature-conclusions.yaml,sha256=CShHYKkG7_oBrLn01TxlBorLfWL_0Ujbo-uxc3tNN74,354
evals/registry/evals/probabilities-word-problems.yaml,sha256=S9fj1LTYKmNtFWdUwfWq2jvwTaKPgWpdwQlecl0k0C4,347
evals/registry/evals/probability_questions.yaml,sha256=qS9pBdUVo-bH7-ewLEjYqIicgIi7d2urjdM2McipSWc,335
evals/registry/evals/product-ie.yaml,sha256=K_H56q_K1n_wplmlbdx-MJ-lSpgCrheQV3YmeUaOUDw,1434
evals/registry/evals/product-matching.yaml,sha256=flbFWqo90lEFLtZf3GSpRkWd0kvXI1LZ-iJf2UhWoE8,1847
evals/registry/evals/prompt-injection.yaml,sha256=q01QVmMDBOgWbl26_3jNPGI5f9iKu9rVNEDk22uLy4Q,281
evals/registry/evals/proofreader.yaml,sha256=fk2hrHfYnfkdGWpJom8Zyors3yV1oFu61pBfZS3ytyM,262
evals/registry/evals/pure_korean.yaml,sha256=H41iSpq-A0f9w0CzGBLfZWeLc2P0wHbkGR_M12l2nl8,235
evals/registry/evals/python_list_comprehension.yaml,sha256=csKNmg0jlOYqO1TFO50sQg6zKNF_VQsW0Ayde0lf-KI,334
evals/registry/evals/qa.yaml,sha256=VzRFQP-dhOezS88MM3AZxwop0XWAMiW2vWoIfg5dYZo,282
evals/registry/evals/quartz.yaml,sha256=0EHWFx6lpnprBZgMgu6kc8MwGL_iFBPqwV-1K7UN_mM,219
evals/registry/evals/ral_to_hex.yaml,sha256=moSh3ifpn7Wv8iWEqg6yN8t1-voTjWM-OQ6BOQIyAcA,176
evals/registry/evals/rare-and-loanwords-dutch-lexicon.yaml,sha256=k_kLjx_5IMYV52jPUzIDVTQk63sVb5YEZBoEhE8Cr50,380
evals/registry/evals/raven-matrices.yaml,sha256=YEAP4309dnnCQ-k3NGCrWLjhCa2_839-Ze6oPNirdB8,14285
evals/registry/evals/reasoning_with_contradictory_statements.yaml,sha256=whbop_aJotmnMPEnI9tTo5-fowbCo4ZXMTmFyDamUN4,366
evals/registry/evals/rectangles.yaml,sha256=ULq-dSXtuxSktCM73PWWs4Gs5uLu3K3EH1BIiTdmfm0,171
evals/registry/evals/recurrence-relation.yaml,sha256=N94HfmShehFvvtosC_m15wvQAu612H5RwSydU7RzN9I,216
evals/registry/evals/regex-match.yaml,sha256=qOU03xzvbRFT1C1IFPru3TbPLrlrAyWxsb6u3NeTaRc,175
evals/registry/evals/relative-orientations.yaml,sha256=dFtW-UInzP67Gi80ZvR4TREbSW_cPdFKoQRNumIvgso,221
evals/registry/evals/research-question-extraction.yaml,sha256=ubJ7eA-3QzBGJmdvidTMh7C34brQ6S8FMwtkgRAYIQQ,544
evals/registry/evals/resistor-ohm-calculator.yaml,sha256=dGPG5l8O6jLSUty2tnHR5zjT_97FdHT4fPGNX7C0pMY,350
evals/registry/evals/resource_id_extraction.yaml,sha256=D3QnLnN4IE8HeMyIGukHJnM1c6MzC-iyV0rQUxAc2Nc,362
evals/registry/evals/reverse-polish-notation.yaml,sha256=SjCbi9WQJ5rGAaIv0wANB_h7sBvB1vApsf0fgWs4uTg,321
evals/registry/evals/reverse-shell.yaml,sha256=1a-LZubKZJwhBc2F1PvgrClLO1jceyu90RUPXcf2t0s,283
evals/registry/evals/reverse-sort-words-eng.yaml,sha256=MzccyfZ_ZpCXytqavOiZnEm6NWZQOMx3AhTSSxne0U0,370
evals/registry/evals/reverse-string.yaml,sha256=4YD9q2QugHODFDUYDZ1jh-ALuNyiQ_23jogZudLgV1s,378
evals/registry/evals/rhetorical-devices.yaml,sha256=iFWFEzUPMNDX35IElSPecTUFwg2Uv3VIKXuY6UzkkYg,294
evals/registry/evals/rock-climbing.yaml,sha256=iGFpgRJyx2rPxORI-I87rXNYE9yf1WF6iDf5CPQNYMo,182
evals/registry/evals/romanian-logic.yaml,sha256=AFO0vm_KtsB3XBEmxlhMtPJyr4RQgOHYWC3lhgDZGSM,338
evals/registry/evals/romanian_homonyms.yaml,sha256=ZWlP018xtsHPG8qPIu-KtkteeUhe85cN387m1g8ywoQ,266
evals/registry/evals/rot13.yaml,sha256=bJqvQ-bO_7rCoFTj_PqNZrDRBW0SJOYpSpJIPdBIup8,255
evals/registry/evals/ru_rhymes.yaml,sha256=JXBbw53hV2gi3LLVNBkoBd-IsBfhqJJBlZZIVgGPpoU,468
evals/registry/evals/rubiks-colors.yaml,sha256=VwyOsuMSHkQHac0Hlr4vb8wqK5lb9AwlhK7_OiWziAc,294
evals/registry/evals/rucola.yaml,sha256=OXpRYqknO92SzBVkotHK2LmPK-zQPl4JPOQvzU60AMg,236
evals/registry/evals/russe.yaml,sha256=KoXTQAtgybFlUqfjwytL6RfuU22UwV2VAMkO1ifeZCU,364
evals/registry/evals/russian-english-homonym-context-resolution.yaml,sha256=AviQ_iYCTyePMAZ86GKb5bR5e_wMqziTgr6w-gGvpzM,298
evals/registry/evals/russian-lexicon.yaml,sha256=RBcLoUIb37M5KdVI8NyNyBl7zoLZMRnnrJiM8ou22es,278
evals/registry/evals/russian-nlp-tasks.yaml,sha256=fy1TANYx-H3SPmLRAdYDbYXAqHEPB426OEuAN_urgCQ,199
evals/registry/evals/russian-rhyme.yaml,sha256=1mARLGCY-28V8HMtRToMiS-s-szsdbIBsV9ohi4p23o,254
evals/registry/evals/russian-verse.yaml,sha256=cwHuZmeDmzfkTS5XOMf5cqB-D9hmhN1Emg5ypfaeFMA,285
evals/registry/evals/russian_medical.yaml,sha256=XU8ViRSVGluUYXweOBlzialcuCk3I_3vCCDTXjG9i0M,190
evals/registry/evals/russian_sarcasm.yaml,sha256=jOYzSdnNJPXGBY54Yb5ef5DEGQ040L55Du_mHWh8CRc,248
evals/registry/evals/sandbagging.yaml,sha256=sNy2u2IyuOazHVksreBbad30O5GSGVsPFtkQ5JwIYrw,1873
evals/registry/evals/sarcasm.yaml,sha256=OnfU-Hac-TuB3382Bs63-tH6Lo61s0ya0ygA9DWvS1w,274
evals/registry/evals/schelling_point.yaml,sha256=RP1P4V4GND7SRNOOM9FclyJ-OroC4fOBD6Kg_qY7PnA,2169
evals/registry/evals/seating_arrangements.yaml,sha256=rX0evVx4LH9icxNKUlcetQyvOz9M3uJ4Dzpu7C82mf8,338
evals/registry/evals/security_guide.yaml,sha256=i49LrtzNZwy7C9C7ZMT5UWZdUqDfmehmnKmLBR4D01U,334
evals/registry/evals/self_prompting.yaml,sha256=WjfUQ4phEP8dji335k1oz9YzbcT78RmE1AHxtwqDoOI,700
evals/registry/evals/seo_keywords.yaml,sha256=RV9kMJcfGGwMutKc4bZ-unlQb2zZ07oc30po-JjzhjU,328
evals/registry/evals/sexagenary-cycle-calculation.yaml,sha256=mhgIVGYu-QfDqioZvPVuCNn2Gg-QJ6N4l0Vo1f2D1mY,247
evals/registry/evals/shape-in-shape.yaml,sha256=qCljT5OfPaz1sRq6hJJMXm0GrT8yJt6ySZV2_21mLs0,295
evals/registry/evals/shared-borders.yaml,sha256=Pnd7Ld6ZQxbd1XEN2VV4N8G_IUzxj7DeHgjy8flGeUM,379
evals/registry/evals/shopping_discount_comparison.yaml,sha256=LATGYAG9o3HoIi2WUDv-4ufXVF2126JrG7AgYB2c77o,328
evals/registry/evals/simple-block-puzzles.yaml,sha256=kbeknpZExqZhzLT7sV3vr-fBSleNLTXP8E9_ouwc2hk,341
evals/registry/evals/simple-charting.yaml,sha256=7GSg1TnokXG2jLCRulOrpr8OnAHcmpOZAF5EQ0GzTgc,295
evals/registry/evals/simple-knowledge-mongolian.yaml,sha256=TcMjFqx8Ng85w7JZ6e-eEWByk2G1Sa6UeAYauRlQEMQ,364
evals/registry/evals/simple-visual-understanding.yaml,sha256=FZG8GrAEFJaXlahe2NGr1f2ZKRVvva7mSQov8OTash0,377
evals/registry/evals/simple_math.yaml,sha256=3ZzzKXLt_UjK2uBNPZDrwpKrFnE6fqxn7c4zzfJ1f5I,253
evals/registry/evals/simple_physics_engine.yaml,sha256=Xuvkz8lT4tMJqkoMhhHdpFfsfI3ogeeK2SY1W39SE8s,338
evals/registry/evals/sindarin-fluency.yaml,sha256=7SVAAprfqctlJakggzSM5CPjP8BDhZLttJ847MA24To,292
evals/registry/evals/singapore_data_protection_decisions.yaml,sha256=NHH-epIL-yQSSVdnaq5bcZF9tydK8Y5oGSSzbttPKNI,541
evals/registry/evals/singlestore-vectorsearch.yaml,sha256=GQwio-jLDeidiDqEh8EitgA0HfOCqDNo4rIF0fCXyYY,430
evals/registry/evals/skill_acquisition.yaml,sha256=oP-z5olGt6jNFhJ1uz1p_7VlNrDfgNSONjs8FsWuf60,4459
evals/registry/evals/smiles_to_formula.yaml,sha256=1EfI8Y6x_NTjgp6CKguYGFqc90NGJBUpGEAny8XNCBY,206
evals/registry/evals/soc_codes.yaml,sha256=cd0O1z5HWkIR6WhgquChQarRFx-_-jwIT5DuuKtJGRM,382
evals/registry/evals/solve-for-variable.yaml,sha256=nu8izNq9PQf6CzfDj5Q8DKMuBS7IDJmEmZ7xUWNIlXY,304
evals/registry/evals/sort-numeric.yaml,sha256=QuXSOrj0W8xqn20Z3Xz1fVZtoOnUYco2nyxZktxrUxY,374
evals/registry/evals/south-african-bands.yaml,sha256=Ak8myxi2XoFQv23NT72sfIHkQCNLrfrFZVDX1LH0rWM,441
evals/registry/evals/spanish-lexicon.yaml,sha256=slXr6TW9-hZ8MW14qBzMuXjd-agZSmoWIw3P2DK8hGo,310
evals/registry/evals/spanish_feminine_noun_masculine_article.yaml,sha256=BS1lafn6OboARgfrARq6g-iYBETi1NM4jXEqgxafrgw,453
evals/registry/evals/split_chinese_characters.yaml,sha256=bhVwTzPS-0MYbSiKt0uX9Sb6eLyIfs9VFcyCFBnqnrg,227
evals/registry/evals/sql.yaml,sha256=F-ZH0r1EyPXhOtitUwVyuA17aGSCOcSsZly2VL98HVo,1048
evals/registry/evals/squares-gpt.yaml,sha256=DRn4BvDRmcJ7oukYftYLzIGY6O7rL1uOpqyGairjOik,273
evals/registry/evals/stats-tests.yaml,sha256=azj7949bHgxbLDSs_4pvyx4rAJXrZdqO_W9UdNfSHwc,174
evals/registry/evals/steganography.yaml,sha256=bSAe5n1oM0p4LOTaNsOru0xTeLdpsbM_ilW7s9mKivg,1403
evals/registry/evals/stock-options.yaml,sha256=OX7A_FTLT59BN_0ynPs_3WZw_EoO9Fj78ak92ce_7OU,4472
evals/registry/evals/superficial-patterns.yaml,sha256=hO1DDrucJfWf8i_hYKKB_esNjlohIIZVLxy58wbyzsY,286
evals/registry/evals/svg_alphabet.yaml,sha256=cfhMMfD8t_WkKke7y22AqJ1mlQBPrVm5Yy-1XyiN2yQ,272
evals/registry/evals/svg_to_text.yaml,sha256=tbFaupas4OskRnPITdwXXWNm_0alGxROqmJR4st28c0,250
evals/registry/evals/svg_understanding.yaml,sha256=xtPDHMd1XWTuwYy6q9kLSzeSXxKudPEcXuVssYQzdbg,245
evals/registry/evals/swap-words.yaml,sha256=j4DbLz97h70zj-_y1CYTfv7TfG3n3dR_5ahIrU3UxuY,171
evals/registry/evals/swedish-spelling.yaml,sha256=KUhH-CFMXRnvsYPXVkkjEZSwTnPQOgNsOLU_VegJPYE,272
evals/registry/evals/swedish_sat.yaml,sha256=3I_e1woOyG3TogavIv5dUHuc4G9pz2VkUR6gJDKAth4,376
evals/registry/evals/syllables_long_words.yaml,sha256=-YSs3YubeV3ikMQt67TzW24TZMp2F8gg7R947orY9zY,197
evals/registry/evals/syntax-check.yaml,sha256=Xpwl1a8DjnM270EfoOPgC2ybyt57D4czPuk9vzf1TLM,269
evals/registry/evals/taxes.yaml,sha256=_sUoQmiuP0KinVWmfinr3WZld-uWKYLLsAMf-xZz6lY,150
evals/registry/evals/tempo_to_measure_count.yaml,sha256=z5kBFRsnqSIH9uFwVHMZbaTdScgnE6bkE6aulVdzsNQ,399
evals/registry/evals/test-basic.yaml,sha256=KbRk8TxXbrGtHjFKxle2sekfNvho-GQzqoTw6g9l_Vw,1169
evals/registry/evals/test-comp-sci.yaml,sha256=yqT5_aFRZxltThsyxxpc_nX9SxJmUO0hvOtbowkejAo,400
evals/registry/evals/test-modelgraded-battle.yaml,sha256=ukmOqejQo_UgXA1HVZVtPdoOJYAmD3olntrWWjdlhxk,1271
evals/registry/evals/test-modelgraded-generated.yaml,sha256=S7fIzT6SB6gkJUUtiMeZwsS72IsE2xwAzQfNClC66ek,347
evals/registry/evals/test-modelgraded.yaml,sha256=NiEosgFLjYvDn4PbDC7D3fNhEYJsMc6V3KD1eeVvJ9s,3083
evals/registry/evals/test_english_pronunciations.yaml,sha256=7q-G6slHETUl_rSpcD_aWXuALc0x7WYP9TmXsUI9-gE,376
evals/registry/evals/test_japanese_english_numerals.yaml,sha256=ShN9YLuKA8NEPVw4oB9Om_W5nQyBLSkKdkyy-fit-eA,381
evals/registry/evals/test_japanese_radical.yaml,sha256=TuNQB230HUnFcEYBGnOzU67iyKShSsoZ54SGuJySMF8,340
evals/registry/evals/test_japanese_units.yaml,sha256=QtSc0sFKL8l7dyozZB3gHEeg7fCLJrkiT5kVpmJwwx8,335
evals/registry/evals/tetris.yaml,sha256=znxB2W_ueFcHGCZEdAQHePtgnD7k7zZQA_x9BPW08NI,360
evals/registry/evals/text_compression.yaml,sha256=BQgyJxWNvILbEDlUgbZ-HDo8sT8D83jJjQdrqSnowuI,1256
evals/registry/evals/theory_of_mind.yaml,sha256=yku4gVPEvebTl9-asmpsJZvov7fuSZ1KQDYdwRC5ZoI,4269
evals/registry/evals/thirty_six_stratagems.yaml,sha256=UZQmVkoAuGQFGzI7DpYiJxrQVXhohAzsw0UPYzHz2KM,325
evals/registry/evals/three-pt-mapping.yaml,sha256=bsxAEXvPC-weUekXjw1ouWgqGnT1E7F7yUhRX-w2cAA,323
evals/registry/evals/time-zone-conversion.yaml,sha256=nie50PQAEz_XCl-90BqntWOrsD9RbcryI4lSFrgVPps,397
evals/registry/evals/tokyo-station-number.yaml,sha256=Tye6oEIFL7UJNOOthruNgyn-z_ROhCbcf4B9im9okns,280
evals/registry/evals/track_objects.yaml,sha256=HMiDD5C0l7pQ7k87xP7nwjte-zEPVL3A82paaHFvukA,263
evals/registry/evals/track_the_stat.yaml,sha256=tmmDj2fixPyBF5xNMLOomPNAC_bT3KE9pb22ke5H4wc,501
evals/registry/evals/tracking-shuffled-objects.yaml,sha256=oAr2z9noPLSNPG1483-f2IIIEl6eVASfgV23nfkQPY8,230
evals/registry/evals/tricky-word-problems.yaml,sha256=1behCvhbtHtigxDLQo20s18nKfImW7aWXl-lAEJ_mnQ,373
evals/registry/evals/turkish_characters.yaml,sha256=q7xPZehczQorXckXRkoQX79mUS_f5AVgDLKMsTPnxsA,302
evals/registry/evals/twenty_questions.yaml,sha256=c2tul1zoSpiGRg2PW9gYe5IY7AwcgMMl9GsHINQ908c,2027
evals/registry/evals/ukraine-eit.yaml,sha256=tXjtkAH60MOiMxhg79GXfd5eLyWf1P1f9Kw9oiDyQ_4,174
evals/registry/evals/ukraine-gec.yaml,sha256=yM4Uu_8FSy8xNPLYmqsPMVGRJDazdirwjM4arum-N38,6692
evals/registry/evals/ukraine_electronic_petitions.yaml,sha256=ZptbdYLuIcY75gNN8kjy_klUI7pYP_HctMNkA3TiP8k,319
evals/registry/evals/unified-patch.yaml,sha256=B4aiU1HjFW10W4EaOUh2aB58tlqJKY9pzinrrQcX1IU,189
evals/registry/evals/unique_combinations.yaml,sha256=Ij0uWchgtoccGv500S8zbTEFZX7wSyHeEDCbwrv6dw4,307
evals/registry/evals/unsolvable_questions.yaml,sha256=5_Bc9DcofRNJ4PST8c4nnt7LJeiyhS--FVcU5PlRK-o,211
evals/registry/evals/unwanted-rhyming.yaml,sha256=nffZ7RZEUdOCpe2kXS2NeGfk0dgryNaymDmc-6RCL7M,274
evals/registry/evals/urdu-lexicon.yaml,sha256=zRGd6yteF8gAG9cZbK-KkcpsFU3iFjQtEEuNDUdssO8,289
evals/registry/evals/urdu-transliteration.yaml,sha256=WXeQtVA6UZOrMy56sW9eZrE5BIByVblDuk_ykVbkl5U,297
evals/registry/evals/us-tort-law.yaml,sha256=gqq4T-Xof57eWncpshQUMePSGpW_pAdWKbk7kL4y2vE,329
evals/registry/evals/utah_real_estate.yaml,sha256=AzGWr03krh4WF8m5n5wYCLZxJThnUJfOkThX86h6fP8,284
evals/registry/evals/utility_price_parsing.yaml,sha256=ph84pD5IEJLDeTFC36MOZPpFek6a7iRElg_dWxVg3fM,214
evals/registry/evals/viewport_to_grid_size.yaml,sha256=eUXzWH93UTRVGxugY54I0-ZWRJPaRvf-6httJh0-nhE,379
evals/registry/evals/vigenere.yaml,sha256=3MVVmzxE8t3rIFHCQ_fAaEcUd7gqKiFu9tQyXvaAzhw,266
evals/registry/evals/vintage_phone_keyboard_decode.yaml,sha256=FXvwiATImlWHWYVPMqVrBEGa2bF6TCSmy70bwjNmoQY,503
evals/registry/evals/which-is-heavier.yaml,sha256=DVA-pI99UH2Ad0-lb3B9HgtG9sFSiLl3gjH64Mdbst0,365
evals/registry/evals/wkt_understanding.yaml,sha256=z42MKP9Ejex69QFJX8bdV6jfXHRwiFy7ltDi9_SvJZE,392
evals/registry/evals/word-association.yaml,sha256=ONEfyzAWN9_vC5tj-d9dcSLy8o5Cw87RIhR6CwVz-Qk,1451
evals/registry/evals/word_vector_over_reliance.yaml,sha256=78mALlnOr-_Gm-o84RuExdvtmN62i0O2vVBmdl0sD_w,397
evals/registry/modelgraded/arithmetic-expression.yaml,sha256=wV9ylHviFiSgjWrw1iWKdWresgLKyYZ-kgspyIz1ApY,976
evals/registry/modelgraded/battle.yaml,sha256=GyhK2Zpiz_8ZjMClad7ZnNgwKiBMlNTSXlqRoX0lMF8,492
evals/registry/modelgraded/best.yaml,sha256=E-sMaNOYFkB0gvd5dpCjDxqDm4xoApdZwlBl_0b-0Hk,267
evals/registry/modelgraded/closedqa.yaml,sha256=S5zCk_p7Jh7efQPn5xvLd0nF01CqRk48sza-FcOmsMs,831
evals/registry/modelgraded/diversity.yaml,sha256=72YBUR5bI7MIjgl77U8jUlvOU-fVhrIB9QRIMV1nigs,246
evals/registry/modelgraded/fact.yaml,sha256=KVo_PDrQJRG13VN4vkvbsYeCIRHfKS9Jol0PJ7B20PY,1157
evals/registry/modelgraded/humor.yaml,sha256=DRYksqdu0EihCb9n9jlPFt3IUvfwfz_oxUC-42srlpQ,2564
evals/registry/modelgraded/iambic_pentameter.yaml,sha256=cOZRPR5s56D-2ONXyYqDPxkzY0MIjdcqZeDnAeXz6iI,275
evals/registry/modelgraded/keywords.yaml,sha256=B3m1OdhUjLGC1j1BC6r6Q6QkvAvUPxb-AJkUuGUd3kg,765
evals/registry/modelgraded/onomatopoeia.yaml,sha256=7hsdCl0pmoPc6kaSLXUbPrpWn4cKA92UyiJYBti6lz8,1355
evals/registry/modelgraded/possible.yaml,sha256=aRscHlzOdERDalY7usmXnuO3LGEMVPQaCDcT-VTMQ-k,700
evals/registry/modelgraded/regression-equation.yaml,sha256=RBT_Jz8LtFKd5-qpof0AnyIKjlEfzocBtQ_qUo5cFHY,887
evals/registry/modelgraded/research-question-extraction.yaml,sha256=OMjvu6oRoZOYxdIJ8NxRX0ISlp7qM83k7y292e-FkAs,1723
evals/registry/modelgraded/rhyming.yaml,sha256=vhO7rNijTaCc5y-jx5GOJl3i16bwYP8e0urkQasLMXs,309
evals/registry/modelgraded/security.yaml,sha256=a01Fgo4--V27C5Is6rTvIATSEJj-bSlseyh5l82U6tA,230
evals/registry/modelgraded/singlestore.yaml,sha256=xQ-L3td9Y0yAGT-v4G6_a8rZLiPquXR6eRws0iJP4tc,1416
evals/registry/modelgraded/sql.yaml,sha256=aN6bn2GTTRwNy5OUUDKap6DKXxZBUOE8IhJHrWgIKoI,1203
evals/registry/modelgraded/translation.yaml,sha256=D2ucKsI1_05eQpoAoVP5uqdbbv6upH5Jk7LZuTPkbhA,1204
evals/registry/solvers/already_said_that.yaml,sha256=x7BCekS9FTeY-faVqE2HCOYMDm3kRt91UvN_NNI0fL0,2400
evals/registry/solvers/anthropic.yaml,sha256=AwLss7HEhQtm3o9oOCR-A8zUtERcKon4S9N3OzKCukU,3683
evals/registry/solvers/bluff.yaml,sha256=SybmhZxuIE9HNZB8WV5nlcfuPX50xZ-0_zPkiaZx_nw,2630
evals/registry/solvers/cant_do_that_anymore.yaml,sha256=R1FUmQg5vp32pjVa_nxP6YGv15GGwg61DSLB5aLCYe8,481
evals/registry/solvers/defaults.yaml,sha256=oSHYPwKyzLrx3pdh3tKkwX3m96C0E192IlRgJgMuZDo,9562
evals/registry/solvers/error_recovery.yaml,sha256=dqO22m_px-yjszpwvlKvZQXEksrjQbMC4HTLGo9X8c0,1125
evals/registry/solvers/function_deduction.yaml,sha256=Y4X3hrM8OZvw7Qv33P-SrfD6k3GzHX0hHh1OdHfWFCI,5850
evals/registry/solvers/gemini.yaml,sha256=RsKmswdxr5O4muN_0j1-BibkD9zZRojSPYEbtQ-UZpM,560
evals/registry/solvers/hr-ml-agent-bench.yaml,sha256=avM9v0XvKU0-pZJdjtjYsWatVSvvxdlNJObb9Bm2QlM,1223
evals/registry/solvers/identifying_variables.yaml,sha256=rUm235HXM1Nzse7-Fyf3GoL-3QKZ9xdi6r3BzyS6Ilw,187
evals/registry/solvers/incontext_rl.yaml,sha256=rFL8VREUV5NHfcQ03s32bevaOcJicDy3xsgMM4cPRGs,823
evals/registry/solvers/make-me-pay.yaml,sha256=zDDekjyaCv7D18fKeX7W8mpeNHjqgqml8fJxktS8oBM,3455
evals/registry/solvers/multistep_web_tasks.yaml,sha256=b-OWsoKQFT9C_Z5nlyP-ga7kGVSFiA5YhTxkyKLvFQU,1891
evals/registry/solvers/sandbagging.yaml,sha256=c6Ho6vNGXCGmfMlCL4e9bUbuCp-0zcil7tr-SOnAE6A,2829
evals/registry/solvers/self_prompting.yaml,sha256=DBeypK86PLr_zJYJNvVbL05RclUWLn1uroJweYM0A3Y,4368
evals/registry/solvers/skill_acquisition.yaml,sha256=RAWj5OLIMERrI6mZtGUNfmQTgTPkD59Kpw4CIAk6dzw,9319
evals/registry/solvers/theory_of_mind.yaml,sha256=nr1W_S8jMSlogQ3u2OH0vPoWTIQVeqvjcN31EyJF5Lg,12911
evals/registry/solvers/together.yaml,sha256=IN90xmhSxBriXqXMxEBb0vPOFNFpmJ4FxOwuAxhUK04,2886
evals/registry/solvers/track_the_stat.yaml,sha256=Jw6okeUp-FARmwmBrPvjRkIXMqB6tNUYWjJ3j6su8y0,2615
evals/registry/solvers/twenty_questions.yaml,sha256=4GC_SvexGk37zuuoBOM3qStKHn53jkXTgI5dk_WSmNc,2609
evals/solvers/human_cli_solver.py,sha256=UHQvXCbABueG3hW_HMmiD4G5DqN2y-kbVD64-9cwBsA,1391
evals/solvers/memory.py,sha256=z47RAynZmwLGegGpFVT-YZzzfCxvDj3LhDKRLa1tMHc,2375
evals/solvers/solver.py,sha256=wWJgNYLl8oMwm9fmfJBz3uoWE7SsKajQ_myebkVHeeE,7009
evals/solvers/solver_test.py,sha256=_yXFTeaV3S_3oWPhaavcgALbMNkpd3yqlMEW61Qfkl4,2200
evals/solvers/utils.py,sha256=pEKyEACPCjA_gnsXIutk3vrN1iKzKR1n9pvwBjt-hzE,2183
evals/solvers/nested/cot_solver.py,sha256=TZtZ7eBKgk6DoGN8CnspxaHuyDQ6eMDK_zveae48frU,3169
evals/solvers/nested/fewshot_solver.py,sha256=Vjc3xzVMveanaffR-3tGutzu31nESD_sPWp6kRiudlM,4268
evals/solvers/nested/hhh_solver.py,sha256=IXL_RxL8qvoZXIrVxVwvqymNHzOVVcHmVbeFNtBpRtY,1497
evals/solvers/nested/self_consistency_solver.py,sha256=rDViX4UlneBaF9xthun-xckB7FEzAGweUmPhn7K2ozM,6290
evals/solvers/postprocessors/base.py,sha256=oUZTqBDCWZuzMP56kGYepxrS6Z8zFUIgQTk0pDzobEg,479
evals/solvers/postprocessors/postprocessors.py,sha256=VyjZ1FLK4Q3Oiu2q2p9q0RG06rib_m9DN4SbXRpcKvA,1491
evals/solvers/postprocessors/postprocessors_test.py,sha256=vwIOkkT_HA8eirFWlKj3UdquzVzYTzxUXWV0z0l-ZAM,2196
evals/solvers/prompts/cot.py,sha256=u8VBZ6dvkE0RAeBt-qFHBds4AqtGG7a642s4jPG2Nns,316
evals/solvers/prompts/hhh.py,sha256=EsUYgQjUiczyUlAj-raxHFdmQCzqqAnwfANYK3RGMBc,8272
evals/solvers/prompts/hhh_test.py,sha256=SIkxNurjn7TSeoaTYrcvlmQFL1l_Oa26W7llOU9yEMQ,7253
evals/solvers/providers/anthropic/anthropic_solver.py,sha256=9MD0dgkOQGhHnX6Bp_pJ1S9T55fisBB1igvVXf3WlZE,5044
evals/solvers/providers/anthropic/anthropic_solver_test.py,sha256=Mo8NbahNHvhfJj76Hau9gL5Ii8453KaJtGxQ0VoKg_4,3903
evals/solvers/providers/google/gemini_solver.py,sha256=A_x8Hk_gXmnpoJvSbZhUVDO04gBql83wPhKDpy4P6xg,6853
evals/solvers/providers/google/gemini_solver_test.py,sha256=7k9rAxg0OAs3nUo6nRK4kfuUw9AHtjBHgVQv4mOXIHI,2313
evals/solvers/providers/openai/openai_assistants_solver.py,sha256=N8Nc-JHNlwzL7ViffUAeliy1591-zNAnWcgR_egpF3Y,10714
evals/solvers/providers/openai/openai_assistants_solver_test.py,sha256=GZlDHLTN4edJM4KnX-TjApVZPqiy7P9Y_G-5VUm4M7o,7355
evals/solvers/providers/openai/openai_solver.py,sha256=Q-YQMkUJnaxiW8xvGv2_-_BVJmLqSnSdfDpTknMUjiA,11635
evals/solvers/providers/together/together_solver.py,sha256=6DackeN3OJY28MEob2VgZI3E3QC9Nqq25O9PQCCeUiw,5856
evals/solvers/providers/together/together_solver_test.py,sha256=dZjHybb3WBphggQOyZkNA55sTpsBXhdmuLAsl02lzmA,3520
evals/utils/api_utils.py,sha256=zJMQV5Uze3sBJ2VipwdyDA0h2Sfs-OCKHUN6mS3CZMI,568
evals/utils/log_utils.py,sha256=mw-X0dr5wuTTzW5hw4fMkmkypYxBH4I0TawfrzpEkJs,2598
evals/utils/misc.py,sha256=l-SEafhRiI8cpMhXhit036DbMmrqbkyhHB2jHMX1Ajk,713
evals/utils/snowflake.py,sha256=1PhhD7sQzw-RAl7QADm7gSRFUl9_ZnXBE0AVRXUhXNs,4076
evals/utils/test.py,sha256=-Q721us8cJmIdoGTd6ZQGq24cMweAJPEFjCRPCsO6Mw,833
evals-3.0.1.dist-info/LICENSE.md,sha256=BV8hOleE7L5NJN9fU5vRNEkOMn1eg8UcK6Tp1c8wh1c,7022
evals-3.0.1.dist-info/METADATA,sha256=8h2O7uA4YCKE9nHpiy7e9ZBczTNNMYxNKPEsZpP-tBk,7873
evals-3.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
evals-3.0.1.dist-info/entry_points.txt,sha256=VCG8Tg79A6FR1bHPBG6Vk2ZSTKBsqb7HZhN9qa1kiS8,90
evals-3.0.1.dist-info/top_level.txt,sha256=-RPWnvpqWuvduYuK6BeQVBTji6GNx1s_SNB011XaRzY,6
evals-3.0.1.dist-info/RECORD,,
