-
Notifications
You must be signed in to change notification settings - Fork 1
Data collector #149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Data collector #149
Changes from all commits
5677db6
db76f59
3005555
efeca00
ef1042d
ac74f5a
4335b4c
ef79f71
cc7ec9b
19e8d40
33728b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| { | ||
| "sessions": { | ||
| "default": { | ||
| "generation_name": "retrosynthesis", | ||
| "project": "DeepRetro", | ||
| "version": "0.0.1", | ||
| "trace_name": "retrosynthesis_pipeline", | ||
| "trace_user_id": "system", | ||
| "session_id": "default" | ||
| }, | ||
| "metadata": { | ||
| "generation_name": "metadata_prediction", | ||
| "project": "DeepRetro", | ||
| "version": "0.0.1", | ||
| "trace_name": "metadata_pipeline", | ||
| "trace_user_id": "system", | ||
| "session_id": "metadata_agent" | ||
| }, | ||
| "retrosynthesis": { | ||
| "generation_name": "retrosynthesis", | ||
| "project": "DeepRetro", | ||
| "version": "0.0.1", | ||
| "trace_name": "retrosynthesis_pipeline", | ||
| "trace_user_id": "system", | ||
| "session_id": "retrosynthesis_pipeline" | ||
| } | ||
| } | ||
| } |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| Cn1nccc1[C@]1(O)CCCC[C@H]1O | ||
| CC1=NC2(N=C1N)c1cc(-c3cc(Cl)cc(C#N)c3)ccc1CCC21CC1 | ||
| CCC/C=C/[C@H]1CC[C@H](C(CO)CO)CC1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC(C)(C)O)c1 | ||
| C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C=CC1O)C(C)CC2OC | ||
| CC(=O)NC[C@H]1CC[C@@H](C(=O)Nc2cccc(OC(F)(F)F)c2)N1 | ||
| COC(=O)c1ccc2c(c1)C=CC(=C(Cl)Cl)CO2 | ||
| Cc1ccc(C(C)(C)C)cc1S[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O | ||
| COC(=O)[C@@H]1CCCC2(CCCCC2)[C@H]1O | ||
| CC(C)S(=O)(=O)N[C@H]1CCOC[C@H]1c1ccc(I)cc1 | ||
| CN1CC[C@@H](c2c(O)cc(O)c3c(=O)cc(-c4ccc(C(F)(F)F)cc4)oc23)[C@@H]1CO | ||
| CC#CCn1c(Br)nc(C=O)c1C(=O)OC | ||
| Cc1ccn2cc(CO)n(-c3ccccc3)c(=O)c12 | ||
| CC(C)(CO)n1c(CO)nc2cnc(Br)cc21 | ||
| C[C@H](c1ccccc1)N1C[C@]2(C(=O)OC(C)(C)C)C=CC[C@@H]2C1=S | ||
| CCOC(=O)c1nc(N2CC[C@H](NC(=O)c3nc(C(F)(F)F)c(CC)[nH]3)[C@H](OC)C2)sc1C | ||
| COC[C@H](C)COCc1ccc([C@@]2(O)CCNC[C@@H]2c2noc(-c3ccccc3CCNC(C)=O)c2Br)cc1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCCC#N)c1 | ||
| C[C@@H](O)c1nc2cnc3ccsc3c2n1[C@H]1CC[C@H](CO)CC1 | ||
| C#CC1(O)C(C)=CC2(CC1(C)C(F)(F)F)OC(C)C(C)O2 | ||
| CC(C)(C)OC(=O)N[C@@H]1c2cccnc2C(=O)CC[C@H]1c1cccc(F)c1F | ||
| CS(=O)(=O)CCN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1 | ||
| C[C@@H]1CCCN1CCc1nnc2cc(Br)ccc2c1O | ||
| O=C(Cc1cccs1)NC1C(=O)N2C(C(=O)O)C(CBr)=CS[C@H]12 | ||
| CN1CCN(c2ccc3c(c2)[nH]c2c(C(N)=O)cc(-c4cnn(C)c4)nc23)CC1 | ||
| CC1(C)OCC(C)(C=O)N(Cc2ccccc2)C1=O | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OS(=O)(=O)C(F)(F)F)c1 | ||
| OC1CCCc2sc(-c3ccncc3)cc2C1c1ccc(Cl)cc1 | ||
| N[C@H]1CC[C@H]1c1ccc(Cl)cc1 | ||
| CN(C)CC(OCC1(c2ccc(F)cc2)CCN(C(=O)OC(C)(C)C)CC1)c1cc(Cl)cc2cn(COCC[Si](C)(C)C)nc12 | ||
| C=C(C[C@@H](Cc1ccc(-c2ccccc2)cc1)NC(=O)OC(C)(C)C)C(=O)O | ||
| Nc1ccc(OC2(S(=O)(=O)c3ccccc3)CC2)nc1Cl | ||
| C[C@]1(F)CC(F)(F)[C@@](C)(c2cc(N)ccc2F)N=C1N | ||
| N[C@H]1CCOC[C@H]1c1ccc(I)cc1 | ||
| COc1ccc2c(Nc3c(Cl)cncc3Cl)cc(=O)[nH]c2c1OCCCCCCN1CCCNCC1 | ||
| COc1cc2ncc3c(N)nc(-c4cncc(OCCNCc5ccc(F)cc5)c4)cc3c2cc1OC | ||
| CC(C)c1ccc2c(c1)OC1(O)c3ccccc3C(=O)C21NC(=O)c1cc(-c2ccccc2)n[nH]1 | ||
| N#Cc1cc(F)cc([C@H]2C[C@H](F)CN2c2ccn3ncc(C(N)=O)c3c2)c1 | ||
| NC(=O)c1cc(-c2ccc3c(cnn3CCN3CCOCC3)c2)nc2c1[nH]c1cc(N3CCOCC3)ccc12 | ||
| Cn1oc(=O)nc1/C(=N\OCc1cccc(N)n1)c1ccccc1 | ||
| CN1CCN(c2ccc3c(c2)[nH]c2c(C(N)=O)cc(-c4ccc(O)c(Cl)c4)nc23)CC1 | ||
| COC(=O)CCc1cc2cc(-c3noc(-c4ccc(OC(C)C)c(Cl)c4)n3)ccc2n1C | ||
| CCOP(=O)(Cc1ccc(Nc2ncc(C(F)(F)F)c(Nc3ccc([C@H]4CC[C@H](N5CCN(C)CC5)CC4)c4c3C(=O)N(C)C4)n2)c(OC)c1)OCC | ||
| COc1ccc(C[C@H](C[C@H](O[Si](C)(C)C(C)(C)C)[C@H](CC2CCCCC2)NC(=O)OC(C)(C)C)C(=O)O)c(OC)c1OC | ||
| CC1(C)COCc2nc3cnc(Br)cc3n21 | ||
| CC(C)S(=O)(=O)N[C@H]1CCOC[C@H]1c1ccc(-c2ccc(C#N)s2)cc1 | ||
| C[C@H](c1ccccc1)N1C[C@]2(C(=O)OC(C)(C)C)C=CC[C@@H]2C1=O | ||
| CCc1nc(C)c(C(=O)Nc2ccc(-c3ccc(C45CCC(CC(=O)OC)(CC4)OC5)cc3)cc2)o1 | ||
| COc1cc2c(Oc3cc(C)c(C)nc3-c3cccc(C)n3)ccnc2cc1OCCNCCO | ||
| O=C(CO)N1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1 | ||
| CC(C)(C)OC(=O)NC1(C2CCc3cc(Sc4cccc(OCc5ccccc5)c4)ccc3C2)COC(C)(C)OC1 | ||
| CCC[C@@H](CCO)Nc1nc(N)nc(C)c1Cc1ccc(CN2CCC[C@@H]2C(=O)O)cc1OC | ||
| COc1cc(-c2cccc(C(F)(F)F)c2)c(F)cc1-c1nncc2cc(S(=O)(=O)Nc3ccon3)ccc12 | ||
| CCC/C=C/[C@H]1CC[C@H](C(C(=O)O)C(=O)O)CC1 | ||
| CCOC(=O)[C@@H]1C[C@H]1COc1cc(CCCOC)cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1 | ||
| CCCCOC(=O)N1CCN(C(=O)[C@H](CCCO[Si](c2ccccc2)(c2ccccc2)C(C)(C)C)NC(=O)OC(C)(C)C)CC1 | ||
| CC1(C)OCC(C)(CO)N(Cc2ccccc2)C1=O | ||
| CCc1[nH]c(C(=O)N[C@H]2CCN(c3cccc(C(=O)O)c3)C[C@H]2OC)nc1C(F)(F)F | ||
| CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](N2C(=O)c3ccccc3C2=O)CC[C@H]1c1cccc(F)c1F | ||
| CC(C)c1ccc(OC2CCC3(CC2)N[C@@H](C(=O)O)C(C)(C)S3)cc1 | ||
| CCOC(=O)CCc1cc2cc(-c3noc(-c4ccc(OC(C)C)c(Cl)c4)n3)ccc2[nH]1 | ||
| CN(C1CC1)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C | ||
| CC(=O)c1ccc2c(c1)C=CC(O)(CO)CO2 | ||
| C[C@H]1OC[C@@H]2CC[C@@H](c3nc(-c4ccc(C(=O)Nc5cc(C(F)(F)F)ccn5)cc4)c4c(N)nccn34)CN2C1=O | ||
| C[C@@H](n1ncn(-c2ccc(Cl)cc2)c1=O)[C@@]1(c2ccc(F)cc2F)CO1 | ||
| CC(=O)NC[C@H]1CN(c2ccc3c(c2)CCCc2c(C(C)C)n[nH]c2-3)C(=O)O1 | ||
| COc1ccccc1C[C@H](C[C@H](O[Si](C)(C)C(C)(C)C)[C@H](Cc1ccccc1)NC(=O)OC(C)(C)C)C(=O)O | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCOC)c1 | ||
| CC(C)N1CCC(Oc2ccc3c(c2)cc2n3[C@H](C)CN(CCO)C2=O)CC1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC#N)CC2)c1 | ||
| CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](N)CC[C@H]1c1cccc(F)c1F | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC(=O)OC)CC2)c1 | ||
| CCCCCN1C(=O)C2(CNC(=O)c3cc4c(cc32)OCO4)c2ccccc21 | ||
| O=[N+]([O-])c1ccc(OC2(S(=O)(=O)c3ccccc3)CC2)nc1Cl | ||
| C[Si](C)(C)CCOCn1cc(C2CCc3c(C(=O)O)nn(COCC[Si](C)(C)C)c3C2)cn1 | ||
| CC1=NC2(N=C1N)c1cc(-c3cc(F)cc(C#N)c3)ccc1CCC21CC1 | ||
| O=C(O)[C@H]1CCOC[C@H]1c1ccc(I)cc1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)OCC(=O)N(C)C)c1 | ||
| C[C@H](C(=O)NCCF)N(C)C(=O)c1ccc2c(c1)c1c(n2C)CCC(C2CCOCC2)C1 | ||
| C[C@H](O[Si](C)(C)C(C)(C)C)[C@@H]1CC(O)CC(C)(C)N1 | ||
| CCc1[nH]c(C(=O)N[C@H]2CCN(c3cccc(C(=O)OC(C)(C)C)c3)C[C@H]2OC)nc1C(F)(F)F | ||
| CC[C@@H](OC(=O)c1ccccc1)[C@H]1CCCN(C(=O)OC(C)(C)C)C1 | ||
| CC(=O)OCc1nc2cnc(Br)cc2n1C(C)(C)COC(C)=O | ||
| CC(=O)NC[C@H]1CC[C@@H](C(=O)Nc2cccc(OC(F)(F)F)c2)N1C(=O)OCc1ccccc1 | ||
| O=C1C=C(N2CCNCC2)C(C2CCCCC2)CC1 | ||
| FC(F)(F)Cn1ncnc1-c1cc2n(n1)-c1cc(C3CCNCC3)ccc1OCC2 | ||
| COC(=O)[C@H]1CN(C(=O)OC(C)(C)C)CC[C@@H]1c1ccc(OCCOc2c(Cl)cc(C)cc2Cl)cc1 | ||
| CCOC(=O)/C(N)=N/Nc1cc(Cl)ccc1[N+](=O)[O-] | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)Oc2ccc3c(c2)CCC3)c1 | ||
| ClCc1ccc2c(c1)Nc1nccnc1S2 | ||
| COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2C=O)cc1 | ||
| CCOC(=O)c1cc2c(F)cccc2nc1[C@H](C)NC(=O)OC(C)(C)C | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC(=O)O)CC2)c1 | ||
| CC(=O)N1c2ccc(N3CCNCC3)nc2[C@H](Nc2ccccc2)[C@@H](C)[C@@H]1C1CC1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCOC)c1 | ||
| O=S(=O)(C#Cc1ccc(Cl)cc1)N1CCNCC1 | ||
| COc1ccc2c(Nc3c(Cl)cncc3Cl)cc(=O)[nH]c2c1OCCCCCCN1CCCN(C)CC1 | ||
| Oc1ccc2c(c1)[C@H](c1ccc(OCCN3CCCC3)cc1)[C@H](c1ccccc1)CO2 | ||
| CCOC(=O)/C=C1\CC[C@@H](c2cccc(F)c2F)[C@H](NC(=O)OC(C)(C)C)c2cccnc21 | ||
| OC[C@H]1C[C@@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C[C@@H]1O | ||
| CC(=O)OCC1=CS[C@@H]2C(NC(=O)Cc3cccs3)C(=O)N2C1C(=O)O | ||
| O=C(OCc1ccccc1)N1CC[C@H]2CCCNC[C@H]21 | ||
| C/N=C(\C)C1=C(O)C=C2Oc3c(C(=O)NCc4c(C)ccc5ccccc45)c(OC)cc(O)c3[C@]2(C)C1=O | ||
| C[C@@]1(O)[C@H](O)[C@@H](CO)O[C@H]1n1cc(-c2ccccc2)c2c(N)ncnc21 | ||
| CC(=O)Nc1nc2c(s1)-c1c(c(C3CC3)nn1C1CCNCC1)CC2 | ||
| CC(C)c1ccc2c(c1)OC1(O)c3ccccc3C(=O)C21NC(=O)C(=O)c1cccs1 | ||
| Cc1cc(C#N)cnc1C(=O)Nc1ccc(F)c([C@@]2(C)N=C(N)[C@@](C)(F)CC2(F)F)c1 | ||
| C[C@@]1(O)[C@H](O)[C@@H](CO)O[C@H]1n1ccc2c(N)nc(N)nc21 | ||
| CC(C)(C)OC(=O)[C@@]12C=CC[C@@H]1CN(C(=O)OCc1ccccc1)C2 | ||
| NC1=N[C@@]2(c3ccc(F)cc3F)CO[C@@H](c3nnco3)C[C@H]2CS1 | ||
| CNC(=O)c1ccc2ncc(C(=O)OCn3ccnc3)c(Nc3ccc(OC)cc3)c2c1 | ||
| COc1ccc2nccc(C(N)CC[C@@H]3CCN(CCSc4cccs4)C[C@@H]3C(=O)O)c2c1 | ||
| O=C(Nc1ccc(-c2cnc(C34CC5CC(CC(C(=O)O)(C5)C3)C4)s2)cc1)Nc1ccccc1F | ||
| OC[C@H]1O[C@](O)(c2ccc(Cl)c(Cc3ccc(C#Cc4cnccn4)cc3)c2)[C@H](O)[C@@H](O)[C@@H]1O | ||
| Nc1ncnc2c1c(-c1ccc3ccc(-c4ccccc4)nc3c1)cn2C1CCC1 | ||
| CCCC[C@@H](C(=O)N1CCC[C@H]1C(=O)O)[C@@H](F)C(=O)OC | ||
| NC1=Nc2ccc(F)cc2C2CCCC12 | ||
| CC1(C)OCC(C)(CO[Si](C)(C)C(C)(C)C)N(Cc2ccccc2)C1=O | ||
| CCc1[nH]c(C(=O)N[C@H]2CCN(c3nc(C(=O)O)c(C)s3)C[C@H]2OC)nc1C(F)(F)F | ||
| Cn1oc(=O)nc1/C(=N\OCc1cccc(NC(=O)OCCc2ccccc2)n1)c1ccccc1 | ||
| CCCC[Sn](/C=C/C1(O)C(C)=CC2(CC1(C)C(F)(F)F)OC(C)C(C)O2)(CCCC)CCCC | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCc2ccc(-c3nnn[nH]3)cc2)c1 | ||
| C[C@H](O[Si](C)(C)C(C)(C)C)[C@@H]1CC(=O)CC(C)(C)N1 | ||
| COC(=O)C(C)(C)CC(CCCCO)CCCc1cccnc1 | ||
| Oc1ccc2c3c(ccc2c1)Cc1ccccc1OC3c1ccc(OCCN2CCCCC2)cc1 | ||
| CC(C)(C)OC(=O)N1CC=C(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1 | ||
| COCCCc1cc(O)cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCC(C)(C)C(=O)O)c1 | ||
| CC(=O)Nc1cccc(N2CCN(CCc3nn(C)c(=O)n3CC3CCCCC3)CC2)c1 | ||
| COC(=O)c1ccc(-c2ccc(O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)c(Cl)c2)cc1 | ||
| CCC(C)(C)OC(=O)Nc1nc(C(O)C(=O)NC2C(=O)N3C(C(=O)O)=C(CSc4nnc(C)s4)CS[C@H]23)cs1 | ||
| C[C@@H](n1ccn(-c2ccc(Cl)cc2)c1=O)[C@@]1(c2ccc(F)cc2F)CO1 | ||
| COC(=O)CC12CCC(c3ccc(Br)cc3)(CC1)CO2 | ||
| CC(C)(C)OC(=O)NC1(c2nc(NCc3ccccn3)c3c(Cl)ccn3n2)CC1 | ||
| Cc1cc(Nc2nccc(C(F)(F)F)n2)cc(-c2cnc([C@@](C)(O)[C@H]3CC[C@H](C(=O)O)CC3)s2)c1 | ||
| CC(C)c1ccc(-n2nc(O)c3c(=O)c4ccc(Cl)cc4[nH]c3c2=O)cc1 | ||
| O=C(Nc1cccc(Cl)c1)N1CCc2[nH]nc(C(=O)N3CC(F)CO3)c2C1 | ||
| COc1ccc2nc(-c3ccc(OC)c(F)c3)nc(C(=O)NC(CO)Cc3c[nH]c4cccnc34)c2c1 | ||
| C[C@@H]1CNC(=O)c2cc3cc(OCCCN4CCCCC4)ccc3n21 | ||
| CC1(C)Oc2ccc(CC(=O)NC3CCCc4ccccc43)cc2C(N)C1O | ||
| O=C(NC[C@@H]1CC[C@@H](COc2ccccc2)OC1)c1ccc(O)cc1 | ||
| COC(=O)c1ccc2c(c1)C=CC(=CCl)CO2 | ||
| COCCCCC1(CNC(=O)C2CCCNC2)c2ccccc2Oc2ccccc21 | ||
| Cn1nccc1[C@]12CCCC[C@H]1OC(=O)O2 | ||
| CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@@H](O)CC[C@H]1c1cccc(F)c1F | ||
| C=C(C[C@@H](Cc1ccc(-c2ccccc2)cc1)NC(=O)OC(C)(C)C)C(=O)OC | ||
| CC(C)(C)OC(=O)NC1(c2nc(O)c3c(Cl)ccn3n2)CC1 | ||
| C=CC[C@@H]1C(=O)N([C@H](C)c2ccccc2)C[C@@]1(C=C)C(=O)OC(C)(C)C | ||
| NC(=O)CN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1 | ||
| CC(=O)Nc1nc2c(s1)-c1c(c(C3CC3)nn1C1CCN(C(=O)N3CCN(C(C)C)CC3)CC1)CC2 | ||
| CC(=O)N1c2ccc(N3CCNCC3)cc2[C@H](Nc2ccccc2)[C@@H](C)[C@@H]1C | ||
| CC1=NC2(N=C1N)c1cc(-c3cc(F)cc(F)c3)ccc1CCC21CC1 | ||
| CCc1ccc(S(=O)(=O)NC2c3cc(CC(=O)NC4CCCc5ccccc54)ccc3OC(C)(C)C2O)cc1 | ||
| CCCCCCC(C)(C)c1ccc([C@H]2C[C@@H](O)CCN2C=O)c(OCc2ccccc2)c1 | ||
| COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2C=NO)cc1 | ||
| Cc1cc(Nc2nccc(C(F)(F)F)n2)cc(-c2cnc([C@](C)(O)[C@H]3CC[C@H](C(=O)O)CC3)s2)c1 | ||
| C[C@@H](O)C[C@H]1OC[C@@H](C2CCCCC2)N(c2cc(C#CC(C)(C)C)sc2C(=O)O)C1=O | ||
| C/C=C/c1c(N)nc(-c2ccc(Cl)c(OC)c2F)nc1C(=O)OC | ||
| CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](O)CC[C@H]1c1cccc(F)c1F | ||
| COc1cc2c(=O)[nH]c(=O)n([C@@H]3O[C@H](CO)[C@H]4OC(C)(C)O[C@H]43)c2cc1OC | ||
| CC1=NC2(N=C1N)c1cc(Br)ccc1CCC21CC1 | ||
| CC1=NC2(N=C1N)c1cc(-c3cncc(Cl)c3)ccc1CCC21CC1 | ||
| CC(=O)OC1CCC2C(CO)CCC12 | ||
| O[C@H]1C[C@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C=C1COCc1ccccc1 | ||
| Cc1nnc(SCC2=C(C(=O)O)N3C(=O)C(NC(=O)C(O)c4csc(N)n4)[C@H]3SC2)s1 | ||
| COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2c2noc(-c3ccccc3CCNC(C)=O)c2Br)cc1 | ||
| C[Si](C)(C)CCOCn1c(O[C@@H]2CO[C@H]3[C@@H]2OC[C@H]3O)nc2cc(Cl)c(-c3ccc(-c4ccc(N=S(C)(=O)N5CCC5)cc4)cc3)nc21 | ||
| CCOC(=O)[C@H]1[C@H](c2ccc3c(c2)OC(F)(F)O3)C1(C)C | ||
| CCCN(CCC)Cc1cc(CO)c(=O)n2c(-c3c(C)cc(C)cc3C)cccc12 | ||
| C[C@H](c1ccccc1)N1C[C@H]2CC=C[C@@]2(C(=O)OC(C)(C)C)C1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)O)c1 | ||
| COc1cc2ncc3c(N)nc(-c4cncc(OCCN(Cc5ccc(F)cc5)C(=O)OC(C)(C)C)c4)cc3c2cc1OC | ||
| O=C(Nc1ccc(-c2cnc(C34CC5CC(CC(C(=O)O)(C5)C3)C4)s2)cc1)Nc1cccc(C(F)(F)F)c1 | ||
| O=C(OCc1ccccc1)N1CC[C@H]2CCCN(CCc3ccccc3)C[C@H]21 | ||
| NS(=O)(=O)OC[C@H]1C[C@@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C[C@@H]1O | ||
| C[Si](C)(C)CCOCn1c(O[C@@H]2CO[C@H]3[C@@H]2OC[C@H]3O)nc2cc(Cl)c(-c3ccc(-c4ccc(N=S(C)(=O)N5CCCC5)cc4)cc3)nc21 | ||
| OC[C@H]1O[C@@H](SC(c2ccccc2)(c2ccccc2)c2ccccc2)[C@H](O)[C@@H](O)[C@@H]1O | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCc2ccc(C#N)cc2)c1 | ||
| CCCc1c(Cc2ccc(-c3ccccc3C#N)cc2F)c(=O)n([C@H]2CC[C@H](OCC3(C(C)=O)CCC3)CC2)c2ncnn12 | ||
| CCOC(=O)[C@@H]1C[C@H]1COc1cc(CCCOC)cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1 | ||
| COC(=O)CC12CCC(c3ccc(-c4ccc(NC(=O)c5nc(C)oc5C(F)(F)F)cc4)cc3)(CC1)CO2 | ||
| O=S(=O)(c1cc(C(F)(F)F)ccc1Br)C1CCOC(c2ccc(Cl)cc2)C1 | ||
| COCC1=CS[C@@H]2C(NC(=O)Cc3cccs3)C(=O)N2C1C(=O)O | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2CO)c1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCC(C)(C)C(=O)OC)c1 | ||
| COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2CO)cc1 | ||
| COCCCc1cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC(C)(C)O)c1 | ||
| COC(=O)[C@@]12C[C@H]1C=CCCCCC[C@H](NC(=O)CC1CC1)C(=O)N1C[C@H](Oc3nc(-c4ccccn4)nc4ccsc34)C[C@H]1C(=O)N2 | ||
| CCN1CCN(CCCCCCOc2c(OC)ccc3c(Nc4c(Cl)cncc4Cl)cc(=O)[nH]c23)CC1 | ||
| OCCN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| import json | ||
| import csv | ||
| from pathlib import Path | ||
|
|
||
| INPUT_PATH = Path("data/checker_dataset_pistachio.jsonl") | ||
| OUTPUT_PATH = Path("data/checker_dataset_clean.csv") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CSV file missing in PR
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
|
|
||
| def main(): | ||
| rows = [] | ||
|
|
||
| with INPUT_PATH.open("r", encoding="utf-8") as f: | ||
| for line in f: | ||
| line = line.strip() | ||
| if not line: | ||
| continue | ||
| rec = json.loads(line) | ||
|
|
||
| target = rec.get("target_smiles_canonical") | ||
| product = rec.get("product_smiles_canonical") | ||
| hall = rec.get("hallucination_raw", {}) or {} | ||
|
|
||
| hallucination_score = hall.get("score") | ||
| hallucination_severity = hall.get("severity") | ||
|
|
||
| # stability_raw is a dict: reactant_smiles -> stability_dict | ||
| stability_raw = rec.get("stability_raw", {}) or {} | ||
|
|
||
| # reactant_smiles_canonical is a list of all reactants for this reaction step | ||
| reactants_canonical = rec.get("reactant_smiles_canonical", []) | ||
|
|
||
| # Join all reactants together (using "." as separator, common in SMILES) | ||
| all_reactants = ".".join(reactants_canonical) if reactants_canonical else "" | ||
|
|
||
| # Aggregate stability data across all reactants | ||
| stability_scores = [] | ||
| stability_assessments = [] | ||
| all_stability_issues = [] | ||
|
|
||
| for rcanon in reactants_canonical: | ||
| stab = stability_raw.get(rcanon, {}) # prefer canonical key if present | ||
| if not stab: | ||
| # fallback: try by non-canonical reactant name if needed | ||
| for key, val in stability_raw.items(): | ||
| if key == rcanon: | ||
| stab = val | ||
| break | ||
|
|
||
| score = stab.get("stability_score") | ||
| if score is not None: | ||
| stability_scores.append(score) | ||
|
|
||
| assessment = stab.get("assessment") | ||
| if assessment: | ||
| stability_assessments.append(assessment) | ||
|
|
||
| issues = stab.get("issues", []) or [] | ||
| if issues: | ||
| # Prefix issues with reactant SMILES for clarity | ||
| for issue in issues: | ||
| all_stability_issues.append(f"{rcanon}: {issue}") | ||
|
|
||
| # Calculate average stability score | ||
| avg_stability_score = sum(stability_scores) / len(stability_scores) if stability_scores else None | ||
|
|
||
| # Determine overall assessment (use worst case if mixed) | ||
| overall_assessment = None | ||
| if stability_assessments: | ||
| if "Potentially unstable" in stability_assessments: | ||
| overall_assessment = "Potentially unstable" | ||
| elif "Moderately stable" in stability_assessments: | ||
| overall_assessment = "Moderately stable" | ||
| else: | ||
| overall_assessment = "Likely stable" | ||
|
|
||
| stability_issues = "; ".join(all_stability_issues) if all_stability_issues else "" | ||
|
|
||
| rows.append({ | ||
| "target_smiles_canonical": target, | ||
| "product_smiles_canonical": product, | ||
| "reactant_smiles_canonical": all_reactants, | ||
| "stability_score": avg_stability_score, | ||
| "stability_assessment": overall_assessment, | ||
| "stability_issues": stability_issues, | ||
| "hallucination_score": hallucination_score, | ||
| "hallucination_severity": hallucination_severity, | ||
| }) | ||
|
|
||
| OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) | ||
| with OUTPUT_PATH.open("w", encoding="utf-8", newline="") as out_f: | ||
| writer = csv.DictWriter( | ||
| out_f, | ||
| fieldnames=[ | ||
| "target_smiles_canonical", | ||
| "product_smiles_canonical", | ||
| "reactant_smiles_canonical", | ||
| "stability_score", | ||
| "stability_assessment", | ||
| "stability_issues", | ||
| "hallucination_score", | ||
| "hallucination_severity", | ||
| ], | ||
| ) | ||
| writer.writeheader() | ||
| writer.writerows(rows) | ||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
jsonl file missing in PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated