Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions config/langfuse_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"sessions": {
"default": {
"generation_name": "retrosynthesis",
"project": "DeepRetro",
"version": "0.0.1",
"trace_name": "retrosynthesis_pipeline",
"trace_user_id": "system",
"session_id": "default"
},
"metadata": {
"generation_name": "metadata_prediction",
"project": "DeepRetro",
"version": "0.0.1",
"trace_name": "metadata_pipeline",
"trace_user_id": "system",
"session_id": "metadata_agent"
},
"retrosynthesis": {
"generation_name": "retrosynthesis",
"project": "DeepRetro",
"version": "0.0.1",
"trace_name": "retrosynthesis_pipeline",
"trace_user_id": "system",
"session_id": "retrosynthesis_pipeline"
}
}
}
2,627 changes: 2,627 additions & 0 deletions data/checker_dataset_clean.csv

Large diffs are not rendered by default.

1,637 changes: 1,637 additions & 0 deletions data/checker_dataset_pistachio.jsonl

Large diffs are not rendered by default.

190 changes: 190 additions & 0 deletions data/targets.smi
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
Cn1nccc1[C@]1(O)CCCC[C@H]1O
CC1=NC2(N=C1N)c1cc(-c3cc(Cl)cc(C#N)c3)ccc1CCC21CC1
CCC/C=C/[C@H]1CC[C@H](C(CO)CO)CC1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC(C)(C)O)c1
C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C=CC1O)C(C)CC2OC
CC(=O)NC[C@H]1CC[C@@H](C(=O)Nc2cccc(OC(F)(F)F)c2)N1
COC(=O)c1ccc2c(c1)C=CC(=C(Cl)Cl)CO2
Cc1ccc(C(C)(C)C)cc1S[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O
COC(=O)[C@@H]1CCCC2(CCCCC2)[C@H]1O
CC(C)S(=O)(=O)N[C@H]1CCOC[C@H]1c1ccc(I)cc1
CN1CC[C@@H](c2c(O)cc(O)c3c(=O)cc(-c4ccc(C(F)(F)F)cc4)oc23)[C@@H]1CO
CC#CCn1c(Br)nc(C=O)c1C(=O)OC
Cc1ccn2cc(CO)n(-c3ccccc3)c(=O)c12
CC(C)(CO)n1c(CO)nc2cnc(Br)cc21
C[C@H](c1ccccc1)N1C[C@]2(C(=O)OC(C)(C)C)C=CC[C@@H]2C1=S
CCOC(=O)c1nc(N2CC[C@H](NC(=O)c3nc(C(F)(F)F)c(CC)[nH]3)[C@H](OC)C2)sc1C
COC[C@H](C)COCc1ccc([C@@]2(O)CCNC[C@@H]2c2noc(-c3ccccc3CCNC(C)=O)c2Br)cc1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCCC#N)c1
C[C@@H](O)c1nc2cnc3ccsc3c2n1[C@H]1CC[C@H](CO)CC1
C#CC1(O)C(C)=CC2(CC1(C)C(F)(F)F)OC(C)C(C)O2
CC(C)(C)OC(=O)N[C@@H]1c2cccnc2C(=O)CC[C@H]1c1cccc(F)c1F
CS(=O)(=O)CCN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1
C[C@@H]1CCCN1CCc1nnc2cc(Br)ccc2c1O
O=C(Cc1cccs1)NC1C(=O)N2C(C(=O)O)C(CBr)=CS[C@H]12
CN1CCN(c2ccc3c(c2)[nH]c2c(C(N)=O)cc(-c4cnn(C)c4)nc23)CC1
CC1(C)OCC(C)(C=O)N(Cc2ccccc2)C1=O
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OS(=O)(=O)C(F)(F)F)c1
OC1CCCc2sc(-c3ccncc3)cc2C1c1ccc(Cl)cc1
N[C@H]1CC[C@H]1c1ccc(Cl)cc1
CN(C)CC(OCC1(c2ccc(F)cc2)CCN(C(=O)OC(C)(C)C)CC1)c1cc(Cl)cc2cn(COCC[Si](C)(C)C)nc12
C=C(C[C@@H](Cc1ccc(-c2ccccc2)cc1)NC(=O)OC(C)(C)C)C(=O)O
Nc1ccc(OC2(S(=O)(=O)c3ccccc3)CC2)nc1Cl
C[C@]1(F)CC(F)(F)[C@@](C)(c2cc(N)ccc2F)N=C1N
N[C@H]1CCOC[C@H]1c1ccc(I)cc1
COc1ccc2c(Nc3c(Cl)cncc3Cl)cc(=O)[nH]c2c1OCCCCCCN1CCCNCC1
COc1cc2ncc3c(N)nc(-c4cncc(OCCNCc5ccc(F)cc5)c4)cc3c2cc1OC
CC(C)c1ccc2c(c1)OC1(O)c3ccccc3C(=O)C21NC(=O)c1cc(-c2ccccc2)n[nH]1
N#Cc1cc(F)cc([C@H]2C[C@H](F)CN2c2ccn3ncc(C(N)=O)c3c2)c1
NC(=O)c1cc(-c2ccc3c(cnn3CCN3CCOCC3)c2)nc2c1[nH]c1cc(N3CCOCC3)ccc12
Cn1oc(=O)nc1/C(=N\OCc1cccc(N)n1)c1ccccc1
CN1CCN(c2ccc3c(c2)[nH]c2c(C(N)=O)cc(-c4ccc(O)c(Cl)c4)nc23)CC1
COC(=O)CCc1cc2cc(-c3noc(-c4ccc(OC(C)C)c(Cl)c4)n3)ccc2n1C
CCOP(=O)(Cc1ccc(Nc2ncc(C(F)(F)F)c(Nc3ccc([C@H]4CC[C@H](N5CCN(C)CC5)CC4)c4c3C(=O)N(C)C4)n2)c(OC)c1)OCC
COc1ccc(C[C@H](C[C@H](O[Si](C)(C)C(C)(C)C)[C@H](CC2CCCCC2)NC(=O)OC(C)(C)C)C(=O)O)c(OC)c1OC
CC1(C)COCc2nc3cnc(Br)cc3n21
CC(C)S(=O)(=O)N[C@H]1CCOC[C@H]1c1ccc(-c2ccc(C#N)s2)cc1
C[C@H](c1ccccc1)N1C[C@]2(C(=O)OC(C)(C)C)C=CC[C@@H]2C1=O
CCc1nc(C)c(C(=O)Nc2ccc(-c3ccc(C45CCC(CC(=O)OC)(CC4)OC5)cc3)cc2)o1
COc1cc2c(Oc3cc(C)c(C)nc3-c3cccc(C)n3)ccnc2cc1OCCNCCO
O=C(CO)N1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1
CC(C)(C)OC(=O)NC1(C2CCc3cc(Sc4cccc(OCc5ccccc5)c4)ccc3C2)COC(C)(C)OC1
CCC[C@@H](CCO)Nc1nc(N)nc(C)c1Cc1ccc(CN2CCC[C@@H]2C(=O)O)cc1OC
COc1cc(-c2cccc(C(F)(F)F)c2)c(F)cc1-c1nncc2cc(S(=O)(=O)Nc3ccon3)ccc12
CCC/C=C/[C@H]1CC[C@H](C(C(=O)O)C(=O)O)CC1
CCOC(=O)[C@@H]1C[C@H]1COc1cc(CCCOC)cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1
CCCCOC(=O)N1CCN(C(=O)[C@H](CCCO[Si](c2ccccc2)(c2ccccc2)C(C)(C)C)NC(=O)OC(C)(C)C)CC1
CC1(C)OCC(C)(CO)N(Cc2ccccc2)C1=O
CCc1[nH]c(C(=O)N[C@H]2CCN(c3cccc(C(=O)O)c3)C[C@H]2OC)nc1C(F)(F)F
CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](N2C(=O)c3ccccc3C2=O)CC[C@H]1c1cccc(F)c1F
CC(C)c1ccc(OC2CCC3(CC2)N[C@@H](C(=O)O)C(C)(C)S3)cc1
CCOC(=O)CCc1cc2cc(-c3noc(-c4ccc(OC(C)C)c(Cl)c4)n3)ccc2[nH]1
CN(C1CC1)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C
CC(=O)c1ccc2c(c1)C=CC(O)(CO)CO2
C[C@H]1OC[C@@H]2CC[C@@H](c3nc(-c4ccc(C(=O)Nc5cc(C(F)(F)F)ccn5)cc4)c4c(N)nccn34)CN2C1=O
C[C@@H](n1ncn(-c2ccc(Cl)cc2)c1=O)[C@@]1(c2ccc(F)cc2F)CO1
CC(=O)NC[C@H]1CN(c2ccc3c(c2)CCCc2c(C(C)C)n[nH]c2-3)C(=O)O1
COc1ccccc1C[C@H](C[C@H](O[Si](C)(C)C(C)(C)C)[C@H](Cc1ccccc1)NC(=O)OC(C)(C)C)C(=O)O
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCOC)c1
CC(C)N1CCC(Oc2ccc3c(c2)cc2n3[C@H](C)CN(CCO)C2=O)CC1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC#N)CC2)c1
CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](N)CC[C@H]1c1cccc(F)c1F
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC(=O)OC)CC2)c1
CCCCCN1C(=O)C2(CNC(=O)c3cc4c(cc32)OCO4)c2ccccc21
O=[N+]([O-])c1ccc(OC2(S(=O)(=O)c3ccccc3)CC2)nc1Cl
C[Si](C)(C)CCOCn1cc(C2CCc3c(C(=O)O)nn(COCC[Si](C)(C)C)c3C2)cn1
CC1=NC2(N=C1N)c1cc(-c3cc(F)cc(C#N)c3)ccc1CCC21CC1
O=C(O)[C@H]1CCOC[C@H]1c1ccc(I)cc1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)OCC(=O)N(C)C)c1
C[C@H](C(=O)NCCF)N(C)C(=O)c1ccc2c(c1)c1c(n2C)CCC(C2CCOCC2)C1
C[C@H](O[Si](C)(C)C(C)(C)C)[C@@H]1CC(O)CC(C)(C)N1
CCc1[nH]c(C(=O)N[C@H]2CCN(c3cccc(C(=O)OC(C)(C)C)c3)C[C@H]2OC)nc1C(F)(F)F
CC[C@@H](OC(=O)c1ccccc1)[C@H]1CCCN(C(=O)OC(C)(C)C)C1
CC(=O)OCc1nc2cnc(Br)cc2n1C(C)(C)COC(C)=O
CC(=O)NC[C@H]1CC[C@@H](C(=O)Nc2cccc(OC(F)(F)F)c2)N1C(=O)OCc1ccccc1
O=C1C=C(N2CCNCC2)C(C2CCCCC2)CC1
FC(F)(F)Cn1ncnc1-c1cc2n(n1)-c1cc(C3CCNCC3)ccc1OCC2
COC(=O)[C@H]1CN(C(=O)OC(C)(C)C)CC[C@@H]1c1ccc(OCCOc2c(Cl)cc(C)cc2Cl)cc1
CCOC(=O)/C(N)=N/Nc1cc(Cl)ccc1[N+](=O)[O-]
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)Oc2ccc3c(c2)CCC3)c1
ClCc1ccc2c(c1)Nc1nccnc1S2
COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2C=O)cc1
CCOC(=O)c1cc2c(F)cccc2nc1[C@H](C)NC(=O)OC(C)(C)C
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC2(CC(=O)O)CC2)c1
CC(=O)N1c2ccc(N3CCNCC3)nc2[C@H](Nc2ccccc2)[C@@H](C)[C@@H]1C1CC1
COCCCc1cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCOC)c1
O=S(=O)(C#Cc1ccc(Cl)cc1)N1CCNCC1
COc1ccc2c(Nc3c(Cl)cncc3Cl)cc(=O)[nH]c2c1OCCCCCCN1CCCN(C)CC1
Oc1ccc2c(c1)[C@H](c1ccc(OCCN3CCCC3)cc1)[C@H](c1ccccc1)CO2
CCOC(=O)/C=C1\CC[C@@H](c2cccc(F)c2F)[C@H](NC(=O)OC(C)(C)C)c2cccnc21
OC[C@H]1C[C@@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C[C@@H]1O
CC(=O)OCC1=CS[C@@H]2C(NC(=O)Cc3cccs3)C(=O)N2C1C(=O)O
O=C(OCc1ccccc1)N1CC[C@H]2CCCNC[C@H]21
C/N=C(\C)C1=C(O)C=C2Oc3c(C(=O)NCc4c(C)ccc5ccccc45)c(OC)cc(O)c3[C@]2(C)C1=O
C[C@@]1(O)[C@H](O)[C@@H](CO)O[C@H]1n1cc(-c2ccccc2)c2c(N)ncnc21
CC(=O)Nc1nc2c(s1)-c1c(c(C3CC3)nn1C1CCNCC1)CC2
CC(C)c1ccc2c(c1)OC1(O)c3ccccc3C(=O)C21NC(=O)C(=O)c1cccs1
Cc1cc(C#N)cnc1C(=O)Nc1ccc(F)c([C@@]2(C)N=C(N)[C@@](C)(F)CC2(F)F)c1
C[C@@]1(O)[C@H](O)[C@@H](CO)O[C@H]1n1ccc2c(N)nc(N)nc21
CC(C)(C)OC(=O)[C@@]12C=CC[C@@H]1CN(C(=O)OCc1ccccc1)C2
NC1=N[C@@]2(c3ccc(F)cc3F)CO[C@@H](c3nnco3)C[C@H]2CS1
CNC(=O)c1ccc2ncc(C(=O)OCn3ccnc3)c(Nc3ccc(OC)cc3)c2c1
COc1ccc2nccc(C(N)CC[C@@H]3CCN(CCSc4cccs4)C[C@@H]3C(=O)O)c2c1
O=C(Nc1ccc(-c2cnc(C34CC5CC(CC(C(=O)O)(C5)C3)C4)s2)cc1)Nc1ccccc1F
OC[C@H]1O[C@](O)(c2ccc(Cl)c(Cc3ccc(C#Cc4cnccn4)cc3)c2)[C@H](O)[C@@H](O)[C@@H]1O
Nc1ncnc2c1c(-c1ccc3ccc(-c4ccccc4)nc3c1)cn2C1CCC1
CCCC[C@@H](C(=O)N1CCC[C@H]1C(=O)O)[C@@H](F)C(=O)OC
NC1=Nc2ccc(F)cc2C2CCCC12
CC1(C)OCC(C)(CO[Si](C)(C)C(C)(C)C)N(Cc2ccccc2)C1=O
CCc1[nH]c(C(=O)N[C@H]2CCN(c3nc(C(=O)O)c(C)s3)C[C@H]2OC)nc1C(F)(F)F
Cn1oc(=O)nc1/C(=N\OCc1cccc(NC(=O)OCCc2ccccc2)n1)c1ccccc1
CCCC[Sn](/C=C/C1(O)C(C)=CC2(CC1(C)C(F)(F)F)OC(C)C(C)O2)(CCCC)CCCC
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCc2ccc(-c3nnn[nH]3)cc2)c1
C[C@H](O[Si](C)(C)C(C)(C)C)[C@@H]1CC(=O)CC(C)(C)N1
COC(=O)C(C)(C)CC(CCCCO)CCCc1cccnc1
Oc1ccc2c3c(ccc2c1)Cc1ccccc1OC3c1ccc(OCCN2CCCCC2)cc1
CC(C)(C)OC(=O)N1CC=C(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1
COCCCc1cc(O)cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCC(C)(C)C(=O)O)c1
CC(=O)Nc1cccc(N2CCN(CCc3nn(C)c(=O)n3CC3CCCCC3)CC2)c1
COC(=O)c1ccc(-c2ccc(O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)c(Cl)c2)cc1
CCC(C)(C)OC(=O)Nc1nc(C(O)C(=O)NC2C(=O)N3C(C(=O)O)=C(CSc4nnc(C)s4)CS[C@H]23)cs1
C[C@@H](n1ccn(-c2ccc(Cl)cc2)c1=O)[C@@]1(c2ccc(F)cc2F)CO1
COC(=O)CC12CCC(c3ccc(Br)cc3)(CC1)CO2
CC(C)(C)OC(=O)NC1(c2nc(NCc3ccccn3)c3c(Cl)ccn3n2)CC1
Cc1cc(Nc2nccc(C(F)(F)F)n2)cc(-c2cnc([C@@](C)(O)[C@H]3CC[C@H](C(=O)O)CC3)s2)c1
CC(C)c1ccc(-n2nc(O)c3c(=O)c4ccc(Cl)cc4[nH]c3c2=O)cc1
O=C(Nc1cccc(Cl)c1)N1CCc2[nH]nc(C(=O)N3CC(F)CO3)c2C1
COc1ccc2nc(-c3ccc(OC)c(F)c3)nc(C(=O)NC(CO)Cc3c[nH]c4cccnc34)c2c1
C[C@@H]1CNC(=O)c2cc3cc(OCCCN4CCCCC4)ccc3n21
CC1(C)Oc2ccc(CC(=O)NC3CCCc4ccccc43)cc2C(N)C1O
O=C(NC[C@@H]1CC[C@@H](COc2ccccc2)OC1)c1ccc(O)cc1
COC(=O)c1ccc2c(c1)C=CC(=CCl)CO2
COCCCCC1(CNC(=O)C2CCCNC2)c2ccccc2Oc2ccccc21
Cn1nccc1[C@]12CCCC[C@H]1OC(=O)O2
CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@@H](O)CC[C@H]1c1cccc(F)c1F
C=C(C[C@@H](Cc1ccc(-c2ccccc2)cc1)NC(=O)OC(C)(C)C)C(=O)OC
CC(C)(C)OC(=O)NC1(c2nc(O)c3c(Cl)ccn3n2)CC1
C=CC[C@@H]1C(=O)N([C@H](C)c2ccccc2)C[C@@]1(C=C)C(=O)OC(C)(C)C
NC(=O)CN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1
CC(=O)Nc1nc2c(s1)-c1c(c(C3CC3)nn1C1CCN(C(=O)N3CCN(C(C)C)CC3)CC1)CC2
CC(=O)N1c2ccc(N3CCNCC3)cc2[C@H](Nc2ccccc2)[C@@H](C)[C@@H]1C
CC1=NC2(N=C1N)c1cc(-c3cc(F)cc(F)c3)ccc1CCC21CC1
CCc1ccc(S(=O)(=O)NC2c3cc(CC(=O)NC4CCCc5ccccc54)ccc3OC(C)(C)C2O)cc1
CCCCCCC(C)(C)c1ccc([C@H]2C[C@@H](O)CCN2C=O)c(OCc2ccccc2)c1
COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2C=NO)cc1
Cc1cc(Nc2nccc(C(F)(F)F)n2)cc(-c2cnc([C@](C)(O)[C@H]3CC[C@H](C(=O)O)CC3)s2)c1
C[C@@H](O)C[C@H]1OC[C@@H](C2CCCCC2)N(c2cc(C#CC(C)(C)C)sc2C(=O)O)C1=O
C/C=C/c1c(N)nc(-c2ccc(Cl)c(OC)c2F)nc1C(=O)OC
CC(C)(C)OC(=O)N[C@@H]1c2cccnc2[C@H](O)CC[C@H]1c1cccc(F)c1F
COc1cc2c(=O)[nH]c(=O)n([C@@H]3O[C@H](CO)[C@H]4OC(C)(C)O[C@H]43)c2cc1OC
CC1=NC2(N=C1N)c1cc(Br)ccc1CCC21CC1
CC1=NC2(N=C1N)c1cc(-c3cncc(Cl)c3)ccc1CCC21CC1
CC(=O)OC1CCC2C(CO)CCC12
O[C@H]1C[C@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C=C1COCc1ccccc1
Cc1nnc(SCC2=C(C(=O)O)N3C(=O)C(NC(=O)C(O)c4csc(N)n4)[C@H]3SC2)s1
COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2c2noc(-c3ccccc3CCNC(C)=O)c2Br)cc1
C[Si](C)(C)CCOCn1c(O[C@@H]2CO[C@H]3[C@@H]2OC[C@H]3O)nc2cc(Cl)c(-c3ccc(-c4ccc(N=S(C)(=O)N5CCC5)cc4)cc3)nc21
CCOC(=O)[C@H]1[C@H](c2ccc3c(c2)OC(F)(F)O3)C1(C)C
CCCN(CCC)Cc1cc(CO)c(=O)n2c(-c3c(C)cc(C)cc3C)cccc12
C[C@H](c1ccccc1)N1C[C@H]2CC=C[C@@]2(C(=O)OC(C)(C)C)C1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2C(=O)O)c1
COc1cc2ncc3c(N)nc(-c4cncc(OCCN(Cc5ccc(F)cc5)C(=O)OC(C)(C)C)c4)cc3c2cc1OC
O=C(Nc1ccc(-c2cnc(C34CC5CC(CC(C(=O)O)(C5)C3)C4)s2)cc1)Nc1cccc(C(F)(F)F)c1
O=C(OCc1ccccc1)N1CC[C@H]2CCCN(CCc3ccccc3)C[C@H]21
NS(=O)(=O)OC[C@H]1C[C@@H](c2cnn3c(N[C@H]4CCc5ccccc54)ncnc23)C[C@@H]1O
C[Si](C)(C)CCOCn1c(O[C@@H]2CO[C@H]3[C@@H]2OC[C@H]3O)nc2cc(Cl)c(-c3ccc(-c4ccc(N=S(C)(=O)N5CCCC5)cc4)cc3)nc21
OC[C@H]1O[C@@H](SC(c2ccccc2)(c2ccccc2)c2ccccc2)[C@H](O)[C@@H](O)[C@@H]1O
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCc2ccc(C#N)cc2)c1
CCCc1c(Cc2ccc(-c3ccccc3C#N)cc2F)c(=O)n([C@H]2CC[C@H](OCC3(C(C)=O)CCC3)CC2)c2ncnn12
CCOC(=O)[C@@H]1C[C@H]1COc1cc(CCCOC)cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)c1
COC(=O)CC12CCC(c3ccc(-c4ccc(NC(=O)c5nc(C)oc5C(F)(F)F)cc4)cc3)(CC1)CO2
O=S(=O)(c1cc(C(F)(F)F)ccc1Br)C1CCOC(c2ccc(Cl)cc2)C1
COCC1=CS[C@@H]2C(NC(=O)Cc3cccs3)C(=O)N2C1C(=O)O
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OC[C@@H]2C[C@H]2CO)c1
COCCCc1cc(CN(C(=O)[C@H]2CN(C(=O)OC(C)(C)C)CC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCCC(C)(C)C(=O)OC)c1
COC[C@H](C)COCc1ccc([C@@]2(O)CCN(C(=O)OC(C)(C)C)C[C@@H]2CO)cc1
COCCCc1cc(CN(C(=O)[C@H]2CNCC[C@@H]2c2ccc(OCCOc3c(Cl)cc(C)cc3Cl)cc2)C2CC2)cc(OCC(C)(C)O)c1
COC(=O)[C@@]12C[C@H]1C=CCCCCC[C@H](NC(=O)CC1CC1)C(=O)N1C[C@H](Oc3nc(-c4ccccn4)nc4ccsc34)C[C@H]1C(=O)N2
CCN1CCN(CCCCCCOc2c(OC)ccc3c(Nc4c(Cl)cncc4Cl)cc(=O)[nH]c23)CC1
OCCN1CCC(c2ccc3c(c2)-n2nc(-c4ncnn4CC(F)(F)F)cc2CCO3)CC1
107 changes: 107 additions & 0 deletions scripts/clean_checker_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import json
import csv
from pathlib import Path

INPUT_PATH = Path("data/checker_dataset_pistachio.jsonl")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jsonl file missing in PR

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

OUTPUT_PATH = Path("data/checker_dataset_clean.csv")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CSV file missing in PR

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


def main():
rows = []

with INPUT_PATH.open("r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
rec = json.loads(line)

target = rec.get("target_smiles_canonical")
product = rec.get("product_smiles_canonical")
hall = rec.get("hallucination_raw", {}) or {}

hallucination_score = hall.get("score")
hallucination_severity = hall.get("severity")

# stability_raw is a dict: reactant_smiles -> stability_dict
stability_raw = rec.get("stability_raw", {}) or {}

# reactant_smiles_canonical is a list of all reactants for this reaction step
reactants_canonical = rec.get("reactant_smiles_canonical", [])

# Join all reactants together (using "." as separator, common in SMILES)
all_reactants = ".".join(reactants_canonical) if reactants_canonical else ""

# Aggregate stability data across all reactants
stability_scores = []
stability_assessments = []
all_stability_issues = []

for rcanon in reactants_canonical:
stab = stability_raw.get(rcanon, {}) # prefer canonical key if present
if not stab:
# fallback: try by non-canonical reactant name if needed
for key, val in stability_raw.items():
if key == rcanon:
stab = val
break

score = stab.get("stability_score")
if score is not None:
stability_scores.append(score)

assessment = stab.get("assessment")
if assessment:
stability_assessments.append(assessment)

issues = stab.get("issues", []) or []
if issues:
# Prefix issues with reactant SMILES for clarity
for issue in issues:
all_stability_issues.append(f"{rcanon}: {issue}")

# Calculate average stability score
avg_stability_score = sum(stability_scores) / len(stability_scores) if stability_scores else None

# Determine overall assessment (use worst case if mixed)
overall_assessment = None
if stability_assessments:
if "Potentially unstable" in stability_assessments:
overall_assessment = "Potentially unstable"
elif "Moderately stable" in stability_assessments:
overall_assessment = "Moderately stable"
else:
overall_assessment = "Likely stable"

stability_issues = "; ".join(all_stability_issues) if all_stability_issues else ""

rows.append({
"target_smiles_canonical": target,
"product_smiles_canonical": product,
"reactant_smiles_canonical": all_reactants,
"stability_score": avg_stability_score,
"stability_assessment": overall_assessment,
"stability_issues": stability_issues,
"hallucination_score": hallucination_score,
"hallucination_severity": hallucination_severity,
})

OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with OUTPUT_PATH.open("w", encoding="utf-8", newline="") as out_f:
writer = csv.DictWriter(
out_f,
fieldnames=[
"target_smiles_canonical",
"product_smiles_canonical",
"reactant_smiles_canonical",
"stability_score",
"stability_assessment",
"stability_issues",
"hallucination_score",
"hallucination_severity",
],
)
writer.writeheader()
writer.writerows(rows)

if __name__ == "__main__":
main()
8 changes: 6 additions & 2 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ def retrosynthesis_api():
use_protecting_group_feature=use_protecting_group_feature)
save_result(smiles, result)
except Exception as e:
print(e)
print(f"ERROR in retrosynthesis:")
print(f" EXCEPTION: {str(e)}")
print(f" TRACEBACK: {traceback.format_exc()}")
return jsonify(
{"error":
f"Error in retrosynthesis: {str(e)}. Please rerun."}), 500
Expand Down Expand Up @@ -345,7 +347,9 @@ def rerun_retrosynthesis():
# Store the result in partial.json
save_result(molecule, result)
except Exception as e:
print(e)
print(f"ERROR in rerun_retrosynthesis:")
print(f" EXCEPTION: {str(e)}")
print(f" TRACEBACK: {traceback.format_exc()}")
return jsonify(
{"error":
f"Error in retrosynthesis: {str(e)}. Please rerun."}), 500
Expand Down
15 changes: 4 additions & 11 deletions src/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
litellm.success_callback = ["langfuse"]
litellm.drop_params = True

metadata = {
"generation_name": "prod", # set langfuse generation name
"project": "Retrosynthesis", # set langfuse project name
"version": "0.0.3", # set langfuse version
"trace_name": "prod", # set langfuse Trace Name
"trace_user_id": "sv", # set langfuse Trace User ID
"session_id": "metadata", # set langfuse Session ID
}
from src.utils.langfuse_config import get_langfuse_metadata


@cache_results
Expand Down Expand Up @@ -146,7 +139,7 @@ def reagent_llm_call(reactants: list[str],
temperature=temperature,
seed=42,
top_p=0.9,
metadata=metadata)
metadata=get_langfuse_metadata("metadata"))
res_text = response.choices[0].message.content
except Exception as e:
logger.info(f"Error in calling {LLM}: {e}")
Expand Down Expand Up @@ -263,7 +256,7 @@ def conditions_llm_call(reactants: list[str],
temperature=temperature,
seed=42,
top_p=0.9,
metadata=metadata)
metadata=get_langfuse_metadata("metadata"))
res_text = response.choices[0].message.content
except Exception as e:

Expand Down Expand Up @@ -338,7 +331,7 @@ def literature_agent(reactants: list[str],
temperature=temperature,
seed=42,
top_p=0.9,
metadata=metadata)
metadata=get_langfuse_metadata("metadata"))
res_text = response.choices[0].message.content
except Exception as e:
logger.info(f"Error in calling {LLM}: {e}")
Expand Down
Loading