구성 생성 노트

8909 단어 Python
BRICS를 사용하여 패브릭에서 새 패브릭 생성

test.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw, BRICS

def main():    
    df = load_data('smiles_list.csv')   
    components = make_fragments(df['mol'])
    generated_mols = generate_mols(components)
    generated_mols_to_csv(generated_mols)
    draw_generated_mols(generated_mols)

def load_data(file_name):
    df = pd.read_csv(file_name)
    df['mol'] = list(map(lambda x: Chem.MolFromSmiles(x), df['SMILES']))
    return df

def make_fragments(mols):
    allfrags = set()
    for mol in mols:
        frag = BRICS.BRICSDecompose(mol)
        allfrags.update(frag)
    frags_mol = [Chem.MolFromSmiles(f) for f in allfrags]
    return frags_mol

def generate_mols(frags_mol):
    builder = BRICS.BRICSBuild(frags_mol)
    print('building mols...')
    generated_mols = []
    for i in range(1000):
        try:
            m = next(builder)
            m.UpdatePropertyCache(strict=True)
            generated_mols.append(m)
        except StopIteration:
            pass
    np.random.shuffle(generated_mols)  
    return generated_mols

def generated_mols_to_csv(generated_mols):
    generated_mols_df = pd.DataFrame({'mol': generated_mols})
    generated_mols_df['SMILES'] = list(map(lambda x: Chem.MolToSmiles(x), generated_mols_df['mol']))
    generated_mols_df.to_csv('generated_mols.csv')

def draw_generated_mols(mols):
    fig = Draw.MolsToGridImage(mols[:50], molsPerRow=3, subImgSize=(300,150))
    fig.save('generated_mols.png')
    fig.show()

if __name__=='__main__':      
    np.random.seed(1234)
    main()
smiles.csv를 같은 층에 준비합니다.
smiles.csv
SMILES
C1CCCCC1
c1ccccc1
N[C@@H](C)C(=O)O
C/C=C/C
C1=CC2=CC3=CC=C(N3)C=C4C=CC(=N4)C=C5C=CC(=N5)C=C1N2
COC(=O)C(\C)=C\C1C(C)(C)[C@H]1C(=O)O[C@@H]2C(C)=C(C(=O)C2)CC=CC=C
CC[C@H](O1)CC[C@@]12CCCO2

좋은 웹페이지 즐겨찾기