mpld3의 도표화 비고
13769 단어 Python
test.py
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem.Draw import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import AllChem
from rdkit.Chem import DataStructs
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpld3 import plugins
import mpld3
# mpld3.enable_notebook()
def main():
df = load_data('generated_mols.csv')
df['svg'] = [moltosvg(mol) for mol in df['mol']]
df = pca_for_df(df)
# print(df.head(20))
interactive_plot(df)
def load_data(file_name):
df = pd.read_csv(file_name,index_col=0,header=0)
df['mol'] = [Chem.MolFromSmiles(mol) for mol in df['SMILES']]
return df
def moltosvg(mol,molSize=(200,200),kekulize=True):
mc = Chem.Mol(mol.ToBinary())
if kekulize:
try:
Chem.Kekulize(mc)
except:
mc = Chem.Mol(mol.ToBinary())
if not mc.GetNumConformers():
rdDepictor.Compute2DCoords(mc)
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
drawer.DrawMolecule(mc)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
return svg.replace('svg:','')
def mol2fparr(mol):
arr = np.zeros((0,))
fp = AllChem.GetMorganFingerprintAsBitVect(mol,2)
DataStructs.ConvertToNumpyArray(fp, arr)
return arr
def pca_for_df(df):
pca = PCA(n_components=2)
X = np.asarray([mol2fparr(mol) for mol in df['mol']])
print(X.shape)
res = pca.fit_transform(X)
print(res.shape)
df['PCA1'] = res[:,0]
df['PCA2'] = res[:,1]
return df
def interactive_plot(df):
plt.rcParams["font.size"] = 18
fig, ax = plt.subplots(figsize=(10,6))
ax.set_xlabel('PCA1')
ax.set_ylabel('PCA2')
ax.set_title('chemical space')
points = ax.scatter(df['PCA1'], df['PCA2'],s=200,alpha=0.5,edgecolors='none')
tooltip = plugins.PointHTMLTooltip(points, df['svg'].values.tolist(),hoffset=10, voffset=10)
plugins.connect(fig, tooltip)
mpld3.save_html(fig, 'chemicalspace_for_generated_mols.html')
if __name__ == '__main__':
main()
SMILES 형식의 다음 csv 파일이 준비되어 있습니다.다음 예에서는 이미 mol 파일의 열이 있지만 SMILES식으로 제작되기 때문에 필요하지 않습니다.
generated_mols.csv
,mol,SMILES
0,<rdkit.Chem.rdchem.Mol object at 0x126093390>,C=CC=CCC1=C(C)[C@@H](OC2[C@H](OC)C2(C)C)CC1=O
1,<rdkit.Chem.rdchem.Mol object at 0x12609f3f0>,COC(=O)C(C)=C(C)C(=O)OC
2,<rdkit.Chem.rdchem.Mol object at 0x1263d34b0>,CO[C@H]1C(C(=O)O[C@H]2CC[C@@]3(CCCO3)O2)C1(C)C
3,<rdkit.Chem.rdchem.Mol object at 0x126467e10>,C=CC=C(C)C(=O)O[C@H]1C([C@H]2CC[C@@]3(CCCO3)O2)C1(C)C
4,<rdkit.Chem.rdchem.Mol object at 0x1263ccd50>,CC1(C)C(C(=O)O[C@H]2CC[C@@]3(CCCO3)O2)[C@@H]1[C@H]1CC[C@@]2(CCCO2)O1
5,<rdkit.Chem.rdchem.Mol object at 0x1263da870>,COC1[C@H](OC2[C@H]([C@H]3CC[C@@]4(CCCO4)O3)C2(C)C)C1(C)C
6,<rdkit.Chem.rdchem.Mol object at 0x125b126f0>,CC[C@H]1CC(=O)C(CC=CC2[C@H](CC)C2(C)C)=C1C
7,<rdkit.Chem.rdchem.Mol object at 0x1260b5150>,CC=C(C)C(=O)OC1[C@H](OC)C1(C)C
8,<rdkit.Chem.rdchem.Mol object at 0x126467570>,C=CC=C(C)C(=O)OC(=O)C(C)=CC=C
Reference
이 문제에 관하여(mpld3의 도표화 비고), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다 https://qiita.com/cat_lover/items/afca244ee724b30bf280텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
우수한 개발자 콘텐츠 발견에 전념 (Collection and Share based on the CC Protocol.)