新建
batched target fishing with TransformerCPI
nickkk
推荐镜像 :tcpi:notebook
推荐机型 :c12_m92_1 * NVIDIA V100
赞
2
点击:开始链接
选择gpu镜像 tcpi:notebook
select kernel tcpi from upper right
代码
文本
[1]
import sys
sys.path.append('./tcpi/')
import torch
from predict import pack
from featurizer import featurizer
import pandas as pd
model = torch.load('tcpi.pt').to(0)
import numpy as np
device = 0
class Tester(object):
def __init__(self, model,device):
self.model = model
self.device = device
def test(self, dataset):
self.model.eval()
with torch.no_grad():
for data in dataset:
adjs, atoms, proteins = [], [], []
atom, adj, protein= data
adjs.append(adj)
atoms.append(atom)
proteins.append(protein)
data = pack(atoms,adjs,proteins, self.device)
predicted_scores = self.model(data)
return predicted_scores
tester = Tester(model, device)
def return_pred(seq, smiles, tester):
compounds, adjacencies, proteins = featurizer(smiles, seq)
test_set = list(zip(compounds, adjacencies, proteins))
score = float(tester.test(test_set))
return score
def process_fasta(path):
f=open(path)
seqs={}
for line in f:
if line.startswith('>'):
name=line.replace('>','').split()[0]
seqs[name]=''
else:
seqs[name]+=line.replace('\n','').strip()
f.close()
return seqs
def return_df(smiles, seqs):
scores = list(map(lambda x: return_pred(x, smiles, tester), seqs.values()))
df = pd.DataFrame()
df['seq_id'] = list(seqs.keys())
df['seq'] = list(seqs.values())
df['smiles'] = [smiles] * len(seqs.keys())
df['score'] = scores
df.sort_values(by="score" , ascending=False)
return df
fasta_path = '/data/transformerCPI2.0/protein_sequence.fasta'
seqs = process_fasta(fasta_path)
mol_path = '/data/Targetmol.txt'
df = pd.read_csv('/data/Targetmol.txt', header = None)
mol_list = list(df[0])
代码
文本
[2]
res_dic = {}
for i, smiles in enumerate(mol_list[:5]):
try:
res_dic[smiles] = return_df(smiles, seqs)
except:
print(i, 'failed')
[14:32:37] Explicit valence for atom # 13 N, 4, is greater than permitted 1 failed
代码
文本
[3]
res_dic
{'[H]Cl.O=C(C(C1=O)=C(O)[C@@H](N(C)C)[C@]2([H])[C@@H](O)[C@]3([H])[C@](C)(O)C4=C(C(C3=C(O)[C@@]21O)=O)C(O)=CC=C4)N': seq_id seq \ 0 NP_001380844.1 MAEASSANLGSGCEEKRHEGSSSESVPPGTTISRVKLLDTMVDTFL... 1 NP_001380831.1 MSEKKQPVDLGLLEEDDEFEEFPAEDWAGLDEDEDAHVWEDNWDDD... 2 NP_001310960.2 MLRTAMGLRSWLAAPWGALPPRPPLLLLLLLLLLLQPPPPTWALSP... smiles score 0 [H]Cl.O=C(C(C1=O)=C(O)[C@@H](N(C)C)[C@]2([H])[... 0.979294 1 [H]Cl.O=C(C(C1=O)=C(O)[C@@H](N(C)C)[C@]2([H])[... 0.848804 2 [H]Cl.O=C(C(C1=O)=C(O)[C@@H](N(C)C)[C@]2([H])[... 0.939213 , 'CC1(C)[C@@H](O)CC[C@]2(C)C3=C([C@@]4(C(C[C@@H]([C@]4(CC3=O)C)[C@H](C)CC(C[C@@H](C)C(O)=O)=O)=O)C)[C@@H](O)C[C@@]12[H]': seq_id seq \ 0 NP_001380844.1 MAEASSANLGSGCEEKRHEGSSSESVPPGTTISRVKLLDTMVDTFL... 1 NP_001380831.1 MSEKKQPVDLGLLEEDDEFEEFPAEDWAGLDEDEDAHVWEDNWDDD... 2 NP_001310960.2 MLRTAMGLRSWLAAPWGALPPRPPLLLLLLLLLLLQPPPPTWALSP... smiles score 0 CC1(C)[C@@H](O)CC[C@]2(C)C3=C([C@@]4(C(C[C@@H]... 0.802814 1 CC1(C)[C@@H](O)CC[C@]2(C)C3=C([C@@]4(C(C[C@@H]... 0.933544 2 CC1(C)[C@@H](O)CC[C@]2(C)C3=C([C@@]4(C(C[C@@H]... 0.600747 , 'O[C@@]([C@H]1OC(C2=CC=CC=C2)=O)(C)[C@@H](C)CC3=CC(OC)=C(OC)C(OC)=C3C4=C1C=C5OCOC5=C4OC': seq_id seq \ 0 NP_001380844.1 MAEASSANLGSGCEEKRHEGSSSESVPPGTTISRVKLLDTMVDTFL... 1 NP_001380831.1 MSEKKQPVDLGLLEEDDEFEEFPAEDWAGLDEDEDAHVWEDNWDDD... 2 NP_001310960.2 MLRTAMGLRSWLAAPWGALPPRPPLLLLLLLLLLLQPPPPTWALSP... smiles score 0 O[C@@]([C@H]1OC(C2=CC=CC=C2)=O)(C)[C@@H](C)CC3... 0.856018 1 O[C@@]([C@H]1OC(C2=CC=CC=C2)=O)(C)[C@@H](C)CC3... 0.541609 2 O[C@@]([C@H]1OC(C2=CC=CC=C2)=O)(C)[C@@H](C)CC3... 0.202228 , 'Cc(nc1)cnc1C(N[C@@H](CCCCC/C=C\\[C@H](C1)[C@]1(C(NS(C1CC1)(=O)=O)=O)NC([C@H](C1)N2C[C@@H]1Oc1nc3ccccc3c3ccccc13)=O)C2=O)=O': seq_id seq \ 0 NP_001380844.1 MAEASSANLGSGCEEKRHEGSSSESVPPGTTISRVKLLDTMVDTFL... 1 NP_001380831.1 MSEKKQPVDLGLLEEDDEFEEFPAEDWAGLDEDEDAHVWEDNWDDD... 2 NP_001310960.2 MLRTAMGLRSWLAAPWGALPPRPPLLLLLLLLLLLQPPPPTWALSP... smiles score 0 Cc(nc1)cnc1C(N[C@@H](CCCCC/C=C\[C@H](C1)[C@]1(... 0.890279 1 Cc(nc1)cnc1C(N[C@@H](CCCCC/C=C\[C@H](C1)[C@]1(... 0.885004 2 Cc(nc1)cnc1C(N[C@@H](CCCCC/C=C\[C@H](C1)[C@]1(... 0.211233 }
代码
文本
点个赞吧
推荐阅读
公开
Drug-target binding affinity predict with transformerCPInickkk
发布于 2023-08-03
公开
Drug-target binding affinity predict with transformerCPInickkk
发布于 2023-07-25
1 转存文件