Mendelevium

Diary
Drug Design
Field Knowledge
- Academia
  - Yang
- Biology
- Physics
Free Energy
Machine Learning & AI
Molecular Dynamics
Other
Specific Sytems
Techniques
- Linux
- Python
- Research
- Web
about
Home

Contact

Copyright © 2025 Xufan Gao | Academic Research Blog

Home > Techniques > Web

A Bunch of Biophysics is Loading ...

Web

分子动力学聚类分析与热图可视化技术
 分子动力学聚类分析与热图可视化技术聚类分析首先需要准备一个包含每个聚类中帧ID的 clus_result.dat 文件，格式如下（每个聚类的第一个数字是中心构象）： cluster 1: 3722 3946 0 1 4 10 23 33 36 41 45 46 47 51 54 59 61 62 63 66 67 69 76 80 84 85 ...... cluster 2: 489 1886 2 3 5 8 9 11 13 14 16 17 18 19 20 21 22 24 25 27 30 31 32 34 35 37 38 39 40 42 43 44 48 49.... .... 在VMD中通过以下TCL脚本生成： # http://github.com/anjibabuIITK/CLUSTER-ANALYSIS-USING-VMD-TCL set number 9 ;# number of clusters, others are tagged 'other' set rcutoff 1.5 ;# RMSD cutoff. unit: angstrom set step_size 1 set nframes [molinfo top get numframes] set inf 0 set nf $nframes set totframes [expr $nf - 1 ] set selA [atomselect top "fragment 1 and resid 149 to 156 and backbone"] ;# select the ligand set lists [measure cluster $selA num $number cutoff $rcutoff first $inf last $totframes step $step_size distfunc rmsd weight mass] set file [open "clus_result.dat" w] for {set i 1} {$i <= [llength $lists]} {incr i} { set lst [lindex $lists [expr $i-1]] puts $file [format "cluster %d: %d" $i [llength $lst]] puts $file $lst puts $file "" } close $file # save the coordinates of centroid structures set c01 [lindex [lindex $lists 0] 0] set sel [atomselect top all frame $c01] set real_frame [expr $c01+1] $sel writegro c01_${real_frame}.gro puts [format "write the centroid of 1st cluster: frame %d" $real_frame] set c02 [lindex [lindex $lists 1] 0] set sel [atomselect top all frame $c02] set real_frame [expr $c02+1] $sel writegro c02_${real_frame}.gro puts [format "write the centroid of 2nd cluster: frame %d" $real_frame] 然后使用Python进行可视化： import matplotlib.pyplot as plt import numpy as np import os def read_vmd_clus_result(file): data = [] with open(file, 'r') as f: while f.readline().strip().startswith('cluster'): line = f.readline().strip() data.append([int(fr) for fr in line.split()]) _ = f.readline() # empty return data def get_id_with_time(data): # data: output from read_vmd_clus_result() # return: a list of tuples, (frame_id, cluster_id) # cluster_id starts from 1 id_with_time = [] for i in range(len(data)): cl = data[i] id_with_time += [(fr, i + 1) for fr in cl] id_with_time.sort(key=lambda x: x[0]) return id_with_time font_le = {'family': 'Times New Roman', 'weight': 'demibold', 'size': 16} font_la = {'family': 'Times New Roman', 'fontname': 'Times New Roman', 'weight': 'demibold', 'size': 24} font_tc = {'family': 'Times New Roman', 'fontname': 'Times New Roman', 'weight': 'demibold', 'size': 20} font_ti = {'family': 'Times New Roman', 'fontname': 'Times New Roman', 'weight': 'demibold', 'size': 28} font_hu = {'family': 'Times New Roman', 'fontname': 'Times New Roman', 'weight': 'demibold', 'size': 36} # a framework of the plot def plot_common(xlabel, ylabel, thickness=2, title=None, size=(8,6), xpad=6, ypad=0, title_pad=0, ticks_size=16, tight=False, ax_color='black'): fig, ax = plt.subplots(figsize=size) # fix the xlabel overflow problem for axis in ['top', 'bottom', 'left', 'right']: ax.spines[axis].set_linewidth(thickness) ax.tick_params(width=thickness) ax.tick_params(axis='y', colors=ax_color, labelcolor=ax_color) # plt.xticks(font='Arial', size=16, weight='bold') # plt.yticks(font='Arial', size=16, weight='bold') plt.xticks(font='Times New Roman', size=ticks_size, weight='demibold') plt.yticks(font='Times New Roman', size=ticks_size, weight='demibold') plt.xlabel(xlabel, fontdict=font_la, labelpad=xpad) plt.ylabel(ylabel, fontdict=font_la, labelpad=ypad, color=ax_color) if title is not None: plt.title(title, fontdict=font_ti, pad=title_pad) if tight: plt.tight_layout() return fig, ax def plot_clustering_id_with_time(idxs, nsperframe, biggest=10, path=None, point=False, size=(8,6), ssize=1): # plot the frame_id with cluster_id. Marking the selected centroid frame (point) with a star. # nsperframe: convert frame_id to nanosecond # biggest: biggest cluster_id shown. Other frames are tagged 'other'. plot_common(xlabel='Time (ns)', ylabel='Cluster ID', size=size) biggest = min(biggest, int(max(idxs))) plt.yticks(np.arange(biggest+1), labels=np.arange(biggest).tolist()+['Other']) x = np.arange(len(idxs))*nsperframe y = [min(i, biggest) for i in idxs] plt.scatter(x, y, s=ssize) if point: plt.scatter(point*nsperframe, idxs[point], marker='*', s=50, color='r') print("The number of clusters: {0:d}".format(len(set(idxs)))) print("The biggest cluster lasted for {0:.1f} ns ({1:.1%})".format(np.sum(idxs==1)*nsperframe, np.sum(idxs==1)/len(idxs))) print("The second cluster lasted for {0:.1f} ns ({1:.1%})".format(np.sum(idxs==2)*nsperframe, np.sum(idxs==2)/len(idxs))) print("The unclustered frames (>=no. {2:d}) occupies {0:.1f} ns or {1:.1%}".format(np.sum(idxs>=biggest)*nsperframe, np.sum(idxs>=biggest)/len(idxs), biggest)) print("The centroid of the biggest cluster is at {0:.1f} ns.".format(point*nsperframe)) if path is not None: plt.savefig(os.path.join(path,'cluster.png')) plt.show() path = 'xxxxxxxxx/clus_result.dat' data = read_vmd_clus_result(path) id_with_time = get_id_with_time(data) plot_clustering_id_with_time(np.array(id_with_time)[:, 1], 0.5, path=os.path.dirname(path), point=data[0][0], ssize=1.25) FEP单点突变热图读取数据： ddG = read_single() # not provided here. customize yourselves. it's just a dictionary of mutation: ddG. # you must follow the format of E1A, E10A, etc. ddG = { 'E1A': -0.783225000000002, 'V2A': 0.379990000000001, 'T3A': -0.7186525, 'E4A': 2.6721, ..... } 然后进行热图绘制： import pandas as pd from matplotlib import pyplot as plt import numpy as np import copy import seaborn as sns # also requires the above plot_common def get_matrix(ddG): columns = sorted(list(set([key[:-1] for key in ddG.keys()])), key=lambda x: int(x[1:])) rows = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'] df = pd.DataFrame(index=rows, columns=columns) df.iloc[:,:] = np.NAN # process into a matrix for key, value in ddG.items(): col, row = key[:-1], key[-1] df.loc[row, col] = value return df def heatmap_single(df): # Convert all entries to numeric values, replacing non-numeric entries with NaN df = df.apply(pd.to_numeric, errors='coerce') # NOTE: you can adjust the color here. The effect may vary with your ddG data range. cmap = sns.diverging_palette(h_neg=0, h_pos=240, n=15, as_cmap=True) # Mask for NaN values mask = df.isnull() # heatmap. no text means not done. the text color is adaptive to the background color fig, ax = plot_common('Residue Position', 'Mutant', size=(8, 10), xpad=6, ypad=0, ticks_size=14, tight=False) sns.heatmap(df, cmap=cmap, center=0, annot=True, mask=mask, fmt='.2f', cbar_kws={'label': '\u0394\u0394G (kcal/mol)', 'format': '%.2f'}, linewidths=0.5, linecolor='grey', annot_kws={'fontfamily': 'Arial'}) # Update colorbar font size cbar = ax.collections[0].colorbar cbar.ax.tick_params(labelsize=14) cbar.set_label('\u0394\u0394G (kcal/mol)', fontsize=18, weight='demibold', family='Arial') plt.show() df = get_matrix(ddG) heatmap_single(copy.deepcopy(df))
Techniques · 2025-10-08

Touch background to close