Source code for memo_ms.visualization

import numpy as np
import scipy as sp
from itertools import cycle
from skbio.stats.ordination import pcoa
import cimcb_lite as cb
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage


[docs]def plot_pcoa_2d( matrix, df_metadata, filename_col, group_col, metric = 'braycurtis', norm = False, scaling = False, pc_to_plot = (1, 2) ): """ Simple 2D PCoA plot of a MEMO matrix / Feature table using Plotly Args: matrix (DataFrame): A Table in the MemoMatrix.memo_matrix or FeatureTable.feature_table format df_metadata (DataFrame): Metadata of the MEMO matrix samples filename_col (str): Column name in df_metadata to match memo_matrix index group_col (str): Column name in df_metadata to use as groups for plotting metric (str, optional): Distance metric to use, see https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html. Defaults to 'braycurtis'. norm (bool, optional): Apply samples normalization. Defaults to False. scaling (bool, optional): Apply pareto scaling to MEMO matrix columns. Defaults to False. pc_to_plot (list of int, optional): PCs to plot. Defaults to [1,2]. Returns: None """ #pylint: disable=too-many-arguments #pylint: disable=too-many-locals df_metadata_resticted = df_metadata[df_metadata[filename_col].isin(list(matrix.index))] matrix = matrix[matrix.index.isin(list(df_metadata_resticted[filename_col]))].reindex(list(df_metadata_resticted[filename_col])) if norm is True: matrix = matrix.div(matrix.sum(axis=1), axis=0) if scaling is True: matrix = matrix.to_numpy() matrix = np.log10(matrix, out=np.zeros_like(matrix), where=(matrix!=0)) # Log scale (base-10) matrix = cb.utils.scale(matrix, method='pareto') dm_memo = sp.spatial.distance.pdist(matrix, metric) pcoa_results = pcoa(dm_memo) x = pcoa_results.samples[f'PC{pc_to_plot[0]}'] y = pcoa_results.samples[f'PC{pc_to_plot[1]}'] exp_var_pc1 = round(100*pcoa_results.proportion_explained[pc_to_plot[0] - 1 ], 1) exp_var_pc2 = round(100*pcoa_results.proportion_explained[pc_to_plot[1] - 1 ], 1) fig = px.scatter(x=x, y=y, color=df_metadata_resticted[group_col], labels={'x': f"PC{pc_to_plot[0]} ({exp_var_pc1} %)", 'y': f"PC{pc_to_plot[1]} ({exp_var_pc2} %)", 'color': group_col }, title="2D PCoA", hover_name=df_metadata_resticted[filename_col], template="simple_white" ) fig.update_layout({'width':1000, 'height':650}) fig.show()
[docs]def plot_pcoa_3d( matrix, df_metadata, filename_col, group_col, metric = 'braycurtis', norm = False, scaling = False, pc_to_plot = (1, 2, 3) ): """ Simple 2D PCoA plot of a MEMO matrix / Feature table using Plotly Args: matrix (DataFrame): A Table in the MemoMatrix.memo_matrix or FeatureTable.feature_table format df_metadata (DataFrame): Metadata of the MEMO matrix samples filename_col (str): Column name in df_metadata to match memo_matrix index group_col (str): Column name in df_metadata to use as groups for plotting metric (str, optional): Distance metric to use, see https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html. Defaults to 'braycurtis'. norm (bool, optional): Apply samples normalization. Defaults to False. scaling (bool, optional): Apply pareto scaling to MEMO matrix columns. Defaults to False. pc_to_plot (list of int, optional): PCs to plot. Defaults to [1,2,3]. Returns: None """ #pylint: disable=too-many-arguments #pylint: disable=too-many-locals df_metadata_resticted = df_metadata[df_metadata[filename_col].isin(list(matrix.index))] matrix = matrix[matrix.index.isin(list(df_metadata_resticted[filename_col]))].reindex(list(df_metadata_resticted[filename_col])) if norm is True: matrix = matrix.div(matrix.sum(axis=1), axis=0) if scaling is True: matrix = matrix.to_numpy() matrix = np.log10(matrix, out=np.zeros_like(matrix), where=(matrix!=0)) # Log scale (base-10) matrix = cb.utils.scale(matrix, method='pareto') dm_memo = sp.spatial.distance.pdist(matrix, metric) pcoa_results = pcoa(dm_memo) x = pcoa_results.samples[f'PC{pc_to_plot[0]}'] y = pcoa_results.samples[f'PC{pc_to_plot[1]}'] z = pcoa_results.samples[f'PC{pc_to_plot[2]}'] exp_var_pc1 = round(100*pcoa_results.proportion_explained[pc_to_plot[0] - 1 ], 1) exp_var_pc2 = round(100*pcoa_results.proportion_explained[pc_to_plot[1] - 1 ], 1) exp_var_pc3 = round(100*pcoa_results.proportion_explained[pc_to_plot[2] - 1 ], 1) fig = px.scatter_3d(x=x, y=y, z=z, color=df_metadata_resticted[group_col], labels={'x': f"PC{pc_to_plot[0]} ({exp_var_pc1} %)", 'y': f"PC{pc_to_plot[1]} ({exp_var_pc2} %)", 'z': f"PC{pc_to_plot[2]} ({exp_var_pc3} %)", 'color': group_col }, title="3D PCoA", hover_name=df_metadata_resticted[filename_col], template="simple_white" ) fig.update_layout({'width':1000, 'height':650}) fig.show()
[docs]def plot_hca( matrix, df_metadata, filename_col, group_col, plotly_discrete_cm = px.colors.qualitative.Plotly, linkage_method = 'ward', linkage_metric = 'euclidean', norm = False, scaling = False): """Simple HCA plot of a MEMO matrix / Feature table using matplotlib Args: matrix (DataFrame): A Table in the MemoMatrix.memo_matrix or FeatureTable.feature_table format df_metadata (DataFrame): Metadata of the MEMO matrix samples filename_col (str): Column name in df_metadata to match memo_matrix index group_col (str): Column name in df_metadata to use as groups for plotting plotly_discrete_cm ([type], optional): Plotly discrete colormap to use for groups. Defaults to px.colors.qualitative.Plotly. linkage_method (str, optional): Linkage method to use. Defaults to 'ward'. linkage_metric (str, optional): Linkage metric to use. Defaults to 'euclidean'. norm (bool, optional): Apply samples normalization. Defaults to False. scaling (bool, optional): Apply pareto scaling to MEMO matrix columns. Defaults to False. Returns: None """ #pylint: disable=too-many-arguments #pylint: disable=too-many-locals #pylint: disable=dangerous-default-value df_metadata_resticted = df_metadata[df_metadata[filename_col].isin(list(matrix.index))] matrix = matrix[matrix.index.isin(list(df_metadata_resticted[filename_col]))].reindex(list(df_metadata_resticted[filename_col])) if norm is True: matrix = matrix.div(matrix.sum(axis=1), axis=0) if scaling is True: matrix = matrix.to_numpy() matrix = np.log10(matrix, out=np.zeros_like(matrix), where=(matrix!=0)) # Log scale (base-10) matrix = cb.utils.scale(matrix, method='pareto') groups = df_metadata_resticted[group_col].unique() colors_list = plotly_discrete_cm dic_col = dict(zip(groups, cycle(colors_list))) Z = linkage(matrix, method=linkage_method, metric=linkage_metric) plt.figure(figsize=(12, 8), dpi=80) dendrogram( Z, labels =df_metadata_resticted[group_col].to_list(), leaf_rotation=0, orientation='left' ) xlbls = plt.gca().get_yticklabels() for lbl in xlbls: lbl.set_color(dic_col[lbl.get_text()]) plt.show()
[docs]def plot_heatmap( matrix, df_metadata, filename_col, group_col, plotly_discrete_cm = px.colors.qualitative.Plotly, linkage_method = 'ward', linkage_metric = 'euclidean', heatmap_metric = 'braycurtis', norm = False, scaling = False): """HCA and heatmap plot of a MEMO matrix / Feature table using Plotly Args: matrix (DataFrame): A Table in the MemoMatrix.memo_matrix or FeatureTable.feature_table format df_metadata (DataFrame): Metadata of the MEMO matrix samples filename_col (str): Column name in df_metadata to match memo_matrix index group_col (str): Column name in df_metadata to use as groups for plotting plotly_discrete_cm ([type], optional): Plotly discrete colormap to use for groups. Defaults to px.colors.qualitative.Plotly. linkage_method (str, optional): Linkage method to use. Defaults to 'ward'. linkage_metric (str, optional): Linkage metric to use. Defaults to 'euclidean'. heatmap_metric (str, optional): Distance metric to use for heatmap. Defaults to 'braycurtis'. norm (bool, optional): Apply samples normalization. Defaults to False. scaling (bool, optional): Apply pareto scaling to MEMO matrix columns. Defaults to False. Returns: None """ #pylint: disable=too-many-arguments #pylint: disable=too-many-locals #pylint: disable=dangerous-default-value df_metadata_resticted = df_metadata[df_metadata[filename_col].isin(list(matrix.index))] matrix = matrix[matrix.index.isin(list(df_metadata_resticted[filename_col]))].reindex(list(df_metadata_resticted[filename_col])) if norm is True: matrix = matrix.div(matrix.sum(axis=1), axis=0) if scaling is True: matrix = matrix.to_numpy() matrix = np.log10(matrix, out=np.zeros_like(matrix), where=(matrix!=0)) # Log scale (base-10) matrix = cb.utils.scale(matrix, method='pareto') dm_memo = sp.spatial.distance.pdist(matrix, heatmap_metric) fig = ff.create_dendrogram(matrix, orientation='bottom', labels= df_metadata_resticted[group_col].to_list(), linkagefun=lambda x: linkage(x, method=linkage_method, metric = linkage_metric) ) for i in range(len(fig['data'])): fig['data'][i]['yaxis'] = 'y2' # Create Side Dendrogram dendro_side = ff.create_dendrogram(matrix, orientation='right', linkagefun=lambda x: linkage(x, method=linkage_method, metric = linkage_metric), ) for i in range(len(dendro_side['data'])): dendro_side['data'][i]['xaxis'] = 'x2' # Add Side Dendrogram Data to Figure for data in dendro_side['data']: fig.add_trace(data) fig.update_layout(plot_bgcolor ='rgba(0,0,0,0)') # Create Heatmap dendro_leaves = dendro_side['layout']['yaxis']['ticktext'] dendro_leaves = list(map(int, dendro_leaves)) heat_data = sp.spatial.distance.squareform(dm_memo) heat_data = heat_data[dendro_leaves,:] heat_data = heat_data[:,dendro_leaves] heatmap = [ go.Heatmap( x = dendro_leaves, y = dendro_leaves, z = heat_data, colorscale = 'YlOrRd', colorbar=dict( title="Distance", len= 0.4, y= 0.2, ypad = 5 ), reversescale = True, ) ] heatmap[0]['x'] = fig['layout']['xaxis']['tickvals'] heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals'] # Add Heatmap Data to Figure for data in heatmap: fig.update_layout({'showlegend':False}) fig.add_trace(data) # Create and add scatter plot for categories df_meta_reindex = df_metadata_resticted.reset_index() df_meta_reindex = df_meta_reindex.reindex(dendro_leaves) df_meta_reindex['x'] = heatmap[0]['x'] df_meta_reindex['y'] = 1 groups = df_metadata_resticted[group_col].unique() colors_list = plotly_discrete_cm dic_col = dict(zip(groups, cycle(colors_list))) scats = [] for group in groups: scat_group = df_meta_reindex[df_meta_reindex[group_col] == group] scat_group = go.Scatter( x=scat_group['x'], y=scat_group['y'], name=group, marker_color = dic_col[group], mode='markers', xaxis= 'x', yaxis = 'y3') scats.append(scat_group) fig.add_traces(scats) # Edit Layout fig.update_layout({'width':1200, 'height':800,'hovermode': 'closest', 'showlegend':True}) fig.update_layout(title_text="Heatmap", title_font_size=15, title_x=0.5) fig.update_layout(paper_bgcolor ='rgb(255,255,255)') # Edit xaxis fig.update_layout(xaxis={'domain': [.15, 1], 'mirror': False, 'showgrid': False, 'showline': False, 'zeroline': False, 'ticks':""} ) # Edit xaxis2 fig.update_layout(xaxis2={'domain': [0, .14], 'mirror': False, 'showgrid': False, 'showline': False, 'zeroline': False, 'showticklabels': False, 'ticks':""}) # Edit yaxis fig.update_layout(yaxis={'domain': [0, .87], 'mirror': False, 'showgrid': False, 'showline': False, 'zeroline': False, 'showticklabels': False, 'ticks': "" }) # Edit yaxis2 fig.update_layout(yaxis2={'domain':[0.89, 1], 'mirror': False, 'showgrid': False, 'showline': False, 'zeroline': False, 'showticklabels': False, 'ticks':""}) # Edit yaxis3 fig.update_layout(yaxis3={'domain':[0.84, 0.90], 'mirror': False, 'showgrid': False, 'showline': False, 'zeroline': False, 'showticklabels': False, 'ticks':""}) labels_to_show_in_legend = groups for trace in fig['data']: if (not trace['name'] in labels_to_show_in_legend): trace['showlegend'] = False fig.update_layout(legend=dict( orientation="v", y = 1, x = 1 )) fig.update_xaxes(tickangle=45) fig.show()