In this example we will consider the mtcars dataset. It is a numeric matrix that gives the feature of several cars. We can cluster these cars, represent their structure in a group, and color the car names following their cylinder (the ‘cyl’ column). Thus, we will know if the cylinder is responsible of this structure!
# Libraries import pandas as pd from matplotlib import pyplot as plt from scipy.cluster.hierarchy import dendrogram, linkage import numpy as np # Data set url = 'https://python-graph-gallery.com/wp-content/uploads/mtcars.csv' df = pd.read_csv(url) df = df.set_index('model') del df.index.name # Calculate the distance between each sample Z = linkage(df, 'ward') # Make the dendro dendrogram(Z, labels=df.index, leaf_rotation=0, orientation="left", color_threshold=240, above_threshold_color='grey') # Create a color palette with 3 color for the 3 cyl possibilities my_palette = plt.cm.get_cmap("Accent", 3) # transforme the 'cyl' column in a categorical variable. It will allow to put one color on each level. df['cyl']=pd.Categorical(df['cyl']) my_color=df['cyl'].cat.codes # Apply the right color to each label ax = plt.gca() xlbls = ax.get_ymajorticklabels() num=-1 for lbl in xlbls: num+=1 val=my_color[num] lbl.set_color(my_palette(val))