In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

First you import pandas, seaborn, matplotlib, and numpy.¶

In [6]:
data = pd.read_excel('gtex_integrin_7_organs.xlsx')

This reads the information in the excel file that was provided that provides data of gene expressions for integrins from Gtex data.¶

The gene expresssions are measured from tissues taken from 7 different sights. Brain, lung, ovary, etc.¶

In [4]:
data
Out[4]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 GTEX-13QIC-0011-R1a-SM-5O9CJ Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
1 GTEX-1399S-1726-SM-5L3DI Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
2 GTEX-PWCY-1326-SM-48TCU Ovary 2.3953 -5.0116 1.4547 4.2593 -0.7346 4.4149 0.2642 1.5216 ... 3.6816 1.5465 7.2964 -0.9406 2.7742 5.0414 2.0325 0.7579 2.2573 1.2516
3 GTEX-QXCU-0626-SM-2TC69 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
4 GTEX-ZA64-1526-SM-5CVMD Breast 2.0569 -2.4659 3.3993 3.1311 3.0074 4.4977 -1.7809 2.7139 ... 4.7340 0.6332 7.3496 -0.9406 2.5338 6.5696 1.7229 -0.6416 3.1195 1.1050
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1982 GTEX-QMRM-0826-SM-3NB33 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1983 GTEX-YFCO-1626-SM-4W1Z3 Prostate 2.9581 -4.6082 1.1641 4.6938 1.5902 5.8625 -0.5125 1.7617 ... 3.8798 -1.4699 7.5163 -0.3752 2.9562 5.3035 4.4304 -0.9406 3.6136 0.4233
1984 GTEX-1117F-2826-SM-5GZXL Breast 4.3184 -6.5064 1.0433 4.8440 3.5498 4.6809 1.0293 3.3478 ... 5.3256 -0.0725 7.7516 1.1382 2.1411 7.1132 0.3796 0.0854 3.8650 1.0151
1985 GTEX-Q2AG-2826-SM-2HMJQ Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125
1986 GTEX-XV7Q-0426-SM-4BRVN Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

1987 rows × 29 columns

This displays the data of everything in the excel file.¶

In [7]:
brain_data = data[data['primary_site']=='Brain']

This shows the data specifically for the brain.¶

In [8]:
brain_data
Out[8]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 GTEX-13QIC-0011-R1a-SM-5O9CJ Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
8 GTEX-N7MS-2526-SM-26GMA Brain 2.2960 -9.9658 0.6608 5.2840 0.4233 4.8510 -0.2671 -0.1031 ... 1.5415 4.6623 3.4687 0.5666 -0.0130 3.0654 0.7916 1.0433 -0.7346 -0.7588
10 GTEX-N7MS-2526-SM-26GMR Brain -0.2498 -9.9658 -0.8863 3.1685 -1.6394 2.8158 -0.4719 -1.1488 ... 1.6045 0.9268 2.8055 -0.5973 0.4657 1.8918 0.3460 0.3907 -1.9942 -1.5522
12 GTEX-NPJ7-0011-R6a-SM-2I3G7 Brain 1.6045 -6.5064 2.3193 3.6335 -2.3147 5.0670 -0.8863 -0.8084 ... 3.2018 1.7575 4.6894 0.4125 -0.6643 3.6916 -0.6193 -2.2447 1.2023 -1.9942
14 GTEX-132Q8-3026-SM-5PNVG Brain 2.8974 -6.5064 1.9601 4.1836 -0.8084 4.5892 -0.5543 0.3460 ... 3.6018 2.7931 4.7274 -0.0574 1.2271 4.3793 0.8488 -0.2159 2.1378 -0.6416
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1977 GTEX-13G51-0011-R6b-SM-5LZX4 Brain -0.3383 -6.5064 1.6234 2.7487 -2.2447 5.2415 -0.8863 -2.9324 ... 2.1988 0.4016 4.5142 -1.1811 -0.8084 3.9983 -1.0862 -3.1714 -0.7588 -1.9379
1978 GTEX-YFC4-0011-R10a-SM-4SOK5 Brain 0.4447 -5.5735 0.3231 3.5237 -1.5105 4.9016 0.9419 -2.7274 ... 2.8178 1.3567 4.4621 -0.2845 1.0222 3.3336 0.1903 -1.0559 0.0300 -0.4719
1980 GTEX-13112-0011-R4b-SM-5DUXL Brain 0.6969 -6.5064 -0.9686 2.3760 -2.2447 4.0739 -0.6193 -4.0350 ... 2.7357 1.5806 4.6882 -0.9971 -0.5756 3.5136 0.9343 -1.0862 0.4340 -2.2447
1981 GTEX-1313W-0011-R1b-SM-5EQ4A Brain 0.1124 -5.0116 2.2482 2.8897 -0.5125 4.6445 0.3115 -3.6259 ... 2.1147 0.9716 5.1202 0.6608 0.4761 3.2343 0.8408 -0.0574 -0.1828 -2.5479
1985 GTEX-Q2AG-2826-SM-2HMJQ Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125

1152 rows × 29 columns

In [29]:
itgs = [col for col in brain_data.columns if col.startswith("ITG")]

melted = brain_data.melt(id_vars=["Unnamed: 0", "primary_site"],
                       value_vars=itgs,
                       var_name='Gene', value_name='Expression')

gene_order_brain = melted.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index

plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_brain, scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Brain Samples')
plt.tight_layout()
plt.show()
/var/folders/9g/x0zd2t4d7wdcyyfwxxbnmqbr0000gn/T/ipykernel_1479/2738837910.py:10: FutureWarning: 

The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect.
  sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_brain, scale='width', inner='box')
No description has been provided for this image

This creates a violin plot of the brain data that was extracted from the excel file.¶

In [9]:
plt.figure(figsize = (16,6))
sns.violinplot(data=brain_data)
plt.title("Integrin Genes of the Brain")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expression Levels")
plt.show()
No description has been provided for this image
In [11]:
lung_data = data[data['primary_site']=='Lung']

This displays the data for the lung found in the excel file.¶

In [25]:
lung_data
Out[25]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
1 GTEX-1399S-1726-SM-5L3DI Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
3 GTEX-QXCU-0626-SM-2TC69 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
5 GTEX-11EI6-0826-SM-5985V Lung 6.0732 -2.4659 3.9901 7.3945 4.7688 5.1157 4.3356 2.3366 ... 3.7378 4.7247 7.5016 5.1396 2.5036 6.5443 4.6531 3.8136 5.8679 0.7407
6 GTEX-S341-0326-SM-2XCAU Lung 4.2510 -5.0116 3.3076 6.1715 3.1129 5.2954 2.2960 1.1184 ... 4.7104 2.7530 7.5022 4.0730 2.6325 6.0483 5.0562 2.6962 5.1611 0.9343
7 GTEX-WY7C-0426-SM-3NB3C Lung 3.3633 -2.5479 4.8340 6.6864 3.0585 4.8294 2.6464 0.7999 ... 5.1190 1.5013 8.0260 3.6635 3.2435 5.8503 5.2991 2.8076 4.7571 -0.1345
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1962 GTEX-Q2AH-0426-SM-2I3EP Lung 5.9644 -1.3921 5.1061 6.9470 3.8973 4.8630 3.6089 3.9765 ... 5.1115 4.9041 7.9145 4.5559 3.7138 6.5782 4.7512 2.9710 5.0777 1.8444
1970 GTEX-RWS6-0226-SM-2XCA9 Lung 6.0830 -0.5756 4.3889 6.7302 4.6053 5.1065 2.8321 0.9716 ... 5.8176 2.5437 7.7929 4.9012 2.7993 6.7510 5.2204 2.8422 5.0951 -0.3201
1975 GTEX-131XE-0726-SM-5HL9K Lung 3.7971 -1.9379 4.8555 6.4052 3.9561 5.4263 3.2959 4.5199 ... 4.6697 6.5777 7.5114 5.2130 2.3816 6.6225 3.7389 3.7248 5.6809 0.8488
1982 GTEX-QMRM-0826-SM-3NB33 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1986 GTEX-XV7Q-0426-SM-4BRVN Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

288 rows × 29 columns

In [30]:
itgs = [col for col in lung_data.columns if col.startswith("ITG")]

melted_lung = lung_data.melt(id_vars=["Unnamed: 0", "primary_site"],
                       value_vars=itgs,
                       var_name='Gene', value_name='Expression')

gene_order_lung = melted_lung.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index

plt.figure(figsize=(16, 6))
sns.violinplot(data=melted_lung, x='Gene', y='Expression', order=gene_order_lung, scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Lung Samples')
plt.tight_layout()
plt.show()
/var/folders/9g/x0zd2t4d7wdcyyfwxxbnmqbr0000gn/T/ipykernel_1479/2889910766.py:10: FutureWarning: 

The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect.
  sns.violinplot(data=melted_lung, x='Gene', y='Expression', order=gene_order_lung, scale='width', inner='box')
No description has been provided for this image

This shows the violin plot of the lung data that was found in the excel file.¶

In [12]:
plt.figure(figsize = (16,6))
sns.violinplot(data=lung_data)
plt.title("Integrin Genes of the Lung")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expression Levels")
plt.show()
No description has been provided for this image
In [34]:
data_brain_lung_expression_only = brain_lung_data.iloc[:,1:]
data_brain_lung_expression_only
Out[34]:
primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ITGA7 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 4.9663 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
1 Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 3.9270 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
3 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 4.5355 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
5 Lung 6.0732 -2.4659 3.9901 7.3945 4.7688 5.1157 4.3356 2.3366 5.0527 ... 3.7378 4.7247 7.5016 5.1396 2.5036 6.5443 4.6531 3.8136 5.8679 0.7407
6 Lung 4.2510 -5.0116 3.3076 6.1715 3.1129 5.2954 2.2960 1.1184 5.2392 ... 4.7104 2.7530 7.5022 4.0730 2.6325 6.0483 5.0562 2.6962 5.1611 0.9343
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1980 Brain 0.6969 -6.5064 -0.9686 2.3760 -2.2447 4.0739 -0.6193 -4.0350 4.8788 ... 2.7357 1.5806 4.6882 -0.9971 -0.5756 3.5136 0.9343 -1.0862 0.4340 -2.2447
1981 Brain 0.1124 -5.0116 2.2482 2.8897 -0.5125 4.6445 0.3115 -3.6259 4.5110 ... 2.1147 0.9716 5.1202 0.6608 0.4761 3.2343 0.8408 -0.0574 -0.1828 -2.5479
1982 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 5.2032 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1985 Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 5.3597 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125
1986 Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 7.7121 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

1440 rows × 28 columns

In [35]:
brain_lung_data_vertical = data_brain_lung_expression_only.melt(id_vars = 'primary_site', var_name = 'integrin gene', value_name = 'expression_levels')
brain_lung_data_vertical
Out[35]:
primary_site integrin gene expression_levels
0 Brain ITGA10 0.5763
1 Lung ITGA10 4.9137
2 Lung ITGA10 4.0541
3 Lung ITGA10 6.0732
4 Lung ITGA10 4.2510
... ... ... ...
38875 Brain ITGA11 -2.2447
38876 Brain ITGA11 -2.5479
38877 Lung ITGA11 1.6604
38878 Brain ITGA11 -0.5125
38879 Lung ITGA11 1.0007

38880 rows × 3 columns

In [38]:
plt.figure(figsize=(16, 6))
sns.violinplot(x = 'integrin gene', y = 'expression_levels', hue = 'primary_site', data = brain_lung_data_vertical, split = True, inner = 'quartile')
plt.title("Integrin Genes of the Brain vs. the Lung")
plt.xlabel("Integrin Gene")
plt.ylabel("Gene Expression Levels")
plt.legend(title = 'primary_site')
plt.show()
No description has been provided for this image