In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
First you import pandas, seaborn, matplotlib, and numpy.¶
In [6]:
data = pd.read_excel('gtex_integrin_7_organs.xlsx')
In [4]:
data
Out[4]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTEX-13QIC-0011-R1a-SM-5O9CJ | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
1 | GTEX-1399S-1726-SM-5L3DI | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
2 | GTEX-PWCY-1326-SM-48TCU | Ovary | 2.3953 | -5.0116 | 1.4547 | 4.2593 | -0.7346 | 4.4149 | 0.2642 | 1.5216 | ... | 3.6816 | 1.5465 | 7.2964 | -0.9406 | 2.7742 | 5.0414 | 2.0325 | 0.7579 | 2.2573 | 1.2516 |
3 | GTEX-QXCU-0626-SM-2TC69 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
4 | GTEX-ZA64-1526-SM-5CVMD | Breast | 2.0569 | -2.4659 | 3.3993 | 3.1311 | 3.0074 | 4.4977 | -1.7809 | 2.7139 | ... | 4.7340 | 0.6332 | 7.3496 | -0.9406 | 2.5338 | 6.5696 | 1.7229 | -0.6416 | 3.1195 | 1.1050 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1982 | GTEX-QMRM-0826-SM-3NB33 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1983 | GTEX-YFCO-1626-SM-4W1Z3 | Prostate | 2.9581 | -4.6082 | 1.1641 | 4.6938 | 1.5902 | 5.8625 | -0.5125 | 1.7617 | ... | 3.8798 | -1.4699 | 7.5163 | -0.3752 | 2.9562 | 5.3035 | 4.4304 | -0.9406 | 3.6136 | 0.4233 |
1984 | GTEX-1117F-2826-SM-5GZXL | Breast | 4.3184 | -6.5064 | 1.0433 | 4.8440 | 3.5498 | 4.6809 | 1.0293 | 3.3478 | ... | 5.3256 | -0.0725 | 7.7516 | 1.1382 | 2.1411 | 7.1132 | 0.3796 | 0.0854 | 3.8650 | 1.0151 |
1985 | GTEX-Q2AG-2826-SM-2HMJQ | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1986 | GTEX-XV7Q-0426-SM-4BRVN | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
1987 rows × 29 columns
This displays the data of everything in the excel file.¶
In [7]:
brain_data = data[data['primary_site']=='Brain']
This shows the data specifically for the brain.¶
In [8]:
brain_data
Out[8]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTEX-13QIC-0011-R1a-SM-5O9CJ | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
8 | GTEX-N7MS-2526-SM-26GMA | Brain | 2.2960 | -9.9658 | 0.6608 | 5.2840 | 0.4233 | 4.8510 | -0.2671 | -0.1031 | ... | 1.5415 | 4.6623 | 3.4687 | 0.5666 | -0.0130 | 3.0654 | 0.7916 | 1.0433 | -0.7346 | -0.7588 |
10 | GTEX-N7MS-2526-SM-26GMR | Brain | -0.2498 | -9.9658 | -0.8863 | 3.1685 | -1.6394 | 2.8158 | -0.4719 | -1.1488 | ... | 1.6045 | 0.9268 | 2.8055 | -0.5973 | 0.4657 | 1.8918 | 0.3460 | 0.3907 | -1.9942 | -1.5522 |
12 | GTEX-NPJ7-0011-R6a-SM-2I3G7 | Brain | 1.6045 | -6.5064 | 2.3193 | 3.6335 | -2.3147 | 5.0670 | -0.8863 | -0.8084 | ... | 3.2018 | 1.7575 | 4.6894 | 0.4125 | -0.6643 | 3.6916 | -0.6193 | -2.2447 | 1.2023 | -1.9942 |
14 | GTEX-132Q8-3026-SM-5PNVG | Brain | 2.8974 | -6.5064 | 1.9601 | 4.1836 | -0.8084 | 4.5892 | -0.5543 | 0.3460 | ... | 3.6018 | 2.7931 | 4.7274 | -0.0574 | 1.2271 | 4.3793 | 0.8488 | -0.2159 | 2.1378 | -0.6416 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1977 | GTEX-13G51-0011-R6b-SM-5LZX4 | Brain | -0.3383 | -6.5064 | 1.6234 | 2.7487 | -2.2447 | 5.2415 | -0.8863 | -2.9324 | ... | 2.1988 | 0.4016 | 4.5142 | -1.1811 | -0.8084 | 3.9983 | -1.0862 | -3.1714 | -0.7588 | -1.9379 |
1978 | GTEX-YFC4-0011-R10a-SM-4SOK5 | Brain | 0.4447 | -5.5735 | 0.3231 | 3.5237 | -1.5105 | 4.9016 | 0.9419 | -2.7274 | ... | 2.8178 | 1.3567 | 4.4621 | -0.2845 | 1.0222 | 3.3336 | 0.1903 | -1.0559 | 0.0300 | -0.4719 |
1980 | GTEX-13112-0011-R4b-SM-5DUXL | Brain | 0.6969 | -6.5064 | -0.9686 | 2.3760 | -2.2447 | 4.0739 | -0.6193 | -4.0350 | ... | 2.7357 | 1.5806 | 4.6882 | -0.9971 | -0.5756 | 3.5136 | 0.9343 | -1.0862 | 0.4340 | -2.2447 |
1981 | GTEX-1313W-0011-R1b-SM-5EQ4A | Brain | 0.1124 | -5.0116 | 2.2482 | 2.8897 | -0.5125 | 4.6445 | 0.3115 | -3.6259 | ... | 2.1147 | 0.9716 | 5.1202 | 0.6608 | 0.4761 | 3.2343 | 0.8408 | -0.0574 | -0.1828 | -2.5479 |
1985 | GTEX-Q2AG-2826-SM-2HMJQ | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1152 rows × 29 columns
In [29]:
itgs = [col for col in brain_data.columns if col.startswith("ITG")]
melted = brain_data.melt(id_vars=["Unnamed: 0", "primary_site"],
value_vars=itgs,
var_name='Gene', value_name='Expression')
gene_order_brain = melted.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_brain, scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Brain Samples')
plt.tight_layout()
plt.show()
/var/folders/9g/x0zd2t4d7wdcyyfwxxbnmqbr0000gn/T/ipykernel_1479/2738837910.py:10: FutureWarning: The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect. sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_brain, scale='width', inner='box')
This creates a violin plot of the brain data that was extracted from the excel file.¶
In [9]:
plt.figure(figsize = (16,6))
sns.violinplot(data=brain_data)
plt.title("Integrin Genes of the Brain")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expression Levels")
plt.show()
In [11]:
lung_data = data[data['primary_site']=='Lung']
This displays the data for the lung found in the excel file.¶
In [25]:
lung_data
Out[25]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | GTEX-1399S-1726-SM-5L3DI | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
3 | GTEX-QXCU-0626-SM-2TC69 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
5 | GTEX-11EI6-0826-SM-5985V | Lung | 6.0732 | -2.4659 | 3.9901 | 7.3945 | 4.7688 | 5.1157 | 4.3356 | 2.3366 | ... | 3.7378 | 4.7247 | 7.5016 | 5.1396 | 2.5036 | 6.5443 | 4.6531 | 3.8136 | 5.8679 | 0.7407 |
6 | GTEX-S341-0326-SM-2XCAU | Lung | 4.2510 | -5.0116 | 3.3076 | 6.1715 | 3.1129 | 5.2954 | 2.2960 | 1.1184 | ... | 4.7104 | 2.7530 | 7.5022 | 4.0730 | 2.6325 | 6.0483 | 5.0562 | 2.6962 | 5.1611 | 0.9343 |
7 | GTEX-WY7C-0426-SM-3NB3C | Lung | 3.3633 | -2.5479 | 4.8340 | 6.6864 | 3.0585 | 4.8294 | 2.6464 | 0.7999 | ... | 5.1190 | 1.5013 | 8.0260 | 3.6635 | 3.2435 | 5.8503 | 5.2991 | 2.8076 | 4.7571 | -0.1345 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1962 | GTEX-Q2AH-0426-SM-2I3EP | Lung | 5.9644 | -1.3921 | 5.1061 | 6.9470 | 3.8973 | 4.8630 | 3.6089 | 3.9765 | ... | 5.1115 | 4.9041 | 7.9145 | 4.5559 | 3.7138 | 6.5782 | 4.7512 | 2.9710 | 5.0777 | 1.8444 |
1970 | GTEX-RWS6-0226-SM-2XCA9 | Lung | 6.0830 | -0.5756 | 4.3889 | 6.7302 | 4.6053 | 5.1065 | 2.8321 | 0.9716 | ... | 5.8176 | 2.5437 | 7.7929 | 4.9012 | 2.7993 | 6.7510 | 5.2204 | 2.8422 | 5.0951 | -0.3201 |
1975 | GTEX-131XE-0726-SM-5HL9K | Lung | 3.7971 | -1.9379 | 4.8555 | 6.4052 | 3.9561 | 5.4263 | 3.2959 | 4.5199 | ... | 4.6697 | 6.5777 | 7.5114 | 5.2130 | 2.3816 | 6.6225 | 3.7389 | 3.7248 | 5.6809 | 0.8488 |
1982 | GTEX-QMRM-0826-SM-3NB33 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1986 | GTEX-XV7Q-0426-SM-4BRVN | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
288 rows × 29 columns
In [30]:
itgs = [col for col in lung_data.columns if col.startswith("ITG")]
melted_lung = lung_data.melt(id_vars=["Unnamed: 0", "primary_site"],
value_vars=itgs,
var_name='Gene', value_name='Expression')
gene_order_lung = melted_lung.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted_lung, x='Gene', y='Expression', order=gene_order_lung, scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Lung Samples')
plt.tight_layout()
plt.show()
/var/folders/9g/x0zd2t4d7wdcyyfwxxbnmqbr0000gn/T/ipykernel_1479/2889910766.py:10: FutureWarning: The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect. sns.violinplot(data=melted_lung, x='Gene', y='Expression', order=gene_order_lung, scale='width', inner='box')
This shows the violin plot of the lung data that was found in the excel file.¶
In [12]:
plt.figure(figsize = (16,6))
sns.violinplot(data=lung_data)
plt.title("Integrin Genes of the Lung")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expression Levels")
plt.show()
In [34]:
data_brain_lung_expression_only = brain_lung_data.iloc[:,1:]
data_brain_lung_expression_only
Out[34]:
primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ITGA7 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | 4.9663 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
1 | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | 3.9270 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
3 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | 4.5355 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
5 | Lung | 6.0732 | -2.4659 | 3.9901 | 7.3945 | 4.7688 | 5.1157 | 4.3356 | 2.3366 | 5.0527 | ... | 3.7378 | 4.7247 | 7.5016 | 5.1396 | 2.5036 | 6.5443 | 4.6531 | 3.8136 | 5.8679 | 0.7407 |
6 | Lung | 4.2510 | -5.0116 | 3.3076 | 6.1715 | 3.1129 | 5.2954 | 2.2960 | 1.1184 | 5.2392 | ... | 4.7104 | 2.7530 | 7.5022 | 4.0730 | 2.6325 | 6.0483 | 5.0562 | 2.6962 | 5.1611 | 0.9343 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1980 | Brain | 0.6969 | -6.5064 | -0.9686 | 2.3760 | -2.2447 | 4.0739 | -0.6193 | -4.0350 | 4.8788 | ... | 2.7357 | 1.5806 | 4.6882 | -0.9971 | -0.5756 | 3.5136 | 0.9343 | -1.0862 | 0.4340 | -2.2447 |
1981 | Brain | 0.1124 | -5.0116 | 2.2482 | 2.8897 | -0.5125 | 4.6445 | 0.3115 | -3.6259 | 4.5110 | ... | 2.1147 | 0.9716 | 5.1202 | 0.6608 | 0.4761 | 3.2343 | 0.8408 | -0.0574 | -0.1828 | -2.5479 |
1982 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | 5.2032 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1985 | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | 5.3597 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1986 | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | 7.7121 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
1440 rows × 28 columns
In [35]:
brain_lung_data_vertical = data_brain_lung_expression_only.melt(id_vars = 'primary_site', var_name = 'integrin gene', value_name = 'expression_levels')
brain_lung_data_vertical
Out[35]:
primary_site | integrin gene | expression_levels | |
---|---|---|---|
0 | Brain | ITGA10 | 0.5763 |
1 | Lung | ITGA10 | 4.9137 |
2 | Lung | ITGA10 | 4.0541 |
3 | Lung | ITGA10 | 6.0732 |
4 | Lung | ITGA10 | 4.2510 |
... | ... | ... | ... |
38875 | Brain | ITGA11 | -2.2447 |
38876 | Brain | ITGA11 | -2.5479 |
38877 | Lung | ITGA11 | 1.6604 |
38878 | Brain | ITGA11 | -0.5125 |
38879 | Lung | ITGA11 | 1.0007 |
38880 rows × 3 columns
In [38]:
plt.figure(figsize=(16, 6))
sns.violinplot(x = 'integrin gene', y = 'expression_levels', hue = 'primary_site', data = brain_lung_data_vertical, split = True, inner = 'quartile')
plt.title("Integrin Genes of the Brain vs. the Lung")
plt.xlabel("Integrin Gene")
plt.ylabel("Gene Expression Levels")
plt.legend(title = 'primary_site')
plt.show()