Commit 11bdc035 authored by Laura Masa's avatar Laura Masa

Add analysis_drug_repurposing

parent 808abd87
This diff is collapsed.
This diff is collapsed.
Drugs,gsm_id,gene_symbol,gds_id,pathway_id,pathway_name,significant
CHEMBL1336,GSM107523,ARRB1,GDS2795,WP382,MAPK signaling,o
CHEMBL1336,GSM107523,AVPR2,GDS2795,WP24,Peptide GPCRs,o
CHEMBL1336,GSM107523,BCL2L11,GDS2795,WP673,ErbB signaling,o
CHEMBL1336,GSM107523,BMP1,GDS2795,WP236,Adipogenesis,o
CHEMBL1336,GSM107523,CASP2,GDS2795,WP254,Apoptosis,o
CHEMBL1336,GSM107523,CD36,GDS2795,WP4754,IL18 signaling,o
CHEMBL1336,GSM107523,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
CHEMBL1336,GSM107523,CLOCK,GDS2795,WP410,Exercise-induced circadian regulation,o
CHEMBL1336,GSM107523,EGFR,GDS2795,WP306,Focal adhesion,o
CHEMBL1336,GSM107523,EPHB2,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
CHEMBL1336,GSM107523,FAF1,GDS2795,WP314,Fas ligand pathway and stress induction of heat shock proteins,o
CHEMBL1336,GSM107523,GAD1,GDS2795,WP550,Biogenic amine synthesis,o
CHEMBL1336,GSM107523,GH1,GDS2795,WP236,Adipogenesis,o
CHEMBL1336,GSM107523,HSPA4,GDS2795,WP2359,Parkin-ubiquitin proteasomal system pathway,o
CHEMBL1336,GSM107523,HTR2C,GDS2795,WP732,Serotonin receptor 2 and ELK-SRF/GATA4 signaling,o
CHEMBL1336,GSM107523,ITGB3,GDS2795,WP306,Focal adhesion,o
CHEMBL1336,GSM107523,MAP2K2,GDS2795,WP673,ErbB signaling,o
CHEMBL1336,GSM107523,MBP,GDS2795,WP422,MAPK cascade,o
CHEMBL1336,GSM107523,MMP14,GDS2795,WP4754,IL18 signaling,o
CHEMBL1336,GSM107523,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
CHEMBL1336,GSM107523,OPRM1,GDS2795,WP24,Peptide GPCRs,o
CHEMBL1336,GSM107523,PDE4D,GDS2795,WP289,Myometrial relaxation and contraction pathways,o
CHEMBL1336,GSM107523,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
CHEMBL1336,GSM107523,PRDX2,GDS2795,WP15,Selenium micronutrient network,o
CHEMBL1336,GSM107523,PTCH1,GDS2795,WP4808,Endochondral ossification with skeletal dysplasias,u
CHEMBL1336,GSM107523,PTGER3,GDS2795,WP247,Small ligand GPCRs,o
CHEMBL1336,GSM107523,PTPN1,GDS2795,WP2037,Prolactin signaling,o
CHEMBL1336,GSM107523,WT1,GDS2795,WP4758,Nephrotic syndrome,o
CHEMBL282575,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
CHEMBL282575,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
CHEMBL282575,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
CHEMBL282575,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
CHEMBL282575,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
CHEMBL282575,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
CHEMBL282575,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
CHEMBL282575,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
CHEMBL282575,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
CHEMBL282575,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
CHEMBL282575,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
CHEMBL282575,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
CHEMBL282575,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
CHEMBL411,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
CHEMBL411,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
CHEMBL411,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
CHEMBL411,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
CHEMBL411,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
CHEMBL411,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
CHEMBL411,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
CHEMBL411,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
CHEMBL411,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
CHEMBL411,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
CHEMBL411,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
CHEMBL411,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
CHEMBL411,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
CHEMBL941,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
CHEMBL941,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
CHEMBL941,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
CHEMBL941,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
CHEMBL941,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
CHEMBL941,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
CHEMBL941,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
CHEMBL941,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
CHEMBL941,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
CHEMBL941,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
CHEMBL941,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
CHEMBL941,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
CHEMBL941,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def filter_cd_zscore(pnd_df):
"""
This function filters the rows of pnd_df based on the values of 'closest distance'
that fall below Q3 from the known treatments.
Input Parameters:
- pnd_df: DataFrame containing the data with 'closest distance' column.
Returns:
- filtered_df: DataFrame containing only the rows where 'closest distance' is between Q1 and Q3.
"""
# Calculate Q3 of the Treatment
filtered_yes = pnd_df[pnd_df['Treatment'] == 'yes']
q3 = filtered_yes['Closest distance'].quantile(0.75)
filtered_df =pnd_df[(pnd_df['Dc_zscore'] < -0.15) & (pnd_df['Closest distance'] <= q3) & (pnd_df['Treatment'] == 'unknown')]
return filtered_df
def rep_pnd(pnd_df,filtered_pnd):
"""
Generates side-by-side boxplots to compare 'Closest distance' and 'Personalized Network Distance (PND)' metrics
across three different treatment status groups.
Input Parameters:
- pnd_df (pd.DataFrame): DataFrame containing drug data with columns:
- 'Treatment': The treatment status of the drugs ('yes' or 'unknown').
- 'Closest distance': A numeric measure of the closest distance metric for the drugs.
- 'PND': A numeric measure of Personalized Network Distance for the drugs.
- filtered_pnd (pd.DataFrame): A subset of pnd_df filtered for a specific condition, with the same columns as pnd_df.
Returns:
- None: The function saves a boxplot comparison as a PNG file and displays the plot.
"""
drugs_with_disease = pnd_df[(pnd_df['Treatment'] == 'yes')]
drugs_without_disease = pnd_df[(pnd_df['Treatment'] == 'unknown')]
combined_data = pd.concat([drugs_with_disease.assign(Treatment='Treatment'), drugs_without_disease.assign(Treatment='All unknown'),filtered_pnd.assign(Treatment='Filtered unknown for DR')])
# Combine the two datasets into a single subplot
fig, axes = plt.subplots(1, 2, figsize=(14, 6)) # Create a subplot with 1 row and 2 columns
sns.boxplot(x='Treatment', y='Closest distance', data=combined_data, hue='Treatment', ax=axes[0], palette={'Treatment': '#FF7A7A', 'All unknown': '#79C4FF', 'Filtered unknown for DR': '#CAF0F8'}, dodge=False, medianprops=dict(linewidth=2))
axes[0].set_ylabel('Closest distance', fontsize=12)
axes[0].set_xlabel('')
for label in axes[0].get_xticklabels():
label.set_fontsize(12)
axes[0].legend([], [], frameon=False)
# Plot the boxplot with 'PND' on the right
sns.boxplot(x='Treatment', y='PND', data=combined_data, hue='Treatment', ax=axes[1], palette={'Treatment': '#FF7A7A', 'All unknown': '#79C4FF', 'Filtered unknown for DR': '#CAF0F8'}, dodge=False, medianprops=dict(linewidth=2))
axes[1].set_ylabel('Personalized Network Distance ($\mathregular{PND}$)', fontsize=12)
axes[1].set_xlabel('')
for label in axes[1].get_xticklabels():
label.set_fontsize(12)
axes[1].legend([], [], frameon=False)
plt.tight_layout() # Adjust the layout of the subplot to avoid overlap
plt.savefig('../results/pnd_closest_distance_boxplot_filtered.png', dpi=300)
plt.show()
def get_gsm_ids_for_lowest_pnd_drugs(pnd_df, drugs_list, num_gsm=3):
"""
This function extracts the gsm_id for the given list of drugs with the lowest PND values.
Input Parameters:
- pnd_df (pd.DataFrame): DataFrame containing drug data with columns:
- 'Treatment': The treatment status of the drugs ('yes' or 'unknown').
- 'Closest distance': A numeric measure of the closest distance metric for the drugs.
- 'PND': A numeric measure of Personalized Network Distance for the drugs.
- drugs_list: List of drugs with the lowest PND values.
- num_gsm: Number of gsm_id to extract for each drug.
Returns:
- gsm_df: DataFrame containing rows with the specified gsm_id for the given drugs.
"""
filtered_rows = []
for drug in drugs_list:
# Filter the DataFrame for the current drug
drug_df = pnd_df[pnd_df['Drugs'] == drug]
# Sort the DataFrame by PND in ascending order and select the top num_gsm rows
top_gsm_df = drug_df.sort_values(by='PND').head(num_gsm)
# Append the selected rows to the list
filtered_rows.append(top_gsm_df)
# Concatenate the selected rows into a single DataFrame
gsm_df = pd.concat(filtered_rows)
return gsm_df
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment