Add analysis_drug_repurposing

11bdc035 · Laura Masa · 808abd87 · 11bdc035 · 11bdc035 · 11bdc035
Commit 11bdc035 authored Jul 10, 2024 by Laura Masa
10 changed files
--- a/analysis_drug_repurposing/data/data_00_raw_data_links_gen_pat.tsv
+++ b/analysis_drug_repurposing/data/data_00_raw_data_links_gen_pat.tsv
--- a/analysis_drug_repurposing/data/data_00_raw_data_nodes_pat.tsv
+++ b/analysis_drug_repurposing/data/data_00_raw_data_nodes_pat.tsv
--- a/analysis_drug_repurposing/data/genes_gds_alzheimer_filtered_dup.csv
+++ b/analysis_drug_repurposing/data/genes_gds_alzheimer_filtered_dup.csv
--- a/analysis_drug_repurposing/data/neuro_pnd.csv
+++ b/analysis_drug_repurposing/data/neuro_pnd.csv
--- a/analysis_drug_repurposing/results/dru_gene_pat_final.csv
+++ b/analysis_drug_repurposing/results/dru_gene_pat_final.csv
+Drugs,gsm_id,gene_symbol,gds_id,pathway_id,pathway_name,significant
+CHEMBL1336,GSM107523,ARRB1,GDS2795,WP382,MAPK signaling,o
+CHEMBL1336,GSM107523,AVPR2,GDS2795,WP24,Peptide GPCRs,o
+CHEMBL1336,GSM107523,BCL2L11,GDS2795,WP673,ErbB signaling,o
+CHEMBL1336,GSM107523,BMP1,GDS2795,WP236,Adipogenesis,o
+CHEMBL1336,GSM107523,CASP2,GDS2795,WP254,Apoptosis,o
+CHEMBL1336,GSM107523,CD36,GDS2795,WP4754,IL18 signaling,o
+CHEMBL1336,GSM107523,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
+CHEMBL1336,GSM107523,CLOCK,GDS2795,WP410,Exercise-induced circadian regulation,o
+CHEMBL1336,GSM107523,EGFR,GDS2795,WP306,Focal adhesion,o
+CHEMBL1336,GSM107523,EPHB2,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
+CHEMBL1336,GSM107523,FAF1,GDS2795,WP314,Fas ligand pathway and stress induction of heat shock proteins,o
+CHEMBL1336,GSM107523,GAD1,GDS2795,WP550,Biogenic amine synthesis,o
+CHEMBL1336,GSM107523,GH1,GDS2795,WP236,Adipogenesis,o
+CHEMBL1336,GSM107523,HSPA4,GDS2795,WP2359,Parkin-ubiquitin proteasomal system pathway,o
+CHEMBL1336,GSM107523,HTR2C,GDS2795,WP732,Serotonin receptor 2 and ELK-SRF/GATA4 signaling,o
+CHEMBL1336,GSM107523,ITGB3,GDS2795,WP306,Focal adhesion,o
+CHEMBL1336,GSM107523,MAP2K2,GDS2795,WP673,ErbB signaling,o
+CHEMBL1336,GSM107523,MBP,GDS2795,WP422,MAPK cascade,o
+CHEMBL1336,GSM107523,MMP14,GDS2795,WP4754,IL18 signaling,o
+CHEMBL1336,GSM107523,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
+CHEMBL1336,GSM107523,OPRM1,GDS2795,WP24,Peptide GPCRs,o
+CHEMBL1336,GSM107523,PDE4D,GDS2795,WP289,Myometrial relaxation and contraction pathways,o
+CHEMBL1336,GSM107523,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
+CHEMBL1336,GSM107523,PRDX2,GDS2795,WP15,Selenium micronutrient network,o
+CHEMBL1336,GSM107523,PTCH1,GDS2795,WP4808,Endochondral ossification with skeletal dysplasias,u
+CHEMBL1336,GSM107523,PTGER3,GDS2795,WP247,Small ligand GPCRs,o
+CHEMBL1336,GSM107523,PTPN1,GDS2795,WP2037,Prolactin signaling,o
+CHEMBL1336,GSM107523,WT1,GDS2795,WP4758,Nephrotic syndrome,o
+CHEMBL282575,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
+CHEMBL282575,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
+CHEMBL282575,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
+CHEMBL282575,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
+CHEMBL282575,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
+CHEMBL282575,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
+CHEMBL282575,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
+CHEMBL282575,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
+CHEMBL282575,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
+CHEMBL282575,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
+CHEMBL282575,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
+CHEMBL282575,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
+CHEMBL282575,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
+CHEMBL411,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
+CHEMBL411,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
+CHEMBL411,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
+CHEMBL411,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
+CHEMBL411,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
+CHEMBL411,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
+CHEMBL411,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
+CHEMBL411,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
+CHEMBL411,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
+CHEMBL411,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
+CHEMBL411,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
+CHEMBL411,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
+CHEMBL411,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
+CHEMBL941,GSM107525,BMP1,GDS2795,WP236,Adipogenesis,o
+CHEMBL941,GSM107525,CD40,GDS2795,WP288,Nod-like receptor (NLR) signaling,o
+CHEMBL941,GSM107525,EGFR,GDS2795,WP306,Focal adhesion,o
+CHEMBL941,GSM107525,GLP1R,GDS2795,WP334,"GPCRs, class B secretin-like",o
+CHEMBL941,GSM107525,IGF1R,GDS2795,WP306,Focal adhesion,o
+CHEMBL941,GSM107525,IL4,GDS2795,WP5422,IL19 signaling,o
+CHEMBL941,GSM107525,MBP,GDS2795,WP422,MAPK cascade,o
+CHEMBL941,GSM107525,MMP14,GDS2795,WP4754,IL18 signaling,o
+CHEMBL941,GSM107525,NOS1,GDS2795,WP2447,Amyotrophic lateral sclerosis (ALS),o
+CHEMBL941,GSM107525,PPARA,GDS2795,WP299,Nuclear receptors in lipid metabolism and toxicity,o
+CHEMBL941,GSM107525,PTPN1,GDS2795,WP2037,Prolactin signaling,o
+CHEMBL941,GSM107525,SIRT3,GDS2795,WP1541,Energy metabolism,o
+CHEMBL941,GSM107525,TBX2,GDS2795,WP304,Kit receptor signaling,o
--- a/analysis_drug_repurposing/results/pnd_boxplot.png
+++ b/analysis_drug_repurposing/results/pnd_boxplot.png
--- a/analysis_drug_repurposing/results/pnd_closest_distance_boxplot.png
+++ b/analysis_drug_repurposing/results/pnd_closest_distance_boxplot.png
--- a/analysis_drug_repurposing/results/pnd_closest_distance_boxplot_filtered.png
+++ b/analysis_drug_repurposing/results/pnd_closest_distance_boxplot_filtered.png
--- a/analysis_drug_repurposing/scripts/analysis_visualization.py
+++ b/analysis_drug_repurposing/scripts/analysis_visualization.py
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+def filter_cd_zscore(pnd_df):
+    """
+    This function filters the rows of pnd_df based on the values of 'closest distance'
+    that fall below Q3 from the known treatments.
+    
+    Input Parameters:
+    - pnd_df: DataFrame containing the data with 'closest distance' column.
+    
+    Returns:
+    - filtered_df: DataFrame containing only the rows where 'closest distance' is between Q1 and Q3.
+    """
+    # Calculate Q3 of the Treatment
+    filtered_yes = pnd_df[pnd_df['Treatment'] == 'yes']
+    q3 = filtered_yes['Closest distance'].quantile(0.75)
+    filtered_df =pnd_df[(pnd_df['Dc_zscore'] < -0.15) & (pnd_df['Closest distance'] <= q3) & (pnd_df['Treatment'] == 'unknown')]
+    
+    return filtered_df
+
+
+
+def rep_pnd(pnd_df,filtered_pnd):
+    """
+    Generates side-by-side boxplots to compare 'Closest distance' and 'Personalized Network Distance (PND)' metrics
+    across three different treatment status groups.
+
+    Input Parameters:
+    - pnd_df (pd.DataFrame): DataFrame containing drug data with columns: 
+      - 'Treatment': The treatment status of the drugs ('yes' or 'unknown').
+      - 'Closest distance': A numeric measure of the closest distance metric for the drugs.
+      - 'PND': A numeric measure of Personalized Network Distance for the drugs.
+    - filtered_pnd (pd.DataFrame): A subset of pnd_df filtered for a specific condition, with the same columns as pnd_df.
+
+    Returns:
+    - None: The function saves a boxplot comparison as a PNG file and displays the plot.
+    """
+    drugs_with_disease = pnd_df[(pnd_df['Treatment'] == 'yes')]
+    drugs_without_disease = pnd_df[(pnd_df['Treatment'] == 'unknown')]
+    combined_data = pd.concat([drugs_with_disease.assign(Treatment='Treatment'), drugs_without_disease.assign(Treatment='All unknown'),filtered_pnd.assign(Treatment='Filtered unknown for DR')])
+ 
+    # Combine the two datasets into a single subplot
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))  # Create a subplot with 1 row and 2 columns
+ 
+    sns.boxplot(x='Treatment', y='Closest distance', data=combined_data, hue='Treatment', ax=axes[0], palette={'Treatment': '#FF7A7A', 'All unknown': '#79C4FF', 'Filtered unknown for DR': '#CAF0F8'}, dodge=False, medianprops=dict(linewidth=2))
+    axes[0].set_ylabel('Closest distance', fontsize=12)
+    axes[0].set_xlabel('')
+    for label in axes[0].get_xticklabels():
+        label.set_fontsize(12)
+    axes[0].legend([], [], frameon=False)
+    
+    # Plot the boxplot with 'PND' on the right
+    sns.boxplot(x='Treatment', y='PND', data=combined_data, hue='Treatment', ax=axes[1], palette={'Treatment': '#FF7A7A', 'All unknown': '#79C4FF', 'Filtered unknown for DR': '#CAF0F8'}, dodge=False, medianprops=dict(linewidth=2))
+    axes[1].set_ylabel('Personalized Network Distance ($\mathregular{PND}$)', fontsize=12)
+    axes[1].set_xlabel('')
+    for label in axes[1].get_xticklabels():
+        label.set_fontsize(12)
+    axes[1].legend([], [], frameon=False)
+    plt.tight_layout()  # Adjust the layout of the subplot to avoid overlap
+ 
+    plt.savefig('../results/pnd_closest_distance_boxplot_filtered.png', dpi=300)
+    plt.show()
+
+
+
+
+
+def get_gsm_ids_for_lowest_pnd_drugs(pnd_df, drugs_list, num_gsm=3):
+    """
+    This function extracts the gsm_id for the given list of drugs with the lowest PND values.
+    
+    Input Parameters:
+    - pnd_df (pd.DataFrame): DataFrame containing drug data with columns: 
+      - 'Treatment': The treatment status of the drugs ('yes' or 'unknown').
+      - 'Closest distance': A numeric measure of the closest distance metric for the drugs.
+      - 'PND': A numeric measure of Personalized Network Distance for the drugs.
+    - drugs_list: List of drugs with the lowest PND values.
+    - num_gsm: Number of gsm_id to extract for each drug.
+    
+    Returns:
+    - gsm_df: DataFrame containing rows with the specified gsm_id for the given drugs.
+    """
+    filtered_rows = []
+
+    for drug in drugs_list:
+        # Filter the DataFrame for the current drug
+        drug_df = pnd_df[pnd_df['Drugs'] == drug]
+        
+        # Sort the DataFrame by PND in ascending order and select the top num_gsm rows
+        top_gsm_df = drug_df.sort_values(by='PND').head(num_gsm)
+        
+        # Append the selected rows to the list
+        filtered_rows.append(top_gsm_df)
+
+    # Concatenate the selected rows into a single DataFrame
+    gsm_df = pd.concat(filtered_rows)
+    
+    return gsm_df
+
+
+
+
+
+
+
+
+
+
+
+
+            
+            
+            
+            
+            
+            
+            
+            
+            
\ No newline at end of file
--- a/analysis_drug_repurposing/scripts/drug_repurposing_alzheimer.ipynb
+++ b/analysis_drug_repurposing/scripts/drug_repurposing_alzheimer.ipynb