Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
N
Network medicine and single cell for Alzheimer
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Disnet
Network medicine
Network medicine and single cell for Alzheimer
Commits
84259f21
Commit
84259f21
authored
Mar 28, 2025
by
Andrea
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Code update
parent
cd9227e2
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
477 additions
and
2472 deletions
+477
-2472
code/functions/functions.py
code/functions/functions.py
+228
-0
code/functions/functions_proximity.py
code/functions/functions_proximity.py
+0
-713
code/scrna_ppi_analysis.ipynb
code/scrna_ppi_analysis.ipynb
+249
-1759
No files found.
code/functions/functions.py
0 → 100644
View file @
84259f21
#! /usr/bin/env python
"""
# ---------------------------------------------------------------------------
# functions.py
# ----------------------------------------------------------------------------
"""
import
pandas
as
pd
import
numpy
as
np
import
seaborn
as
sns
import
matplotlib.pyplot
as
plt
import
networkx
as
nx
from
scipy.stats
import
norm
from
itertools
import
combinations
from
tqdm
import
tqdm
import
re
from
itertools
import
product
from
statannotations.Annotator
import
Annotator
# =================================================================================
def
genes_dis
(
enf
,
file
):
"""
This function creates a list with the genes associated with the disease "enf" in the dis_gen file
"""
genes
=
[]
for
i
,
dis
in
enumerate
(
file
[
"cui"
]):
if
dis
==
enf
:
gen
=
file
[
"gene_id"
][
i
]
genes
.
append
(
gen
)
return
genes
# =================================================================================
def
pro_gen_dict
(
gene_list
,
file
):
"""
This function creates a dictionary from the list of genes associated with the disease with:
key: protein associated with each gene in the gen_pro file
value: gene related to the key protein in the gen_pro file
"""
result_dict
=
{}
for
i
,
gen
in
enumerate
(
file
[
"gene_id"
]):
# Looping through gen_pro, which relates genes and proteins.
# I'm storing the position of the gene (i) and the gene id (gen).
if
gen
in
gene_list
:
# Searching each gene in gen_pro within the corresponding gene list of each disease.
prot
=
file
[
"protein_id"
][
i
]
# If that gene is in the gene list of each disease, I find the associated protein at the same position.
result_dict
[
prot
]
=
gen
# Adding to each disease's dictionary the protein as key and the related gene as value.
return
result_dict
# =================================================================================
def
gen_pro_PPI
(
dict1
,
file
):
"""
From a dictionary with the relationships between proteins and genes associated with each of our diseases,
this function retains the prot:gen relationship from the dictionary only if such prot appears in the PPI network of the pro_pro file.
key: proteins appearing in the PPI network
value: genes related to the key protein
"""
result_dict
=
{}
for
prot
in
dict1
.
keys
():
# Iterating over all proteins in the general prot:gen dictionary.
if
prot
in
file
[
"prA"
]
.
tolist
()
or
prot
in
file
[
"prB"
]
.
tolist
():
# Selecting proteins that appear in the PPI network.
result_dict
[
prot
]
=
dict1
[
prot
]
# Adding to the PPI prot:gen dictionary only the prot:gen relationships for proteins that are in the PPI.
return
result_dict
# =================================================================================
def
lcc
(
SG
):
"""
This function gives us the LCC of the proteins from the PPI network associated with a disease from a subgraph
formed only with the proteins associated with the disease.
"""
lcc
=
max
(
nx
.
connected_components
(
SG
),
key
=
len
)
# Calculating the LCC (module comprising the largest number of proteins associated with a disease).
# Our goal is to obtain the number of genes that are part of the LCC of the disease:
# The number of proteins from the disease in the LCC is the same number as the genes in the LCC
# (because we have extracted the list of proteins from the dictionary where they form a tuple with their associated genes).
return
lcc
# =================================================================================
def
nodes_by_degree
(
G
):
"""
This function returns a dictionary where we will obtain the degrees as keys and, in the values, all the nodes of the network that contain that degree.
"""
degree_dict
=
{}
for
node
in
G
.
nodes
():
degree
=
G
.
degree
(
node
)
if
degree
not
in
degree_dict
:
degree_dict
[
degree
]
=
[]
degree_dict
[
degree
]
.
append
(
node
)
return
degree_dict
# =================================================================================
def
degrees_list
(
G
):
"""
This function returns a list with the nodes and another list with their degrees.
"""
nodes
=
list
(
G
.
nodes
())
degrees
=
list
(
dict
(
G
.
degree
())
.
values
())
return
nodes
,
degrees
# =================================================================================
def
calculate_lcc_for_cell_type
(
degs_cell_type
,
gen_pro
,
pro_pro
,
PPI
):
# Generar la subred para el tipo celular con sus DEGs
prots
=
pro_gen_dict
(
degs_cell_type
,
gen_pro
)
# Proteínas correspondientes a los DEGs
prots_interactome
=
gen_pro_PPI
(
prots
,
pro_pro
)
SG
=
PPI
.
subgraph
(
prots_interactome
)
lcc_cell_type
=
lcc
(
SG
)
return
lcc_cell_type
# =================================================================================
def
calculate_lcc_from_prots
(
prots
,
pro_pro
,
PPI
):
prots_interactome
=
[]
for
prot
in
prots
:
# Iterating over all proteins in the general prot:gen dictionary.
if
prot
in
pro_pro
[
"prA"
]
.
tolist
()
or
prot
in
pro_pro
[
"prB"
]
.
tolist
():
# Selecting proteins that appear in the PPI network.
prots_interactome
.
append
(
prot
)
SG
=
PPI
.
subgraph
(
prots_interactome
)
lcc_cell_type
=
lcc
(
SG
)
return
lcc_cell_type
# =================================================================================
def
generate_log_bins
(
graph
,
num_bins
):
"""
This function generates logarithmic bins to group nodes of a graph based on
the degree distribution of the nodes.
"""
degrees
=
[
degree
for
_
,
degree
in
graph
.
degree
()]
min_degree
=
max
(
min
(
degrees
),
1
)
# Para evitar log(0)
max_degree
=
max
(
degrees
)
return
np
.
logspace
(
np
.
log10
(
min_degree
),
np
.
log10
(
max_degree
),
num_bins
)
# =================================================================================
def
group_nodes_by_bins
(
graph
,
log_bins
):
"""
This function groups nodes of a graph in logarithmic bins based on its degree.
"""
nodes_bins
=
{}
for
node
,
degree
in
graph
.
degree
():
bin_index
=
np
.
digitize
(
degree
,
log_bins
)
-
1
# Ajustar índice para Python (basado en 0)
nodes_bins
.
setdefault
(
bin_index
,
[])
.
append
(
node
)
return
nodes_bins
# =================================================================================
def
random_subset_generator
(
proteins
,
graph_ppi
,
num_iterations
):
# Generation of logarithmic bins
num_bins
=
10
bin_edges
=
generate_log_bins
(
graph_ppi
,
num_bins
)
# Group nodes in logarithmic bins
group_nodes_bins
=
group_nodes_by_bins
(
graph_ppi
,
bin_edges
)
results
=
[]
for
_
in
range
(
num_iterations
):
# For each iteration
iteration_results
=
[]
# list to append proteins for each disease
for
prot
in
proteins
:
# degree of the node
degree_node
=
graph_ppi
.
degree
(
prot
)
# bin of the node based on its degree
bin_index
=
np
.
digitize
(
degree_node
,
bin_edges
)
-
1
# nodes of the same bin
available_nodes
=
group_nodes_bins
.
get
(
bin_index
,
[])
if
available_nodes
:
random_node
=
np
.
random
.
choice
(
available_nodes
)
#choose randomly a node from available nodes
while
random_node
==
prot
:
random_node
=
np
.
random
.
choice
(
available_nodes
)
iteration_results
.
append
(
str
(
random_node
))
else
:
iteration_results
.
append
(
None
)
if
any
(
iteration_results
):
results
.
append
(
iteration_results
)
return
results
# =================================================================================
def
load_and_process
(
files
):
df_list
=
[]
for
file
in
files
:
df
=
pd
.
read_csv
(
file
)
# Extraer el tipo celular del nombre del archivo (por ejemplo, "degs_celltype_mapped.csv")
cell_type
=
file
.
split
(
'_'
)[
1
]
# Asumiendo que el tipo celular está en la segunda posición del nombre del archivo
df
[
'cell_type'
]
=
cell_type
# Agregar la columna 'cell_type' al DataFrame
df_list
.
append
(
df
)
merged_df
=
pd
.
concat
(
df_list
)
return
merged_df
# =================================================================================
\ No newline at end of file
code/functions/functions_proximity.py
deleted
100644 → 0
View file @
cd9227e2
This diff is collapsed.
Click to expand it.
code/scrna_ppi_analysis.ipynb
View file @
84259f21
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment