Commit dba46f0a authored by Antonio Gil's avatar Antonio Gil

Addition of the revised files

parent b2f66963
{
"cells": [],
"metadata": {},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f62b61bc",
"metadata": {},
"outputs": [],
"source": [
"!python src/main.py --thresholds"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 31,
"id": "233d5348",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PPI Network: 16918 nodes, 238361 edges\n",
" 0%| | 0/1 [00:00<?, ?it/s]DIAMOnD(): ignoring 1 of 14 seed genes that are not in the network\n",
"\n",
" results have been saved to 'first_1000_added_nodes_weight_1.txt' \n",
"\n",
"start running DOMINO...\n",
"generating graph from /tmp/tmpctgip1np\n",
"network' pkl is saved: /tmp/tmpctgip1np.pkl\n",
"done building network\n",
"generating subgraphs...\n",
"Before slicing: n of cc:7, n of nodes: 16920, n of edges, 238362\n",
"# of modules after extraction: 69\n",
"After slicing: n of cc:107, n of nodes: 7055, n of edges, 56982\n",
"subgraphs' pkl is saved\n",
"number of slices: 107\n",
"# of slices after perturbation TH: 4/107\n",
"4 relevant slices were retained with threshold 0.3\n",
"n of putative modules: 1\n",
"n of final modules: 1 (n=[13])\n",
"1 final modules are reported at /tmp/tmpqptdxutp/tmpq4jr44k8/modules.out\n",
"False\n",
"Setting the graph_diameter to the precomputed value of 8. Directly specify meta to overwrite this.\n",
"100%|█████████████████████████████████████████████| 1/1 [00:59<00:00, 59.06s/it]\n",
"100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 1175.53it/s]\n",
"Saving weighted DIAMOnD: 0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 15.60it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 8405.42it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n",
"PPI Network: 16918 nodes, 238361 edges\n",
"0it [00:00, ?it/s]\n"
]
}
],
"source": [
"!python src/main.py --thresholds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01e63699",
"id": "f62b61bc",
"metadata": {},
"outputs": [],
"source": []
"source": [
"!python src/main.py --thresholds"
]
}
],
"metadata": {
......
......@@ -3,11 +3,10 @@ import sys
import tempfile
import shutil
# Add the folder containing DIAMOnD and DIAMOnD_mod to the path
sys.path.append(os.path.abspath('./state_of_art_repos/DIAMOnD'))
from DIAMOnD import run_diamond_from_args
from DIAMOnD_mod import run_diamond_mod_from_args
from DIAMOnD import run_diamond_from_args as run_diamond_from_args
from DIAMOnD_weighted import run_diamond_weighted_from_args
class DIAMOND:
"""
......@@ -17,59 +16,22 @@ class DIAMOND:
self.alpha = alpha
def run_diamond(self, ppi_path: str, seed_file: str, n: int):
# Execute the standard DIAMOnD with network file, seed file, and number of additions
# standard DIAMOnD (all seeds weight = self.alpha)
seed_genes, added_nodes = run_diamond_from_args([
ppi_path,
seed_file,
str(n)
str(n),
str(self.alpha)
])
# Collect unique added genes
added_genes = set(gene[0] for gene in added_nodes)
return {
'seed_nodes': list(seed_genes),
'module_nodes': list(added_genes)
}
class DIAMOND_MOD:
"""
Wrapper for the modified DIAMOnD algorithm used in thresholded runs.
This wrapper creates a temporary directory structure that DIAMOnD_mod expects,
invokes the algorithm, then cleans up.
"""
def __init__(self, alpha=1):
self.alpha = alpha
def run_diamond(self, ppi_path: str, seed_file: str, n: int):
# 1) Create a temporary directory for seeds, with a subfolder 'score_default'
tmpdir = tempfile.mkdtemp(prefix="diamondmod_")
score_dir = os.path.join(tmpdir, "score_default")
os.makedirs(score_dir, exist_ok=True)
# 2) Extract the disease name from the seed_file basename
basename = os.path.basename(seed_file) # e.g. 'seed_nodes_Disease.txt'
# Remove prefix 'seed_nodes_' and suffix '.txt'
disease = basename.replace("seed_nodes_", "").rsplit('.txt', 1)[0]
added = set(g[0] for g in added_nodes)
return {'seed_nodes': list(seed_genes), 'module_nodes': list(added)}
# 3) Copy and rename the seed file into the expected location
# DIAMOnD_mod looks for 'seed_genes_<disease>.txt' in subfolders
dest_file = os.path.join(score_dir, f"seed_genes_{disease}.txt")
shutil.copy(seed_file, dest_file)
# 4) Call the modified DIAMOnD with the directory containing score_* subfolders
seed_genes, added_nodes = run_diamond_mod_from_args([
def run_diamond_weighted(self, ppi_path: str, seed_file: str, n: int):
# weighted DIAMOnD: seed_file has "gene<TAB>weight"
seed_genes, added_nodes = run_diamond_weighted_from_args([
ppi_path,
tmpdir,
disease,
seed_file,
str(n)
])
# 5) Clean up the temporary directory
shutil.rmtree(tmpdir)
# 6) Format and return results like the original DIAMOND wrapper
added_genes = set(gene[0] for gene in added_nodes)
return {
'seed_nodes': list(seed_genes),
'module_nodes': list(added_genes)
}
added = set(g[0] for g in added_nodes)
return {'seed_nodes': list(seed_genes), 'module_nodes': list(added)}
Q9NQW8
O75051
Q9GZU5
O60840
P19087
P26441
Q6ZUT3
Q6P4Q7
Q9Y3T9
Q16281
P26367
Q13956
Q13057
P08100
Q9P2K1
P51160
Q8TC44
Q9Y5X4
P08842
P18850
Q04609
P80723
Q14524
Q15046
P49815
Q9NS23
P01112
P16070
Q92508
P14210
P13473
P12004
P42345
P35240
Q13520
Q14240
P15056
P61073
Q6ZMJ4
P01116
Q96P20
P01375
Q9UNQ0
P00403
Q9NZQ7
P10451
Q99748
Q15303
P04626
P04637
P10644
P42336
Q9HBA0
P30281
Q8N653
P10176
P23560
Q9BXM9
P29323
Q04637
P05181
P09211
P17813
Q9Y3B3
P04040
P15692
P29474
Q03135
P00533
P21860
Q14790
Q9BZC7
P06400
P39905
O14786
Q86XR7
P55011
P08253
Q02297
P10415
Q8ND04
Q92574
P38936
P32119
Q86VB7
Q99758
P60842
Q14533
P01133
P29972
P08581
P24385
P07996
P06730
P19235
P09603
P04424
P01588
Q5T4W7
P40305
O97980
P53667
P41181
O60232
P55087
P13612
P35354
P03372
O43511
Q86YT6
Q96A00
P52788
P01135
P46527
P05231
Q12824
O14901
Q13214
Q9HC07
P31749
P10275
P46013
P04179
P42771
O00399
Q12979
P19544
P09038
P21359
P51946
Q86TG7
P05230
P01137
Q9UBP5
Q9BTV5
Q86SH2
O75462
Q00987
P42224
P28482
O43790
P14780
Q8N726
P60484
O00233
O14733
Q9P0V3
P20930
P20702
P08700
Q15464
P06702
P17676
O95983
Q6GMV2
P11308
Q96GD4
Q9Y5W5
P14210
P40337
Q9Y5K2
O60260
P13726
P49841
Q9H4L4
P21980
O76064
O95059
Q9UQ74
P05161
Q9UKV0
P35573
P41226
P16442
P42338
P15391
O94907
Q9Y618
P00395
P32780
P14635
P04637
Q96AH8
Q9HBH0
P17252
P11362
P28698
P41134
P50502
Q9H3M7
P29350
P59666
Q9NRI5
Q5MNZ9
Q9Y6K9
Q9Y6Q9
Q9H165
Q9HC35
P01215
P11473
Q969V5
P05813
Q92830
P00740
P13631
Q9HBK9
O60885
P22004
Q96J66
P49736
P10827
P20585
P07900
O95352
Q96QH2
P10070
O00220
P01583
P29590
Q96BJ3
O00470
P31269
P23219
Q04206
P33527
P48023
P01111
O15067
Q15910
P40238
Q96AD5
P16220
Q03112
O14933
P19526
P05164
P56159
P23769
Q12948
Q14289
Q99856
Q13547
Q9Y4H4
Q9Y275
P08138
Q86UG4
Q06124
Q6ISU1
Q13541
P28340
P37231
Q9Y2Y4
O75874
P35354
P16278
P40933
Q9UIK4
P45983
O43175
Q7LBC6
Q13948
Q30201
Q9H4A4
Q07065
Q86T82
Q7LGC8
P01584
Q13427
P20813
P17861
P14222
P11465
P31749
O75956
Q13162
Q99835
Q86YL7
O60760
P32248
O14521
Q9UJW0
Q5T9L3
P20248
P04818
P52630
Q15004
P07355
O00144
P10276
Q02880
P17948
P11926
P0DN86
Q07812
Q13794
Q00532
P28482
Q9UIH9
P17405
P17931
Q12778
Q7RTS3
P78396
O43174
P60484
P46098
P41222
Q9Y4P8
Q15125
O14520
O95936
O15151
P30044
Q9H4X1
Q14526
O14727
P35232
Q96DB5
Q9Y3B4
P35659
Q96AH0
P01100
O96019
P52948
O43663
Q5JUW0
Q12918
P01375
O96017
Q16552
P04626
O15162
Q8NFH3
Q96HU1
Q06546
P27469
P29323
P05160
P26358
Q14469
P47736
Q9H2X6
P02545
P27361
Q14790
P06493
Q15238
P20393
Q14764
P08311
P51397
P55089
P00742
Q00978
P21941
P49639
P63165
P55073
P10415
P25445
Q9Y222
Q9P2Y5
Q9Y2Q3
Q9H9Z2
Q8IZL9
P40424
Q9Y4J8
Q13253
Q15717
P08238
Q16649
P49715
P09917
P20701
P56945
Q9H1Y0
P13501
P09683
P13591
P55273
Q14683
P48736
Q96LZ7
Q9P1W9
Q09428
P08670
P00734
Q96KQ7
O00625
O43524
Q09472
Q53EL6
Q6UX65
P16455
Q15843
Q92851
P12724
Q9Y6K1
P46100
P20138
O60341
P11215
P39748
Q8TC59
P40763
P49959
Q8NB91
P10912
P50591
Q6N021
Q9UM07
Q13478
Q6W2J9
P05231
Q01196
P50052
P07204
P10914
P31358
P54259
P26651
Q7Z5L9
P00750
P53999
P10275
P42771
O14980
P39880
P78344
Q7Z6L0
Q8N264
P55085
P48735
Q9Y4E5
Q86TG7
Q9BUK6
P49711
P05107
P46531
P19793
P54253
Q03111
Q6UN15
P25103
P42224
P15144
Q9P2R6
Q13501
Q9UBB5
P55854
P25325
Q9UL19
Q8N726
O14745
Q14739
P02786
P11274
P16871
Q9H0R6
Q9Y248
Q13084
Q9NS23
P31249
Q9BT92
Q03252
P30307
P07333
Q13564
P06731
Q5H9F3
O60682
P15056
O75376
Q02750
P22083
Q13323
P54132
P51828
Q15303
P22735
Q86SQ9
P42336
O60503
P37198
P61956
Q8N7E2
P05362
P46695
Q9H2G9
P24864
Q16665
P25791
Q9UM73
P09211
P42773
P10721
P01911
P08246
Q9ULX5
Q13526
P27708
Q02779
Q8TDQ0
Q16518
P00533
P63244
P00441
P10145
Q92913
P80370
P18887
P05204
P04141
Q16881
P26718
Q9HCS7
Q9UBN6
Q9GZP9
P78395
Q9BQQ3
P14598
P08151
P78504
O15350
P30048
Q14002
Q16539
P53355
Q9H4A3
P08571
P20700
P16473
P19484
Q8WXI8
Q9H3D4
O14497
P98160
P01133
Q03164
Q8IZQ1
Q01094
O15120
O15205
Q8WZ82
P05109
P58340
O43918
P16284
Q9BVP2
O00167
O95361
P41221
P34810
Q02556
P41182
P09382
P10826
Q9NX95
Q9UBH6
Q15672
O14798
P18440
P12314
P06748
P15531
Q9UKK6
O15297
Q03014
P46527
Q86U10
P78317
Q75NE6
P22301
P30203
Q99612
Q86V85
P05112
O60216
Q8IXJ9
P01137
P00747
P35227
Q14116
O75925
Q08722
Q9HD15
P23458
P36888
P00492
P63279
P05114
Q8WXS3
Q96TC7
Q13241
P62333
P78347
Q8WUD6
P10124
P42684
P60520
P08631
Q8N474
Q06250
P01106
Q13342
P23497
P16070
P07949
P18146
P09488
Q969K3
P28906
Q9Y2Q5
P60903
Q53EP0
Q9UMW8
P61073
P78357
P01116
P08758
P08519
Q96LR5
Q16236
Q9UIB8
Q8IUI8
Q8NFJ5
P30711
O00329
P07741
Q86UV5
P10242
P10644
Q9NPF7
P55081
Q05516
P51532
P06401
O75787
Q5T5J6
P42574
P80098
P15498
P15692
Q15744
P16949
P40198
Q9Y5Y4
Q76LX8
P42229
Q9UJU2
P51580
P05771
P52790
P39905
P42772
P02768
Q9UER7
P12643
Q13671
P04083
P06729
P50135
P00488
Q96DT0
P17947
P13928
P00519
P43004
O15550
O43315
O75844
P38936
Q14980
Q9NR12
Q9UPP1
P14652
Q96Q27
P61006
Q9BZK7
P01033
O75592
Q8NFD5
O14746
P51825
P04049
P07996
Q96RU7
Q07820
P42566
Q02363
Q13950
O15392
Q7Z7K6
P51692
Q9UBX3
Q15257
P52798
P11279
P29762
P49006
P01579
Q03169
Q07666
Q9UBW7
P28907
P08183
Q01105
P49913
P51689
P01135
Q6ZT07
Q15022
P17936
O14931
Q5VT79
P09619
P12644
P09919
P38398
P35610
Q02548
Q12829
P21579
P19544
Q15466
P08575
Q68D85
P16152
Q06455
P41212
P07954
P05451
Q8N8S7
Q92597
P14780
P12429
P60709
Q13485
P01584
P12821
Q9NP81
Q2M385
Q96JG9
O15055
P35555
Q8N157
P27352
Q6UWY2
Q9Y276
Q12948
Q15493
O95832
P12755
P49591
Q9BRG2
O75298
Q07654
O75914
P51606
O95471
Q92570
Q9NZQ3
Q5T5P2
Q5M775
P16442
Q3V6T2
O95255
Q10981
P22897
O75815
P15882
Q9UPI3
O14733
P17181
P08700
P39019
Q15596
Q13761
Q86VZ1
Q9Y6D9
Q9BXL5
P0DJD1
Q12866
P11308
Q92954
O43324
Q96GD4
Q9NQS1
Q14563
Q15468
Q16829
P29375
P25063
O60675
P35240
Q9GZY0
O75365
P33151
P14921
Q9NX94
Q13795
Q9UKV0
P18433
P11226
Q14C87
P55075
Q6P1Q0
Q8TD46
P16442
Q9Y3D8
P42338
P15391
O60353
P04637
P08842
P11362
P35237
P62324
Q9UPS8
Q9H1B5
P29350
Q9UBP4
Q9NR96
P20815
A8K2U0
Q86X10
P06881
P51826
P10599
P62244
Q99697
P08865
O95081
Q8IUH3
O60674
Q6UW56
P61254
P00740
Q14005
O60885
P13631
O00548
P42677
P20585
Q96QH2
P50225
P01583
P35475
P29590
P55199
P15976
Q7L1I2
Q86U70
P31269
O00470
Q92574
Q12884
Q15154
P33527
P61964
P01111
Q15910
P40238
P47928
Q15438
Q96EB6
P08473
Q03112
O14709
P30679
P05164
P23769
P06730
Q9NZK5
P31314
Q14289
P13051
Q53HV7
O43914
P35251
P35637
O14627
P56645
P61313
Q14994
P08138
Q86UG4
Q06124
P00540
P62854
Q8NBU5
O00255
P25800
O75874
Q13951
Q13948
Q2M3G0
Q30201
Q15306
Q9UBK8
P28908
P49789
P01584
Q5XKL5
Q9NZZ3
P19525
P20813
P41586
Q99873
Q16667
P19113
Q14865
Q8IWP9
P31749
O60760
O60641
P32248
Q5T9L3
P20248
P04818
P22681
Q9UM11
P55198
P10276
Q06945
P17948
Q07812
P09601
P40225
Q00987
Q9Y286
P28482
Q07020
P20336
P35453
P12821
P16104
O15151
Q9UP52
Q9NWQ8
Q9BR01
O14727
P54274
P55017
Q92835
P52948
P01375
Q9UNQ0
Q6J9G0
Q92820
P57679
P02787
P15516
P59282
O60936
Q06609
B0YJ81
P08174
P10398
P03999
P29323
P30291
P52823
P98177
P17813
Q8N7R0
Q92673
Q92804
Q9HD36
Q06520
Q92817
P06400
Q14764
P08311
Q9BSI4
Q9NW38
Q13625
P51397
P51878
P00742
Q01844
O14786
P01282
P42568
P08253
P10415
P25445
P04629
Q9Y222
P17693
P41217
Q9Y2Q3
Q9H9Z2
P04798
P40424
P04440
P50539
Q6RUI8
Q02083
P18031
P09683
P46777
P08620
P13591
Q8WUF5
Q15554
Q6IAA8
Q9BXW9
P55273
O95456
P48736
Q9P1W9
Q93052
P27797
Q9Y2B2
P01258
P00734
P11586
Q96KQ7
P27487
Q09472
P16455
Q9Y6K1
Q8WWQ2
Q96T37
P46100
P20138
O60341
P34925
Q15773
P47974
P35318
P40763
Q9UIQ6
P78415
Q8NB91
P50591
Q6N021
P0DP23
P05231
Q9UQ26
Q01196
Q99626
P10914
P54259
O00501
P53999
P42771
P39880
P17542
Q8N264
O15438
P78310
P18065
Q16270
P48735
Q9UGL1
Q96BS2
O00206
O95453
P46531
P09429
Q92794
P04003
Q9Y6Q2
Q92945
P15036
Q9Y2S2
Q03111
Q7L266
Q6UN15
P25103
P42224
Q9UHD8
Q9HD64
Q14774
Q13018
Q14814
Q8N726
P57723
P55157
Q14739
P02786
Q5DJT8
P11274
P16871
P14784
Q92832
Q13084
Q9Y235
P49815
P17483
Q9BT92
P46783
P62273
P07333
P23771
P12004
P15923
P40189
P04280
P15056
O75376
P22083
P13569
P20809
P54132
P20366
P51681
Q16517
P13942
Q86SQ9
P42336
O95841
Q16658
P62913
P62380
P29017
Q5VWP3
P25791
Q16665
O43711
P0DMV1
Q9UM73
P09211
P42773
P12259
P10721
P51587
P01911
O94915
P14679
Q15696
P00533
P20333
Q08257
Q07955
Q9UQQ2
P02751
Q5ZPR3
P18887
P04141
P21741
P35579
P78395
P24468
Q05923
P01889
Q8IZP0
Q7Z7C8
P08151
Q96T68
O15350
P42898
P53350
Q13422
P02788
P08571
Q07011
P01563
Q03164
Q99732
Q00597
P09874
O15527
P04004
Q13315
P04439
P58340
Q9UH73
Q99062
P41221
Q03701
P42704
Q02556
P09382
P16591
P16035
P32320
Q92793
O14508
Q05932
P04216
O75533
P06748
P10321
Q9UKK6
P15559
P0DP25
P48551
Q86U10
P28072
Q75NE6
P22301
O60264
P21439
P01374
Q9NR09
P30203
Q99612
Q969E8
Q15375
Q5T4F7
O15516
P05112
Q8IXJ9
P35226
P61163
P11912
Q14116
P25391
P23458
P36888
P00492
P63279
P16581
Q8WXS3
P62081
P12980
Q9NP58
P46087
P42768
P23528
P20273
O15164
Q70J99
Q06250
P13747
P01106
Q13426
P16234
O14763
P41180
P04275
P09488
P43626
Q9H0R1
P28906
P01562
Q9BX66
Q08050
P08708
Q13492
P61073
P78357
P62847
P01116
P04000
P05113
Q13617
O60934
P11245
P49917
P30711
O00329
P10242
P10644
Q9NPF7
P35222
P09651
P23560
P10176
P46937
P18074
Q9H0G5
P11388
P08684
P16410
Q86UK5
Q12974
Q12983
P15692
Q15744
P16949
Q05195
Q01518
Q07954
Q96RU8
P42229
Q16186
Q9NX02
Q9UJU2
P51580
P42772
P14317
Q9HC29
P55197
P13928
P00519
Q9BYW2
Q9UBN7
P36575
P43246
Q8TCX1
Q8IWZ3
Q96P11
P55265
Q2M1K9
P42766
O15524
O43490
P18077
P29965
P01344
Q8TF68
P51825
P28370
P24385
P13987
Q07820
P60568
P51692
P01241
Q13505
P00374
O60858
P25490
P0DMV2
P48357
P61353
Q9NZC2
Q13015
P41159
P13612
Q14542
O75475
P43652
P49006
P05067
P28907
P08183
P11802
P0DP24
Q01105
P06753
Q6ZT07
Q8NFT8
O60603
Q16625
O43516
Q12824
Q5VT79
Q9NZ71
P09919
O00481
P38398
Q02548
P35610
Q9UQ90
Q02952
Q9C000
P19544
Q9H3R0
Q15466
P08575
P10635
Q14457
P17812
P21359
Q06455
P01303
P41212
P35749
Q9Y251
P05451
Q9ULB4
P13611
Q9C0K0
P48061
O14544
P51816
P62857
P62979
Q8TEK3
P60709
Q9UNN4
Q9ULZ1
O76041
Q13427
Q02750
O75792
O00483
O75306
P19429
Q9H9J2
Q14116
O75112
Q969Y2
Q9NW38
O95822
O00329
P31645
Q9NPD8
P18206
Q9NX95
Q9UBP5
Q9NSU2
P14854
P01308
O75489
O15239
Q9HD89
P29323
O00400
P24844
O00622
Q06124
Q14126
Q02817
P01889
Q8N7E2
P29475
P06280
P04406
P26447
Q9UI09
Q9Y4P3
Q96P66
Q9Y6M9
Q9BYV6
Q9UQ13
P56556
Q53S33
P30084
P07951
Q8NBP7
Q969V5
Q96G97
Q9UDT6
P17655
O60936
P19105
Q92963
Q12887
P01100
P20472
Q92889
O43451
P10321
Q86SK9
P51532
P20800
O60706
P39060
O15287
Q8TCY5
Q13423
P48736
P16671
P13987
Q5JWF2
Q8IY92
P15531
Q9BU61
P10809
Q99959
P16615
Q96I36
P05305
P51790
Q05195
O14958
O76082
Q06587
Q9H8M7
P49638
P02545
P24311
P05362
Q9UH92
Q92736
P01189
P09493
O75072
P19099
Q01094
P48995
P08590
P17931
O14521
P50461
Q13093
P53667
O95390
P05976
P17936
P29460
P03905
P00403
Q9Y6C7
O43502
Q01718
P56381
O75880
Q8TB37
Q6NXT6
Q06413
P25490
Q96IL0
P48735
Q13118
P56539
P0DJ07
P04049
O43678
P48029
P52179
Q8N653
P03923
P49675
P02585
Q00872
P84996
P13533
Q96RL7
P09601
P04439
Q92558
Q9P232
P10415
Q9P032
P10599
P53597
Q86WW8
O00300
Q9UKP3
Q9H270
Q14896
Q9P0J0
P35609
Q30201
Q86TC9
Q9NQX0
Q15831
Q6P5Q4
Q96DP5
P45379
O00767
Q5JTJ3
Q02078
P31749
P08588
P28330
P16035
P00846
P19404
P08123
P07741
P35318
Q9Y2Z2
P51689
Q15746
P12883
Q9NVI7
P15056
P02511
P19634
Q92905
Q5ST30
Q9BXW9
Q5JTZ9
O00206
Q9H0R6
O60313
Q16665
Q96AC1
P09622
P02766
Q9UL12
P04440
Q9BSH4
P10144
Q6TFL4
Q16584
P41595
Q8NB12
Q14192
Q9BQ52
P37231
P83436
P12724
P12821
P04275
P50052
P62508
Q92574
P00797
P38606
Q92769
P27361
Q92570
P49821
Q2V2M9
P08559
P12829
P30556
Q14324
Q13085
Q9UBK2
Q9BZQ4
P63316
P42338
O15360
P17661
Q9BUP3
Q9BXM9
Q08289
Q16891
Q330K2
Q06609
P04637
O95817
Q15046
P22301
P40939
Q53GG5
Q8NB91
P68363
P17050
Q14524
Q9BT22
P12955
Q96HC4
P00395
P06753
P55081
P42345
Q96FA3
O43766
Q9NPI8
O00217
Q9HB96
P51787
Q15526
O00170
P41180
P31040
Q14683
P62753
P60174
P51884
Q12926
O14807
P01116
P42336
Q53H12
Q96TC7
Q8IYD8
Q92629
P01185
P05019
Q9NSE4
Q9BYX4
P17302
P28482
P49748
P62140
Q7Z406
P38398
P53701
Q9Y234
P08833
P51671
Q9P0K1
Q00325
P62736
P14410
Q9UMX1
O95319
Q3T906
Q6PCD5
Q5TEU4
P27797
P01137
P01375
P98161
O75251
Q96H96
P60709
Q0ZGT2
F8WCM5
P24723
P04216
Q9BUB7
O60260
P09001
O75052
P08235
Q8TDP1
O95467
Q86Y39
P00156
P21359
Q9BTV5
Q99731
Q9NX14
Q01105
P23560
O95299
O95997
P23946
P30988
P00414
Q9UI95
P41252
O95169
P01111
P42658
P16930
P58546
Q14164
P16278
Q03164
P54652
P08397
Q8WXI7
P40763
P68133
P15692
P13866
P50416
Q13203
Q96CU9
Q53HV7
Q5SZL2
Q92879
O75347
P13473
P49796
Q8N5M1
Q9Y3Q4
P29474
P35219
P01160
P10646
Q5RI15
Q8N183
Q9UPT6
Q8NEB9
P47972
P04792
Q16795
Q96LZ7
P42166
Q13315
Q99797
Q9Y5Y9
P10176
Q13467
Q9NZJ5
O96000
Q10713
Q13131
P10253
Q8TF45
Q96AX1
P43694
Q9NPC6
P02462
P30626
Q14692
P49888
Q9UHL9
Q86SG6
P53805
P01344
Q9Y375
Q9NPL8
P43405
Q969Q1
O95528
Q9NNW7
O15305
Q9BZV2
Q99684
P02741
Q9H1R3
Q9BX63
O60941
P08069
Q9UP38
Q13936
Q9UGJ0
Q99572
Q5VST9
P01130
O43181
Q9Y5Y7
Q9BYF1
P14780
Q13424
P08253
Q9H845
Q99873
Q9UI47
O43676
A6NFY7
Q9NS23
Q9BTV4
P04150
Q9HCF6
P07900
P26678
Q9Y376
P28331
P36507
Q16595
Q99714
P01106
Q03135
P50402
Q9BR39
Q15327
O15431
P06213
Q9Y276
P68032
Q8WXH0
Q6XR72
P50221
P35250
O15120
Q7KZN9
P78347
Q8WZ42
P10916
Q96DB5
Q09013
O43819
O75164
Q702N8
Q16836
O75380
Q6UUV9
Q00597
Q99996
P42167
Q6NZI2
Q9NYY8
P15924
Q8IZQ8
Q13642
Q96A33
O60783
P16066
O14950
P03897
Q86WJ1
P15502
O15273
Q96L96
O43543
P03372
P16442
Q9UIG0
Q16635
Q8IVG9
P51504
P21912
P49327
P03891
P12235
P02810
P55265
Q9Y3Z3
Q8IUX1
Q99988
Q14315
P03886
Q86YQ2
P49006
P01112
Q96L12
P03915
P51587
A4UGR9
P16860
Q9NVI1
P51449
Q86VF7
P42574
O14793
P04280
P82650
Q9C0B1
Q01449
Q15842
Q5TBB1
P63092
Q8NF91
P10275
P01019
Q86YC2
Q13164
Q07889
P02452
Q9H7Z6
......@@ -263,38 +263,3 @@ def run_diamond_mod_from_args(args=None):
#
# ===========================================================================
if __name__ == '__main__':
# -----------------------------------------------------
# Checking for input from the command line:
# -----------------------------------------------------
#
# [1] file providing the network in the form of an edgelist
# (tab-separated table, columns 1 & 2 will be used)
#
# [2] file with the seed genes (if table contains more than one
# column they must be tab-separated; the first column will be
# used only)
#
# [3] number of desired iterations
#
# [4] (optional) seeds weight (integer), default value is 1
# [5] (optional) name for the results file
#check if input style is correct
input_list = sys.argv
network_edgelist_file,seeds_file,max_number_of_added_nodes,alpha,outfile_name= check_input_style(input_list)
# read the network and the seed genes:
G_original,seed_genes = read_input(network_edgelist_file,seeds_file)
# run DIAMOnD
added_nodes = DIAMOnD(G_original,
seed_genes,
max_number_of_added_nodes,alpha,
outfile=outfile_name)
print("\n results have been saved to '%s' \n" %outfile_name)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
A DIAMOnD implementation with individual seed weights per gene.
Seeds file must have two columns: gene_id and weight (integer).
"""
import copy
import csv
import pickle
import sys
import time
from collections import defaultdict
import networkx as nx
import numpy as np
import scipy.stats
# =============================================================================
def print_usage():
print(' ')
print(' usage: ./DIAMOnD_weighted network_file seed_file n outfile_name')
print(' -----------------------------------------------------------------')
print(' network_file : path to PPI edgelist (any delimiter)')
print(' seed_file : table with gene_id and weight columns')
print(' n : number of DIAMOnD genes to add')
print(' outfile_name : destination for results')
print(' ')
# =============================================================================
def check_input_style(input_list):
try:
network_file = input_list[1]
seed_file = input_list[2]
max_added = int(input_list[3])
except:
print_usage()
sys.exit(0)
outfile = None
if len(input_list) >= 5:
outfile = input_list[4]
else:
outfile = f'first_{max_added}_weighted_nodes.txt'
return network_file, seed_file, max_added, outfile
# =============================================================================
def read_input(network_file, seed_file):
"""
Reads network and weighted seed genes.
Seeds file must have two columns: gene_id and weight.
"""
# --- read network
G = nx.Graph()
sniffer = csv.Sniffer()
delim = None
# detect delimiter
for line in open(network_file, 'r'):
if line.startswith('#'): continue
delim = sniffer.sniff(line).delimiter
break
if delim is None:
print('Bad network file format'); sys.exit(1)
for line in open(network_file, 'r'):
if line.startswith('#'): continue
parts = line.strip().split(delim)
G.add_edge(parts[0], parts[1])
# --- read weighted seeds
seed_weights = {}
for line in open(seed_file, 'r'):
if line.startswith('#'): continue
parts = line.strip().split() # whitespace or tab
gene = parts[0]
try:
wt = int(parts[1])
except:
wt = 1
seed_weights[gene] = wt
return G, seed_weights
# =============================================================================
def compute_all_gamma_ln(N):
gamma_ln = {}
for i in range(1, N+1):
gamma_ln[i] = scipy.special.gammaln(i)
return gamma_ln
# =============================================================================
def logchoose(n, k, gamma_ln):
if n-k+1 <= 0:
return np.inf
return gamma_ln[n+1] - (gamma_ln[k+1] + gamma_ln[n-k+1])
# =============================================================================
def gauss_hypergeom(x, r, b, n, gamma_ln):
return np.exp(
logchoose(r, x, gamma_ln) +
logchoose(b, n-x, gamma_ln) -
logchoose(r+b, n, gamma_ln)
)
# =============================================================================
def pvalue(kb, k, N, s, gamma_ln):
p = 0.0
for n in range(kb, k+1):
if n > s: break
p += gauss_hypergeom(n, s, N-s, k, gamma_ln)
return min(p, 1)
# =============================================================================
def get_neighbors_and_degrees(G):
neighbors = {}
degrees = {}
for node in G.nodes():
neighbors[node] = set(G.neighbors(node))
degrees[node] = G.degree(node)
return neighbors, degrees
# =============================================================================
def reduce_not_in_cluster_nodes(all_degrees, neighbors, not_in_cluster, cluster_weights):
"""
Reduce candidates by weighted degree computations.
"""
kb2k = defaultdict(dict)
for node in not_in_cluster:
k_base = all_degrees[node]
# weighted connections to cluster
kb = sum(cluster_weights.get(nei, 1) for nei in neighbors[node] if nei in cluster_weights)
weight = cluster_weights.get(node, 1)
# adjust k and kb by weight influence
k = k_base + (weight - 1) * kb
kb = kb + (weight - 1) * kb
kb2k[kb][k] = node
# select best per k
reduced = {}
for kb, k2node in kb2k.items():
min_k = min(k2node.keys())
node = k2node[min_k]
reduced[node] = (kb, min_k)
return reduced
# =============================================================================
def diamond_iteration_of_first_X_nodes(G, seed_weights, X):
"""
Core DIAMOnD loop with weighted seeds.
seed_weights: dict gene->weight
"""
N = G.number_of_nodes()
neighbors, degrees = get_neighbors_and_degrees(G)
# initial cluster weights
cluster_weights = seed_weights.copy()
s0 = sum(cluster_weights.values())
N_adj = N + sum((w-1) for w in seed_weights.values())
gamma_ln = compute_all_gamma_ln(N_adj + 1)
not_in_cluster = set()
for gene in cluster_weights:
not_in_cluster |= neighbors.get(gene, set())
not_in_cluster -= set(cluster_weights.keys())
added_nodes = []
all_p = {}
while len(added_nodes) < X:
reduced = reduce_not_in_cluster_nodes(degrees, neighbors, not_in_cluster, cluster_weights)
pmin = 1.1
next_node = None
info = {}
for node, (kb, k) in reduced.items():
key = (k, kb, s0)
p = all_p.get(key) or pvalue(kb, k, N_adj, s0, gamma_ln)
all_p[key] = p
if p < pmin:
pmin = p
next_node = node
info[node] = (k, kb, p)
if pmin > 0.05:
print(f"Stopping early at iteration {len(added_nodes)}: p-value = {pmin:.4f}")
break
# record addition
added_nodes.append((next_node, info[next_node][0], info[next_node][1], info[next_node][2]))
# update cluster
weight_next = seed_weights.get(next_node, 1)
cluster_weights[next_node] = weight_next
s0 += weight_next
not_in_cluster |= (neighbors[next_node] - set(cluster_weights.keys()))
not_in_cluster.discard(next_node)
return added_nodes
# =============================================================================
def DIAMOnD(G_original, seed_weights, max_nodes, outfile):
"""
Runs weighted DIAMOnD and writes results to outfile.
"""
all_genes = set(G_original.nodes())
seeds = {g: w for g, w in seed_weights.items() if g in all_genes}
if len(seeds) < len(seed_weights):
missing = set(seed_weights) - all_genes
print(f"Ignoring {len(missing)} seeds not in network: {missing}")
added = diamond_iteration_of_first_X_nodes(G_original, seeds, max_nodes)
# save output
with open(outfile, 'w') as fout:
fout.write('#rank\tDIAMOnD_node\n')
for rank, (node, k, kb, p) in enumerate(added, 1):
fout.write(f"{rank}\t{node}\n")
return list(seeds.keys()), added
# =============================================================================
def run_diamond_weighted_from_args(args=None):
if args is None:
args = sys.argv
else:
args = ['DIAMOnD_weighted.py'] + args
netf, seedf, n, outfile = check_input_style(list(args))
G, seed_weights = read_input(netf, seedf)
return DIAMOnD(G, seed_weights, n, outfile)
if __name__ == '__main__':
run_diamond_weighted_from_args()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
disease,score,n_input,n_used,zscore,pvalue,lcc_size,significance
Acanthosis Nigricans,0.0,200,200,8.595609719180702,0.0,200,True
"Bronchiolitis, Viral",0.0,200,200,5.550186398118612,0.0,199,True
Acanthosis Nigricans,0.1,200,200,7.800835882837143,0.0,196,True
disease,score,n_input,n_used,zscore,pvalue,lcc_size,significance
Acanthosis Nigricans,0.0,70,70,12.232261486730968,0.0,70,True
Acanthosis Nigricans,0.1,48,48,21.549552001770667,0.0,48,True
"Bronchiolitis, Viral",0.0,13,13,8.036465189401895,0.0,13,True
disease,score,n_input,n_used,zscore,pvalue,lcc_size,significance
Acanthosis Nigricans,0.0,63,63,3.7791431444824575,0.001,27,True
Acanthosis Nigricans,0.0,27,27,7.301711700487385,0.0,27,True
"Bronchiolitis, Viral",0.0,13,13,-0.8026240370093498,1.0,1,False
"Bronchiolitis, Viral",0.0,1,1,,1.0,1,False
Acanthosis Nigricans,0.1,37,37,5.233075002125382,0.0,11,True
Acanthosis Nigricans,0.1,11,11,10.192777957538603,0.0,11,True
disease,score,n_input,n_used,zscore,pvalue,lcc_size,significance
Acanthosis Nigricans,0.0,76,76,3.1402920979607636,0.0,50,True
"Bronchiolitis, Viral",0.0,22,22,5.0654874160779215,0.001,8,True
Acanthosis Nigricans,0.1,47,47,5.365588705196455,0.0,29,True
disease,score,n_input,n_used,zscore,pvalue,lcc_size,significance
Acanthosis Nigricans,0.0,85,85,6.82410673243988,0.0,85,True
"Bronchiolitis, Viral",0.0,23,23,11.312913145300755,0.0,23,True
Acanthosis Nigricans,0.1,53,53,5.512634412249499,0.0,53,True
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment