import pandas as pd
import os
= os.path.dirname(os.getcwd())
base_directory
= (
diagnoses_icd
pd.read_csv("raw_data", "physionet.org", "files", "mimiciv", "3.1", "hosp", "diagnoses_icd.csv"),
os.path.join(base_directory, =str
dtype={'icd_code': 'code'})
).rename(columns
)'coding_system'] = "ICD" + diagnoses_icd['icd_version']
diagnoses_icd[
print("----Raw MIMIC IV 3.1 Diagnoses ICD----")
display(diagnoses_icd)
= pd.read_csv(
icd_to_phecode 'scripts', 'rollup_mappings', "icd_to_phecode.csv"),
os.path.join(base_directory, =str
dtype
)print("----ICD to PheCode Rollup file----")
display(icd_to_phecode.head())
print("----Comprehensive Diagnoses including ICD and PheCode----")
= pd.merge(diagnoses_icd, icd_to_phecode, on=['coding_system','code'])
comprehensive_diagnoses
display(comprehensive_diagnoses.head())
print("----Identifying the cohort of interest----")
= comprehensive_diagnoses[comprehensive_diagnoses['PheCode']=='495']
comprehensive_asthma_cohort
display(comprehensive_asthma_cohort)print(comprehensive_asthma_cohort.describe())
print("----Final Asthma Cohort----")
= comprehensive_asthma_cohort[['subject_id']].drop_duplicates()
asthma_cohort display(asthma_cohort)
----Raw MIMIC IV 3.1 Diagnoses ICD----
subject_id | hadm_id | seq_num | code | icd_version | coding_system | |
---|---|---|---|---|---|---|
0 | 10000032 | 22595853 | 1 | 5723 | 9 | ICD9 |
1 | 10000032 | 22595853 | 2 | 78959 | 9 | ICD9 |
2 | 10000032 | 22595853 | 3 | 5715 | 9 | ICD9 |
3 | 10000032 | 22595853 | 4 | 07070 | 9 | ICD9 |
4 | 10000032 | 22595853 | 5 | 496 | 9 | ICD9 |
... | ... | ... | ... | ... | ... | ... |
6364483 | 19999987 | 23865745 | 7 | 41401 | 9 | ICD9 |
6364484 | 19999987 | 23865745 | 8 | 78039 | 9 | ICD9 |
6364485 | 19999987 | 23865745 | 9 | 0413 | 9 | ICD9 |
6364486 | 19999987 | 23865745 | 10 | 36846 | 9 | ICD9 |
6364487 | 19999987 | 23865745 | 11 | 7810 | 9 | ICD9 |
6364488 rows × 6 columns
----ICD to PheCode Rollup file----
code | PheCode | coding_system | |
---|---|---|---|
0 | 001 | 008 | ICD9 |
1 | 0010 | 008 | ICD9 |
2 | 0011 | 008 | ICD9 |
3 | 0019 | 008 | ICD9 |
4 | 002 | 008 | ICD9 |
----Comprehensive Diagnoses including ICD and PheCode----
subject_id | hadm_id | seq_num | code | icd_version | coding_system | PheCode | |
---|---|---|---|---|---|---|---|
0 | 10000032 | 22595853 | 1 | 5723 | 9 | ICD9 | 571.81 |
1 | 10000826 | 20032235 | 4 | 5723 | 9 | ICD9 | 571.81 |
2 | 10000826 | 28289260 | 1 | 5723 | 9 | ICD9 | 571.81 |
3 | 10005866 | 26158160 | 4 | 5723 | 9 | ICD9 | 571.81 |
4 | 10008924 | 23676183 | 7 | 5723 | 9 | ICD9 | 571.81 |
----Identifying the cohort of interest----
subject_id | hadm_id | seq_num | code | icd_version | coding_system | PheCode | |
---|---|---|---|---|---|---|---|
2478863 | 10001725 | 25563031 | 4 | 49390 | 9 | ICD9 | 495 |
2478864 | 10001884 | 26679629 | 7 | 49390 | 9 | ICD9 | 495 |
2478865 | 10003019 | 20030125 | 5 | 49390 | 9 | ICD9 | 495 |
2478866 | 10003019 | 20277210 | 10 | 49390 | 9 | ICD9 | 495 |
2478867 | 10003019 | 20962108 | 15 | 49390 | 9 | ICD9 | 495 |
... | ... | ... | ... | ... | ... | ... | ... |
5908161 | 17892612 | 24109018 | 1 | 49382 | 9 | ICD9 | 495 |
5908162 | 17997063 | 25519468 | 11 | 49382 | 9 | ICD9 | 495 |
5908163 | 18269165 | 28966193 | 6 | 49382 | 9 | ICD9 | 495 |
5908164 | 18958101 | 23643092 | 8 | 49382 | 9 | ICD9 | 495 |
5908165 | 19757198 | 28680884 | 9 | 49382 | 9 | ICD9 | 495 |
42057 rows × 7 columns
subject_id hadm_id seq_num code icd_version coding_system PheCode
count 42057 42057 42057 42057 42057 42057 42057
unique 20316 42035 39 11 2 2 1
top 18676703 24773199 5 J45909 10 ICD10 495
freq 60 3 4105 20679 21954 21954 42057
----Final Asthma Cohort----
subject_id | |
---|---|
2478863 | 10001725 |
2478864 | 10001884 |
2478865 | 10003019 |
2478872 | 10004457 |
2478875 | 10004749 |
... | ... |
5908156 | 16550589 |
5908158 | 17562616 |
5908161 | 17892612 |
5908162 | 17997063 |
5908165 | 19757198 |
20316 rows × 1 columns