IABP Doctor Edition¶

Install¶

In [32]:
# !pip install pandas numpy scipy openpyxl statsmodels

Import¶

In [11]:
import pandas as pd
import numpy as np
import re
from scipy.stats import chi2_contingency, fisher_exact, ttest_ind
from openpyxl import load_workbook

Load Data¶

In [12]:
file_path = "IABP thesis master sheet edited version.xlsx"

df = pd.read_excel(file_path)

Clean Column Names¶

In [13]:
df.columns = (
    df.columns
    .str.strip()
    .str.replace(r"[^\w]+", "_", regex=True)
    .str.replace(r"_+", "_", regex=True)
    .str.strip("_")
    .str.lower()
)

df = df.loc[:, ~df.columns.duplicated()]

df.columns.tolist()
Out[13]:
['s_no',
 'gender',
 'age',
 'bmi',
 'smoking',
 'diabetes',
 'systemic_hypertension',
 'dyslipidemia',
 'ccs_angina_grade',
 'nyha_grade',
 'duration_of_symptoms',
 'left_ventricular_ef',
 'left_main',
 'previous_cardiac_surgery',
 'stemi_nstemi_unstable_angina',
 'peripheral_vascular_disease',
 'rft',
 'lft',
 'inr',
 'platelets',
 'on_anti_platelets',
 'euro_score',
 'surgery_elective_urgency_emergency',
 'iabp_insertion_time',
 'reinsertion_of_iabp',
 'diagnosis',
 'type_of_operation',
 'number_of_grafts_i_ii_iii_iv',
 'type_of_grafts_used_in_cabg',
 'indications',
 'gcs',
 'power_of_all_4_limbs',
 'peripheral_pulse_of_lower_limbs',
 'echo_new_findings',
 'gi_symptoms',
 'lft_1',
 'rft_1',
 'airway_status',
 'sheath',
 'chest_x_ray_position_of_iabp',
 'heparin_infuson_given_according_to_act',
 'complications',
 'abg',
 'lactate_levels',
 'vascular_complications',
 'renal_dysfunction',
 'compartment_syndrome',
 'infection',
 'visceral_injury',
 'neuro_complications',
 'balloon_rupture',
 'day_of_iabp_removal',
 'forced_iabp_removal_due_to_complications',
 'number_of_days_in_icu',
 'number_of_days_in_in_hospital',
 'outcome']

Extract Comments (Data Dictionary)¶

In [14]:
wb = load_workbook(file_path)
ws = wb.active

comments_dict = {}

for row in ws.iter_rows():
    for cell in row:
        if cell.comment:
            col_name = ws.cell(row=1, column=cell.column).value
            comments_dict[col_name] = cell.comment.text

Parse Mappings¶

In [15]:
def parse_mapping(comment):
    mapping = {}
    if not comment:
        return mapping
    
    lines = comment.split("\n")
    
    for line in lines:
        line = line.strip()
        match = re.match(r"(\d+)\s*[-: ]\s*(.+)", line)
        if match:
            key = int(match.group(1))
            value = match.group(2).strip()
            mapping[key] = value
            
    return mapping


column_mappings = {}

for col, comment in comments_dict.items():
    parsed = parse_mapping(comment)
    if len(parsed) > 0:
        column_mappings[col.upper()] = parsed

Create Readable Dataset¶

In [16]:
df_readable = df.copy()

for col in df.columns:
    original_col = col.upper()
    
    if original_col in column_mappings:
        mapping = column_mappings[original_col]
        df_readable[col] = df[col].map(mapping).fillna(df[col])

Helper Functions¶

In [17]:
def chi_square_test(col1, col2):
    table = pd.crosstab(df[col1], df[col2])
    
    if table.shape == (2,2):
        _, p = fisher_exact(table)
    else:
        _, p, _, _ = chi2_contingency(table)
    
    return p, table


def t_test(col, outcome_col="outcome"):
    group1 = df[df[outcome_col] == 0][col].dropna()
    group2 = df[df[outcome_col] == 1][col].dropna()
    
    t_stat, p = ttest_ind(group1, group2)
    return p

Bronze -> Silver Checks¶

In [23]:
column_mappings
Out[23]:
{'GENDER': {0: 'Female', 1: 'Male'},
 'AGE': {0: '<30 Y', 1: '31 to 50', 2: '51 to 60', 3: '>61'},
 'BMI': {67: 'y'},
 'SMOKING': {0: 'no', 1: 'yes'},
 'DIABETES': {0: 'no', 1: 'yes'},
 'SYSTEMIC \nHYPERTENSION': {0: 'no', 1: 'yes'},
 'DYSLIPIDEMIA': {0: 'no', 1: 'yes'},
 'CCS ANGINA\n GRADE': {1: 'class 1',
  2: 'class 2',
  3: 'class 3',
  4: 'class 4'},
 'NYHA \n GRADE': {1: 'Grade 1', 2: 'Grade 2', 3: 'Grade 3', 4: 'Grade 4'},
 'DURATION OF \nSYMPTOMS': {0: '< 1 day',
  1: '1 day to 7 days',
  2: '7 days to 1 month',
  3: '> 1 month'},
 '  LEFT MAIN': {0: 'no LM', 1: 'LM +'},
 ' STEMI / NSTEMI \n/ UNSTABLE ANGINA': {0: 'STEMI',
  1: 'NSTEMI',
  2: 'Unstable angina',
  3: 'NA'},
 ' PERIPHERAL \nVASCULAR DISEASE ': {0: 'no', 1: 'yes'},
 ' PLATELETS ': {0: '<50k', 1: 'to to 100k', 2: '>1L'},
 ' ON ANTI-\nPLATELETS': {0: 'no',
  1: 'yes stopped 4 days before surgery',
  2: 'continued till surgery'},
 ' SURGERY ELECTIVE / URGENCY\n / EMERGENCY': {0: 'elective',
  1: 'Urgent',
  2: 'emergency'},
 'REINSERTION \nOF IABP': {0: 'no', 1: 'yes'},
 'DIAGNOSIS': {0: 'CAD',
  1: 'CAD + MR',
  2: 'CAD + AR',
  3: 'CAD + MR/AR',
  4: 'MR',
  5: 'AR',
  6: 'MS',
  7: 'AS',
  8: 'MR/AR/TR',
  9: 'CAD + VSR',
  10: 'Constritive pericaritis'},
 '·NUMBER OF GRAFTS -  \nI, II,  III, IV ': {0: 'No Graft',
  1: 'I',
  2: 'II',
  3: 'III',
  4: 'IV',
  5: 'V'},
 'TYPE OF GRAFTS USED IN CABG ': {0: 'with only venous grafts',
  1: 'with only arterial',
  2: 'CABG with both venous and arterial grafts',
  3: 'NA'},
 'GCS': {0: '13 to 15 / 8 to 10 T',
  1: '9 to 12 / 4 to 7T',
  2: '3 to 8 / 2 to 3 T'},
 ' POWER OF \nALL 4 LIMBS': {0: 'normal', 1: 'abnormal', 2: 'NA (SEDATION)'},
 'ECHO NEW \nFINDINGS': {0: 'no', 1: 'yes'},
 'GI SYMPTOMS': {0: 'no',
  1: 'Hematemesis',
  2: 'hematochezia',
  3: 'Abdominal pain,',
  4: 'Abdominal distension'},
 'INR': {0: '0.8  TO 1.2', 1: '1.3 to 2.5', 2: '> 2.5'},
 'PLATELETS': {0: '<50k', 1: 'to to 100k', 2: '>1L'},
 'AIRWAY \nSTATUS': {0: 'extubated', 1: 'intubated', 3: 'tracheostomy'},
 'SHEATH': {0: 'With out', 1: 'With'},
 ' CHEST X-RAY POSITION \nOF IABP ': {0: 'norma', 1: 'abnormal position'},
 'HEPARIN INFUSON GIVEN ACCORDING TO\nACT': {0: 'no', 1: 'yes'},
 'ABG': {0: 'normal',
  1: 'metabolic acidosis',
  2: 'metabolic alkalosis',
  3: 'respiratory acidosis',
  4: 'respiratory alkalosis'},
 'LACTATE \nLEVELS': {0: 'normal', 1: 'elivated'},
 'INFECTION': {0: 'no', 1: 'yes local', 2: 'yes systems', 3: 'yes sepsis'},
 'VISCERAL \nINJURY ': {0: 'no', 1: 'yes'},
 'NEURO COMPLICATIONS': {0: 'no',
  1: 'Paraplegia',
  2: 'Stroke',
  3: 'Foot drop'},
 'BALLOON RUPTURE ': {0: 'no', 1: 'yes'},
 'FORCED IABP REMOVAL \nDUE TO COMPLICATIONS': {0: 'no', 1: 'yes'},
 ' NUMBER OF \nDAYS IN IN HOSPITAL': {24: 'days'}}

Analysis Section¶

Demographics vs Complications¶

In [18]:
demographics = [
    "gender", "age", "bmi", "smoking",
    "diabetes", "systemic_hypertension", "dyslipidemia"
]

results = []

for col in demographics:
    try:
        p, table = chi_square_test(col, "vascular_complications")
        results.append([col, "vascular_complications", p])
    except:
        pass

pd.DataFrame(results, columns=["Variable", "Target", "P_value"])
Out[18]:
Variable Target P_value
0 gender vascular_complications 0.505165
1 age vascular_complications 0.899990
2 bmi vascular_complications 0.002730
3 smoking vascular_complications 0.689167
4 diabetes vascular_complications 0.701812
5 systemic_hypertension vascular_complications 0.691120
6 dyslipidemia vascular_complications 0.786045

Indications & Surgery vs Outcome¶

In [19]:
cols = ["indications", "type_of_operation"]

results2 = []

for col in cols:
    try:
        p, table = chi_square_test(col, "outcome")
        results2.append([col, "outcome", p])
    except:
        pass

pd.DataFrame(results2, columns=["Variable", "Target", "P_value"])
Out[19]:
Variable Target P_value
0 indications outcome 5.108896e-03
1 type_of_operation outcome 4.689308e-18

Sheath vs Complications¶

In [20]:
p, table = chi_square_test("sheath", "vascular_complications")

print("P-value:", p)
table
P-value: 0.1001158149833359
Out[20]:
vascular_complications 0 1 6
sheath
0 65 3 3
1 43 6 0

Heparin vs Complications¶

In [21]:
p, table = chi_square_test("heparin_infuson_given_according_to_act", "vascular_complications")

print("P-value:", p)
table
P-value: 0.08187145311507667
Out[21]:
vascular_complications 0 1 6
heparin_infuson_given_according_to_act
0 43 7 1
1 65 2 2

Preoperative Data vs Outcome (T-Test)¶

In [24]:
continuous_vars = [
    "left_ventricular_ef",
    "platelets",
    "inr",
    "lactate_levels"
]

results3 = []

for col in continuous_vars:
    try:
        p = t_test(col)
        results3.append([col, p])
    except:
        pass

pd.DataFrame(results3, columns=["Variable", "P_value"])
C:\Users\Asus\AppData\Local\Temp\ipykernel_41596\3643293397.py:16: SmallSampleWarning: One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.
  t_stat, p = ttest_ind(group1, group2)
Out[24]:
Variable P_value
0 left_ventricular_ef NaN
1 platelets NaN
2 inr NaN
3 lactate_levels NaN

Pre vs Post Renal Failure¶

In [25]:
p, table = chi_square_test("rft", "renal_dysfunction")

print("P-value:", p)
table
P-value: 8.458793985488693e-05
Out[25]:
renal_dysfunction 0 1 2
rft
0 78 7 6
1 12 8 6
2 0 0 1
3 1 0 1

Complications Summary¶

In [26]:
complications = [
    "vascular_complications",
    "renal_dysfunction",
    "infection",
    "balloon_rupture"
]

for c in complications:
    print("\n", c)
    print(df_readable[c].value_counts())
 vascular_complications
vascular_complications
0    108
1      9
6      3
Name: count, dtype: int64

 renal_dysfunction
renal_dysfunction
0    91
1    15
2    14
Name: count, dtype: int64

 infection
infection
no            110
yes sepsis      8
yes local       2
Name: count, dtype: int64

 balloon_rupture
balloon_rupture
0    120
Name: count, dtype: int64
In [28]:
final_results = pd.concat([
    pd.DataFrame(results, columns=["Variable", "Target", "P_value"]),
    pd.DataFrame(results2, columns=["Variable", "Target", "P_value"]),
    pd.DataFrame(results3, columns=["Variable", "P_value"])
])

final_results.to_excel("final_analysis_results.xlsx", index=False)
df_readable.to_excel("clean_readable_data.xlsx", index=False)

Rough Area¶

In [31]:
comments_dict
Out[31]:
{'GENDER': 'Author:\n0 - Female\n1 - Male',
 'Age': 'Author:\n0: <30 Y\n1 : 31 to 50\n2 : 51 to 60\n3 : >61',
 'BMI': 'Author:\n67 y\nmale\nw - 83\nH - na',
 'Smoking': 'Author:\n0 - no\n1 - yes',
 'Diabetes': 'Author:\n0- no\n1- yes',
 'Systemic \nhypertension': 'Author:\n0- no\n1- yes',
 'Dyslipidemia': 'Author:\n0- no\n1- yes',
 'ccs angina\n grade': 'Author:\n1 - class 1\n2 - class 2\n3 - class 3\n4 - class 4',
 'NYHA \n grade': 'Author:\n1 - Grade 1\n2 - Grade 2\n3 - Grade 3\n4 - Grade 4',
 'duration of \nsymptoms': 'Author:\n0 - < 1 day\n1 - 1 day to 7 days\n2 - 7 days to 1 month\n3- > 1 month',
 'Left ventricular \nEF': 'Author:\n35\n',
 '  Left Main': 'Author:\n0 - no LM\n1 - LM +',
 ' Previous cardiac \nsurgery  ': 'Author:\nPost CMV',
 ' STEMI / NSTEMI \n/ Unstable angina': 'Author:\n0 - STEMI\n1- NSTEMI\n2 - Unstable angina\n3- NA',
 ' Peripheral \nvascular disease ': 'Author:\n0 - no\n1 - yes',
 'RFT': 'Author:\ncreat 1.3 \nbun 53',
 'LFT': 'Author:\ntotal 2\nindirect 0.4\ndirect 1.6\n',
 '\xa0 INR': 'Author:\n1.38',
 ' Platelets ': 'Author:\n0 - <50k\n1 - to to 100k\n2 - >1L',
 ' On Anti-\nplatelets': 'Author:\n0 - no\n1 - yes stopped 4 days before surgery\n2 - continued till surgery',
 '\xa0 Euro score': 'Author:\n6.06',
 ' surgery Elective / Urgency\n / Emergency': 'Author:\n0 - elective\n1 - Urgent\n2 - emergency',
 'IABP insertion time': 'Author:\nVF\n',
 'Reinsertion \nof IABP': 'Author:\n0 -no\n1 - yes',
 'Diagnosis': 'Author:\n0- CAD \n1- CAD + MR\n2- CAD + AR\n3- CAD + MR/AR\n 4- MR\n5- AR\n6- MS\n7- AS\n8 - MR/AR/TR\n9 - CAD + VSR\n10 - Constritive pericaritis',
 'Type of operation ': 'Author:\nCABG with MVR\n',
 '·Number of grafts -  \nI, II,  III, IV ': 'Author:\n0 - No Graft\n1- I\n2 - II \n3 - III\n4 -  IV \n5- V\n',
 'type of grafts used in CABG ': 'Author:\n0 - with only venous grafts \n1- with only arterial \ngrafts \n2 - CABG with both venous and arterial grafts\n3 - NA',
 'Indications': 'Author:\npre op pulmonary edema',
 'GCS': 'Author:\n0 - 13 to 15 / 8 to 10 T\n1 - 9 to 12 / 4 to 7T\n2 - 3 to 8 / 2 to 3 T',
 ' Power of \nall 4 limbs': 'Author:\n0 - normal\n1 - abnormal\n2- NA (SEDATION)',
 'Peripheral pulse \nof lower limbs': 'Author:\nabsent in left leg, foot amputation due to PVD in 2016, \nIABP placed in right FA',
 'Echo new \nfindings': 'Author:\n0 - no\n1 - yes\n',
 'GI symptoms': 'Author:\n0- no\n1 - Hematemesis \n2- hematochezia \n3 - Abdominal pain, \n4- Abdominal distension\n',
 'INR': 'Author:\n0-  0.8  TO 1.2\n1 - 1.3 to 2.5\n2 - > 2.5',
 'Platelets': 'Author:\n0 - <50k\n1 - to to 100k\n2 - >1L',
 'Airway \nstatus': 'Author:\n0 - extubated\n1 - intubated\n3- tracheostomy\n',
 'Sheath': 'Author:\n0 -With out\n1- With',
 ' Chest x-ray position \nof IABP ': 'Author:\n0 - norma\n1 - abnormal position',
 'Heparin infuson given according to\nACT': 'Author:\n0 -no\n1 - yes',
 'ABG': 'Author:\n0 - normal\n1 - metabolic acidosis\n2 - metabolic alkalosis\n3 - respiratory acidosis\n4 - respiratory alkalosis\n',
 'Lactate \nlevels': 'Author:\n0 - normal\n1 - elivated',
 'Vascular \ncomplications': 'Author:\nvascular spasm',
 'Renal \ndysfunction': 'Author:\nrabhdo due to compartmnet syndrm',
 'Compartment\nsyndrome ': 'Author:\nyes',
 'Infection': 'Author:\n0 - no\n1 - yes local\n2 - yes systems\n3 - yes sepsis',
 'Visceral \ninjury ': 'Author:\n0 - no\n1 -  yes',
 'Neuro complications': 'Author:\n0- no\n1 - Paraplegia \n2 -  Stroke\n3 - Foot drop\n\n',
 'Balloon rupture ': 'Author:\n0 - no\n1 -  yes',
 'Day  of IABP \nremoval': 'Author:\n3rd day of IABP\n',
 'Forced IABP removal \ndue to complications': 'Author:\n 0 - no\n1 -yes',
 ' Number of \ndays in ICU': 'Author:\n23days',
 ' Number of \ndays in in hospital': 'Author:\n24 days\n',
 'Outcome': 'Author:\nsepsis '}
In [ ]: