# Creating the NDC code to RxNorm code mapping
def roll_ndc2rxnorm(rxnsat_path, ingredient_path, output_path):
"""
Build NDC → RxNorm mapping using RXNSAT.RRF and an ingredient rollup table.
Output schema: ndc,rxcui
"""
print("Generating NDC to RxNorm mapping table...")
# --- Load RXNSAT (keep only required columns) ---
cols_all = ['RXCUI','LUI','SUI','RXAUI','STYPE','CODE','ATUI','SATUI','ATN','SAB','ATV','SUPPRESS','CVF']
keep_cols = ['RXCUI', 'SUPPRESS', 'ATN', 'SAB', 'ATV']
rxnsat = pd.read_csv(
rxnsat_path,
sep='|',
header=None,
names=cols_all,
usecols=keep_cols,
dtype={c: str for c in keep_cols},
engine='python'
)
print('total rows:', rxnsat.shape[0])
rxnsat = rxnsat.loc[rxnsat['ATN'] == 'NDC'].copy()
print('unique CUIs in NDC rows:', rxnsat['RXCUI'].nunique())
valid_mask = (rxnsat['SAB'] == 'RXNORM') & (rxnsat['SUPPRESS'] == 'N')
table_valid = rxnsat.loc[valid_mask].copy()
table_obsolete = rxnsat.loc[~valid_mask].copy()
print('valid table size:', table_valid.shape[0])
table_obsolete = table_obsolete.loc[
(table_obsolete['ATV'].str.len() == 11) & (table_obsolete['ATV'].str.isdigit())
].copy()
print('obsolete table size:', table_obsolete.shape[0])
table = pd.concat([table_valid, table_obsolete], ignore_index=True)
table.drop_duplicates(subset=['ATV'], keep='first', inplace=True)
print('final size:', table.shape[0])
print('final unique CUIs:', table['RXCUI'].nunique())
ing = pd.read_csv(ingredient_path, usecols=['base', 'ingredient'], dtype=str)
base2ing = dict(zip(ing['base'], ing['ingredient']))
table['RXCUI'] = table['RXCUI'].map(lambda x: base2ing.get(x, x))
out = (table.loc[:, ['ATV', 'RXCUI']]
.rename(columns={'ATV': 'ndc', 'RXCUI': 'rxcui'})
.drop_duplicates())
os.makedirs(os.path.dirname(output_path), exist_ok=True)
out.to_csv(output_path, index=False)
return out
base_directory = "/n/data1/hsph/biostat/celehs/lab/va67/EHR_TUTORIAL_WORKSPACE"
rxnsat_path = os.path.join(
base_directory, 'scripts', 'EHR-Processing-Tutorial-main', 'meta_files',
'rxnorm', 'rrf', 'RXNSAT.RRF'
)
ingredient_path = os.path.join(
base_directory, 'scripts', 'EHR-Processing-Tutorial-main', 'meta_files',
'rxnorm', 'ingredient.csv'
)
output_path = os.path.join(
base_directory, 'scripts', 'EHR-Processing-Tutorial-main', 'Rollup_Mappings',
'NDC_to_RxNorm.csv'
)
ndc2rxnorm = roll_ndc2rxnorm(rxnsat_path, ingredient_path, output_path)
print('\nNDC to RxNorm mapping:')
display(ndc2rxnorm)