{
"cells": [
{
"cell_type": "markdown",
"id": "ac3c4f7b-3c6b-4698-9979-501b102b1296",
"metadata": {},
"source": [
"# Assemble CRACMM species metadata from CMAQ \n",
"---\n",
" author: Havala O.T. Pye (pye.havala@epa.gov)\n",
" date: created 2022-09-12\n",
"\n",
" updated: Nash Skipper\n",
" date: 2024-02-09\n",
"\n",
" updated: Nash Skipper\n",
" date: 2024-03-28\n",
" \n",
" updated: Havala Pye\n",
" date: 2025-02-18\n",
"\n",
" updated: Michael Pye\n",
" date: 2025-02-27 \n",
"---\n",
"## Notebook Description\n",
"This notebook collects data across the CMAQ model to create a table of species information in both csv and markdown formats. The csv version contains additional data not easily displayed in markdown. Output from this notebook is stored [here](https://github.com/USEPA/CRACMM/tree/main/metadata). After clicking the link, select the directory that corresponds to the chemical mechanism of your choice to find the correct output files. \n",
"\n",
"## Download Notebook\n",
"Click [here](https://github.com/USEPA/CRACMM/blob/main/utilities/markdown_metadata.ipynb) to download this tutorial as a Jupyter Notebook file. \n",
"\n",
"## CMAQ input files\n",
"- AE_{mech}.nml\n",
"- GC_{mech}.nml \n",
"- NR_{mech}.nml \n",
"- AERO_DATA.F \n",
"- SOA_DEFN.F \n",
"- hlconst.F \n",
"- {mech}_speciesdescription.csv\n",
"\n",
"## Mechanisms supported \n",
"- cracmm1_aq \n",
"- cracmm1amore_aq \n",
"- cracmm2 "
]
},
{
"cell_type": "markdown",
"id": "e0a836b6-4358-4206-b8e6-d22a01f94479",
"metadata": {},
"source": [
"## Setup libraries, paths, and function to prepare metadata"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d4df66fa-7d2a-4b29-8e72-c3f63ee126a5",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5026cc63-a3e5-4fe7-8f1d-3825a2161a5c",
"metadata": {},
"outputs": [],
"source": [
"# Set current working directory where this file resides \n",
"# This code expects *.nml and files from CMAQ to be present in ./input and will output in ./output\n",
"outputfiledir = os.path.join(os.getcwd(), 'output')\n",
"workdir = '/work/MOD3DEV/has/2023cracmm_ages/structurecuration/'\n",
"filepath = os.path.normpath(workdir)\n",
"os.chdir(filepath)\n",
"inputfiledir = os.path.join(os.getcwd(), 'input')\n",
"if not os.path.isdir('./output'):\n",
" os.mkdir('./output')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c81725dc-ee1a-4860-a9ef-33bb8c823140",
"metadata": {},
"outputs": [],
"source": [
"# Function to prepare dataframe with metadata\n",
"def prep_metadata(mech):\n",
" \n",
" # uses CMAQ files: AE.nml, GC.nml, NR.nml. AERO_DATA.F, SOA_DEFN.F, hlconst.F, {mech}_speciesdescription.csv\n",
" # mech input string should be cracmm1_aq or cracmm1amore_aq for CMAQv5.4\n",
" # cracmm1_aq, cracmm1amore_aq, or cracmm2 for CMAQv5.5\n",
"\n",
" ###########################################\n",
" # Prep Gases\n",
" gcfile = 'GC_'+mech+'.nml'\n",
" filename = os.path.join( inputfiledir, gcfile)\n",
" dfgc = pd.read_csv(filename,skiprows=4)\n",
" nrowdim=len(dfgc)\n",
" dfgc=dfgc.drop([nrowdim-1]) #drop last row\n",
" dfgc.columns=dfgc.columns.str.replace(' ','', regex=False)\n",
" dfgc.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True)\n",
" dfgc['Species']=dfgc.Species.str.replace(\"'\",\"\", regex=False)\n",
" dfgc['Species']=dfgc.Species.str.replace(\" \",\"\", regex=False)\n",
" dfgc['PhaseG']='G' # is it in the gas-phase? \n",
" dfgc=dfgc.drop(['GC2AESURR','GC2AQSURR','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP'],axis=1) \n",
" dfgc['Species']=dfgc.Species.str.replace('VROC','ROC', regex=False) # drop for matching with AE\n",
"\n",
" ###########################################\n",
" # Prep NR\n",
" nrfile = 'NR_'+mech+'.nml'\n",
" filename = os.path.join( inputfiledir, nrfile)\n",
" dfnr = pd.read_csv(filename,skiprows=4)\n",
" nrowdim=len(dfnr)\n",
" dfnr=dfnr.drop([nrowdim-1]) #drop last row\n",
" dfnr.columns=dfnr.columns.str.replace(' ','', regex=False)\n",
" dfnr.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True)\n",
" dfnr['Species']=dfnr.Species.str.replace(\"'\",\"\", regex=False)\n",
" dfnr['Species']=dfnr.Species.str.replace(\" \",\"\", regex=False)\n",
" dfnr['PhaseG']='G'\n",
" dfnr=dfnr.drop(['NR2AESURR','NR2AQSURR','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP'],axis=1) #these won't match other nml\n",
" # Append NR to GC\n",
" dfgc=pd.concat([dfgc, dfnr],ignore_index=True)\n",
" dfgc['WET-SCAVSURR']=dfgc['WET-SCAVSURR'].str.replace(\"'\",\"\", regex=False)\n",
" dfgc['WET-SCAVSURR']=dfgc['WET-SCAVSURR'].str.replace(\" \",\"\", regex=False)\n",
"\n",
" ###########################################\n",
" #https://www.dataquest.io/wp-content/uploads/2019/03/python-regular-expressions-cheat-sheet.pdf\n",
" # Prep hlconst, dissolution enthalpy for WET-SCAVSURR\n",
" hlfile = 'hlconst.F'\n",
" filename = os.path.join( inputfiledir, hlfile)\n",
" column_names = ['hspecies','henryMatm','henryenthalpyK']\n",
" dfhenry = pd.DataFrame(columns=column_names)\n",
" # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n",
" filetoread = open(filename)\n",
" for line in filetoread:\n",
" line = line.rstrip()\n",
" if re.search('^ DATA SUBNAME\\(',line):\n",
" hspecies=(re.findall('\\)\\s*/\\s*\\'(.*)\\'.*!', line)[0]) # return name\n",
" hlvalue=float(re.findall('DATA SUBNAME.*\\/.*,(.*),.*\\/.*!',line)[0]) # get the item between the first 2 commas between the slashes\n",
" enthalpyK=float(re.findall('DATA SUBNAME.*\\/.*,.*,(.*).*\\/.*!',line)[0])\n",
" newrow = pd.Series(data={'hspecies':hspecies,'henryMatm':hlvalue,\n",
" 'henryenthalpyK':enthalpyK})\n",
" dfhenry = pd.concat([dfhenry, newrow.to_frame().T],ignore_index=True)\n",
" dfhenry.hspecies=dfhenry.hspecies.str.replace(\" \",\"\", regex=False)\n",
" dfgc=pd.merge(dfgc,dfhenry,left_on=\"WET-SCAVSURR\",right_on=\"hspecies\",how=\"left\")\n",
"\n",
" ###########################################\n",
" # Prep AE\n",
" aefile = 'AE_'+mech+'.nml'\n",
" filename = os.path.join( inputfiledir, aefile )\n",
" dfae = pd.read_csv(filename,skiprows=4)\n",
" nrowdim=len(dfae)\n",
" dfae=dfae.drop([nrowdim-1]) #drop last row\n",
" dfae.columns=dfae.columns.str.replace(' ','', regex=False) # get rid of spaces in column names\n",
" dfae.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True) # rename this heading\n",
" dfae['Species']=dfae.Species.str.replace(\"'\",\"\", regex=False) # get rid of ' in species names\n",
" dfae['Species']=dfae.Species.str.replace(\" \",\"\", regex=False) # get rid of spaces in species names\n",
" dfae['PhaseP']='P' # particle phase\n",
" dfae=dfae.drop(['AE2AQSURR','FAC.1','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP','OPTICS','DRYDEPSURR','WET-SCAVSURR'],axis=1) #these won't match other nml\n",
"\n",
" ###########################################\n",
" # Prep AERO_DATA and get density, kappa\n",
" adfile = 'AERO_DATA.F'\n",
" filename = os.path.join( inputfiledir, adfile)\n",
" column_names = ['adspecies','aerodensity','aerokappa']\n",
" dfad = pd.DataFrame(columns=column_names)\n",
" # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n",
" filetoread = open(filename)\n",
" for line in filetoread:\n",
" line = line.rstrip()\n",
" if re.search('^ & spcs_list_type\\(',line):\n",
" # one comment has () which is problematic, drop\n",
" line=str.replace(line, '(Black)','Black')\n",
" adspecies=(re.findall('^ & spcs_list_type\\(\\'(.*)\\',.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) # return name\n",
" aerodensity=float(re.findall('^ & spcs_list_type\\(.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) \n",
" aerokappa=float(re.findall('^ & spcs_list_type\\(.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,(.*)\\)', line)[0]) \n",
" newrow = pd.Series(data={'adspecies':adspecies,'aerodensity':aerodensity,\n",
" 'aerokappa':aerokappa})\n",
" dfad = pd.concat([dfad, newrow.to_frame().T],ignore_index=True)\n",
" dfad.adspecies=dfad.adspecies.str.replace(\" \",\"\", regex=False)\n",
" dfae=pd.merge(dfae,dfad,left_on=\"Species\",right_on=\"adspecies\",how=\"left\")\n",
"\n",
" ###########################################\n",
" # Prep SOA_DEFN\n",
" oafile = 'SOA_DEFN.F'\n",
" filename = os.path.join( inputfiledir, oafile)\n",
" column_names = ['oaspecies','oacstar','oaenthalpy','oaotoc','oaomoc']\n",
" dfoa = pd.DataFrame(columns=column_names)\n",
" # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n",
" filetoread = open(filename)\n",
" for line in filetoread:\n",
" line = line.rstrip()\n",
" if re.search('^ & oa_type\\(',line):\n",
" oaspecies=( re.findall('^ & oa_type\\(\\'(.*)\\',.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) # return name\n",
" oacstar=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) \n",
" oaenthalpy=float(re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*\\)', line)[0]) \n",
" oaotoc=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*\\)', line)[0]) \n",
" oaomoc=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*\\)', line)[0]) \n",
" newrow = pd.Series(data={'oaspecies':oaspecies,'oacstar':oacstar,\n",
" 'oaenthalpy':oaenthalpy,'oaotoc':oaotoc,\n",
" 'oaomoc':oaomoc})\n",
" #print(newrow)\n",
" dfoa = pd.concat([dfoa, newrow.to_frame().T],ignore_index=True)\n",
" dfoa.oaspecies=dfoa.oaspecies.str.replace(\" \",\"\", regex=False)\n",
" dfae=pd.merge(dfae,dfoa,left_on=\"Species\",right_on=\"oaspecies\",how=\"left\")\n",
"\n",
" # Finish formatting ae.nml info\n",
" #dfae['Species']=dfae['Species'].str.strip().str[0:-1] # remove trailing I,J,K, needed for CMAQ v5.3 but not v5.4\n",
" dfae['Species']=dfae.Species.str.replace('AROC','ROC', regex=False) # match these with gas\n",
" dfae['Species']=dfae.Species.str.replace('AHOM','HOM', regex=False) # match with gas\n",
" dfae['Species']=dfae.Species.str.replace('AELHOM','ELHOM', regex=False) # match with gas\n",
" dfae['Species']=dfae.Species.str.replace('AOP3','OP3', regex=False) # match with gas\n",
" dfae['Species']=dfae.Species.str.replace('ATRPN','TRPN', regex=False) # match with gas\n",
" dfae['Species']=dfae.Species.str.replace('AHONIT','HONIT', regex=False) # match with gas\n",
"\n",
" ###########################################\n",
" # merge and add g (gas) or p (particle) suffix and do molec wt check\n",
" dfgc=pd.merge(dfgc,dfae,on=\"Species\",how=\"outer\",suffixes=(\"_g\",\"_p\"))\n",
" dfgc['chckmw']=dfgc['MOLWT_g']-dfgc['MOLWT_p'] # gas and particle molecular weights should match\n",
" if len(dfgc[dfgc['chckmw']>0])>0:\n",
" print(\">>gas and particle molecular weights have an inconsistency<<\")\n",
" print(dfgc[dfgc['chckmw']>0])\n",
" else:\n",
" print(\">>gas and particle molecular weights match<<\")\n",
"\n",
" ###########################################\n",
" # bring in descriptions\n",
" filename = os.path.join( inputfiledir, mech+'_speciesdescription.csv')\n",
" dfdesc = pd.read_csv(filename)\n",
" dfdesc.columns=dfdesc.columns.str.replace(' ','', regex=False)\n",
" dfdesc['Species']=dfdesc.Species.str.replace(' ','', regex=False)\n",
" # need to remove spaces from species names\n",
" dfgc= pd.merge(dfgc,dfdesc,left_on='Species',right_on='Species',how=\"left\")\n",
" # warning if no matching species description\n",
" if dfgc[dfgc['Description'].isna()].size>0:\n",
" for spc in dfgc[dfgc['Description'].isna()]['Species']:\n",
" print(f'Warning: {spc} species description is missing')\n",
" print(f'Check {mech}_speciesdescription.csv for missing species descriptions')\n",
"\n",
" ###########################################\n",
" # Organize data sort alphabetical, take GC.nml value first\n",
" dfgc = dfgc.sort_values(\"Species\") # sort alphabetical\n",
" dfgc[\"Phase\"]=dfgc[\"PhaseG\"].fillna('')+dfgc[\"PhaseP\"].fillna('')\n",
" dfgc['Molecular Weight (g/mol)']=dfgc['MOLWT_g'].fillna(dfgc['MOLWT_p'])\n",
" dfgc['Explicit/Lumped']=dfgc['ExplicitorLumped_g'].fillna(dfgc['ExplicitorLumped_p'])\n",
" dfgc['Representative']=dfgc['!RepCmp_g'].fillna(dfgc['!RepCmp_p']) \n",
" dfgc['Representative']=dfgc.Representative.str.replace(\"!\",\"\", regex=False)\n",
" dfgc['DTXSID']=dfgc['DTXSID_g'].fillna(dfgc['DTXSID_p'])\n",
" dfgc['DTXSID']=dfgc['DTXSID'].fillna('') \n",
" dfgc['DTXSID']=dfgc['DTXSID'].str.replace(' ','', regex=False) \n",
" dfgc['SMILES']=dfgc['SMILES_g'].fillna(dfgc['SMILES_p']) \n",
" dfgc['SMILES']=dfgc['SMILES'].str.replace(' ','', regex=False) \n",
" # Diagnose stable species based on them being transported in gas or aerosol\n",
" dfgc['St']=dfgc['TRNS_g'].fillna('')+dfgc['TRNS_p'].fillna('')\n",
" dfgc['St']=dfgc['St'].str.find('Yes')\n",
" dfgc.loc[dfgc['St']>0,'Stable']='Yes'\n",
" dfgc.loc[dfgc['St']<0,'Stable']='No'\n",
"\n",
" return dfgc"
]
},
{
"cell_type": "markdown",
"id": "35153f82-117f-470c-981d-af9d7ec9537b",
"metadata": {},
"source": [
"## Prepare metadata for mechanism\n",
"Warnings will print if molecular weights differ across gas and particle phases. Paired gas-particle species are identified by a prepended A and V."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c64f1c3e-0c14-49c9-8072-86921d80868e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">>gas and particle molecular weights match<<\n"
]
}
],
"source": [
"mech='cracmm2'\n",
"dfgc=prep_metadata(mech)"
]
},
{
"cell_type": "markdown",
"id": "45a28fcc-70a0-452a-94b7-3988f17f03f9",
"metadata": {},
"source": [
"### Save to Markdown File"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74b4fb68-64b8-46f4-8c58-cc1a55555ac1",
"metadata": {},
"outputs": [],
"source": [
"###########################################\n",
"# Write out Markdown for CMAQ GitHub\n",
"###########################################\n",
"dfmarkdown = dfgc[['Species','Description','Phase','Molecular Weight (g/mol)','Explicit/Lumped','Representative','DTXSID','SMILES']].copy()\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('NA','', regex=False)\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('[','\\[', regex=False)\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace(']','\\]', regex=False)\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('(','\\(', regex=False)\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace(')','\\)', regex=False)\n",
"dfmarkdown['SMILES']=dfmarkdown['SMILES'].fillna('')\n",
"\n",
"# Hyperlink SMILES to DTXSID entry in dashboard\n",
"dfmarkdown['SMILESfmt']= '['+ dfmarkdown.SMILES + '](https://comptox.epa.gov/dashboard/chemical/details/'+ dfmarkdown.DTXSID + ')' \n",
"maskval = dfmarkdown.DTXSID.str.len()>5\n",
"dfmarkdown.loc[maskval,'SMILES']=dfmarkdown.loc[maskval,'SMILESfmt']\n",
"dfmarkdown=dfmarkdown.drop(['DTXSID'],axis=1)\n",
"dfmarkdown=dfmarkdown.drop(['SMILESfmt'],axis=1)\n",
"\n",
"# assemble and format table header\n",
"headerline = ' Species | Description | Phase | Molecular Weight (g/mol) | Explicit/ Lumped | Representative Structure | SMILES '\n",
"firstmarkdownline = \"Gas (G) and particle (P) species from the namelists. SMILES link to representative structures in the EPA Chemicals Dashboard (if available).\"\n",
"secondmarkdownline = \"Note that for each particulate species in CMAQ, a letter will be appended to the name to designate the size, or mode, of the aerosol being represented: I = Aitken mode, J = Accumulation mode, K = Coarse mode. Prepending of a species with a V or A in CMAQ or the chemical mechanism files indicates the species resides in the gas or particulate phase. \"\n",
"dfmarkdown['Representative']=dfgc.Representative.str.replace(\";\",\",\", regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace(';',',', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('ug/m3','μg m-3', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('log10C','log10C', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('kOH','kOH', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('cm3','cm3', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('s-1','s-1', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-10','10-10', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-11','10-11', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-12','10-12', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-13','10-13', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-14','10-14', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-2','10-2', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-1','10-1', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+1','10+1', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+2','10+2', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+3','10+3', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+4','10+4', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+5','10+5', regex=False)\n",
"dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+6','10+6', regex=False)\n",
"\n",
"mdfile = mech+'_species_table.md'\n",
"filename = os.path.join( outputfiledir, mdfile)\n",
"mdfile= open(filename,'w')\n",
"mdfile.write(mech.upper() + ' Species Table')\n",
"mdfile.write('\\n')\n",
"mdfile.write(firstmarkdownline)\n",
"mdfile.write('\\n')\n",
"mdfile.write('\\n')\n",
"mdfile.write(secondmarkdownline)\n",
"mdfile.write('\\n')\n",
"mdfile.write('\\n')\n",
"mdfile.write(headerline)\n",
"mdfile.write('\\n')\n",
"mdfile.write(' ----- | ----- | ----- | ----- | ----- | ----- | ----- ')\n",
"mdfile.write('\\n')\n",
"mdfile.close()\n",
"dfmarkdown.to_csv(filename,index=False,header=False,sep='|',mode='a')\n",
"mdfile= open(filename,'a')\n",
"mdfile.write('\\n')\n",
"mdfile.close()"
]
},
{
"cell_type": "markdown",
"id": "25879c81-3e52-44c2-814c-16cdc5e07ef3",
"metadata": {},
"source": [
"### Save to csv file"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9f05eddc-41a1-48af-97e7-4a401e835f69",
"metadata": {},
"outputs": [],
"source": [
"############# Metadata CSV\n",
"dfmetadata=dfgc[['Species','Description','Phase','Stable','Molecular Weight (g/mol)',\n",
" 'Explicit/Lumped','Representative','SMILES','DTXSID','henryMatm',\n",
" 'henryenthalpyK','aerodensity','aerokappa','oacstar','oaenthalpy',\n",
" 'oaomoc']].copy()\n",
"dfmetadata['aerokappa']=dfmetadata.aerokappa.mask(dfmetadata.aerokappa <= 0, 'NA')\n",
"dfmetadata['DTXSID']=dfmetadata.DTXSID.mask(dfmetadata.DTXSID == '', 'NA' )\n",
"dfmetadata=dfmetadata.rename(columns={'henryMatm':'H Law (M/atm)'})\n",
"dfmetadata=dfmetadata.rename(columns={'henryenthalpyK':'Enthalpy of solution (K)'})\n",
"dfmetadata=dfmetadata.rename(columns={'aerodensity':'Aerosol density (kg/m3)'})\n",
"dfmetadata=dfmetadata.rename(columns={'aerokappa':'Kappa_org'})\n",
"dfmetadata=dfmetadata.rename(columns={'oacstar':'C* (microg/m3)'})\n",
"dfmetadata=dfmetadata.rename(columns={'oaenthalpy':'Enthalpy of vaporization (J/mol)'})\n",
"dfmetadata=dfmetadata.rename(columns={'oaomoc':'OM to OC (g/g)'})\n",
"\n",
"dfmetadata=dfmetadata.fillna('NA')\n",
"\n",
"metafile = mech+'_metadata.csv'\n",
"filename = os.path.join( outputfiledir, metafile)\n",
"dfmetadata.to_csv(filename,index=False,header=True,sep=',',mode='w')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "rhel8_py39",
"language": "python",
"name": "rhel8_py39"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}