{ "cells": [ { "cell_type": "markdown", "id": "ac3c4f7b-3c6b-4698-9979-501b102b1296", "metadata": {}, "source": [ "# Assemble CRACMM species metadata from CMAQ \n", "---\n", " author: Havala O.T. Pye (pye.havala@epa.gov)\n", " date: created 2022-09-12\n", "\n", " updated: Nash Skipper\n", " date: 2024-02-09\n", "\n", " updated: Nash Skipper\n", " date: 2024-03-28\n", " \n", " updated: Havala Pye\n", " date: 2025-02-18\n", "\n", " updated: Michael Pye\n", " date: 2025-02-27 \n", "---\n", "## Notebook Description\n", "This notebook collects data across the CMAQ model to create a table of species information in both csv and markdown formats. The csv version contains additional data not easily displayed in markdown. Output from this notebook is stored [here](https://github.com/USEPA/CRACMM/tree/main/metadata). After clicking the link, select the directory that corresponds to the chemical mechanism of your choice to find the correct output files. \n", "\n", "## Download Notebook\n", "Click [here](https://github.com/USEPA/CRACMM/blob/main/utilities/markdown_metadata.ipynb) to download this tutorial as a Jupyter Notebook file. \n", "\n", "## CMAQ input files\n", "- AE_{mech}.nml\n", "- GC_{mech}.nml \n", "- NR_{mech}.nml \n", "- AERO_DATA.F \n", "- SOA_DEFN.F \n", "- hlconst.F \n", "- {mech}_speciesdescription.csv\n", "\n", "## Mechanisms supported \n", "- cracmm1_aq \n", "- cracmm1amore_aq \n", "- cracmm2 " ] }, { "cell_type": "markdown", "id": "e0a836b6-4358-4206-b8e6-d22a01f94479", "metadata": {}, "source": [ "## Setup libraries, paths, and function to prepare metadata" ] }, { "cell_type": "code", "execution_count": 1, "id": "d4df66fa-7d2a-4b29-8e72-c3f63ee126a5", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": null, "id": "5026cc63-a3e5-4fe7-8f1d-3825a2161a5c", "metadata": {}, "outputs": [], "source": [ "# Set current working directory where this file resides \n", "# This code expects *.nml and files from CMAQ to be present in ./input and will output in ./output\n", "outputfiledir = os.path.join(os.getcwd(), 'output')\n", "workdir = '/work/MOD3DEV/has/2023cracmm_ages/structurecuration/'\n", "filepath = os.path.normpath(workdir)\n", "os.chdir(filepath)\n", "inputfiledir = os.path.join(os.getcwd(), 'input')\n", "if not os.path.isdir('./output'):\n", " os.mkdir('./output')" ] }, { "cell_type": "code", "execution_count": 3, "id": "c81725dc-ee1a-4860-a9ef-33bb8c823140", "metadata": {}, "outputs": [], "source": [ "# Function to prepare dataframe with metadata\n", "def prep_metadata(mech):\n", " \n", " # uses CMAQ files: AE.nml, GC.nml, NR.nml. AERO_DATA.F, SOA_DEFN.F, hlconst.F, {mech}_speciesdescription.csv\n", " # mech input string should be cracmm1_aq or cracmm1amore_aq for CMAQv5.4\n", " # cracmm1_aq, cracmm1amore_aq, or cracmm2 for CMAQv5.5\n", "\n", " ###########################################\n", " # Prep Gases\n", " gcfile = 'GC_'+mech+'.nml'\n", " filename = os.path.join( inputfiledir, gcfile)\n", " dfgc = pd.read_csv(filename,skiprows=4)\n", " nrowdim=len(dfgc)\n", " dfgc=dfgc.drop([nrowdim-1]) #drop last row\n", " dfgc.columns=dfgc.columns.str.replace(' ','', regex=False)\n", " dfgc.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True)\n", " dfgc['Species']=dfgc.Species.str.replace(\"'\",\"\", regex=False)\n", " dfgc['Species']=dfgc.Species.str.replace(\" \",\"\", regex=False)\n", " dfgc['PhaseG']='G' # is it in the gas-phase? \n", " dfgc=dfgc.drop(['GC2AESURR','GC2AQSURR','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP'],axis=1) \n", " dfgc['Species']=dfgc.Species.str.replace('VROC','ROC', regex=False) # drop for matching with AE\n", "\n", " ###########################################\n", " # Prep NR\n", " nrfile = 'NR_'+mech+'.nml'\n", " filename = os.path.join( inputfiledir, nrfile)\n", " dfnr = pd.read_csv(filename,skiprows=4)\n", " nrowdim=len(dfnr)\n", " dfnr=dfnr.drop([nrowdim-1]) #drop last row\n", " dfnr.columns=dfnr.columns.str.replace(' ','', regex=False)\n", " dfnr.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True)\n", " dfnr['Species']=dfnr.Species.str.replace(\"'\",\"\", regex=False)\n", " dfnr['Species']=dfnr.Species.str.replace(\" \",\"\", regex=False)\n", " dfnr['PhaseG']='G'\n", " dfnr=dfnr.drop(['NR2AESURR','NR2AQSURR','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP'],axis=1) #these won't match other nml\n", " # Append NR to GC\n", " dfgc=pd.concat([dfgc, dfnr],ignore_index=True)\n", " dfgc['WET-SCAVSURR']=dfgc['WET-SCAVSURR'].str.replace(\"'\",\"\", regex=False)\n", " dfgc['WET-SCAVSURR']=dfgc['WET-SCAVSURR'].str.replace(\" \",\"\", regex=False)\n", "\n", " ###########################################\n", " #https://www.dataquest.io/wp-content/uploads/2019/03/python-regular-expressions-cheat-sheet.pdf\n", " # Prep hlconst, dissolution enthalpy for WET-SCAVSURR\n", " hlfile = 'hlconst.F'\n", " filename = os.path.join( inputfiledir, hlfile)\n", " column_names = ['hspecies','henryMatm','henryenthalpyK']\n", " dfhenry = pd.DataFrame(columns=column_names)\n", " # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n", " filetoread = open(filename)\n", " for line in filetoread:\n", " line = line.rstrip()\n", " if re.search('^ DATA SUBNAME\\(',line):\n", " hspecies=(re.findall('\\)\\s*/\\s*\\'(.*)\\'.*!', line)[0]) # return name\n", " hlvalue=float(re.findall('DATA SUBNAME.*\\/.*,(.*),.*\\/.*!',line)[0]) # get the item between the first 2 commas between the slashes\n", " enthalpyK=float(re.findall('DATA SUBNAME.*\\/.*,.*,(.*).*\\/.*!',line)[0])\n", " newrow = pd.Series(data={'hspecies':hspecies,'henryMatm':hlvalue,\n", " 'henryenthalpyK':enthalpyK})\n", " dfhenry = pd.concat([dfhenry, newrow.to_frame().T],ignore_index=True)\n", " dfhenry.hspecies=dfhenry.hspecies.str.replace(\" \",\"\", regex=False)\n", " dfgc=pd.merge(dfgc,dfhenry,left_on=\"WET-SCAVSURR\",right_on=\"hspecies\",how=\"left\")\n", "\n", " ###########################################\n", " # Prep AE\n", " aefile = 'AE_'+mech+'.nml'\n", " filename = os.path.join( inputfiledir, aefile )\n", " dfae = pd.read_csv(filename,skiprows=4)\n", " nrowdim=len(dfae)\n", " dfae=dfae.drop([nrowdim-1]) #drop last row\n", " dfae.columns=dfae.columns.str.replace(' ','', regex=False) # get rid of spaces in column names\n", " dfae.rename(columns={\"!SPECIES\":\"Species\"}, inplace=True) # rename this heading\n", " dfae['Species']=dfae.Species.str.replace(\"'\",\"\", regex=False) # get rid of ' in species names\n", " dfae['Species']=dfae.Species.str.replace(\" \",\"\", regex=False) # get rid of spaces in species names\n", " dfae['PhaseP']='P' # particle phase\n", " dfae=dfae.drop(['AE2AQSURR','FAC.1','IC','IC_FAC','BC','BC_FAC','FAC','CONC','WDEP','DDEP','OPTICS','DRYDEPSURR','WET-SCAVSURR'],axis=1) #these won't match other nml\n", "\n", " ###########################################\n", " # Prep AERO_DATA and get density, kappa\n", " adfile = 'AERO_DATA.F'\n", " filename = os.path.join( inputfiledir, adfile)\n", " column_names = ['adspecies','aerodensity','aerokappa']\n", " dfad = pd.DataFrame(columns=column_names)\n", " # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n", " filetoread = open(filename)\n", " for line in filetoread:\n", " line = line.rstrip()\n", " if re.search('^ & spcs_list_type\\(',line):\n", " # one comment has () which is problematic, drop\n", " line=str.replace(line, '(Black)','Black')\n", " adspecies=(re.findall('^ & spcs_list_type\\(\\'(.*)\\',.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) # return name\n", " aerodensity=float(re.findall('^ & spcs_list_type\\(.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) \n", " aerokappa=float(re.findall('^ & spcs_list_type\\(.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,(.*)\\)', line)[0]) \n", " newrow = pd.Series(data={'adspecies':adspecies,'aerodensity':aerodensity,\n", " 'aerokappa':aerokappa})\n", " dfad = pd.concat([dfad, newrow.to_frame().T],ignore_index=True)\n", " dfad.adspecies=dfad.adspecies.str.replace(\" \",\"\", regex=False)\n", " dfae=pd.merge(dfae,dfad,left_on=\"Species\",right_on=\"adspecies\",how=\"left\")\n", "\n", " ###########################################\n", " # Prep SOA_DEFN\n", " oafile = 'SOA_DEFN.F'\n", " filename = os.path.join( inputfiledir, oafile)\n", " column_names = ['oaspecies','oacstar','oaenthalpy','oaotoc','oaomoc']\n", " dfoa = pd.DataFrame(columns=column_names)\n", " # read lines that start with DATA SUBNAME ('^ DATA SUBNAME') and parse Hlconst, save to dataframe\n", " filetoread = open(filename)\n", " for line in filetoread:\n", " line = line.rstrip()\n", " if re.search('^ & oa_type\\(',line):\n", " oaspecies=( re.findall('^ & oa_type\\(\\'(.*)\\',.*,.*,.*,.*,.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) # return name\n", " oacstar=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*,.*\\)', line)[0]) \n", " oaenthalpy=float(re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*,.*\\)', line)[0]) \n", " oaotoc=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*,.*\\)', line)[0]) \n", " oaomoc=float( re.findall('^ & oa_type\\(.*,.*,.*,.*,.*,.*,.*,\\s*(.*),.*,.*,.*,.*\\)', line)[0]) \n", " newrow = pd.Series(data={'oaspecies':oaspecies,'oacstar':oacstar,\n", " 'oaenthalpy':oaenthalpy,'oaotoc':oaotoc,\n", " 'oaomoc':oaomoc})\n", " #print(newrow)\n", " dfoa = pd.concat([dfoa, newrow.to_frame().T],ignore_index=True)\n", " dfoa.oaspecies=dfoa.oaspecies.str.replace(\" \",\"\", regex=False)\n", " dfae=pd.merge(dfae,dfoa,left_on=\"Species\",right_on=\"oaspecies\",how=\"left\")\n", "\n", " # Finish formatting ae.nml info\n", " #dfae['Species']=dfae['Species'].str.strip().str[0:-1] # remove trailing I,J,K, needed for CMAQ v5.3 but not v5.4\n", " dfae['Species']=dfae.Species.str.replace('AROC','ROC', regex=False) # match these with gas\n", " dfae['Species']=dfae.Species.str.replace('AHOM','HOM', regex=False) # match with gas\n", " dfae['Species']=dfae.Species.str.replace('AELHOM','ELHOM', regex=False) # match with gas\n", " dfae['Species']=dfae.Species.str.replace('AOP3','OP3', regex=False) # match with gas\n", " dfae['Species']=dfae.Species.str.replace('ATRPN','TRPN', regex=False) # match with gas\n", " dfae['Species']=dfae.Species.str.replace('AHONIT','HONIT', regex=False) # match with gas\n", "\n", " ###########################################\n", " # merge and add g (gas) or p (particle) suffix and do molec wt check\n", " dfgc=pd.merge(dfgc,dfae,on=\"Species\",how=\"outer\",suffixes=(\"_g\",\"_p\"))\n", " dfgc['chckmw']=dfgc['MOLWT_g']-dfgc['MOLWT_p'] # gas and particle molecular weights should match\n", " if len(dfgc[dfgc['chckmw']>0])>0:\n", " print(\">>gas and particle molecular weights have an inconsistency<<\")\n", " print(dfgc[dfgc['chckmw']>0])\n", " else:\n", " print(\">>gas and particle molecular weights match<<\")\n", "\n", " ###########################################\n", " # bring in descriptions\n", " filename = os.path.join( inputfiledir, mech+'_speciesdescription.csv')\n", " dfdesc = pd.read_csv(filename)\n", " dfdesc.columns=dfdesc.columns.str.replace(' ','', regex=False)\n", " dfdesc['Species']=dfdesc.Species.str.replace(' ','', regex=False)\n", " # need to remove spaces from species names\n", " dfgc= pd.merge(dfgc,dfdesc,left_on='Species',right_on='Species',how=\"left\")\n", " # warning if no matching species description\n", " if dfgc[dfgc['Description'].isna()].size>0:\n", " for spc in dfgc[dfgc['Description'].isna()]['Species']:\n", " print(f'Warning: {spc} species description is missing')\n", " print(f'Check {mech}_speciesdescription.csv for missing species descriptions')\n", "\n", " ###########################################\n", " # Organize data sort alphabetical, take GC.nml value first\n", " dfgc = dfgc.sort_values(\"Species\") # sort alphabetical\n", " dfgc[\"Phase\"]=dfgc[\"PhaseG\"].fillna('')+dfgc[\"PhaseP\"].fillna('')\n", " dfgc['Molecular Weight (g/mol)']=dfgc['MOLWT_g'].fillna(dfgc['MOLWT_p'])\n", " dfgc['Explicit/Lumped']=dfgc['ExplicitorLumped_g'].fillna(dfgc['ExplicitorLumped_p'])\n", " dfgc['Representative']=dfgc['!RepCmp_g'].fillna(dfgc['!RepCmp_p']) \n", " dfgc['Representative']=dfgc.Representative.str.replace(\"!\",\"\", regex=False)\n", " dfgc['DTXSID']=dfgc['DTXSID_g'].fillna(dfgc['DTXSID_p'])\n", " dfgc['DTXSID']=dfgc['DTXSID'].fillna('') \n", " dfgc['DTXSID']=dfgc['DTXSID'].str.replace(' ','', regex=False) \n", " dfgc['SMILES']=dfgc['SMILES_g'].fillna(dfgc['SMILES_p']) \n", " dfgc['SMILES']=dfgc['SMILES'].str.replace(' ','', regex=False) \n", " # Diagnose stable species based on them being transported in gas or aerosol\n", " dfgc['St']=dfgc['TRNS_g'].fillna('')+dfgc['TRNS_p'].fillna('')\n", " dfgc['St']=dfgc['St'].str.find('Yes')\n", " dfgc.loc[dfgc['St']>0,'Stable']='Yes'\n", " dfgc.loc[dfgc['St']<0,'Stable']='No'\n", "\n", " return dfgc" ] }, { "cell_type": "markdown", "id": "35153f82-117f-470c-981d-af9d7ec9537b", "metadata": {}, "source": [ "## Prepare metadata for mechanism\n", "Warnings will print if molecular weights differ across gas and particle phases. Paired gas-particle species are identified by a prepended A and V." ] }, { "cell_type": "code", "execution_count": 4, "id": "c64f1c3e-0c14-49c9-8072-86921d80868e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">>gas and particle molecular weights match<<\n" ] } ], "source": [ "mech='cracmm2'\n", "dfgc=prep_metadata(mech)" ] }, { "cell_type": "markdown", "id": "45a28fcc-70a0-452a-94b7-3988f17f03f9", "metadata": {}, "source": [ "### Save to Markdown File" ] }, { "cell_type": "code", "execution_count": null, "id": "74b4fb68-64b8-46f4-8c58-cc1a55555ac1", "metadata": {}, "outputs": [], "source": [ "###########################################\n", "# Write out Markdown for CMAQ GitHub\n", "###########################################\n", "dfmarkdown = dfgc[['Species','Description','Phase','Molecular Weight (g/mol)','Explicit/Lumped','Representative','DTXSID','SMILES']].copy()\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('NA','', regex=False)\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('[','\\[', regex=False)\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace(']','\\]', regex=False)\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace('(','\\(', regex=False)\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].str.replace(')','\\)', regex=False)\n", "dfmarkdown['SMILES']=dfmarkdown['SMILES'].fillna('')\n", "\n", "# Hyperlink SMILES to DTXSID entry in dashboard\n", "dfmarkdown['SMILESfmt']= '['+ dfmarkdown.SMILES + '](https://comptox.epa.gov/dashboard/chemical/details/'+ dfmarkdown.DTXSID + ')' \n", "maskval = dfmarkdown.DTXSID.str.len()>5\n", "dfmarkdown.loc[maskval,'SMILES']=dfmarkdown.loc[maskval,'SMILESfmt']\n", "dfmarkdown=dfmarkdown.drop(['DTXSID'],axis=1)\n", "dfmarkdown=dfmarkdown.drop(['SMILESfmt'],axis=1)\n", "\n", "# assemble and format table header\n", "headerline = ' Species | Description | Phase | Molecular Weight (g/mol) | Explicit/ Lumped | Representative Structure | SMILES '\n", "firstmarkdownline = \"Gas (G) and particle (P) species from the namelists. SMILES link to representative structures in the EPA Chemicals Dashboard (if available).\"\n", "secondmarkdownline = \"Note that for each particulate species in CMAQ, a letter will be appended to the name to designate the size, or mode, of the aerosol being represented: I = Aitken mode, J = Accumulation mode, K = Coarse mode. Prepending of a species with a V or A in CMAQ or the chemical mechanism files indicates the species resides in the gas or particulate phase. \"\n", "dfmarkdown['Representative']=dfgc.Representative.str.replace(\";\",\",\", regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace(';',',', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('ug/m3','μg m-3', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('log10C','log10C', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('kOH','kOH', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('cm3','cm3', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('s-1','s-1', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-10','10-10', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-11','10-11', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-12','10-12', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-13','10-13', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-14','10-14', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-2','10-2', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10-1','10-1', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+1','10+1', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+2','10+2', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+3','10+3', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+4','10+4', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+5','10+5', regex=False)\n", "dfmarkdown['Description']=dfmarkdown.Description.str.replace('10+6','10+6', regex=False)\n", "\n", "mdfile = mech+'_species_table.md'\n", "filename = os.path.join( outputfiledir, mdfile)\n", "mdfile= open(filename,'w')\n", "mdfile.write(mech.upper() + ' Species Table')\n", "mdfile.write('\\n')\n", "mdfile.write(firstmarkdownline)\n", "mdfile.write('\\n')\n", "mdfile.write('\\n')\n", "mdfile.write(secondmarkdownline)\n", "mdfile.write('\\n')\n", "mdfile.write('\\n')\n", "mdfile.write(headerline)\n", "mdfile.write('\\n')\n", "mdfile.write(' ----- | ----- | ----- | ----- | ----- | ----- | ----- ')\n", "mdfile.write('\\n')\n", "mdfile.close()\n", "dfmarkdown.to_csv(filename,index=False,header=False,sep='|',mode='a')\n", "mdfile= open(filename,'a')\n", "mdfile.write('\\n')\n", "mdfile.close()" ] }, { "cell_type": "markdown", "id": "25879c81-3e52-44c2-814c-16cdc5e07ef3", "metadata": {}, "source": [ "### Save to csv file" ] }, { "cell_type": "code", "execution_count": 6, "id": "9f05eddc-41a1-48af-97e7-4a401e835f69", "metadata": {}, "outputs": [], "source": [ "############# Metadata CSV\n", "dfmetadata=dfgc[['Species','Description','Phase','Stable','Molecular Weight (g/mol)',\n", " 'Explicit/Lumped','Representative','SMILES','DTXSID','henryMatm',\n", " 'henryenthalpyK','aerodensity','aerokappa','oacstar','oaenthalpy',\n", " 'oaomoc']].copy()\n", "dfmetadata['aerokappa']=dfmetadata.aerokappa.mask(dfmetadata.aerokappa <= 0, 'NA')\n", "dfmetadata['DTXSID']=dfmetadata.DTXSID.mask(dfmetadata.DTXSID == '', 'NA' )\n", "dfmetadata=dfmetadata.rename(columns={'henryMatm':'H Law (M/atm)'})\n", "dfmetadata=dfmetadata.rename(columns={'henryenthalpyK':'Enthalpy of solution (K)'})\n", "dfmetadata=dfmetadata.rename(columns={'aerodensity':'Aerosol density (kg/m3)'})\n", "dfmetadata=dfmetadata.rename(columns={'aerokappa':'Kappa_org'})\n", "dfmetadata=dfmetadata.rename(columns={'oacstar':'C* (microg/m3)'})\n", "dfmetadata=dfmetadata.rename(columns={'oaenthalpy':'Enthalpy of vaporization (J/mol)'})\n", "dfmetadata=dfmetadata.rename(columns={'oaomoc':'OM to OC (g/g)'})\n", "\n", "dfmetadata=dfmetadata.fillna('NA')\n", "\n", "metafile = mech+'_metadata.csv'\n", "filename = os.path.join( outputfiledir, metafile)\n", "dfmetadata.to_csv(filename,index=False,header=True,sep=',',mode='w')" ] } ], "metadata": { "kernelspec": { "display_name": "rhel8_py39", "language": "python", "name": "rhel8_py39" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" } }, "nbformat": 4, "nbformat_minor": 5 }