Source code for gemma.gemmafiles
'''File readers for G-EMMA input files'''
from gemmaclass import gemma
from sampleclass import sample
import numpy as np
import pandas
import dateutil.parser as parser
import datetime as dt
[docs]def read_gemma_endmembers(fname='gemma_endmembers.txt'):
'''Read g-emma end-members file into a pandas DataFrame
Parameters
----------
fname : filename (default: gemma_endmembers.txt)
Returns
-------
p : multi-index (end-member, solute) pandas.DataFrame
'''
index = []
data = []
cols = ['Dist','Min','Max','Mean','Sd']
inames = ['Em','Sol']
with open(fname,'r') as f:
for l in f:
if l[0] != '!':
if l[0] == '[':
e = l.strip('[] \n')
else:
vals = l.strip().split('\t')
#p = e+'_'+vals[0]
index.append((e, vals[0]))
vals = [vals[1]]+[float(x) for x in vals[2:]]
#data[p] = dict(zip(cols,vals))
data.append(vals)
return pandas.DataFrame(data=data,
index=pandas.MultiIndex.from_tuples(index,
names=inames),
columns=cols)
[docs]def read_gemma_stream(fname='gemma_stream.txt'):
'''Read g-emma stream file into a pandas DataFrame
Parameters
----------
fname : filename (default: gemma_stream.txt)
Returns
-------
p : pandas.DataFrame (solutes as index, samples as columns)
'''
data = {}
with open(fname,'r') as f:
isheader=True
for l in f:
if l[0] != '!':
if isheader:
if 'Sample' in l:
isheader=False
header = l.strip().split('\t')
else:
nsamples = int(l.strip())
else:
vals = l.strip().split('\t')
s = vals[0]
date = parser.parse(vals[1])
vals = [s,date]+[float(x) for x in vals[2:]]
data[s] = dict(zip(header,vals))
return pandas.DataFrame(data).T
[docs]def endmembers_to_sep_dfs(df):
'''Convert multi-index dataframe of endmembers - solute to dataframes
per end-member of solute characteristics
Parameters
----------
df : pandas.DataFrame, usually result of read_gemma_endmembers
Returns
-------
mean : pandas.DataFrame of mean concentration end-members / solutes
sd : pandas.DataFrame of standard deviation end-members / solutes
min : pandas.DataFrame of minimum concentration end-members / solutes
max : pandas.DataFrame of max concentration end-members / solutes
'''
return df.unstack('Sol')['Mean'], df.unstack('Sol')['Sd'], df.unstack('Sol')['Min'], df.unstack('Sol')['Max']