GEMMA python module home

Source code for gemma.gemmafiles

'''File readers for G-EMMA input files'''
from gemmaclass import gemma
from sampleclass import sample
import numpy as np
import pandas
import dateutil.parser as parser
import datetime as dt

[docs]def read_gemma_endmembers(fname='gemma_endmembers.txt'): '''Read g-emma end-members file into a pandas DataFrame Parameters ---------- fname : filename (default: gemma_endmembers.txt) Returns ------- p : multi-index (end-member, solute) pandas.DataFrame ''' index = [] data = [] cols = ['Dist','Min','Max','Mean','Sd'] inames = ['Em','Sol'] with open(fname,'r') as f: for l in f: if l[0] != '!': if l[0] == '[': e = l.strip('[] \n') else: vals = l.strip().split('\t') #p = e+'_'+vals[0] index.append((e, vals[0])) vals = [vals[1]]+[float(x) for x in vals[2:]] #data[p] = dict(zip(cols,vals)) data.append(vals) return pandas.DataFrame(data=data, index=pandas.MultiIndex.from_tuples(index, names=inames), columns=cols)
[docs]def read_gemma_stream(fname='gemma_stream.txt'): '''Read g-emma stream file into a pandas DataFrame Parameters ---------- fname : filename (default: gemma_stream.txt) Returns ------- p : pandas.DataFrame (solutes as index, samples as columns) ''' data = {} with open(fname,'r') as f: isheader=True for l in f: if l[0] != '!': if isheader: if 'Sample' in l: isheader=False header = l.strip().split('\t') else: nsamples = int(l.strip()) else: vals = l.strip().split('\t') s = vals[0] date = parser.parse(vals[1]) vals = [s,date]+[float(x) for x in vals[2:]] data[s] = dict(zip(header,vals)) return pandas.DataFrame(data).T
[docs]def endmembers_to_sep_dfs(df): '''Convert multi-index dataframe of endmembers - solute to dataframes per end-member of solute characteristics Parameters ---------- df : pandas.DataFrame, usually result of read_gemma_endmembers Returns ------- mean : pandas.DataFrame of mean concentration end-members / solutes sd : pandas.DataFrame of standard deviation end-members / solutes min : pandas.DataFrame of minimum concentration end-members / solutes max : pandas.DataFrame of max concentration end-members / solutes ''' return df.unstack('Sol')['Mean'], df.unstack('Sol')['Sd'], df.unstack('Sol')['Min'], df.unstack('Sol')['Max']