Skip to content

Commit

Permalink
feat(classif): cutpoint model; rm spurious sleep
Browse files Browse the repository at this point in the history
- Extend cut-point-based model.
- Resolve spurious sleep - now for all models.
  • Loading branch information
chanshing committed Nov 10, 2023
1 parent 29b9e54 commit 38595f8
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 61 deletions.
2 changes: 1 addition & 1 deletion src/accelerometer/accPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
'bicycling': 'springgreen',
'tasks-light': 'darkorange',
'SB': 'red', # sedentary behaviour
'LIPA': 'darkorange', # light physical activity
'LPA': 'darkorange', # light physical activity
'MVPA': 'green', # moderate-vigorous physical activity
'MPA': 'green', # moderate physical activity
'VPA': 'springgreen', # vigorous physical activity
Expand Down
19 changes: 12 additions & 7 deletions src/accelerometer/accProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,16 @@ def main(): # noqa: C901
help="""calibration sphere threshold (default
: %(default)s mg))""")
# activity parameters
parser.add_argument('--mgCutPointMVPA',
parser.add_argument('--mgCpLPA',
metavar="mg", default=45, type=int,
help="""LPA threshold for cut point based activity
definition (default : %(default)s)""")
parser.add_argument('--mgCpMPA',
metavar="mg", default=100, type=int,
help="""MVPA threshold for cut point based activity
definition (default : %(default)s)""")
parser.add_argument('--mgCutPointVPA',
metavar="mg", default=425, type=int,
help="""MPA threshold for cut point based activity
definition (default : %(default)s)""")
parser.add_argument('--mgCpVPA',
metavar="mg", default=400, type=int,
help="""VPA threshold for cut point based activity
definition (default : %(default)s)""")
parser.add_argument('--intensityDistribution',
Expand Down Expand Up @@ -328,8 +332,9 @@ def deleteIntermediateFiles():
activityClassification=args.activityClassification,
timeZone=args.timeZone, startTime=args.startTime,
endTime=args.endTime, epochPeriod=args.epochPeriod,
stationaryStd=args.stationaryStd, mgCutPointMVPA=args.mgCutPointMVPA,
mgCutPointVPA=args.mgCutPointVPA, activityModel=args.activityModel,
stationaryStd=args.stationaryStd,
mgCpLPA=args.mgCpLPA, mgCpMPA=args.mgCpMPA, mgCpVPA=args.mgCpVPA,
activityModel=args.activityModel,
intensityDistribution=args.intensityDistribution,
psd=args.psd, fourierFrequency=args.fourierFrequency,
fourierWithAcc=args.fourierWithAcc, m10l5=args.m10l5)
Expand Down
155 changes: 110 additions & 45 deletions src/accelerometer/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@
import json


def activityClassification(epoch, activityModel="walmsley"):
def activityClassification(
epoch,
activityModel: str = "walmsley",
mgCpLPA: int = 45,
mgCpMPA: int = 100,
mgCpVPA: int = 400
):
"""Perform classification of activity states from epoch feature data
Based on a balanced random forest with a Hidden Markov Model containing
Expand All @@ -40,70 +46,66 @@ def activityClassification(epoch, activityModel="walmsley"):
:rtype: list(str)
"""

use_cutpoints = 'chan' in activityModel
smooth_sleep = 'chan' in activityModel

activityModel = resolveModelPath(activityModel)
modelPath = resolveModelPath(activityModel)

featureCols = joblib.load(getFileFromTar(activityModel, 'featureCols'))
model = joblib.load(getFileFromTar(activityModel, 'model'))
hmmParams = joblib.load(getFileFromTar(activityModel, 'hmmParams'))
labels = joblib.load(getFileFromTar(activityModel, 'labels')).tolist()
featureCols = joblib.load(getFileFromTar(modelPath, 'featureCols'))
model = joblib.load(getFileFromTar(modelPath, 'model'))
hmmParams = joblib.load(getFileFromTar(modelPath, 'hmmParams'))
labels = joblib.load(getFileFromTar(modelPath, 'labels')).tolist()

X = epoch[featureCols].to_numpy()
ok = np.isfinite(X).all(axis=1)
print(f"{len(epoch) - np.sum(ok)} rows with NaN or Inf values, out of {len(epoch)}")

Y = viterbi(model.predict(X[ok]), hmmParams)

if smooth_sleep:
sleep = pd.Series(Y == 'sleep')
sleep_streak = (
sleep.ne(sleep.shift())
.cumsum()
.pipe(lambda x: x.groupby(x).transform('count') * sleep)
)
# TODO: hardcoded 120 = 1hr
Y[(Y == 'sleep') & (sleep_streak < 120)] = 'sedentary'
Y = pd.Series(index=epoch.index)
Y.loc[ok] = viterbi(model.predict(X[ok]), hmmParams)

if use_cutpoints:
# TODO: Chan's logic hardcoded here
if activityModel == 'chan':
enmo = epoch['enmoTrunc'].to_numpy()
enmo = enmo[mask]
Y[(Y == 'other') & (enmo < .1)] = 'light'
Y[(Y == 'other') & (enmo >= .1)] = 'moderate-vigorous'
other = (Y == 'other')
Y.loc[other & (enmo < 100/1000)] = 'light'
Y.loc[other & (enmo >= 100/1000)] = 'moderate'
Y.loc[other & (enmo > 400/1000)] = 'vigorous'
labels.remove('other')
labels.append('light')
labels.append('moderate-vigorous')
labels.append('moderate')
labels.append('vigorous')
del enmo
del other

# Append predicted activities to epoch dataframe
epoch["label"] = np.nan
epoch.loc[ok, "label"] = Y
Y = removeSpuriousSleep(Y, activityModel=activityModel)

# One-hot encoding
epoch.loc[ok, labels] = (Y[ok].to_numpy()[:, None] == labels).astype('float')

# MET prediction
METs = joblib.load(getFileFromTar(activityModel, 'METs'))
METs = joblib.load(getFileFromTar(modelPath, 'METs'))
if METs is not None:
epoch["MET"] = epoch["label"].replace(METs)
epoch.loc[:, "MET"] = Y.replace(METs)

# One-hot encoding
for lab in labels:
epoch[lab] = 0
epoch.loc[epoch['label'] == lab, lab] = 1
# Null values aren't one-hot encoded, so set such instances to NaN
for lab in labels:
epoch.loc[epoch[labels].sum(axis=1) == 0, lab] = np.nan
# Cut-point based classification on non-sleep epochs
YCpOneHot = cutPointModel(
epoch['enmoTrunc'],
cuts={'LPA': mgCpLPA/1000, 'MPA': mgCpMPA/1000, 'VPA': mgCpVPA/1000},
whr=~(Y == 'sleep') # Note: ~(Y == 'sleep') != (Y != 'sleep') because of NaNs
)
epoch = epoch.join(YCpOneHot)
labelsCp = list(YCpOneHot.columns)
labels.extend(labelsCp)

return epoch, labels


def trainClassificationModel(
trainingFile,
labelCol="label", participantCol="participant",
annotationCol="annotation", metCol="MET",
featuresTxt="activityModels/features.txt",
nTrees=1000, maxDepth=None, minSamplesLeaf=1,
cv=None, testParticipants=None,
outDir='model/',
nJobs=1,
trainingFile,
labelCol="label", participantCol="participant",
annotationCol="annotation", metCol="MET",
featuresTxt="activityModels/features.txt",
nTrees=1000, maxDepth=None, minSamplesLeaf=1,
cv=None, testParticipants=None,
outDir='model/',
nJobs=1,
):
"""Train model to classify activity states from epoch feature data
Expand Down Expand Up @@ -334,6 +336,69 @@ def log(x):
return viterbi_path


def removeSpuriousSleep(Y, activityModel='walmsley', sleepTol='1H'):
""" Remove spurious sleep epochs from activity classification
:param Series Y: Model output
:param str activityModel: Model identifier
:param str sleepTol: Minimum sleep duration, e.g. '1H'
:return: Dataframe of revised model output
:rtype: pandas.DataFrame
"""

newValue = {
'willetts': 'sit-stand',
'doherty': 'sedentary',
'walmsley': 'sedentary',
'chan': 'sedentary',
}[activityModel]

sleep = Y == 'sleep'
sleepStreak = (
sleep.ne(sleep.shift())
.cumsum()
.pipe(lambda x: x.groupby(x).transform('count') * sleep)
)
sleepTol = pd.Timedelta(sleepTol) / Y.index.freq
whr = sleep & (sleepStreak < sleepTol)
Y = Y.copy() # no modify original
Y.loc[whr] = newValue

return Y


def cutPointModel(enmo, cuts=None, whr=None):
"""Perform classification of activities based on cutpoints.
:param Series enmo: Timeseries of ENMO.
:param dict cuts: Dictionary of cutpoints for each activity.
:return: Activity labels.
:rtype: pandas.Series
"""

if cuts is None:
# default cutpoints
cuts = {'LPA': 45/1000, 'MPA': 100/1000, 'VPA': 400/1000}

if whr is None:
whr = pd.Series(True, index=enmo.index)

Y = pd.DataFrame(index=enmo.index, columns=['CpSB', 'CpLPA', 'CpMPA', 'CpVPA', 'CpMVPA'])

Y.loc[:, 'CpSB'] = (enmo <= cuts['LPA']) & whr
Y.loc[:, 'CpLPA'] = (enmo > cuts['LPA']) & (enmo <= cuts['MPA']) & whr
Y.loc[:, 'CpMPA'] = (enmo > cuts['MPA']) & (enmo <= cuts['VPA']) & whr
Y.loc[:, 'CpVPA'] = (enmo > cuts['VPA']) & whr
Y.loc[:, 'CpMVPA'] = (enmo > cuts['MPA']) & whr

Y.loc[enmo.isna()] = np.nan
Y = Y.astype('float')

return Y


def perParticipantSummaryHTML(dfParam, yTrueCol, yPredCol, pidCol, outHTML):
"""Provide HTML summary of how well activity classification model works
at the per-participant level
Expand Down
12 changes: 4 additions & 8 deletions src/accelerometer/summarisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def getActivitySummary( # noqa: C901
activityClassification=True, timeZone='Europe/London',
startTime=None, endTime=None,
epochPeriod=30, stationaryStd=13, minNonWearDuration=60,
mgCutPointMVPA=100, mgCutPointVPA=425,
mgCpLPA=45, mgCpMPA=100, mgCpVPA=400,
activityModel="walmsley",
intensityDistribution=False, imputation=True,
psd=False, fourierFrequency=False, fourierWithAcc=False, m10l5=False
Expand Down Expand Up @@ -109,10 +109,6 @@ def getActivitySummary( # noqa: C901
# enmoTrunc = max(enmo, 0)
data['acc'] = data['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units

# Cut-point based MVPA and VPA
data['cutPointMVPA'] = data['acc'] >= mgCutPointMVPA
data['cutPointVPA'] = data['acc'] >= mgCutPointVPA

# Resolve interrupts
data = resolveInterrupts(data, epochPeriod, summary)
# Resolve non-wear
Expand All @@ -121,7 +117,7 @@ def getActivitySummary( # noqa: C901
# Predict activity from features, and add label column
labels = []
if activityClassification:
data, labels = classification.activityClassification(data, activityModel)
data, labels = classification.activityClassification(data, activityModel, mgCpLPA, mgCpMPA, mgCpVPA)

# Calculate empirical cumulative distribution function of vector magnitudes
if intensityDistribution:
Expand Down Expand Up @@ -355,7 +351,7 @@ def writeMovementSummaries(data, labels, summary): # noqa: C901

# Hours of activity for each recorded day
epochInHours = pd.Timedelta(freq).total_seconds() / 3600
cols = ['wearTime', 'cutPointMVPA', 'cutPointVPA'] + labels
cols = ['wearTime'] + labels
dailyStats = (
data[cols].astype('float')
.groupby(data.index.date)
Expand All @@ -370,7 +366,7 @@ def writeMovementSummaries(data, labels, summary): # noqa: C901
# In the following, we resample, pad and impute the data so that we have a
# multiple of 24h for the stats calculations
tStart, tEnd = data.index[0], data.index[-1]
cols = ['acc', 'wearTime', 'cutPointMVPA', 'cutPointVPA'] + labels
cols = ['acc', 'wearTime'] + labels
if 'MET' in data.columns:
cols.append('MET')
data = imputeMissing(data[cols].astype('float'))
Expand Down

0 comments on commit 38595f8

Please sign in to comment.