Skip to content

Commit

Permalink
docs: update paths after src/ refactor; fix docstring errors
Browse files Browse the repository at this point in the history
  • Loading branch information
chanshing committed Jan 16, 2024
1 parent bffc6a6 commit a86acfd
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 248 deletions.
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Updating Sphinx documentation
Run below
```
$ sphinx-apidoc -f -o source/ ../accelerometer/
$ sphinx-apidoc -f -o source/ ../src/accelerometer/ # TODO: do we still need this???
$ make html
$ open build/html/index.html
```
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
import os
import sys
sys.path.insert(0, os.path.abspath('../../'))
sys.path.insert(0, os.path.abspath('../../src'))


# -- Project information -----------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ Licence
See `LICENSE.md <https://github.com/OxWearables/biobankAccelerometerAnalysis/blob/master/LICENSE.md>`_.


************
****************
Acknowledgements
************
****************
We would like to thank all our code contributors and manuscript co-authors.
`Contributors Graph <https://github.com/OxWearables/biobankAccelerometerAnalysis/graphs/contributors>`_.

Expand Down
38 changes: 21 additions & 17 deletions src/accelerometer/accPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,31 @@ def plotTimeSeries( # noqa: C901
title=None,
showFirstNDays=None
):
"""Plot overall activity and classified activity types
:param pd.DataFrame data: Input DataFrame with time series data
Index: DatetimeIndex
Columns (4 class example):
Name: acc, dtype=float (optional)
Name: light, dtype=Any numeric, value=0 or 1
Name: moderate-vigorous, dtype=Any numeric, value=0 or 1
Name: sedentary, dtype=Any numeric, value=0 or 1
Name: sleep, dtype=Any numeric, value=0 or 1
:param str title: Optional plot title
:param int showFirstNDays: Only show first n days of time series (if specified)
"""
Plot acceleration traces and classified activities.
:param data: Input time-series of acceleration and activity classes. Index: DatetimeIndex. Columns (4-class example):
- Name: acc, dtype=float (optional)
- Name: light, dtype=Any numeric, value=0 or 1
- Name: moderate-vigorous, dtype=Any numeric, value=0 or 1
- Name: sedentary, dtype=Any numeric, value=0 or 1
- Name: sleep, dtype=Any numeric, value=0 or 1
:type data: pd.DataFrame
:param title: Optional plot title
:type title: str, optional
:param showFirstNDays: Only show first n days of time series (if specified)
:type showFirstNDays: int, optional
:return: pyplot Figure
:rtype: plt.Figure
:Example:
>>> from accelerometer.accPlot import plotTimeSeries
>>> df = pd.DataFrame(...)
>>> fig = plotTimeSeries(df)
>>> fig.show()
.. code-block:: python
from accelerometer.accPlot import plotTimeSeries
df = pd.DataFrame(...)
fig = plotTimeSeries(df)
fig.show()
"""

# check index is datetime
Expand Down
24 changes: 12 additions & 12 deletions src/accelerometer/circadian.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@


def calculatePSD(e, epochPeriod, fourierWithAcc, labels, summary):
"""Calculate the power spectral density from fourier analysis of a 1 day frequency
"""
Calculate the power spectral density from fourier analysis of a 1 day frequency.
:param pandas.DataFrame e: Pandas dataframe of epoch data
:param int epochPeriod: Size of epoch time window (in seconds)
:param bool fourierWithAcc:True calculates fourier done with acceleration data instead of sleep data
:param bool fourierWithAcc: True calculates fourier done with acceleration data instead of sleep data
:param list(str) labels: Activity state labels
:param dict summary: Output dictionary containing all summary metrics
:param dict summary: Output dictionary containing all summary metrics. This dictionary will be modified in-place: a new key 'PSD-<W/Hz>' will be added with the calculated frequency as its value.
:return: Write dict <summary> keys 'PSD-<W/Hz>'
"""
if fourierWithAcc:
y = e['accImputed'].values
Expand All @@ -35,15 +35,15 @@ def calculatePSD(e, epochPeriod, fourierWithAcc, labels, summary):


def calculateFourierFreq(e, epochPeriod, fourierWithAcc, labels, summary):
"""Calculate the most prevalent frequency in a fourier analysis
"""
Calculate the most prevalent frequency in a fourier analysis.
:param pandas.DataFrame e: Pandas dataframe of epoch data
:param int epochPeriod: Size of epoch time window (in seconds)
:paran bool fourierWithAcc: True calculates fourier done with acceleration data instead of sleep data
:param bool fourierWithAcc: True calculates fourier done with acceleration data instead of sleep data
:param list(str) labels: Activity state labels
:param dict summary: Output dictionary containing all summary metrics
:param dict summary: Output dictionary containing all summary metrics. This dictionary will be modified in-place: a new key 'fourier frequency-<1/days>' will be added with the calculated frequency as its value.
:return: Write dict <summary> keys 'fourier frequency-<1/days>'
"""
if fourierWithAcc:
y = e['accImputed'].values
Expand Down Expand Up @@ -71,14 +71,14 @@ def func(k):


def calculateM10L5(e, epochPeriod, summary):
"""Calculates the M10 L5 relative amplitude from the average acceleration from
the ten most active hours and 5 least most active hours
"""
Calculates the M10 L5 relative amplitude from the average acceleration from
the ten most active hours and 5 least most active hours.
:param pandas.DataFrame e: Pandas dataframe of epoch data
:param int epochPeriod: Size of epoch time window (in seconds)
:param dict summary: Output dictionary containing all summary metrics
:param dict summary: Output dictionary containing all summary metrics. This dictionary will be modified in-place: a new key 'M10 L5-<rel amp>' will be added with the calculated frequency as its value.
:return: Write dict <summary> keys 'M10 L5-<rel amp>'
"""
TEN_HOURS = int(10 * 60 * 60 / epochPeriod)
FIVE_HOURS = int(5 * 60 * 60 / epochPeriod)
Expand Down
138 changes: 63 additions & 75 deletions src/accelerometer/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,20 @@ def activityClassification(
mgCpMPA: int = 100,
mgCpVPA: int = 400
):
"""Perform classification of activity states from epoch feature data
Based on a balanced random forest with a Hidden Markov Model containing
transitions between predicted activity states and emissions trained using a
free-living groundtruth to identify pre-defined classes of behaviour from
accelerometer data.
:param str epoch: Dataframe of processed epoch data
:param str activityModel: Input tar model file which contains random forest
"""
Perform classification of activity states from epoch feature data. Based on
a balanced random forest with a Hidden Markov Model containing transitions
between predicted activity states and emissions trained using a free-living
groundtruth to identify pre-defined classes of behaviour from accelerometer
data.
:param pandas.DataFrame epoch: Dataframe of processed epoch data
:param str activityModel: Path to input tar model file which contains random forest
pickle model, HMM priors/transitions/emissions npy files, and npy file
of METs for each activity state
:return: Pandas dataframe of activity epoch data with one-hot encoded labels
:rtype: pandas.DataFrame
:return: Activity state labels
:rtype: list(str)
:return: Tuple containing a pandas dataframe of activity epoch data with one-hot encoded labels, and a list of activity state labels
:rtype: tuple(pandas.DataFrame, list(str))
"""

modelPath = resolveModelPath(activityModel)
Expand Down Expand Up @@ -107,31 +104,27 @@ def trainClassificationModel(
outDir='model/',
nJobs=1,
):
"""Train model to classify activity states from epoch feature data
Based on a balanced random forest with a Hidden Markov Model containing
transitions between predicted activity states and emissions trained using
the input training file to identify pre-defined classes of behaviour from
"""
Train model to classify activity states from epoch feature data. Based on a
balanced random forest with a Hidden Markov Model containing transitions
between predicted activity states and emissions trained using the input
training file to identify pre-defined classes of behaviour from
accelerometer data.
:param str trainingFile: Input csv file of training data, pre-sorted by time
:param str labelCol: Input label column
:param str participantCol: Input participant column
:param str annotationCol: Input text annotation e.g. 'walking with dog'
vs. 'walking'
:param str annotationCol: Input text annotation e.g. 'walking with dog' vs. 'walking'
:param str metCol: Input MET column
:param str featuresTxt: Input txt file listing feature column names
:param int cv: Number of CV folds. If None, CV is skipped.
:param str testParticipants: Input comma separated list of participant IDs
to test on.
:param str testParticipants: Input comma separated list of participant IDs to test on.
:param int nTrees: Random forest n_estimators param.
:param int maxDepth: Random forest max_depth param.
:param int minSamplesLeaf: Random forest min_samples_leaf param.
:param str outDir: Output directory. Output files (trained model, predictions, etc.) will be written to this directory.
:param int nJobs: Number of jobs to run in parallel.
:param str outDir: Output directory
:return: Output files (trained model, predictions, etc.) written to <outDir>
:rtype: void
"""

report = {
Expand Down Expand Up @@ -259,12 +252,16 @@ def _Model(**kwargs):


def trainHMM(Y_prob, Y_true, labels=None, uniform_prior=True):
""" https://en.wikipedia.org/wiki/Hidden_Markov_model
"""
Implements a Hidden Markov Model as described in https://en.wikipedia.org/wiki/Hidden_Markov_model.
:return: Dictionary containing prior, emission and transition
matrices, and corresponding labels.
:rtype: dict
:param numpy.array Y_prob: Array of predicted probabilities for each class.
:param numpy.array Y_true: Array of true labels.
:param list(str) labels: List of class labels.
:param uniform_prior: If True, all labels have equal probability. If False, label probability equals empirical rate.
:return: Dictionary containing prior, emission and transition matrices, and corresponding labels.
:rtype: dict
"""

if labels is None:
Expand All @@ -291,14 +288,14 @@ def trainHMM(Y_prob, Y_true, labels=None, uniform_prior=True):


def viterbi(Y_obs, hmm_params):
""" Perform HMM smoothing over observations via Viteri algorithm
"""
Performs Hidden Markov Model (HMM) smoothing over observations using the
Viterbi algorithm. For more information on the Viterbi algorithm, see:
https://en.wikipedia.org/wiki/Viterbi_algorithm
:param dict hmm_params: Dictionary containing prior, emission and transition
matrices, and corresponding labels
:param dict hmm_params: Dictionary containing prior, emission and transition matrices, and corresponding labels.
:return: Smoothed sequence of activities
:return: Smoothed sequence of activities.
:rtype: numpy.array
"""

Expand Down Expand Up @@ -337,9 +334,10 @@ def log(x):


def removeSpuriousSleep(Y, activityModel='walmsley', sleepTol='1H'):
""" Remove spurious sleep epochs from activity classification
"""
Remove spurious sleep epochs from activity classification.
:param Series Y: Model output
:param pandas.Series Y: Model output
:param str activityModel: Model identifier
:param str sleepTol: Minimum sleep duration, e.g. '1H'
Expand Down Expand Up @@ -369,9 +367,10 @@ def removeSpuriousSleep(Y, activityModel='walmsley', sleepTol='1H'):


def cutPointModel(enmo, cuts=None, whr=None):
"""Perform classification of activities based on cutpoints.
"""
Perform classification of activities based on cutpoints.
:param Series enmo: Timeseries of ENMO.
:param pandas.Series enmo: Timeseries of ENMO.
:param dict cuts: Dictionary of cutpoints for each activity.
:return: Activity labels.
Expand Down Expand Up @@ -400,17 +399,15 @@ def cutPointModel(enmo, cuts=None, whr=None):


def perParticipantSummaryHTML(dfParam, yTrueCol, yPredCol, pidCol, outHTML):
"""Provide HTML summary of how well activity classification model works
at the per-participant level
"""
Provide HTML summary of how well activity classification model works at the per-participant level.
:param dataframe dfParam: Input pandas dataframe
:param pandas.DataFrame dfParam: Input pandas dataframe
:param str yTrueCol: Input for y_true column label
:param str yPregCol: Input for y_pred column label
:param str yPredCol: Input for y_pred column label
:param str pidCol: Input for participant ID column label
:param str outHTML: Output file to print HTML summary to
:return: HTML file reporting kappa, accuracy, and confusion matrix
:rtype: void
"""
# get kappa & accuracy on a per-participant basis
pIDs = dfParam[pidCol].unique()
Expand Down Expand Up @@ -460,15 +457,12 @@ def perParticipantSummaryHTML(dfParam, yTrueCol, yPredCol, pidCol, outHTML):


def saveToTar(tarOut, **kwargs):
"""Save objects to tar file. Objects must be passed as keyworded arguments,
then the key is used for the object name in the tar file.
"""
Save objects to tar file. Objects must be passed as keyworded arguments, then the key is used for the object name in the tar file.
:param **kwargs: Objects to be saved passed as keyworded arguments.
:param kwargs: Objects to be saved passed as keyworded arguments.
:return: tar file written to <tarOut>
:rtype: void
"""

try:

tmpdir = tempfile.mkdtemp()
Expand All @@ -491,18 +485,16 @@ def saveToTar(tarOut, **kwargs):


def getFileFromTar(tarArchive, targetFile):
"""Read file from tar
This is currently more tricky than it should be see
https://github.com/numpy/numpy/issues/7989
"""
Read file from tar. This is currently more tricky than it should be. See https://github.com/numpy/numpy/issues/7989
:param str tarArchive: Input tarfile object
:param str targetFile: Target individual file within .tar
:return: file object byte stream
:rtype: object
"""
:rtype: io.BytesIO
"""
with tarfile.open(tarArchive, 'r') as t:
b = BytesIO()
try:
Expand All @@ -520,32 +512,28 @@ def addReferenceLabelsToNewFeatures(
outputFile,
featuresTxt="activityModels/features.txt",
labelCol="label", participantCol="participant",
annotationCol="annotation", metCol="MET"):
"""Append reference annotations to newly extracted feature data
This method helps add existing curated labels (from referenceLabelsFile)
to a file with newly extracted features (both pre-sorted by participant
and time).
annotationCol="annotation", metCol="MET"
):
"""
Append reference annotations to newly extracted feature data. This method
helps add existing curated labels (from referenceLabelsFile) to a file with
newly extracted features (both pre-sorted by participant and time).
:param str featuresFile: Input csv file of new features data, pre-sorted by time
:param str referenceLabelsFile: Input csv file of reference labelled data,
pre-sorted by time
:param str outputFile: Output csv file of new features data with refernce labels
:param str referenceLabelsFile: Input csv file of reference labelled data, pre-sorted by time
:param str outputFile: Output csv file of new features data with reference labels
:param str featuresTxt: Input txt file listing feature column names
:param str labelCol: Input label column
:param str participantCol: Input participant column
:param str annotationCol: Input text annotation e.g. 'walking with dog'
vs. 'walking'
:param str annotationCol: Input text annotation e.g. 'walking with dog' vs. 'walking'
:param str metCol: Input MET column
:return: New csv file written to <outputFile>
:rtype: void
:return: None. Writes a new csv file to <outputFile>.
.. code-block:: python
:Example:
>>> from accelerometer import accClassification
>>> accClassification.addReferenceLabelsToNewFeatures("newFeats.csv",
"refLabels.csv", "newFeatsPlusLabels.csv")
<file written to newFeatsPlusLabels.csv>
from accelerometer import accClassification
accClassification.addReferenceLabelsToNewFeatures("newFeats.csv", "refLabels.csv", "newFeatsPlusLabels.csv")
"""

# load new features file
Expand Down
Loading

0 comments on commit a86acfd

Please sign in to comment.