DataJoint pipeline: Fetching data as DataFrames#
Important
This guide assumes you have a DataJoint pipeline deployed with data already ingested.
This guide builds upon the Querying data guide and provides further examples on fetching various kinds of data as pandas.DataFrames
from the Aeon DataJoint pipeline.
You can also run this notebook online at works.datajoint.com
using the following credentials:
Username: aeondemo
Password: aeon_djworks
To access it, go to the Notebook tab at the top and in the File Browser on the left, navigate to ucl-swc_aeon > docs > examples
, where this notebook dj_fetching_data.ipynb
is located.
Note
The examples here use the social period of the social0.2-aeon4 dataset. Since the social period spans 2 weeks, we limit retrieval to the first 3 days to keep the examples concise.
If you are using a different dataset, be sure to replace the experiment name and parameters in the code below accordingly.
Import libraries and define variables and helper functions#
from datetime import datetime
import warnings
import numpy as np
import pandas as pd
from aeon.dj_pipeline import acquisition, streams, subject, tracking
from aeon.dj_pipeline.analysis.block_analysis import (
Block,
BlockAnalysis,
BlockSubjectAnalysis,
get_foraging_bouts,
)
def ensure_ts_arr_datetime(array):
"""Ensure array is a numpy array of datetime64[ns] type."""
if len(array) == 0:
return np.array([], dtype="datetime64[ns]")
else:
return np.array(array, dtype="datetime64[ns]")
exp = {
"name": "social0.2-aeon4",
"presocial_start": "2024-01-31 11:00:00",
"presocial_end": "2024-02-08 15:00:00",
"social_start": "2024-02-09 17:00:00",
"social_end": "2024-02-23 12:00:00",
"postsocial_start": "2024-02-25 18:00:00",
"postsocial_end": "2024-03-02 13:00:00",
}
key = {"experiment_name": exp["name"]}
# Define periods
periods = {
"presocial": (exp["presocial_start"], exp["presocial_end"]),
"social": (exp["social_start"], exp["social_end"]),
"postsocial": (exp["postsocial_start"], exp["postsocial_end"]),
}
# Select the social period and limit to first 3 days for brevity
period_name = "social"
start = periods[period_name][0]
start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
end_dt = start_dt + pd.Timedelta(days=3)
Patch data#
In this section, we will fetch foraging patch-related data for each Block. The data includes:
patch information: wheel timestamps, patch rate, and patch offset for each block
subject patch data: subjects’ interactions with patches, including information on their presence in patches (duration, timestamps, RFID detections), pellet consumption (count, timestamps), and wheel movement (distance travelled)
subject patch preferences: preferences of subjects for different patches
def load_subject_patch_data(
key: dict[str, str], period_start: str, period_end: str
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""Loads subject patch data for a specified time period.
Args:
key (dict): The key to filter the subject patch data.
period_start (str): The start time for the period.
period_end (str): The end time for the period.
Returns:
tuple: A tuple containing:
- patch_info (pd.DataFrame): Information about patches.
- block_subject_patch_data (pd.DataFrame): Data for the specified period.
- block_subject_patch_pref (pd.DataFrame): Preference data for the specified period.
"""
patch_info = (
BlockAnalysis.Patch()
& key
& f"block_start >= '{period_start}'"
& f"block_start <= '{period_end}'"
).fetch(
"block_start",
"patch_name",
"patch_rate",
"patch_offset",
"wheel_timestamps",
as_dict=True,
)
block_subject_patch_data = (
BlockSubjectAnalysis.Patch()
& key
& f"block_start >= '{period_start}'"
& f"block_start <= '{period_end}'"
).fetch(format="frame")
block_subject_patch_pref = (
BlockSubjectAnalysis.Preference()
& key
& f"block_start >= '{period_start}'"
& f"block_start <= '{period_end}'"
).fetch(format="frame")
if patch_info:
patch_info = pd.DataFrame(patch_info)
if isinstance(block_subject_patch_data, pd.DataFrame) and not block_subject_patch_data.empty:
block_subject_patch_data.reset_index(inplace=True)
if isinstance(block_subject_patch_pref, pd.DataFrame) and not block_subject_patch_pref.empty:
block_subject_patch_pref.reset_index(inplace=True)
return patch_info, block_subject_patch_data, block_subject_patch_pref
block_patch_info, block_subject_patch_data, block_subject_patch_pref = load_subject_patch_data(key, start_dt, end_dt)
# Drop NaNs in preference columns
block_subject_patch_pref = block_subject_patch_pref.dropna(subset=["final_preference_by_time", "final_preference_by_wheel"])
# Validate subject count for pre/post-social blocks
if period_name in ["presocial", "postsocial"] and not block_subject_patch_data.empty:
n_subjects = block_subject_patch_data.groupby("block_start")["subject_name"].nunique()
if (n_subjects != 1).any():
warnings.warn(
f"{exp['name']} {period_name} blocks have >1 subject. Data may need cleaning."
)
# Ensure timestamp arrays are datetime64[ns]
for col in ["pellet_timestamps", "in_patch_rfid_timestamps", "in_patch_timestamps"]:
if col in block_subject_patch_data.columns:
block_subject_patch_data[col] = block_subject_patch_data[col].apply(ensure_ts_arr_datetime)
block_patch_info
block_start | patch_name | wheel_timestamps | patch_rate | patch_offset | |
---|---|---|---|---|---|
0 | 2024-02-09 18:19:04.000000 | Patch1 | [2024-02-09T18:19:04.000000000, 2024-02-09T18:... | 0.0100 | 75.0 |
1 | 2024-02-09 18:19:04.000000 | Patch2 | [2024-02-09T18:19:04.000000000, 2024-02-09T18:... | 0.0020 | 75.0 |
2 | 2024-02-09 18:19:04.000000 | Patch3 | [2024-02-09T18:19:04.000000000, 2024-02-09T18:... | 0.0033 | 75.0 |
3 | 2024-02-09 20:07:25.041984 | Patch1 | [2024-02-09T20:07:25.060000000, 2024-02-09T20:... | 0.0020 | 75.0 |
4 | 2024-02-09 20:07:25.041984 | Patch2 | [2024-02-09T20:07:25.060000000, 2024-02-09T20:... | 0.0033 | 75.0 |
... | ... | ... | ... | ... | ... |
109 | 2024-02-12 14:31:02.005984 | Patch2 | [2024-02-12T14:31:02.020000000, 2024-02-12T14:... | 0.0033 | 75.0 |
110 | 2024-02-12 14:31:02.005984 | Patch3 | [2024-02-12T14:31:02.020000000, 2024-02-12T14:... | 0.0100 | 75.0 |
111 | 2024-02-12 16:53:14.000000 | Patch1 | [2024-02-12T16:53:14.000000000, 2024-02-12T16:... | 0.0020 | 75.0 |
112 | 2024-02-12 16:53:14.000000 | Patch2 | [2024-02-12T16:53:14.000000000, 2024-02-12T16:... | 0.0100 | 75.0 |
113 | 2024-02-12 16:53:14.000000 | Patch3 | [2024-02-12T16:53:14.000000000, 2024-02-12T16:... | 0.0033 | 75.0 |
114 rows × 5 columns
block_subject_patch_data
experiment_name | block_start | patch_name | subject_name | in_patch_timestamps | in_patch_time | in_patch_rfid_timestamps | pellet_count | pellet_timestamps | patch_threshold | wheel_cumsum_distance_travelled | period | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch1 | BAA-1104048 | [2024-02-09T18:26:44.600000000, 2024-02-09T18:... | 756.60 | [2024-02-09T18:26:45.736672000, 2024-02-09T18:... | 39 | [2024-02-09T18:26:50.373504000, 2024-02-09T18:... | [125.10144062824004, 125.98842043772429, 133.9... | [-0.0, 0.004602223261072957, 0.007670372101788... | social |
1 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch1 | BAA-1104049 | [2024-02-09T18:21:10.200000000, 2024-02-09T18:... | 570.18 | [2024-02-09T18:21:11.452832000, 2024-02-09T18:... | 26 | [2024-02-09T18:28:57.907488000, 2024-02-09T18:... | [75.07162358109204, 186.27023735234684, 135.82... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | social |
2 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch2 | BAA-1104048 | [2024-02-09T18:20:10.400000000, 2024-02-09T18:... | 123.32 | [2024-02-09T18:20:12.097312000, 2024-02-09T18:... | 0 | [] | [] | [-0.0, -0.004602223261073846, -0.0015340744203... | social |
3 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch2 | BAA-1104049 | [2024-02-09T18:20:54.600000000, 2024-02-09T18:... | 226.80 | [2024-02-09T18:21:30.375328000, 2024-02-09T18:... | 3 | [2024-02-09T18:52:14.199488000, 2024-02-09T19:... | [1069.4286592499257, 694.8095229017808, 278.84... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | social |
4 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch3 | BAA-1104048 | [2024-02-09T18:20:03.940000000, 2024-02-09T18:... | 138.78 | [2024-02-09T18:25:59.113504000, 2024-02-09T18:... | 1 | [2024-02-09T19:30:22.688480000] | [331.8480024096391] | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | social |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
223 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch1 | BAA-1104049 | [2024-02-12T17:01:34.760000000, 2024-02-12T17:... | 94.98 | [2024-02-12T17:01:35.372416000, 2024-02-12T17:... | 0 | [] | [] | [-0.0, -0.00920444652214547, -0.00613629768143... | social |
224 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch2 | BAA-1104048 | [2024-02-12T16:58:52.540000000, 2024-02-12T16:... | 627.76 | [2024-02-12T16:58:53.758496000, 2024-02-12T16:... | 18 | [2024-02-12T17:01:47.607488000, 2024-02-12T17:... | [128.88388967189582, 98.29740841703715, 138.54... | [-0.0, 0.0030681488407182655, 0.00306814884071... | social |
225 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch2 | BAA-1104049 | [2024-02-12T16:53:55.360000000, 2024-02-12T16:... | 1215.12 | [2024-02-12T16:53:56.698656000, 2024-02-12T16:... | 34 | [2024-02-12T16:57:31.338496000, 2024-02-12T16:... | [245.09652119007265, 137.19851472663964, 129.5... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | social |
226 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch3 | BAA-1104048 | [2024-02-12T16:58:45.920000000, 2024-02-12T16:... | 101.68 | [2024-02-12T16:58:47.270432000, 2024-02-12T16:... | 0 | [] | [] | [-0.0, 0.0, -0.006136297681431202, -0.00460222... | social |
227 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch3 | BAA-1104049 | [2024-02-12T17:01:20.200000000, 2024-02-12T17:... | 48.12 | [2024-02-12T17:01:22.861888000, 2024-02-12T17:... | 0 | [] | [] | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | social |
228 rows × 12 columns
block_subject_patch_pref
experiment_name | block_start | patch_name | subject_name | cumulative_preference_by_wheel | cumulative_preference_by_time | running_preference_by_time | running_preference_by_wheel | final_preference_by_wheel | final_preference_by_time | period | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch1 | BAA-1104048 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.758947 | 0.742711 | social |
1 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch1 | BAA-1104049 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.548170 | 0.574604 | social |
2 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch2 | BAA-1104048 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.096201 | 0.121056 | social |
3 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch2 | BAA-1104049 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.251308 | 0.228560 | social |
4 | social0.2-aeon4 | 2024-02-09 18:19:04 | Patch3 | BAA-1104048 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.144852 | 0.136232 | social |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
223 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch1 | BAA-1104049 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.038067 | 0.069930 | social |
224 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch2 | BAA-1104048 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.831808 | 0.778163 | social |
225 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch2 | BAA-1104049 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.955588 | 0.894642 | social |
226 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch3 | BAA-1104048 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.162096 | 0.126041 | social |
227 | social0.2-aeon4 | 2024-02-12 16:53:14 | Patch3 | BAA-1104049 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | 0.006345 | 0.035429 | social |
162 rows × 11 columns
Foraging bouts#
Here we will use the block_analysis.get_foraging_bouts()
function to retrieve foraging bout data for each subject across all blocks.
def load_foraging_bouts(
key: dict[str, str], period_start: str, period_end: str
) -> pd.DataFrame:
"""Loads foraging bout data for blocks falling within a specified time period.
Args:
key (dict): Key to identify experiment data (e.g., {"experiment_name": "Exp1"}).
period_start (str): Start datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
period_end (str): End datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
Returns:
pd.DataFrame: Concatenated dataframe of foraging bouts for all matching blocks.
Returns an empty dataframe with predefined columns if no data found.
"""
# Fetch block start times within the specified period
blocks = (
Block & key & f"block_start >= '{period_start}'" & f"block_end <= '{period_end}'"
).fetch("block_start")
# Retrieve foraging bouts for each block
bouts = []
for block_start in blocks:
block_key = key | {"block_start": str(block_start)}
bouts.append(get_foraging_bouts(block_key, min_pellets=1))
# Return concatenated DataFrame or empty fallback
if bouts:
return pd.concat(bouts, ignore_index=True)
else:
return pd.DataFrame(
columns=["start", "end", "n_pellets", "cum_wheel_dist", "subject"]
)
# Load foraging bouts
foraging_df = load_foraging_bouts(key, start_dt, end_dt)
foraging_df
start | end | n_pellets | cum_wheel_dist | subject | |
---|---|---|---|---|---|
0 | 2024-02-09 18:26:07.280 | 2024-02-09 18:27:51.460 | 1 | 458.105303 | BAA-1104048 |
1 | 2024-02-09 18:28:26.720 | 2024-02-09 18:30:25.440 | 1 | 464.014558 | BAA-1104049 |
2 | 2024-02-09 18:36:49.760 | 2024-02-09 18:38:21.360 | 2 | 240.621107 | BAA-1104049 |
3 | 2024-02-09 18:39:17.180 | 2024-02-09 18:42:14.880 | 7 | 983.174489 | BAA-1104048 |
4 | 2024-02-09 18:43:02.440 | 2024-02-09 18:44:35.480 | 1 | 247.676315 | BAA-1104049 |
... | ... | ... | ... | ... | ... |
327 | 2024-02-12 16:24:17.660 | 2024-02-12 16:26:56.160 | 3 | 680.642741 | BAA-1104048 |
328 | 2024-02-12 16:30:04.740 | 2024-02-12 16:33:20.300 | 3 | 981.212408 | BAA-1104048 |
329 | 2024-02-12 16:38:32.840 | 2024-02-12 16:43:16.600 | 3 | 2384.391929 | BAA-1104048 |
330 | 2024-02-12 16:49:39.380 | 2024-02-12 16:51:38.920 | 1 | 321.695406 | BAA-1104049 |
331 | 2024-02-12 16:51:21.440 | 2024-02-12 16:53:12.640 | 2 | 423.237326 | BAA-1104048 |
332 rows × 5 columns
RFID data#
In this experiment, each subject is implanted with a miniature RFID microchip. RFID readers are positioned at the foraging patches, nest, and gate.
We will fetch the RFID detection data at each reader across all chunks occurring within the first 3 days of the social period.
def load_rfid_events(
key: dict[str, str], period_start: str, period_end: str
) -> pd.DataFrame:
"""Loads RFID events data for chunks falling within a specified time period.
Args:
key (dict): Key to identify experiment data (e.g., {"experiment_name": "Exp1"}).
period_start (str): Start datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
period_end (str): End datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
Returns:
pd.DataFrame: DataFrame containing RFID events for the specified period.
Returns an empty dataframe with predefined columns if no data found.
"""
# Fetch RFID events within the specified period
rfid_events_df = (
streams.RfidReader * streams.RfidReaderRfidEvents
& key
& f'chunk_start >= "{period_start}"'
& f'chunk_start <= "{period_end}"'
).fetch(format="frame")
if rfid_events_df.empty or not isinstance(rfid_events_df, pd.DataFrame):
# Return empty DataFrame with expected columns if no data found
return pd.DataFrame(
columns=[
"experiment_name",
"chunk_start",
"rfid_reader_name",
"sample_count",
"timestamps",
"rfid",
]
)
# Get subject details for RFID mapping
subject_detail = subject.SubjectDetail.fetch(format="frame")
subject_detail.reset_index(inplace=True)
# Create mapping from RFID to subject ID
rfid_to_lab_id = dict(zip(subject_detail["lab_id"], subject_detail["subject"]))
rfid_events_df["rfid"] = [
[rfid_to_lab_id.get(str(rfid)) for rfid in rfid_array]
for rfid_array in rfid_events_df["rfid"]
]
# Extract experiment_name and chunk_start from the index before resetting
rfid_events_df["experiment_name"] = [idx[0] for idx in rfid_events_df.index]
rfid_events_df["chunk_start"] = [
idx[3] for idx in rfid_events_df.index
] # Assuming chunk_start is at index 3
# Reset the index and drop the index column
rfid_events_df = rfid_events_df.reset_index(drop=True)
# Reorder columns to put experiment_name first and chunk_start second
cols = ["experiment_name", "chunk_start"] + [
col
for col in rfid_events_df.columns
if col not in ["experiment_name", "chunk_start"]
]
rfid_events_df = rfid_events_df[cols]
return rfid_events_df
# Load RFID data
rfid_df = load_rfid_events(key, start_dt, end_dt)
rfid_df
experiment_name | chunk_start | rfid_reader_name | sample_count | timestamps | rfid | |
---|---|---|---|---|---|---|
0 | social0.2-aeon4 | 2024-02-09 17:00:00 | Patch1Rfid | 844 | [2024-02-09T17:00:00.483007908, 2024-02-09T17:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
1 | social0.2-aeon4 | 2024-02-09 18:00:00 | Patch1Rfid | 857 | [2024-02-09T18:01:41.768191814, 2024-02-09T18:... | [BAA-1104049, BAA-1104048, BAA-1104048, BAA-11... |
2 | social0.2-aeon4 | 2024-02-09 19:00:00 | Patch1Rfid | 1262 | [2024-02-09T19:04:08.140863895, 2024-02-09T19:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
3 | social0.2-aeon4 | 2024-02-09 20:00:00 | Patch1Rfid | 301 | [2024-02-09T20:05:47.786528111, 2024-02-09T20:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
4 | social0.2-aeon4 | 2024-02-09 21:00:00 | Patch1Rfid | 0 | [] | [] |
... | ... | ... | ... | ... | ... | ... |
433 | social0.2-aeon4 | 2024-02-12 13:00:00 | Patch3Rfid | 336 | [2024-02-12T13:05:10.713151932, 2024-02-12T13:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
434 | social0.2-aeon4 | 2024-02-12 14:00:00 | Patch3Rfid | 323 | [2024-02-12T14:01:47.358272076, 2024-02-12T14:... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... |
435 | social0.2-aeon4 | 2024-02-12 15:00:00 | Patch3Rfid | 19 | [2024-02-12T15:12:24.588128090, 2024-02-12T15:... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... |
436 | social0.2-aeon4 | 2024-02-12 16:00:00 | Patch3Rfid | 139 | [2024-02-12T16:03:41.951839924, 2024-02-12T16:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
437 | social0.2-aeon4 | 2024-02-12 17:00:00 | Patch3Rfid | 183 | [2024-02-12T17:01:05.555712223, 2024-02-12T17:... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... |
438 rows × 6 columns
Position data#
In this section, we will fetch SLEAP position data (centroid only) from the tracking.DenoisedTracking
table for each subject across all chunks occurring within the first 3 days of the social period.
Note
The full pose data (all tracked body parts) can be fetched from the tracking.SLEAPTracking.Part
table.
def load_position_data(
key: dict[str, str], period_start: str, period_end: str
) -> pd.DataFrame:
"""Loads position data (centroid tracking) for a specified time period.
Args:
key (dict): Key to identify experiment data (e.g., {"experiment_name": "Exp1"}).
period_start (str): Start datetime of the time period.
period_end (str): End datetime of the time period.
Returns:
pd.DataFrame: DataFrame containing position data for the specified period.
Returns an empty DataFrame if no data found.
"""
try:
print(f" Querying data from {period_start} to {period_end}...")
# Create chunk restriction for the time period
chunk_restriction = acquisition.create_chunk_restriction(
key["experiment_name"], period_start, period_end
)
# Fetch centroid tracking data for the specified period
centroid_df = (
streams.SpinnakerVideoSource * tracking.DenoisedTracking.Subject
& key
& {"spinnaker_video_source_name": "CameraTop"}
& chunk_restriction
).fetch(format="frame")
centroid_df = centroid_df.reset_index()
centroid_df = centroid_df.rename(
columns={
"subject_name": "identity_name",
"timestamps": "time",
"subject_likelihood": "identity_likelihood",
}
)
centroid_df = centroid_df.explode(
["time", "identity_likelihood", "x", "y", "likelihood"]
)
centroid_df = centroid_df[
[
"time",
"experiment_name",
"identity_name",
"identity_likelihood",
"x",
"y",
"likelihood",
]
].set_index("time")
# Clean up the dataframe
if isinstance(centroid_df, pd.DataFrame) and not centroid_df.empty:
if "spinnaker_video_source_name" in centroid_df.columns:
centroid_df.drop(columns=["spinnaker_video_source_name"], inplace=True)
print(f" Retrieved {len(centroid_df)} rows of position data")
else:
print(" No data found for the specified period")
return centroid_df
except Exception as e:
print(
f" Error loading position data for {key['experiment_name']} ({period_start} "
f"to {period_end}): {e}"
)
return pd.DataFrame()
# Load position data
# If this takes too long, consider changing end_dt to an earlier time
position_df = load_position_data(key, start_dt, end_dt).sort_index()
Querying data from 2024-02-09 17:00:00 to 2024-02-12 17:00:00...
Retrieved 20211079 rows of position data
position_df
experiment_name | identity_name | identity_likelihood | x | y | likelihood | |
---|---|---|---|---|---|---|
time | ||||||
2024-02-09 16:48:10.660 | social0.2-aeon4 | BAA-1104048 | NaN | 1280.208496 | 550.073364 | 0.971199 |
2024-02-09 16:48:10.660 | social0.2-aeon4 | BAA-1104049 | 0.102343 | 1232.14624 | 558.113037 | 0.971199 |
2024-02-09 16:48:10.680 | social0.2-aeon4 | BAA-1104049 | 0.021459 | 1282.991699 | 552.229248 | 0.985731 |
2024-02-09 16:48:10.680 | social0.2-aeon4 | BAA-1104048 | NaN | 1232.227051 | 560.120911 | 0.985731 |
2024-02-09 16:48:10.700 | social0.2-aeon4 | BAA-1104048 | NaN | 1283.058105 | 552.201538 | 0.963831 |
... | ... | ... | ... | ... | ... | ... |
2024-02-12 17:59:59.460 | social0.2-aeon4 | BAA-1104049 | 0.971221 | 1213.701416 | 544.300171 | 0.709776 |
2024-02-12 17:59:59.480 | social0.2-aeon4 | BAA-1104048 | 0.997087 | 1211.169434 | 531.434387 | 0.709763 |
2024-02-12 17:59:59.480 | social0.2-aeon4 | BAA-1104049 | 0.971762 | 1213.696045 | 544.309204 | 0.709763 |
2024-02-12 17:59:59.500 | social0.2-aeon4 | BAA-1104048 | 0.996872 | 1211.168945 | 531.43396 | 0.709618 |
2024-02-12 17:59:59.500 | social0.2-aeon4 | BAA-1104049 | 0.971445 | 1213.696045 | 544.309326 | 0.709618 |
20211079 rows × 6 columns
Weight data#
A weighing scale integrated into the nest records the weight data for each subject whenever a subject is alone in the nest.
Here we will fetch the weight data for each subject across all chunks occurring within the first 3 days of the social period.
def load_weight_data(
key: dict[str, str], period_start: str, period_end: str
) -> pd.DataFrame:
"""Loads weight data for a specified time period.
Args:
key (dict): Key to identify experiment data (e.g., {"experiment_name": "Exp1"}).
period_start (str): Start datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
period_end (str): End datetime of the time period (format: '%Y-%m-%d %H:%M:%S').
Returns:
pd.DataFrame: Weight data for the specified period.
Returns an empty dataframe if no data found.
"""
try:
weight_df = (
acquisition.Environment.SubjectWeight
& key
& f"chunk_start >= '{period_start}'"
& f"chunk_start <= '{period_end}'"
).proj("timestamps", "weight", "subject_id").fetch(format="frame")
return weight_df if not weight_df.empty and isinstance(weight_df, pd.DataFrame) else pd.DataFrame()
except Exception as e:
print(
f"Error loading weight data for {key} from {period_start} to {period_end}: {e}"
)
return pd.DataFrame()
weight_df = load_weight_data(key, start_dt, end_dt)
weight_df
timestamps | weight | subject_id | ||
---|---|---|---|---|
experiment_name | chunk_start | |||
social0.2-aeon4 | 2024-02-09 17:00:00 | [2024-02-09T17:12:29.800000191, 2024-02-09T17:... | [23.522316, 23.522316, 23.522316, 23.522316, 2... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... |
2024-02-09 18:00:00 | [2024-02-09T18:07:53.019999981, 2024-02-09T18:... | [25.6000004, 25.6000004, 25.6000004, 25.600000... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... | |
2024-02-09 19:00:00 | [2024-02-09T19:04:08.159999847, 2024-02-09T19:... | [27.4697571, 27.4697571, 27.4697571, 27.469757... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... | |
2024-02-09 20:00:00 | [2024-02-09T20:20:22.920000076, 2024-02-09T20:... | [31.3124409, 11.8549995] | [BAA-1104049, BAA-1104049] | |
2024-02-09 21:00:00 | [2024-02-09T21:08:21.480000019, 2024-02-09T21:... | [21.3240242, 21.3240242, 21.3240242, 21.324024... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... | |
... | ... | ... | ... | |
2024-02-12 13:00:00 | [2024-02-12T13:01:20.599999905, 2024-02-12T13:... | [28.2491455, 28.2541466, 28.2541466, 28.254146... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... | |
2024-02-12 14:00:00 | [2024-02-12T14:15:02.300000191, 2024-02-12T14:... | [28.2345123, 28.2345123, 28.2345123, 28.234512... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... | |
2024-02-12 15:00:00 | [2024-02-12T15:12:41.400000095, 2024-02-12T15:... | [32.1000023, 32.1000023, 32.1000023, 32.100002... | [BAA-1104048, BAA-1104048, BAA-1104048, BAA-11... | |
2024-02-12 16:00:00 | [2024-02-12T16:12:17.960000038, 2024-02-12T16:... | [30.8999977, 30.8999977, 30.8999977, 30.899997... | [BAA-1104049, BAA-1104049, BAA-1104049, BAA-11... | |
2024-02-12 17:00:00 | [2024-02-12T17:02:20.840000153, 2024-02-12T17:... | [30.1764622, 29.99049, 29.99049, 29.99049, 29.... | [BAA-1104049, BAA-1104048, BAA-1104048, BAA-11... |
73 rows × 3 columns