from collections import defaultdict
from pathlib import Path
from typing import List
import pandas as pd
import base64
from LabGuruAPI import SESSION
from LabGuruAPI._collections import RodentStrain, Rodent, Tissue
from LabGuruAPI._datasets import Dataset
[docs]
class StudyDesign(object):
"""Represents the design of a study, including data handling and synchronization.
This class is responsible for managing and processing study design data, including
loading data from various sources and synchronizing with a laboratory management
system. It handles the creation of rodents and associated tissue samples as
part of the experimental design.
Attributes:
study_id (int): Unique identifier for the study.
study_name (str): Name of the study, generated based on the study ID.
dataset_name (str): Name of the dataset associated with the study.
df (pd.DataFrame): DataFrame containing the study design data.
dataset (Dataset): Dataset object corresponding to the study's dataset.
"""
def __init__(self, study_id_num: int):
self.study_id = study_id_num
self.study_name = f'GRO-{study_id_num}'
self.dataset_name = f'{self.study_name} Study Design'
self.df = pd.DataFrame
self.dataset = Dataset.from_name(self.dataset_name)
def load_data_from_lg(self):
self.df = self.dataset.get_data() if self.dataset else pd.DataFrame
def load_data_from_excel(self, workbook_path: Path):
design_data = pd.read_excel(workbook_path, "Study Design", index_col=0)
milestone_data = pd.read_excel(workbook_path, "Milestone Dates", index_col=0)
collection_schedule = pd.read_excel(workbook_path, "Sample Collection Schedule")
start_date = milestone_data.loc['Start', 'Date']
end_date = milestone_data.loc['End', 'Date']
all_dates = pd.date_range(start_date, end_date).strftime('%Y-%m-%d')
# Generate new rodents
zeros = len(str(design_data['No. Animals'].sum()))
animal_name_iter = Rodent.iter_next_names(f"{self.study_name}-", zeros)
async_tasks = []
for grp_id in design_data.index:
num_rodents = design_data.loc[grp_id, 'No. Animals']
for _ in range(num_rodents):
async_tasks.append(Rodent.make_new(
name=next(animal_name_iter),
group=grp_id,
experiment=self.study_name,
strain=RodentStrain.from_name(design_data.loc[grp_id, 'Strain']),
).async_lg_sync())
rodent_df = pd.DataFrame(index=pd.CategoricalIndex([], name='Rodent Specimen'))
rodents_by_group: defaultdict[int, List[Rodent]] = defaultdict(list)
cur_rodent: Rodent
for cur_rodent in SESSION.execute_async(async_tasks):
rodents_by_group[cur_rodent.group].append(cur_rodent)
rodent_df.at[cur_rodent.name, 'Group'] = int(cur_rodent.group)
for c_tissue_type in collection_schedule['Tissue'].unique():
rodent_df.at[cur_rodent.name, f'{c_tissue_type} Collection Days'] = ''
#Add tissue samples by group
pt = collection_schedule.melt(['Tissue', 'Day', 'Code', 'Date'],
[c for c in collection_schedule.columns.values if 'Group' in c], 'Group')
pt['Group ID'] = pt['Group'].apply(lambda x: int(x.replace('Group ', '')))
pt.dropna(inplace=True)
tissue_sample_futures = []
for c_grp_id, c_grp_data in pt.groupby('Group ID'):
for cur_rodent in rodents_by_group[c_grp_id]:
prefix, rodent_num = cur_rodent.name.rsplit('-', 1)
for _, row in c_grp_data.iterrows():
tissue_name = f'{prefix}-{row["Code"]}{rodent_num}'
tissue_sample = Tissue.make_new(
name=tissue_name,
species=cur_rodent.strain.species,
genotype=cur_rodent.strain.genotype,
tissue_type=row['Tissue'],
harvest_date=row['Date'].date(),
specimen=cur_rodent
)
tissue_sample_futures.append(tissue_sample.async_lg_sync()) # Sync the tissue sample to the LabGuru
rodent_df.at[cur_rodent.name, f'{row["Tissue"]} Collection Days'] += f'{int(row["Day"])}, '
SESSION.execute_async(tissue_sample_futures)
for collection_col in filter(lambda x: 'Collection Days' in x, rodent_df.columns.values):
rodent_df[collection_col] = rodent_df[collection_col].apply(lambda x: x[:-2])
# Generate final dataset
self.df = design_data.merge(rodent_df.reset_index(), on='Group').set_index('Rodent Specimen').reset_index()
dataset = Dataset.make_new(name=self.dataset_name)
dataset.set_data(self.df)
dataset.lg_sync()
if __name__ == '__main__':
workbook_path = Path(r"C:\Users\RobertWarden-Rothman\GRO Biosciences\Projects - Foundry"
r"\Workflow Development\ELISA\study_upload_template.xlsx")
new_study = StudyDesign(1645)
new_study.load_data_from_excel(workbook_path)
encoded_string = base64.b64encode(b'hello')
print(encoded_string.decode('utf-8'))