Source code for LabGuruAPI._in_vivo

from collections import defaultdict
from pathlib import Path
from typing import List

import pandas as pd
import base64

from LabGuruAPI import SESSION
from LabGuruAPI._collections import RodentStrain, Rodent, Tissue
from LabGuruAPI._datasets import Dataset



[docs]
class StudyDesign(object):
    """Represents the design of a study, including data handling and synchronization.

    This class is responsible for managing and processing study design data, including
    loading data from various sources and synchronizing with a laboratory management
    system. It handles the creation of rodents and associated tissue samples as
    part of the experimental design.

    Attributes:
        study_id (int): Unique identifier for the study.
        study_name (str): Name of the study, generated based on the study ID.
        dataset_name (str): Name of the dataset associated with the study.
        df (pd.DataFrame): DataFrame containing the study design data.
        dataset (Dataset): Dataset object corresponding to the study's dataset.
    """
    def __init__(self, study_id_num: int):
        self.study_id = study_id_num
        self.study_name = f'GRO-{study_id_num}'
        self.dataset_name = f'{self.study_name} Study Design'

        self.df = pd.DataFrame
        self.dataset = Dataset.from_name(self.dataset_name)

    def load_data_from_lg(self):
        self.df = self.dataset.get_data() if self.dataset else pd.DataFrame

    def load_data_from_excel(self, workbook_path: Path):
        design_data = pd.read_excel(workbook_path, "Study Design", index_col=0)
        milestone_data = pd.read_excel(workbook_path, "Milestone Dates", index_col=0)
        collection_schedule = pd.read_excel(workbook_path, "Sample Collection Schedule")

        start_date = milestone_data.loc['Start', 'Date']
        end_date = milestone_data.loc['End', 'Date']
        all_dates = pd.date_range(start_date, end_date).strftime('%Y-%m-%d')

        # Generate new rodents
        zeros = len(str(design_data['No. Animals'].sum()))
        animal_name_iter = Rodent.iter_next_names(f"{self.study_name}-", zeros)

        async_tasks = []
        for grp_id in design_data.index:
            num_rodents = design_data.loc[grp_id, 'No. Animals']
            for _ in range(num_rodents):
                async_tasks.append(Rodent.make_new(
                    name=next(animal_name_iter),
                    group=grp_id,
                    experiment=self.study_name,
                    strain=RodentStrain.from_name(design_data.loc[grp_id, 'Strain']),
                ).async_lg_sync())

        rodent_df = pd.DataFrame(index=pd.CategoricalIndex([], name='Rodent Specimen'))
        rodents_by_group: defaultdict[int, List[Rodent]] = defaultdict(list)
        cur_rodent: Rodent
        for cur_rodent in SESSION.execute_async(async_tasks):
            rodents_by_group[cur_rodent.group].append(cur_rodent)
            rodent_df.at[cur_rodent.name, 'Group'] = int(cur_rodent.group)
            for c_tissue_type in collection_schedule['Tissue'].unique():
                rodent_df.at[cur_rodent.name, f'{c_tissue_type} Collection Days'] = ''

        #Add tissue samples by group
        pt = collection_schedule.melt(['Tissue', 'Day', 'Code', 'Date'],
                                      [c for c in collection_schedule.columns.values if 'Group' in c], 'Group')
        pt['Group ID'] = pt['Group'].apply(lambda x: int(x.replace('Group ', '')))
        pt.dropna(inplace=True)

        tissue_sample_futures = []
        for c_grp_id, c_grp_data in pt.groupby('Group ID'):
            for cur_rodent in rodents_by_group[c_grp_id]:
                prefix, rodent_num = cur_rodent.name.rsplit('-', 1)
                for _, row in c_grp_data.iterrows():
                    tissue_name = f'{prefix}-{row["Code"]}{rodent_num}'
                    tissue_sample = Tissue.make_new(
                        name=tissue_name,
                        species=cur_rodent.strain.species,
                        genotype=cur_rodent.strain.genotype,
                        tissue_type=row['Tissue'],
                        harvest_date=row['Date'].date(),
                        specimen=cur_rodent
                    )
                    tissue_sample_futures.append(tissue_sample.async_lg_sync())  # Sync the tissue sample to the LabGuru

                    rodent_df.at[cur_rodent.name, f'{row["Tissue"]} Collection Days'] += f'{int(row["Day"])}, '

        SESSION.execute_async(tissue_sample_futures)

        for collection_col in filter(lambda x: 'Collection Days' in x, rodent_df.columns.values):
            rodent_df[collection_col] = rodent_df[collection_col].apply(lambda x: x[:-2])

        # Generate final dataset
        self.df = design_data.merge(rodent_df.reset_index(), on='Group').set_index('Rodent Specimen').reset_index()
        dataset = Dataset.make_new(name=self.dataset_name)
        dataset.set_data(self.df)
        dataset.lg_sync()



        
if __name__ == '__main__':
    workbook_path = Path(r"C:\Users\RobertWarden-Rothman\GRO Biosciences\Projects - Foundry"
                         r"\Workflow Development\ELISA\study_upload_template.xlsx")
    new_study = StudyDesign(1645)
    new_study.load_data_from_excel(workbook_path)
    
    encoded_string = base64.b64encode(b'hello')
    print(encoded_string.decode('utf-8'))