Source code for app.model.lib.data_spreadsheet

from openpyxl import Workbook
from openpyxl.styles import Border, Side
from openpyxl.comments import Comment
from openpyxl.utils import get_column_letter

from app.model.lib.excel import export_to_xlsx

_TIME_UNITS = {
    'd': 'days',
    'h': 'hours',
    'm': 'minutes',
    's': 'seconds',
}

_TECHNIQUE_DESCRIPTIONS = {
    'fc':          "Flow-cytometry values per time-point in {units}, use a period (.) as the decimal separator",
    'fc_ps':       "FC counts values per time-point for strain {strain} in {units}, use a period (.) as the decimal separator",
    'od':          "Optical density values per time-point, use a period (.) as the decimal separator",
    'plates':      "Plate count values per time-point, use a period (.) as the decimal separator",
    'plates_ps':   "Plate count values per time-point for strain {strain}, use a period (.) as the decimal separator.",
    '16s_ps':      "Abundance values per time-point for strain {strain} in {units}, use a period (.) as the decimal separator.",
    'qpcr':        "Abundance values per time-point, use a period (.) as the decimal separator.",
    'qpcr_ps':     "Abundance values per time-point for strain {strain}, use a period (.) as the decimal separator.",
    'ph':          "pH values per time-point, use a period (.) as the decimal separator",
    'metabolites': "Concentration values per time-point for metabolite {metabolite} in {units}, use a period (.) as the decimal separator",
    'STD':         "Standard deviation if more than 1 technical replicate, use a period (.) as the decimal separator",
}



[docs]
def create_excel(submission_form):
    "Create a template data file based on the submission's study design"

    submission = submission_form.submission

    strain_names = [t.name for t in submission_form.fetch_taxa()]
    strain_names += [s['name'] for s in submission.studyDesign['custom_strains']]

    workbook = Workbook()

    short_time_units = submission.studyDesign['timeUnits']
    long_time_units = _TIME_UNITS[short_time_units]

    headers_common = {
        'Biological Replicate': 'Unique names of individual samples: a bottle, a well in a well-plate, or a mini-bioreactor.',
        'Compartment':          'A compartment within the vessel where growth is measured.',
        'Time':                 f"Measurement time-points in {long_time_units} ({short_time_units}).",
    }

    headers_bioreplicates = {**headers_common}
    headers_strains       = {**headers_common}
    headers_metabolites   = {**headers_common}

    for index, study_technique in enumerate(submission.build_techniques()):
        subject_type   = study_technique.subjectType
        technique_type = study_technique.type
        units          = study_technique.units

        for measurement_technique in study_technique.measurementTechniques:
            if subject_type == 'bioreplicate':
                technique_name = measurement_technique.csv_column_name()
                description = _TECHNIQUE_DESCRIPTIONS[technique_type].format(units=study_technique.units)

                headers_bioreplicates[technique_name] = description

                if study_technique.includeStd:
                    title = ' '.join([technique_name, 'STD'])
                    headers_bioreplicates[title] = _TECHNIQUE_DESCRIPTIONS['STD']

            elif subject_type == 'strain':
                if study_technique.includeUnknown:
                    technique_strain_names = [*strain_names, "Unknown"]
                else:
                    technique_strain_names = strain_names

                for strain_name in technique_strain_names:
                    title = measurement_technique.csv_column_name(strain_name)

                    description = _TECHNIQUE_DESCRIPTIONS[f"{technique_type}_ps"].format(
                        strain=strain_name,
                        units=units,
                    )
                    headers_strains[title] = description

                    if study_technique.includeStd:
                        title = ' '.join([title, 'STD'])
                        headers_strains[title] = _TECHNIQUE_DESCRIPTIONS['STD']

            elif subject_type == 'metabolite':
                metabolites = submission_form.fetch_metabolites_for_technique(index)

                for metabolite in metabolites:
                    title = measurement_technique.csv_column_name(metabolite.name)
                    description = _TECHNIQUE_DESCRIPTIONS['metabolites'].format(
                        metabolite=metabolite.name,
                        units=units,
                    )
                    headers_metabolites[title] = description

                    if study_technique.includeStd:
                        title = ' '.join([metabolite.name, 'STD'])
                        headers_metabolites[title] = _TECHNIQUE_DESCRIPTIONS['STD']

            else:
                raise ValueError(f"Invalid technique subject_type: {subject_type}")

    # Create sheets for each category of measurement:
    if len(headers_bioreplicates) > 3:
        _fill_sheet(workbook, "Growth data per community", headers_bioreplicates, submission)

    if len(headers_strains) > 3:
        _fill_sheet(workbook, "Growth data per strain", headers_strains, submission)

    if len(headers_metabolites) > 3:
        _fill_sheet(workbook, "Growth data per metabolite", headers_metabolites, submission)

    return export_to_xlsx(workbook)



def _add_header(sheet, index, title, description):
    cell         = sheet.cell(row=1, column=index, value=title)
    cell.comment = Comment(description, author="mGrowthDB")

    # Built-in styles:
    # <https://openpyxl.readthedocs.io/en/stable/styles.html#using-builtin-styles>
    #
    cell.style = 'Headline 3'

    # Calculate column width based on the length of the title text, plus some extra space
    column_width = len(title)
    column_letter = get_column_letter(index)
    sheet.column_dimensions[column_letter].width = column_width + 1


def _fill_sheet(workbook, sheet_title, headers, submission):
    if workbook.sheetnames == ['Sheet']:
        # Then we have a brand new workbook, use its first sheet
        sheet = workbook.active
        workbook.active.title = sheet_title
    else:
        sheet = workbook.create_sheet(title=sheet_title)

    # Add headers and descriptions to the first row and modify the width of each columns
    for index, (title, description) in enumerate(headers.items(), start=1):
        _add_header(sheet, index, title, description)

    bottom_border = Border(bottom=Side(style="thin", color="000000"))

    row_index = 2
    for experiment in submission.studyDesign['experiments']:
        for bioreplicate in experiment['bioreplicates']:
            for compartment_name in experiment['compartmentNames']:
                for _ in range(experiment['timepointCount']):
                    sheet.cell(row=row_index, column=1, value=bioreplicate['name'])
                    sheet.cell(row=row_index, column=2, value=compartment_name)

                    row_index += 1

                # Apply border after every group of time points
                for column in range(1, len(headers.keys()) + 1):
                    cell = sheet.cell(row=(row_index - 1), column=column)
                    cell.border = bottom_border

    # Freeze header and label columns
    sheet.freeze_panes = "D2"