Source code for app.model.lib.data_spreadsheet

from openpyxl import Workbook
from openpyxl.styles import Border, Side
from openpyxl.comments import Comment
from openpyxl.utils import get_column_letter

from app.model.lib.excel import export_to_xlsx

_TIME_UNITS = {
    'd': 'days',
    'h': 'hours',
    'm': 'minutes',
    's': 'seconds',
}

_TECHNIQUE_DESCRIPTIONS = {
    'fc':          "Flow-cytometry values per time-point in {units}, use a period (.) as the decimal separator",
    'fc_ps':       "FC counts values per time-point for strain {strain} in {units}, use a period (.) as the decimal separator",
    'od':          "Optical density values per time-point, use a period (.) as the decimal separator",
    'plates':      "Plate count values per time-point, use a period (.) as the decimal separator",
    'plates_ps':   "Plate count values per time-point for strain {strain}, use a period (.) as the decimal separator.",
    '16s_ps':      "Abundance values per time-point for strain {strain} in {units}, use a period (.) as the decimal separator.",
    'qpcr':        "Abundance values per time-point, use a period (.) as the decimal separator.",
    'qpcr_ps':     "Abundance values per time-point for strain {strain}, use a period (.) as the decimal separator.",
    'ph':          "pH values per time-point, use a period (.) as the decimal separator",
    'metabolites': "Concentration values per time-point for metabolite {metabolite} in {units}, use a period (.) as the decimal separator",
    'STD':         "Standard deviation if more than 1 technical replicate, use a period (.) as the decimal separator",
}


[docs] def create_excel(submission_form): "Create a template data file based on the submission's study design" submission = submission_form.submission strain_names = [t.name for t in submission_form.fetch_taxa()] strain_names += [s['name'] for s in submission.studyDesign['custom_strains']] workbook = Workbook() short_time_units = submission.studyDesign['timeUnits'] long_time_units = _TIME_UNITS[short_time_units] headers_common = { 'Biological Replicate': 'Unique names of individual samples: a bottle, a well in a well-plate, or a mini-bioreactor.', 'Compartment': 'A compartment within the vessel where growth is measured.', 'Time': f"Measurement time-points in {long_time_units} ({short_time_units}).", } headers_bioreplicates = {**headers_common} headers_strains = {**headers_common} headers_metabolites = {**headers_common} for index, study_technique in enumerate(submission.build_techniques()): subject_type = study_technique.subjectType technique_type = study_technique.type units = study_technique.units for measurement_technique in study_technique.measurementTechniques: if subject_type == 'bioreplicate': technique_name = measurement_technique.csv_column_name() description = _TECHNIQUE_DESCRIPTIONS[technique_type].format(units=study_technique.units) headers_bioreplicates[technique_name] = description if study_technique.includeStd: title = ' '.join([technique_name, 'STD']) headers_bioreplicates[title] = _TECHNIQUE_DESCRIPTIONS['STD'] elif subject_type == 'strain': if study_technique.includeUnknown: technique_strain_names = [*strain_names, "Unknown"] else: technique_strain_names = strain_names for strain_name in technique_strain_names: title = measurement_technique.csv_column_name(strain_name) description = _TECHNIQUE_DESCRIPTIONS[f"{technique_type}_ps"].format( strain=strain_name, units=units, ) headers_strains[title] = description if study_technique.includeStd: title = ' '.join([title, 'STD']) headers_strains[title] = _TECHNIQUE_DESCRIPTIONS['STD'] elif subject_type == 'metabolite': metabolites = submission_form.fetch_metabolites_for_technique(index) for metabolite in metabolites: title = measurement_technique.csv_column_name(metabolite.name) description = _TECHNIQUE_DESCRIPTIONS['metabolites'].format( metabolite=metabolite.name, units=units, ) headers_metabolites[title] = description if study_technique.includeStd: title = ' '.join([metabolite.name, 'STD']) headers_metabolites[title] = _TECHNIQUE_DESCRIPTIONS['STD'] else: raise ValueError(f"Invalid technique subject_type: {subject_type}") # Create sheets for each category of measurement: if len(headers_bioreplicates) > 3: _fill_sheet(workbook, "Growth data per community", headers_bioreplicates, submission) if len(headers_strains) > 3: _fill_sheet(workbook, "Growth data per strain", headers_strains, submission) if len(headers_metabolites) > 3: _fill_sheet(workbook, "Growth data per metabolite", headers_metabolites, submission) return export_to_xlsx(workbook)
def _add_header(sheet, index, title, description): cell = sheet.cell(row=1, column=index, value=title) cell.comment = Comment(description, author="mGrowthDB") # Built-in styles: # <https://openpyxl.readthedocs.io/en/stable/styles.html#using-builtin-styles> # cell.style = 'Headline 3' # Calculate column width based on the length of the title text, plus some extra space column_width = len(title) column_letter = get_column_letter(index) sheet.column_dimensions[column_letter].width = column_width + 1 def _fill_sheet(workbook, sheet_title, headers, submission): if workbook.sheetnames == ['Sheet']: # Then we have a brand new workbook, use its first sheet sheet = workbook.active workbook.active.title = sheet_title else: sheet = workbook.create_sheet(title=sheet_title) # Add headers and descriptions to the first row and modify the width of each columns for index, (title, description) in enumerate(headers.items(), start=1): _add_header(sheet, index, title, description) bottom_border = Border(bottom=Side(style="thin", color="000000")) row_index = 2 for experiment in submission.studyDesign['experiments']: for bioreplicate in experiment['bioreplicates']: for compartment_name in experiment['compartmentNames']: for _ in range(experiment['timepointCount']): sheet.cell(row=row_index, column=1, value=bioreplicate['name']) sheet.cell(row=row_index, column=2, value=compartment_name) row_index += 1 # Apply border after every group of time points for column in range(1, len(headers.keys()) + 1): cell = sheet.cell(row=(row_index - 1), column=column) cell.border = bottom_border # Freeze header and label columns sheet.freeze_panes = "D2"