app.py

### PACKAGE IMPORTS ###
from gevent import monkey
monkey.patch_all()

import gevent
from gevent.pywsgi import WSGIServer

import os
import re
import io
import csv
import math
import pickle
import uuid
import zipfile
import numpy as np
import pandas as pd

from datetime import datetime, timedelta

from flask import Blueprint, Flask, render_template, url_for, request, redirect, jsonify, Response, send_file, send_from_directory, abort, send_from_directory, session
from flask_cors import CORS
from werkzeug.utils import secure_filename

from config import ANALYTICS_DOMAIN, ANALYTICS_SCRIPT_URL, BASE_DIR, DATA_FOLDER, SITE_TABLES_FOLDER, RES_TABLES_FOLDER, REP_STRUCS_FOLDER, PROTS_FOLDER, ASSEMBLY_FOLDER, CIF_SIFTS_DIR, CHAIN_MAPPING_DIR, USER_JOBS_OUT_FOLDER, SESSIONS_FOLDER, SLIVKA_URL, SUBMISSIONS_ENABLED, STATIC_URL_PATH, URL_PREFIX
from config import SERVICE_STATUS, SERVICE_NOTICE, SERVICE_NOTICE_UPDATED, SERVICE_ALERT_CLASS
from filters import datetime_parse, datetime_format
from forms import LigysisForm
from logger_config import setup_logging
from session_db import initialize_db, fetch_results
from submission import SubmissionHandler
from utils.validation import is_valid_session_id, is_valid_submission_time

### FUNCTIONS ###

def load_pickle(f_in): # loads pickle and returns data
    """
    Loads data from pickle.
    """
    with open(f_in, "rb") as f:
        data = pickle.load(f)
    return data

def convert_numpy(obj): # utility function to ensure object types are correct
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {k: convert_numpy(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [convert_numpy(v) for v in obj]
    return obj

def hex_to_rgb(hex_code): # converts hex code to RGB
    hex_code = hex_code.lstrip('#') 
    rgb = tuple(int(hex_code[i:i+2], 16) for i in (0, 2, 4))
    normalized_rgb = [round(x/255.0, 3) for x in rgb] 
    return normalized_rgb

def extract_open_files(cxc_in, fmt = "cif"): # extracts dictionary of order of opened files from ChimeraX .cxc
    """
    reads ChimeraX command file (.cxc) generated by
    LIGYSIS and extracts all the models opened and the
    order they are in. Returns a dictionary.
    """
    open_files_dict = {}
    file_counter = 1
    with open(cxc_in, 'r') as file:
        for line in file:
            if line.startswith("open"):
                file_name = line.strip().split()[1]
                if file_name.endswith(f'.{fmt}'):
                    open_files_dict[file_counter] = file_name
                    file_counter += 1
    return open_files_dict

def transform_lines_3DMol(defattr_in, opened_chimX, loaded_3dmol): # gets binding site ID attribute data from .attr, .cxc and 3DMol.js
    """
    reads ChimeraX attribute file (.defattr) and uses ordered loaded
    models dictionary to return a list of tuples containing the
    model ID, chain, residue number and binding site ID to be used by
    3DMol.js for binding site attribute assignment.
    """
    # print(defattr_in, opened_chimX, loaded_3dmol)
    bs_ids = []
    data = []
    with open(defattr_in, 'r') as file:
        for line in file:
            match = re.match(r'\s+#(\d+)/(\w+):(\d+)\s+(-?\d+)', line) # finding regex match on .attr file
            if match:
                key, chain, resi, p_bs = match.groups() # extracts binding site information
                bs_ids.append(p_bs) # adds binding site ID to list
                if opened_chimX[int(key)] in loaded_3dmol:
                    mod_id = loaded_3dmol[opened_chimX[int(key)]]
                    data.append((mod_id, chain, int(resi), int(p_bs))) # adds mapping data tuple to list
                else:
                    print("Model not found")
    bs_ids = sorted(int(item) for item in set(bs_ids)) # gets unique set of sorted binding site IDs
    return data, bs_ids

def transform_lines_PyMol(defattr_in, open_files_dict): # gets binding site ID attribute data from .attr to format PyMol
    """
    reads ChimeraX attribute file (.defattr) and uses ordered loaded
    models dictionary to return PyMol property setting commands as well
    as the unique binding site IDs.
    """
    bs_ids = []
    transformed_lines = []
    with open(defattr_in, 'r') as file:
        for line in file:
            match = re.match(r'\s+#(\d+)/(\w+):(\d+)\s+(-?\d+)', line)
            if match:
                key, chain, resi, p_bs = match.groups()
                bs_ids.append(p_bs)
                if int(key) in open_files_dict:
                    file_name = open_files_dict[int(key)].split(".")[0]
                    transformed_line = f'alter_state 1, {file_name} & chain {chain} & resi {resi}, p.bs = {p_bs}'
                    transformed_lines.append(transformed_line)
    bs_ids = sorted(int(item) for item in set(bs_ids))
    return transformed_lines, bs_ids

def transform_dict2(input_dict):
    output_dict = {}
    for k, v in input_dict.items():
        key_list, value_list = v[0], v[1]
        
        # If the integer (v[1]) is not already a key in the new dictionary, create it
        if value_list not in output_dict:
            output_dict[value_list] = [[], []]
        
        # Append the original key to the list of keys and its corresponding tuples to the list of values
        output_dict[value_list][0].append(k)
        output_dict[value_list][1].extend(key_list)
    
    return output_dict

def convert_mapping_dict(d): # converts mapping dictionary to correct data type
    rfd = {}
    for k1, v1 in d.items():
        rfd[k1] = {}
        for k2, v2 in v1.items():
            rfd[k1][k2] = {}
            for k3, v3 in v2.items():
                rfd[k1][k2][int(k3)] = int(v3)
    return rfd

def generate_pseudobond_lines_ChimeraX(df): # generates pseudobond lines for ChimeraX
    output = [] # Eempty list to store the formatted strings

    grouped = df.groupby('width') # Group the DataFrame by the 'width' column
    
    for width, group in grouped:
        output.append(f"; radius = {width}") # Add the radius line
    
        for _, row in group.iterrows(): # Iterate over each row in the group
            # Extract necessary information for the format
            end_part = f"/{row['auth_asym_id_end']}:{row['auth_seq_id_end']}@{row['auth_atom_id_end']}"
            begin_part = f"/{row['auth_asym_id_bgn']}:{row['auth_seq_id_bgn']}@{row['auth_atom_id_bgn']}"
        
            formatted_string = f"{end_part} {begin_part} {row['color']}"
            
            output.append(formatted_string)
    
    return output

def generate_distance_lines_PyMol(df, mult = 1): # generates pseudobond lines for ChimeraX
    output = [] # Eempty list to store the formatted strings

    cons = df.copy().reset_index(drop=True) # Reset the index of the DataFrame

    for idx, row in cons.iterrows(): # Iterate over each row in the group
        # Extract necessary information for the format
        end_part = f"///{row['auth_asym_id_end']}/{row['auth_seq_id_end']}/{row['auth_atom_id_end']}"
        begin_part = f"///{row['auth_asym_id_bgn']}/{row['auth_seq_id_bgn']}/{row['auth_atom_id_bgn']}"

        d_idx = idx + 1
    
        distance_string = f"distance d{d_idx}, {end_part}, {begin_part}"
        colour_string = f"set dash_color, {hex_to_rgb(row['color'])}, d{d_idx}"
        radius_string = f"set dash_radius, {row['width']*mult}, d{d_idx}"
        labels_string = f"hide labels, d{d_idx}"
        
        pymol_strs = [distance_string, colour_string, radius_string, labels_string]
        
        output.extend(pymol_strs)
    
    return output

def chimeraX2PyMol(cxc_in, attr_in, fmt = 'cif'): # converts ChimeraX command and attribute files to PyMol script
    """
    Converts ChimeraX command and attribute files to a PyMol
    script that will do the same thing as the ChimeraX script.
    """
    model_order = extract_open_files(cxc_in, fmt = fmt) # extract model order from ChimeraX command file
    
    pymol_attrs, bs_ids = transform_lines_PyMol(attr_in, model_order)
    
    pymol_lines = []
    pymol_lines.append('# styling')
    for l in pymol_looks:
        pymol_lines.append(f'{l}')

    pymol_lines.append('# load models')
    for model in model_order.values():
        pymol_lines.append(f'load {model}')

    pymol_lines.append('alter all, p.bs = -3 # all atoms have BS = -3')

    pymol_lines.append('# assignig binding site IDs')
    for l in pymol_attrs:
        pymol_lines.append(f'{l}')

    pymol_lines.append('# formatting')
    for l in pymol_formats:
        pymol_lines.append(f'{l}')
        
    pymol_lines.append('# colouring by binding site ID')

    pymol_lines.append('color grey, unclust_ligs')
    clust_bs_ids = [bs_id for bs_id in bs_ids if bs_id != -1]
    for bs_id in clust_bs_ids:
        rgb = hex_to_rgb(colors[bs_id])
        pymol_lines.append(f'select BS{bs_id}, p.bs = {bs_id}')
        pymol_lines.append(f'set_color BS{bs_id}_color, {rgb}')
        pymol_lines.append(f'color BS{bs_id}_color, BS{bs_id}')
    pymol_lines.append('deselect')

    return pymol_lines

def compute_symmetrical_log_limits(df, col_name = "MES"):
    """
    Computes symmetrical log scale limits for a specified column in a table.
    """
    values = df[col_name]

    # deal with np.nan values
    values = values[~np.isnan(values)]

    if len(values) == 0:
        min_value = 1
        max_value = 1
    else:
        # Compute min and max values
        min_value = values.min()
        max_value = values.max()

    # Calculate the reciprocal of the minimum value
    try:
        reciprocal_of_min = 1 / min_value
    except:
        reciprocal_of_min = 1 # if min_value is 0 or nan. Means limit will be set by

    # Determine the maximum absolute value
    # print(max_value, reciprocal_of_min)
    new_max = max(abs(max_value), abs(reciprocal_of_min))

    # Round up to the nearest 0.5
    if math.isinf(new_max):
        rounded_max = 2  # or any large finite value
    else:
        rounded_max = math.ceil(new_max * 2) / 2

    # Calculate symmetrical limits
    min_limit = 1 / rounded_max
    max_limit = rounded_max

    return rounded_max

#### USER JOB FUNCTIONS ####

def get_all_bs_ress(results_df, job_id):
    all_bs_ress = results_df.query('binding_sites == binding_sites').reset_index(drop=True)
    all_bs_ress = all_bs_ress.explode("binding_sites")
    all_bs_ress["bs_id"] = all_bs_ress.job_id + "." + all_bs_ress.binding_sites.astype(str)
    all_bs_ress.UniProt_ResNum = all_bs_ress.UniProt_ResNum.astype(int)
    all_bs_ress["RSA"].values[all_bs_ress["RSA"].values > 100] = 100
    return all_bs_ress

### SOME FIXED VARIABLES ###

colors = load_pickle(os.path.join(DATA_FOLDER, "sample_colors_hex.pkl")) # sample colors

headings = ["ID", "RSA", "DS", "MES", "Size", "Cluster", "FS"] # headings of binding site table

cc_new = ["UPResNum", "MSACol", "DS", "MES", "p", "AA", "RSA", "SS"] # headings of binding residue table

cc_new_sel = ["DS", "MES", "p", "RSA",] # headings of binding residue table

bs_table_tooltips = [ # hover tooltips for binding site table
    "This is the ligand binding site identifier",
    "This is the site's avg. RSA",
    "This is the site's avg. divergence score",
    "This is the site's avg. missense enrichment score",
    "This is the site's size (in aa)",
    "This is the site's RSA cluster label",
    "This is the site's functional score",
]

bs_ress_table_tooltips = [ # hover tooltips for binding residue table
    "This is the residue's UniProt number",
    "This is the residue's alignment column",
    "This is the residue's divergence score",
    "This is the residue's missense enrichment score",
    "This is the MES p-value",
    "This is the residue's amino acid",
    "This is the residue's RSA",
    "This is the residue's secondary structure",
]

arpeggio_cols = [
    'contact', 'distance',
    'auth_asym_id_end', 'auth_atom_id_end', 'auth_seq_id_end',
    'label_comp_id_end', 'auth_asym_id_bgn',
    'auth_atom_id_bgn', 'auth_seq_id_bgn', 'label_comp_id_bgn',
    'orig_label_asym_id_end', 'UniProt_ResNum_end',
    'coords_end', 'coords_bgn', 'width', 'color'
]

USER_arpeggio_cols = [
    'contact', 'distance',
    'auth_asym_id_end', 'auth_atom_id_end', 'auth_seq_id_end',
    'label_comp_id_end', 'auth_asym_id_bgn',
    'auth_atom_id_bgn', 'auth_seq_id_bgn', 'label_comp_id_bgn',
    'UniProt_ResNum_end',
    'coords_end', 'coords_bgn', 'width', 'color'
]

extra_cxc_lines = [
    "color white; set bgColor white;",
    "set silhouette ON; set silhouetteWidth 2; set silhouetteColor black;",
    "~disp; select ~protein; ~select : HOH; ~select ::binding_site==-1; disp sel; ~sel;",
    "col :HOH orange; col ::binding_site==-1 grey;",
    #"surf protein; transparency 30 s;",
]

pymol_looks = [
    "bg_color white",
    "space cmyk",
    "set ray_trace_fog=0",
    "set depth_cue=0",
    "set antialias, 4",
    "set hash_max, 300",
    "set ray_trace_mode, 1",
    "set ray_trace_gain, 0.005",
    "set cartoon_discrete_colors, 1",
]

pymol_atom_colors = [ # grabbed from ChimeraX which were grabbed from JMol
    "set_color N_color, [48,80,248]", "color N_color, elem N", # N - Nitrogen
    "set_color O_color, [255,13,13]", "color O_color, elem O", # O - Oxygen
    "set_color S_color, [255,255,48]", "color S_color, elem S", # S - Sulfur
    "set_color P_color, [255,128,0]", "color P_color, elem P", # P - Phosphorus
    "set_color Na_color, [171,92,242]", "color Na_color, elem Na", # Na - Sodium
    "set_color Cl_color, [31,240,31]", "color Cl_color, elem Cl", # Cl - Chlorine
    "set_color K_color, [143,64,212]", "color K_color, elem K", # K - Potassium
    "set_color Zn_color, [125,128,176]", "color Zn_color, elem Zn", # Zn - Zinc
    "set_color Mg_color, [138,255,0]", "color Mg_color, elem Mg", # Mg - Magnesium
    "set_color Ca_color, [61,255,0]", "color Ca_color, elem Ca", # Ca - Calcium
    "set_color Se_color, [255,161,0]", "color Se_color, elem Se", # Se - Selenium
    "set_color Fe_color, [224,102,51]", "color Fe_color, elem Fe", # Fe - Iron
    "set_color F_color, [144,224,80]", "color F_color, elem F", # F - Fluorine
    "set_color Mn_color, [156,122,199]", "color Mn_color, elem Mn", # Mn - Manganese
    "set_color Br_color, [166,41,41]", "color Br_color, elem Br", # Br - Bromine
    "set_color Ni_color, [80,208,80]", "color Ni_color, elem Ni", # Ni - Nickel
    "set_color Cu_color, [200,128,51]", "color Cu_color, elem Cu", # Cu - Copper
]

pymol_formats = [
    "color white, all",
    "hide everything, all",
    "select prot, polymer.protein",
    "select water, resn HOH",
    "show cartoon, prot",
    "color red, water",
    "select unclust_ligs, p.bs = -1",
    "alter_state 1, water, p.bs=-2",
    "select clust_ligs, p.bs > -1",
    "show licorice, clust_ligs",
    "deselect",
    "hide everything, water",
]

pymol_dash = [
    'set dash_as_cylinders, true',
    'set dash_gap, 0.15',
    'set dash_length, 0.25',
    'set dash_round_ends, false',
]

basic_pymol_format = [
    "color white, all",
    "hide everything, all",
    "select prot, polymer.protein",
    "select water, resn HOH",
    "show cartoon, prot",
    "color gold, water",
    "deselect",
    "hide everything, water",
]

info_file = "README.txt" # info file about contacts visualisation

contacts_info = """ 
Arpeggio protein-ligand contacts visualisation

The Arpeggio colour scheme is used to visually represent different types of interactions. Below are the hex codes, their corresponding color names and the interactions they represent.

- #000000: Black - Represents 'clash' interactions.
- #999999: Dim Gray - Used for 'covalent', 'vdw_clash', 'vdw', and 'proximal' interactions.
- #f04646: Red Orange - Used for 'hbond' and 'polar' interactions.
- #fc7600: Pumpkin - Represents 'weak_hbond' and 'weak_polar' interactions.
- #3977db: Royal Blue - Indicates 'xbond' (halogen bond) interactions.
- #e3e159: Pale Goldenrod - Represents 'ionic' interactions.
- #800080: Purple - Used for 'metal_complex' interactions.
- #00ccff: Vivid Sky Blue - Indicates 'aromatic' interactions.
- #006633: Dark Green - Represents 'hydrophobic' interactions.
- #ff007f: Bright Pink - Used for 'carbonyl' interactions.

The width of the pseudobonds represents the distance between the interacting atoms. Width of 0.125 if the atoms are within VdW Clash distance, otherwise 0.0625.
""" #  Interactions README 

### READING INPUT DATA ###

LIGYSIS_prots_data = load_pickle(f'{DATA_FOLDER}/LIGYSIS_protein_names_dict.pkl')

LIGYSIS_prots_dat_EXT = load_pickle(f'{DATA_FOLDER}/LIGYSIS_protein_names_dict_RF3.pkl')

LIGYSIS_rep_chain_mappings = load_pickle(f'{DATA_FOLDER}/LIGYSIS_rep_chain_mappings.pkl')

# prot_ids = sorted(list(LIGYSIS_prots_data.keys()))
prot_ids = sorted(list(set(list(LIGYSIS_prots_dat_EXT.keys()))))

ACCS = sorted(list(set(list(LIGYSIS_prots_dat_EXT.values()))))

######################## FLASK APPLICATION ########################

app = Flask(__name__, static_folder='static', static_url_path=STATIC_URL_PATH)
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'default_secret_key')
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=60)  # Long expiry
app.config['SESSION_COOKIE_NAME'] = 'ligysis_session'

CORS(app, resources={rf"{URL_PREFIX}/*": {"origins": ["http://www-dev.compbio.dundee.ac.uk",
                                                      "http://www.compbio.dundee.ac.uk",
                                                      "https://www.compbio.dundee.ac.uk"]}})


# Analytics configuration
@app.context_processor
def inject_config():
    return {
        'ANALYTICS_DOMAIN': ANALYTICS_DOMAIN,
        'ANALYTICS_SCRIPT_URL': ANALYTICS_SCRIPT_URL
    }

@app.context_processor
def inject_globals():
    return dict(SUBMISSIONS_ENABLED=SUBMISSIONS_ENABLED)

@app.context_processor
def inject_service_status():
    return {
        'SERVICE_STATUS': SERVICE_STATUS,
        'SERVICE_NOTICE': SERVICE_NOTICE,
        'SERVICE_NOTICE_UPDATED': SERVICE_NOTICE_UPDATED,
        'SERVICE_ALERT_CLASS': SERVICE_ALERT_CLASS
    }

# Use Blueprint to add URL prefix to serve site from a sub-directory
main = Blueprint('main', __name__, url_prefix=URL_PREFIX, static_url_path=STATIC_URL_PATH, static_folder='static')

os.makedirs(SESSIONS_FOLDER, exist_ok=True)

initialize_db()

custom_logger = setup_logging(name='app')

# Register the filters used in job status table
app.jinja_env.filters['datetime_parse'] = datetime_parse
app.jinja_env.filters['datetime_format'] = datetime_format

################### ROUTES FOR LIGYSIS RESULTS ####################

@main.route('/', methods=['POST', 'GET'])
def index(): # route for index main site
    if request.method == 'POST':

        try: # this is to visualise pre-computed LIGYSIS results

            prot_id = request.form['proteinId'].strip() # get protein ID from form and strip any whitespace

            try: 

                ACC = LIGYSIS_prots_dat_EXT[prot_id]

                if ACC in ACCS:

                    # prot_seg_rep_strucs = load_pickle(os.path.join(REP_STRUCS_FOLDER, "{}_segs_rep_strucs.pkl".format(ACC))) # representative structures dict (only successfully run segments)
                    # prot_seg_rep_strucs = load_pickle(os.path.join(REP_STRUCS_FOLDER, "{}_coords.pkl".format(ACC))) # representative structures dict (only successfully run segments)

                    # first_seg = sorted(list(prot_seg_rep_strucs[ACC].keys()))[0]

                    bss_data = pd.read_pickle(os.path.join(SITE_TABLES_FOLDER, "{}_bss.pkl".format(ACC))) # site data
                    labs = bss_data.lab.tolist()
                    segs = sorted(list(set([el.split("_")[1] for el in labs])))
                    first_seg = segs[0]

                    return redirect(url_for('main.results', prot_id = ACC, seg_id = first_seg)) # renders results page
                else:
                    return render_template('error.html', prot_id = ACC)
            except Exception as e:
                custom_logger.error(f"Error processing protein ID: {prot_id}", exc_info=True)
                return render_template('error.html', prot_id = prot_id)
        
        except: # this is to visualise user-submitted results, Input ID will be randomly generated by the app
            # raise
            job_id = request.form['jobId']
            if os.path.isdir(os.path.join(USER_JOBS_OUT_FOLDER, job_id)):
                return redirect(url_for('main.user_results', session_id=job_id, submission_time='none')) # renders user results page
            else:
                print(f'{os.path.join(USER_JOBS_OUT_FOLDER, job_id)} does not exist')
                return render_template('USER_error.html', job_id = job_id)

    else:
        return render_template('index.html', prot_ids = prot_ids) # renders home page with all tasks

@main.route('/results/<prot_id>/<seg_id>', methods = ['POST', 'GET'])
def results(prot_id, seg_id): # route for results site. Takes Prot ID and Seg ID

    seg_name = prot_id + "_" + seg_id # combining UniProt ID and Segment ID into SEGMENT NAME

    bss_data = pd.read_pickle(os.path.join(SITE_TABLES_FOLDER, "{}_bss.pkl".format(prot_id))) # site data
    labs = bss_data.lab.tolist()
    segs = sorted(list(set([int(el.split("_")[1]) for el in labs])))

    bss_MES_axis_lim = compute_symmetrical_log_limits(bss_data)

    bss_data = bss_data.fillna("NaN") # pre-processing could also be done before saving the pickle
    bss_data.columns = headings # changing table column names

    bss_prot = bss_data[bss_data.ID.str.contains(seg_name)].copy() # extracting Segment of interest rows from table

    bss_prot.ID = bss_prot.ID.str.split("_").str[2] # extracting binding site ID from binding site name, which is UniProt ID _ Segment ID _ Binding Site ID

    bss_prot.ID = bss_prot.ID.astype(int) # changing binding site ID to integer data type
    bss_prot.Cluster = bss_prot.Cluster.astype(int) # changing binding site ID to integer data type

    bss_prot = bss_prot.sort_values(by = "ID") # sorting binding site table rows by ID

    # print(os.path.join(SITE_TABLES_FOLDER, "{}_bss.pkl".format(prot_id)))
    # print(os.path.join(RES_TABLES_FOLDER, "{}_ress.pkl".format(seg_name)))

    # print(bss_prot)

    first_site = bss_prot.ID.unique().tolist()[0] # first binding site ID

    first_site_name = seg_name + "_" + str(first_site) # name of first binding site (data shown by default when oppening page)

    bss_ress = pd.read_pickle(os.path.join(RES_TABLES_FOLDER, "{}_ress.pkl".format(seg_name))) # residue data

    bs_ress_MES_axis_lim = compute_symmetrical_log_limits(bss_ress)
    
    # print(bss_MES_axis_lim, bs_ress_MES_axis_lim)

    bss_ress = bss_ress.fillna("NaN") # pre-processing could also be done before saving the pickle

    first_site_data = bss_ress.query('bs_id == @first_site_name')[cc_new].to_dict(orient="list") # data of first binding site residues

    data1 = bss_prot.to_dict(orient="list") # converting table to dictionary to pass to client

    prot_ress = bss_ress.query('up_acc == @prot_id')[cc_new]

    # prot_seg_rep_strucs = load_pickle(os.path.join(REP_STRUCS_FOLDER, "{}_segs_rep_strucs.pkl".format(prot_id))) # representative structures dict (only successfully run segments)
    prot_seg_rep_strucs = load_pickle(os.path.join(REP_STRUCS_FOLDER, "{}_coords.pkl".format(prot_id))) # representative structures dict (only successfully run segments)

    segment_reps = {k: v for k, v in prot_seg_rep_strucs[prot_id].items() if k in segs}

    # print(segment_reps, segs)

    data2 = prot_ress.to_dict(orient="list")

    # bs_ress_dict = load_pickle(os.path.join(DATA_FOLDER, "example", "other", f'{prot_id}_{seg_id}_ALL_inf_bss_ress.pkl'))
    # bs_ress_dict = load_pickle(os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{prot_id}_{seg_id}_ALL_inf_bss_ress.pkl'))
    binding_sites_dict = bss_ress.groupby('binding_sites')['UPResNum'].apply(list).to_dict()
    binding_sites_dict = {str(key): value for key, value in binding_sites_dict.items()}
    all_binding = sorted(set(bss_ress['UPResNum']))
    binding_sites_dict['ALL_BINDING'] = all_binding

    seg_ress_dict = binding_sites_dict#[prot_id][seg_id]
    # seg_ress_dict = {str(key): value for key, value in seg_ress_dict.items()}
    
    # seg_ress_dict["ALL_BINDING"] = sorted(list(set([el2 for el in seg_ress_dict.values() for el2 in el]))) # add key: "ALL_BINDING" and value a sorted set of all binding residues
    
    protein_atoms_dict = load_pickle(os.path.join(DATA_FOLDER, "segment_prot_struc_dict_DEF.pkl"))

    prot_atoms_rep = list(protein_atoms_dict[prot_id][seg_id].keys())[0]

    prot_pdb_id, prot_pdb_chain = prot_atoms_rep.split("_")

    # pdb2up_dict = load_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{prot_pdb_id}_pdb2up.pkl')
    pdb2up_dict = load_pickle(f'{CIF_SIFTS_DIR}/{prot_pdb_id}_pdb2up.pkl')

    # up2pdb_dict = load_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{prot_pdb_id}_up2pdb.pkl')
    up2pdb_dict = load_pickle(f'{CIF_SIFTS_DIR}/{prot_pdb_id}_up2pdb.pkl')
    
    entry_name = LIGYSIS_prots_data[prot_id]["entry"]

    upid_name = LIGYSIS_prots_data[prot_id]["upid"]

    prot_long_name = LIGYSIS_prots_data[prot_id]["prot_name_long"]

    up2pdb_dict_converted = {k: {k2:{int(k3):int(v3) for k3, v3 in v2.items()} for k2, v2 in v.items()} for k, v in up2pdb_dict.items()}

    pdb2up_dict_converted = {k: {k2:{int(k3):int(v3) for k3, v3 in v2.items()} for k2, v2 in v.items()} for k, v in pdb2up_dict.items()}

    arpeggio_dir = os.path.join(PROTS_FOLDER, prot_id, str(seg_id), "arpeggio")
    arpeggio_files = os.listdir(arpeggio_dir)
    arpeggio_proc_files = [f for f in arpeggio_files if f.endswith(".pkl")]
    arpeggio_proc_pdbs = [el.split("_")[0] for el in arpeggio_proc_files]

    # assembly_pdbs = os.listdir(os.path.join(DATA_FOLDER, prot_id, str(seg_id), "assemblies")) # CIF bio assembly file names
    # assembly_pdbs = os.listdir(os.path.join(PROTS_FOLDER, prot_id, str(seg_id), "assemblies")) # CIF bio assembly file names
    # assembly_pdbs = [el for el in assembly_pdbs if el.endswith(".cif")]
    assembly_pdbs = [os.path.join(ASSEMBLY_FOLDER, f'{el}_bio.cif') for el in arpeggio_proc_pdbs]

    # assembly_pdb_ids = sorted(list(set([el.split("_")[0] for el in assembly_pdbs])),) # sorted unique PDB IDs
    assembly_pdb_ids = sorted(arpeggio_proc_pdbs) # sorted unique PDB IDs

    # simple_pdbs = os.listdir(os.path.join(DATA_FOLDER, prot_id, str(seg_id), "simple")) # simple PDB file names (single chain)
    simple_pdbs = os.listdir(os.path.join(PROTS_FOLDER, prot_id, str(seg_id), "simple")) # simple PDB file names (single chain)
    simple_pdbs = [el for el in simple_pdbs if el.endswith(".cif")]

    simple_pdbs_full_path = [url_for('main.data_serve_file', prot_id=prot_id, seg_id=seg_id, filename=el) for el in simple_pdbs]


    n_strucs = len(assembly_pdbs) # number of structures
    # n_ligs = len(load_pickle(os.path.join(DATA_FOLDER, "example", "other", f'{prot_id}_{seg_id}_ALL_inf_ligs_fingerprints.pkl'))) # number of ligands
    n_ligs = len(load_pickle(os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{prot_id}_{seg_id}_ALL_inf_ligs_fingerprints.pkl')))
    n_sites = len(bss_prot) # number of binding sites
    seg_stats = {prot_id: {seg_id: {'strucs': n_strucs, 'ligs': n_ligs, 'bss': n_sites}}}

    prot_atoms_rep_pdb, prot_atoms_rep_chain = prot_atoms_rep.split("_")
    rep_auth_asym_id = LIGYSIS_rep_chain_mappings[prot_id][seg_id][prot_atoms_rep_pdb][prot_atoms_rep_chain]["auth_asym_id"]
    rep_label_asym_id = LIGYSIS_rep_chain_mappings[prot_id][seg_id][prot_atoms_rep_pdb][prot_atoms_rep_chain]["label_asym_id"]

    headings_with_data = [heading for heading in headings if "NaN" not in data1[heading]]

    cc_new_sel_with_data = [heading for heading in cc_new_sel if "NaN" not in data2[heading]]

    return render_template(
        'structure.html', data = data1, headings = headings, data2 = data2, cc_new = cc_new, cc_new_sel = cc_new_sel, colors = colors,
        seg_ress_dict = seg_ress_dict, prot_id = prot_id, seg_id = seg_id, segment_reps = segment_reps,
        first_site_data = first_site_data, bs_table_tooltips = bs_table_tooltips, bs_ress_table_tooltips = bs_ress_table_tooltips,
        pdb2up_dict = pdb2up_dict_converted, up2pdb_dict = up2pdb_dict_converted, seg_stats = seg_stats, entry_name = entry_name, upid_name = upid_name, prot_long_name = prot_long_name,
        simple_pdbs = simple_pdbs_full_path, assembly_pdb_ids = assembly_pdb_ids, prot_atoms_rep = prot_atoms_rep, SITE_TABLES_FOLDER = SITE_TABLES_FOLDER, RES_TABLES_FOLDER = RES_TABLES_FOLDER,
        bss_MES_axis_lim = bss_MES_axis_lim, bs_ress_MES_axis_lim = bs_ress_MES_axis_lim, rep_auth_asym_id = rep_auth_asym_id, rep_label_asym_id = rep_label_asym_id,
        headings_with_data = headings_with_data, cc_new_sel_with_data = cc_new_sel_with_data,
    )

@main.route('/about')
def about(): # route for about site
    return render_template('about.html')

@main.route('/help')
def help(): # route for help site
    return render_template('help.html')

@main.route('/contact')
def contact(): # route for contact site
    return render_template('contact.html')

@main.route('/files/<path:filename>')
def data_serve_file(filename):
    prot_id = request.args.get('prot_id')
    seg_id = request.args.get('seg_id')
    
    if not prot_id or not seg_id:
        abort(400, description="Missing required parameters: prot_id and seg_id")
    
    # Sanitize filename
    sanitized_filename = secure_filename(filename)
    directory = os.path.join(PROTS_FOLDER, prot_id, seg_id, "simple")
    
    try:
        return send_from_directory(directory, sanitized_filename)
    except FileNotFoundError:
        abort(404)

@main.route('/assemblies/<path:filename>')
def serve_assembly(filename):
    try:
        return send_from_directory(ASSEMBLY_FOLDER, filename)
    except FileNotFoundError:
        abort(404)

@main.route('/alignments/<prot_id>/<seg_id>/<path:filename>')
def serve_alignment(prot_id, seg_id, filename):
    ALIGNMENTS_FOLDER = os.path.join(PROTS_FOLDER, prot_id, seg_id, "variants")
    try:
        return send_from_directory(ALIGNMENTS_FOLDER, filename)
    except FileNotFoundError:
        abort(404)

@main.route('/get-table', methods=['POST'])
def get_table(): # route to get binding site residues for a given binding site

    lab = request.json.get('label', None)

    prot_id, seg_id, _ = lab.split("_")

    seg_name = prot_id + "_" + seg_id

    seg_ress = pd.read_pickle(os.path.join(RES_TABLES_FOLDER, "{}_ress.pkl".format(seg_name))) # residue data

    seg_ress = seg_ress.fillna("NaN") # pre-processing could also be done before saving the pickle

    site_ress = seg_ress.query('bs_id == @lab')[cc_new]

    site_data = site_ress.to_dict(orient="list")

    return jsonify(site_data)

@main.route('/download-csv')
def download_csv(): # route to download .csv tables

    filepath = request.args.get('filepath', default=None, type=str)

    # filepath = filepath.lstrip('/')

    if filepath is None:
        return "Filepath not provided", 400
    
    else:
    
        # full_path = os.path.join(BASE_DIR, filepath)

        # print(filepath)

        df = pd.read_pickle(filepath)

        output = df.to_csv(index=False)

        filenameout = filepath.split("/")[-1].split(".")[0] + ".csv"

        return Response(
            output,
            mimetype="text/csv",
            headers={"Content-disposition": "attachment; filename={}".format(filenameout)},
        )

@main.route('/process-model-order', methods=['POST'])
def process_model_order(): # route to process model order data from ChimeraX files
    data = request.json
    loaded_order = data['modelOrder'] # this is the order in which files have been loaded by 3DMol.js
    segment_name = data['segmentName'] # name of the segment
    prot_id, seg_id = segment_name.split("_") # extracting protein ID and segment ID
    # cxc_in =f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{segment_name}_ALL_inf_average_0.5.cxc' # ChimeraX command file
    # attr_in =  f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{segment_name}_ALL_inf_average_0.5.defattr' # ChimeraX attribute file
    cxc_in = os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{segment_name}_ALL_inf_average_0.5.cxc') # ChimeraX command file
    attr_in =  os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{segment_name}_ALL_inf_average_0.5.defattr') # ChimeraX attribute file

    model_order = extract_open_files(cxc_in) # order in which ChimeraX opens files (important for binding site attribute assignment)

    loaded_order = {key.split('?')[0]: value for key, value in loaded_order.items()}
    result_tuples, bs_ids = transform_lines_3DMol(attr_in, model_order, loaded_order) # binding site attribute data list of tuples

    # print(result_tuples, bs_ids)

    max_id = max(bs_ids) # maximum binding site ID

    response_data = {
        'resultTuples': result_tuples,
        'maxId': max_id
    }

    return jsonify(response_data) # send jasonified data back to client

@main.route('/get-contacts', methods=['POST'])
def get_contacts(): # route to get contacts data from Arpeggio table for a given assembly

    data = request.json
    active_model = data['modelData']
    prot_id = data['proteinId']
    seg_id = data['segmentId']
    
    arpeggio_cons = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{active_model}_bio_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
    ].copy()

    json_cons = arpeggio_cons_filt[arpeggio_cols].to_json(orient='records')

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(active_model)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    #struc_prot_data = list(arpeggio_cons_filt[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None))

    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = "_".join(k.split("_")[1:])
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    response_data = {
        'contacts': json_cons,
        'protein': struc_prot_data,
    }

    return jsonify(response_data) # send jasonified data back to client

@main.route('/get-uniprot-mapping', methods=['POST'])
def get_uniprot_mapping(): # route to get UniProt residue and chain mapping for a given pdb
    data = request.json
    pdb_id = data['pdbId']
    prot_id = data['proteinId']
    seg_id = data['segmentId']

    # pdb2up_map = load_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{pdb_id}_pdb2up.pkl')
    # up2pdb_map = load_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{pdb_id}_up2pdb.pkl')
    # chain2acc_map = load_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{pdb_id}_chain2acc.pkl')
    # chains_map_df = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/mapping/{pdb_id}_bio_chain_remapping.pkl')
    pdb2up_map = load_pickle(f'{CIF_SIFTS_DIR}/{pdb_id}_pdb2up.pkl')
    up2pdb_map = load_pickle(f'{CIF_SIFTS_DIR}/{pdb_id}_up2pdb.pkl')
    chain2acc_map = load_pickle(f'{CIF_SIFTS_DIR}/{pdb_id}_chain2acc.pkl')
    chains_map_df = pd.read_pickle(f'{CHAIN_MAPPING_DIR}/{pdb_id}_bio_chain_remapping.pkl')
    chains_map = dict(zip(chains_map_df["new_auth_asym_id"], chains_map_df["orig_label_asym_id"]))
    
    response_data = {
        'pdb2up': convert_mapping_dict(pdb2up_map),
        'up2pdb': convert_mapping_dict(up2pdb_map),
        'chain2acc': chain2acc_map,
        'chains': chains_map,
    }

    return jsonify(response_data)

@main.route('/download-superposition-ChimeraX', methods=['POST'])
def download_superposition_ChimeraX(): # route to download ChimeraX script to visualise ligand superposition

    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')

    if not prot_id or not seg_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    # simple_dir = os.path.join(DATA_FOLDER, prot_id, str(seg_id), "simple")
    simple_dir = os.path.join(PROTS_FOLDER, prot_id, str(seg_id), "simple")
    simple_pdbs = os.listdir(simple_dir)
    simple_pdbs = [f'{simple_dir}/{el}' for el in simple_pdbs if el.endswith(".cif")]

    seg_name = f'{prot_id}_{seg_id}'
    # cxc_in =f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{seg_name}_ALL_inf_average_0.5.cxc' # ChimeraX command file
    # attr_in =  f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{seg_name}_ALL_inf_average_0.5.defattr' # ChimeraX attribute file
    cxc_in = os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{seg_name}_ALL_inf_average_0.5.cxc') # ChimeraX command file
    attr_in =  os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{seg_name}_ALL_inf_average_0.5.defattr') # ChimeraX attribute file

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_ids = list(bs_membership.keys())

    # read lines in cxc_in and push to cxc_lines
    cxc_lines = []
    with open(cxc_in, 'r') as file:
        for line in file:
            if line.strip() == '':
                continue
            else:
                cxc_lines.append(line.strip())
                if line.strip().startswith("# colouring"):
                    break

    for el in extra_cxc_lines:
        cxc_lines.append(el)
    
    for bs_id in bs_ids:
        cxc_lines.append((f'col ::binding_site=={bs_id} {colors[bs_id]};'))
    
    cxc_lines.append('delete pseudobond;')
    cxc_lines.append(f'save {prot_id}_{seg_id}_ALL_inf_average_0.5.cxs;')
    cxc_lines_string = "\n".join(cxc_lines)

    # Create and add in-memory files directly to the zip
    cxc_file = f'{seg_name}_ALL_inf_average_0.5.cxc'
    cxc_file_in_memory = io.BytesIO()
    cxc_file_in_memory.write(cxc_lines_string.encode('utf-8'))

    files_to_zip = simple_pdbs + [attr_in, ]#cxc_in]

    memory_file = io.BytesIO() # Create a BytesIO object to hold the in-memory zip file

    with zipfile.ZipFile(memory_file, 'w') as zf: # Create a ZipFile object for in-memory use
        for file_path in files_to_zip:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
            else:
                return f"File {file_path} not found", 404
        
        # Add the in-memory files directly to the in-memory zip
        cxc_file_in_memory.seek(0)
        zf.writestr(cxc_file, cxc_file_in_memory.read())
    
    memory_file.seek(0)  # Seek to the beginning of the BytesIO object before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_superposition_ChimeraX.zip'
    )

@main.route('/download-superposition-PyMol', methods=['POST'])
def download_superposition_PyMol(): # route to download PyMol script to visualise ligand superposition
    
    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')

    if not prot_id or not seg_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    # simple_dir = os.path.join(DATA_FOLDER, prot_id, str(seg_id), "simple")
    simple_dir = os.path.join(PROTS_FOLDER, prot_id, str(seg_id), "simple")
    simple_pdbs = os.listdir(simple_dir)
    simple_pdbs = [f'{simple_dir}/{el}' for el in simple_pdbs if el.endswith(".cif")]

    seg_name = f'{prot_id}_{seg_id}'
    # cxc_in =f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{seg_name}_ALL_inf_average_0.5.cxc' # ChimeraX command file
    # attr_in =  f'{DATA_FOLDER}/{prot_id}/{seg_id}/simple/{seg_name}_ALL_inf_average_0.5.defattr' # ChimeraX attribute file
    cxc_in = os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{seg_name}_ALL_inf_average_0.5.cxc') # ChimeraX command file
    attr_in =  os.path.join(PROTS_FOLDER, prot_id, seg_id, "results", f'{seg_name}_ALL_inf_average_0.5.defattr') # ChimeraX attribute file

    pymol_lines = chimeraX2PyMol(cxc_in, attr_in)
    pymol_lines_string = "\n".join(pymol_lines)

    # Create and add in-memory files directly to the zip
    pymol_file = f'{seg_name}_ALL_inf_average_0.5.pml'
    pymol_file_in_memory = io.BytesIO()
    pymol_file_in_memory.write(pymol_lines_string.encode('utf-8'))

    memory_file = io.BytesIO() # Create a BytesIO object to hold the in-memory zip file

    with zipfile.ZipFile(memory_file, 'w') as zf: # Create a ZipFile object for in-memory use
        for file_path in simple_pdbs:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
            else:
                return f"File {file_path} not found", 404
        
        # Add the in-memory files directly to the in-memory zip
        pymol_file_in_memory.seek(0)
        zf.writestr(pymol_file, pymol_file_in_memory.read())
    
    memory_file.seek(0)  # Seek to the beginning of the BytesIO object before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_superposition_PyMol.zip'
    )

@main.route('/download-assembly-ChimeraX', methods=['POST'])
def download_assembly_ChimeraX(): # route to download ChimeraX script to visualise assembly
    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    pdb_id = data.get('pdbId')
    
    if not prot_id or not seg_id or not pdb_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    # assembly_file = f'{DATA_FOLDER}/{prot_id}/{seg_id}/assemblies/{pdb_id}_bio.cif' # assembly cif file
    assembly_file = f'{ASSEMBLY_FOLDER}/{pdb_id}_bio.cif' # assembly cif file

    # arpeggio_cons = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
    arpeggio_cons = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
    ].copy()

    pseudobond_lines = "\n".join(generate_pseudobond_lines_ChimeraX(arpeggio_cons_filt))
    pseudobond_file = f'{prot_id}_{seg_id}_{pdb_id}.pb'

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(pdb_id)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = "_".join(k.split("_")[1:])
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    aas_str = []

    ligs_str = []

    for k, v in struc_prot_data.items():
        lig_resn, lig_chain, lig_resi = k.split("_")
        ress = v[0]
        col_key = v[1]
        if ress != []:
            prot_sel_str = 'sel ' + ' '.join([f'/{el[1]}:{el[2]}' for el in ress]) + ';'
            prot_col_str = f'col sel {colors[col_key]}'
            prot_disp_str = 'disp sel'
            aas_str.extend([prot_sel_str, prot_col_str, prot_disp_str])

        lig_sel_str = 'sel ' + f'/{lig_chain}:{lig_resi};'
        lig_col_str = f'col sel {colors[col_key]}'
        lig_disp_str = 'disp sel'

        ligs_str.extend([lig_sel_str, lig_col_str, lig_disp_str])

    cxc_lines = "\n".join(
        [
            f'open {pdb_id}_bio.cif',
            'rib', 'style all stick', # some structures have settings to not show cartoon and style as spheres
            'color white', 
            f'open {pseudobond_file}',
            'set bgColor white',
            'set silhouette ON',
            'set silhouettewidth 2',
            '~disp',
            'transparency 30',
        ]  + aas_str + ligs_str + ['~sel', 'color byhet', 'del H']
    )

    cxc_file = f'{prot_id}_{seg_id}_{pdb_id}.cxc'

    files_to_zip = [
        assembly_file, 
    ]

    # Create and add in-memory files directly to the zip
    pb_file_in_memory = io.BytesIO()
    pb_file_in_memory.write(pseudobond_lines.encode('utf-8'))

    cxc_file_in_memory = io.BytesIO()
    cxc_file_in_memory.write(cxc_lines.encode('utf-8'))
        
    info_file_in_memory = io.BytesIO()
    info_file_in_memory.write(contacts_info.encode('utf-8'))
    
    # Create an in-memory zip file for sending to the client
    memory_file = io.BytesIO()
    with zipfile.ZipFile(memory_file, 'w') as zf:
        # Add the existing files to the in-memory zip
        for file_path in files_to_zip:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
        
        # Add the in-memory files directly to the in-memory zip
        pb_file_in_memory.seek(0)
        zf.writestr(pseudobond_file, pb_file_in_memory.read())

        cxc_file_in_memory.seek(0)
        zf.writestr(cxc_file, cxc_file_in_memory.read())

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_{pdb_id}_assembly_ChimeraX.zip'
    )

@main.route('/download-assembly-PyMol', methods=['POST'])
def download_assembly_PyMol(): # route to download PyMol script to visualise assembly
    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    pdb_id = data.get('pdbId')
    
    if not prot_id or not seg_id or not pdb_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    # assembly_file = f'{DATA_FOLDER}/{prot_id}/{seg_id}/assemblies/{pdb_id}_bio.cif' # assembly cif file
    assembly_file = f'{ASSEMBLY_FOLDER}/{pdb_id}_bio.cif' # assembly cif file

    # arpeggio_cons = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
    arpeggio_cons = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['C', 'N', 'O',]))
    ].copy()

    distance_lines = generate_distance_lines_PyMol(arpeggio_cons_filt, mult = 1) #1.5 is too thick

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(pdb_id)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = "_".join(k.split("_")[1:])
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    struc_prot_data_rf = transform_dict2(struc_prot_data)

    aas_lines = []
    ligs_lines = []
    for k, v in struc_prot_data_rf.items():
        col_key = k
        ligs = v[0]
        ress = v[1]
        if ligs != []:
            lig_sels = []
            for el in ligs:
                lig_d = el.split("_")
                lig_sels.append(f'///{lig_d[1]}/{lig_d[2]}')
            lig_sel_str = f'select BS{col_key}_ligs, '+ ' '.join(lig_sels)
            bs_set_col_str = f'set_color BS{col_key}_color, {hex_to_rgb(colors[col_key])}'
            lig_col_str = f'color BS{col_key}_color, BS{col_key}_ligs'
            lig_disp_str = f'show licorice, BS{col_key}_ligs'
            ligs_lines.extend([lig_sel_str, bs_set_col_str, lig_col_str, lig_disp_str])
        if ress != []:
            prot_sel_str = f'select BS{col_key}, ' + ' '.join([f'///{el[1]}/{el[2]}' for el in ress])
            prot_col_str = f'color BS{col_key}_color, BS{col_key}'
            prot_disp_str = f'show licorice, BS{col_key}'
            aas_lines.extend([prot_sel_str, prot_col_str, prot_disp_str])

    load_line = [f'load {os.path.basename(assembly_file)}']

    pml_lines = pymol_looks + pymol_dash + load_line + basic_pymol_format + distance_lines + ligs_lines + aas_lines + ["hide sticks, name N+O+C", "deselect",] + pymol_atom_colors
    pml_string = "\n".join(pml_lines)

    pml_file = f'{prot_id}_{seg_id}_{pdb_id}.pml'

    files_to_zip = [
        assembly_file, 
    ]

    pml_file_in_memory = io.BytesIO()
    pml_file_in_memory.write(pml_string.encode('utf-8'))
        
    info_file_in_memory = io.BytesIO()
    info_file_in_memory.write(contacts_info.encode('utf-8'))

    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for file_path in files_to_zip: # Add the existing files to the in-memory zip
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))

        pml_file_in_memory.seek(0)
        zf.writestr(pml_file, pml_file_in_memory.read())

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    memory_file.seek(0) # Seek to the beginning of the in-memory zip file before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_{pdb_id}_assembly_PyMol.zip'
    )

@main.route('/download-all-assemblies-ChimeraX', methods=['POST'])
def download_all_assemblies_ChimeraX(): # route to download ChimeraX scripts to visualise all assemblies
    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    assembly_pdb_ids = data.get('assemblyPdbIds')  # This is your array

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    if not prot_id or not seg_id or not assembly_pdb_ids: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400
    
    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids: # Loop through each assembly PDB ID to create corresponding folders in the zip
            folder_name = f'{pdb_id}'

            # assembly_file = f'{DATA_FOLDER}/{prot_id}/{seg_id}/assemblies/{pdb_id}_bio.cif' # assembly cif file
            assembly_file = f'{ASSEMBLY_FOLDER}/{pdb_id}_bio.cif' # assembly cif file

            # arpeggio_cons = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
            arpeggio_cons = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

            arpeggio_cons_filt = arpeggio_cons[
                (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
                (arpeggio_cons['interacting_entities'] == "INTER") &
                (arpeggio_cons['type'] == "atom-atom") & 
                (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
            ].copy()

            pseudobond_lines = "\n".join(generate_pseudobond_lines_ChimeraX(arpeggio_cons_filt))
            pseudobond_file = f'{prot_id}_{seg_id}_{pdb_id}.pb'

            struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(pdb_id)}

            arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

            struc_prot_data = {}
            for k, v in struc_ligs.items():
                ligand_id = "_".join(k.split("_")[1:])
                ligand_site = v
                ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
                struc_prot_data[ligand_id] = [
                    list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
                    ligand_site
                ]

            aas_str = []
            ligs_str = []

            for k, v in struc_prot_data.items():
                lig_resn, lig_chain, lig_resi = k.split("_")
                ress = v[0]
                col_key = v[1]
                if ress != []:
                    prot_sel_str = 'sel ' + ' '.join([f'/{el[1]}:{el[2]}' for el in ress]) + ';'
                    prot_col_str = f'col sel {colors[col_key]}'
                    prot_disp_str = 'disp sel'
                    aas_str.extend([prot_sel_str, prot_col_str, prot_disp_str])

                lig_sel_str = 'sel ' + f'/{lig_chain}:{lig_resi};'
                lig_col_str = f'col sel {colors[col_key]}'
                lig_disp_str = 'disp sel'

                ligs_str.extend([lig_sel_str, lig_col_str, lig_disp_str])

            cxc_lines = "\n".join(
                [
                    f'open {pdb_id}_bio.cif',
                    'rib', 'style all stick', # some structures have settings to not show cartoon and style as spheres
                    'color white', 
                    f'open {pseudobond_file}',
                    'set bgColor white',
                    'set silhouette ON',
                    'set silhouettewidth 2',
                    '~disp',
                    'transparency 30',
                ]  + aas_str + ligs_str + ['~sel', 'color byhet', 'del H']
            )

            cxc_file = f'{prot_id}_{seg_id}_{pdb_id}.cxc'

            files_to_zip = [
                assembly_file, 
            ]

            # Create and add in-memory files directly to the zip
            pb_file_in_memory = io.BytesIO()
            pb_file_in_memory.write(pseudobond_lines.encode('utf-8'))

            cxc_file_in_memory = io.BytesIO()
            cxc_file_in_memory.write(cxc_lines.encode('utf-8'))
            
            for file_path in files_to_zip:
                if os.path.exists(file_path):  # Check if the file exists
                    zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

            # Add the in-memory files directly to the in-memory zip
            pb_file_in_memory.seek(0)
            zf.writestr(os.path.join(folder_name, pseudobond_file), pb_file_in_memory.read())

            cxc_file_in_memory.seek(0)
            zf.writestr(os.path.join(folder_name, cxc_file), cxc_file_in_memory.read())

        info_file_in_memory = io.BytesIO()
        info_file_in_memory.write(contacts_info.encode('utf-8'))

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_all_assemblies_ChimeraX.zip'
    )

@main.route('/download-all-assemblies-PyMol', methods=['POST'])
def download_all_assemblies_PyMol(): # route to download PyMol scripts to visualise all assemblies
    data = request.get_json() # Get JSON data from the POST request
    
    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    assembly_pdb_ids = data.get('assemblyPdbIds')  # This is your array

    # bs_membership = pd.read_pickle(f'{DATA_FOLDER}/example/other/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')
    bs_membership = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/results/{prot_id}_{seg_id}_ALL_inf_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    if not prot_id or not seg_id or not assembly_pdb_ids: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400
    
    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids: # Loop through each assembly PDB ID to create corresponding folders in the zip
            folder_name = f'{pdb_id}'

            # assembly_file = f'{DATA_FOLDER}/{prot_id}/{seg_id}/assemblies/{pdb_id}_bio.cif' # assembly cif file
            assembly_file = f'{ASSEMBLY_FOLDER}/{pdb_id}_bio.cif' # assembly cif file

            # arpeggio_cons = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
            arpeggio_cons = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

            arpeggio_cons_filt = arpeggio_cons[
                (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
                (arpeggio_cons['interacting_entities'] == "INTER") &
                (arpeggio_cons['type'] == "atom-atom") & 
                (~arpeggio_cons['auth_atom_id_end'].isin(['C', 'N', 'O',]))
            ].copy()

            distance_lines = generate_distance_lines_PyMol(arpeggio_cons_filt, mult = 1.5)

            struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(pdb_id)}

            arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

            struc_prot_data = {}
            for k, v in struc_ligs.items():
                ligand_id = "_".join(k.split("_")[1:])
                ligand_site = v
                ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
                struc_prot_data[ligand_id] = [
                    list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
                    ligand_site
                ]

            struc_prot_data_rf = transform_dict2(struc_prot_data)

            aas_lines = []
            ligs_lines = []
            for k, v in struc_prot_data_rf.items():
                col_key = k
                ligs = v[0]
                ress = v[1]
                if ligs != []:
                    lig_sels = []
                    for el in ligs:
                        lig_d = el.split("_")
                        lig_sels.append(f'///{lig_d[1]}/{lig_d[2]}')
                    lig_sel_str = f'select BS{col_key}_ligs, '+ ' '.join(lig_sels)
                    bs_set_col_str = f'set_color BS{col_key}_color, {hex_to_rgb(colors[col_key])}'
                    lig_col_str = f'color BS{col_key}_color, BS{col_key}_ligs'
                    lig_disp_str = f'show licorice, BS{col_key}_ligs'
                    ligs_lines.extend([lig_sel_str, bs_set_col_str, lig_col_str, lig_disp_str])
                if ress != []:
                    prot_sel_str = f'select BS{col_key}, ' + ' '.join([f'///{el[1]}/{el[2]}' for el in ress])
                    prot_col_str = f'color BS{col_key}_color, BS{col_key}'
                    prot_disp_str = f'show licorice, BS{col_key}'
                    aas_lines.extend([prot_sel_str, prot_col_str, prot_disp_str])

            load_line = [f'load {os.path.basename(assembly_file)}']

            pml_lines = pymol_looks + pymol_dash + load_line + basic_pymol_format + distance_lines + ligs_lines + aas_lines + ["hide sticks, name N+O+C", "deselect",]  + pymol_atom_colors
            pml_string = "\n".join(pml_lines)

            pml_file = f'{prot_id}_{seg_id}_{pdb_id}.pml'

            files_to_zip = [
                assembly_file, 
            ]

            pml_file_in_memory = io.BytesIO()
            pml_file_in_memory.write(pml_string.encode('utf-8'))
            
            for file_path in files_to_zip:
                if os.path.exists(file_path):  # Check if the file exists
                    zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

            pml_file_in_memory.seek(0)
            zf.writestr(os.path.join(folder_name, pml_file), pml_file_in_memory.read())

        info_file_in_memory = io.BytesIO()
        info_file_in_memory.write(contacts_info.encode('utf-8'))

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_all_assemblies_PyMol.zip'
    )

@main.route('/download-assembly-contact-data', methods=['POST'])
def download_assembly_contact_data(): # route to download contacts data for a given assembly
    data = request.get_json()

    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    pdb_id = data.get('pdbId')

    # arpeggio_df = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
    arpeggio_df = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

    # Convert the DataFrame to CSV
    csv_data = io.StringIO()
    arpeggio_df.to_csv(csv_data, index=False)
    csv_data.seek(0)

    # Return the CSV file as a downloadable response
    return send_file(
        io.BytesIO(csv_data.getvalue().encode('utf-8')),
            mimetype='text/csv',
            as_attachment=True,
            download_name=f'{prot_id}_{seg_id}_{pdb_id}_contacts.csv'
        )

@main.route('/download-all-assemblies-contact-data', methods=['POST'])
def download_all_assemblies_contact_data(): # route to download contacts data for all assemblies
    data = request.get_json()

    prot_id = data.get('proteinId')
    seg_id = data.get('segmentId')
    assembly_pdb_ids = data.get('assemblyPdbIds')

    memory_file = io.BytesIO()

    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids:
            # arpeggio_df = pd.read_pickle(f'{DATA_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')
            arpeggio_df = pd.read_pickle(f'{PROTS_FOLDER}/{prot_id}/{seg_id}/arpeggio/{pdb_id}_bio_proc.pkl')

            # Convert the DataFrame to CSV
            csv_data = io.StringIO()
            arpeggio_df.to_csv(csv_data, index=False)
            csv_data.seek(0)

            # Write the CSV data to the in-memory zip file
            zf.writestr(f'{prot_id}_{seg_id}_{pdb_id}_contacts.csv', csv_data.getvalue())

    memory_file.seek(0)

    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{prot_id}_{seg_id}_all_assemblies_contacts.zip'
    )

####################### ROUTES FOR USER JOBS #######################

@main.route('/submit', methods=['GET', 'POST'])
def submit():
    if 'session_id' not in session:
        session.permanent = True
        session['session_id'] = str(uuid.uuid4())
    
    session_id = session['session_id']
    form = LigysisForm()
    error = False

    # TODO: Ensure manual form submissions are blocked when SUBMISSIONS_ENABLED is False
    if form.validate_on_submit():
        try:
            config = {
                'uniprot_id': request.form.get('uniprot_id', 'Q9UGL1'),
                'format': request.form.get('format', 'mmcif'),
                'variants': request.form.get('variants', True),
                'override': request.form.get('override', True),
                'clust_method': request.form.get('clust_method', 'average'),
                'clust_dist': request.form.get('clust_dist', 0.5),
                'hmm_iters': request.form.get('hmm_iters', 3),
                'file_key_override': 'input_dir'  # Custom file key for ligysis service
            }
            
            submission_handler = SubmissionHandler(session_id, form, service_type='ligysis', config=config, tar_upload=True)
            gevent.spawn(submission_handler.handle_submission)
            gevent.sleep(3)  # Adding a short minimum delay to ensure the user has enough time to read the modal popup
            submission_handler.metadata_available.wait()
            submission_handler.slivka_job_triggered.wait()
            
            return redirect(url_for('main.status', session_id=session_id))
        except Exception as e:
            error = True
    elif request.method == 'POST':
        error = True
    
    return render_template('submit.html', form=form, error=error)

# Route used to download submission tar on status page
@main.route('/download/<session_id>/<submission_time>/<filename>')
def download(session_id, submission_time, filename):
    # Validate session_id and submission_time
    if not is_valid_session_id(session_id) or not is_valid_submission_time(submission_time):
        return "Invalid input", 400

    # Sanitize filename
    sanitized_filename = secure_filename(filename)

    directory = os.path.join(SESSIONS_FOLDER, session_id, submission_time)
    file_path = os.path.join(directory, sanitized_filename)

    # Check if the file exists before sending it
    if os.path.exists(file_path):
        return send_from_directory(directory=directory, path=sanitized_filename, as_attachment=True)
    else:
        return "File not found", 404

@main.route('/status/<session_id>', methods=['GET'])
def status(session_id):
    # Validate session_id
    if not is_valid_session_id(session_id):
        return "Invalid input", 400
    
    # Fetch results based on the session ID
    results = fetch_results(session_id)
    return render_template('results.html', results=results, session_id=session_id,
                           current_time=datetime.now(), timedelta_24h=timedelta(days=1),
                           slivka_url=SLIVKA_URL)

# Route used to serve structures from the user jobs for 3Dmol.js   
@main.route('/user-files/<session_id>/<submission_time>/<file_type>/<path:filename>')
def serve_file(session_id, submission_time, file_type, filename):
    # Validate session_id and submission_time
    if not is_valid_session_id(session_id) or not is_valid_submission_time(submission_time):
        return "Invalid input", 400
    
    if file_type not in ['simple_cifs', 'supp_cifs']:
        return "Invalid file type", 400
    
    # Sanitize filename
    sanitized_filename = secure_filename(filename)

    directory = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", "input_structures", file_type)
    file_path = os.path.join(directory, sanitized_filename)
    
    try:
        # return send_from_directory(PROTS_FOLDER, filename)
        return send_from_directory(directory, filename)
    except FileNotFoundError:
        abort(404)

@main.route('/user-alignments/<session_id>/<submission_time>/<job_id>/<path:filename>')
def user_serve_alignment(session_id, submission_time, job_id, filename):

    alignment_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id, "varalign")
    try:
        return send_from_directory(alignment_dir, filename)
    except FileNotFoundError:
        abort(404)

@main.route('/user-results/<session_id>/<submission_time>', methods = ['POST', 'GET'])
def user_results(session_id, submission_time): # route for user results site. Takes Job ID

    job_id = session_id  # trying to maintain compatibility with demo version
    job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    if not os.path.exists(job_output_dir):
    
        job_id = "input_structures" # job ID is fixed
        
        # Validate session_id and submission_time
        if not is_valid_session_id(session_id) or not is_valid_submission_time(submission_time):
            return "Invalid input", 400
        
        # Construct the path
        job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", "input_structures")

        # Check if the test file exists before trying to load the app
        check_file_path = os.path.join(job_output_dir, "results", "input_structures_results_table.pkl")

        if not os.path.exists(check_file_path):
            return render_template('errors/404.html', error_message="File not found", file_path=check_file_path), 404
                
    job_results_dir = os.path.join(job_output_dir, "results")
    job_supp_cifs_dir = os.path.join(job_output_dir, "supp_cifs")
    job_mappings_dir = os.path.join(job_output_dir, "mappings")
    job_variants_dir = os.path.join(job_output_dir, "varalign")
    job_simple_cifs_dir = os.path.join(job_output_dir, "simple_cifs")

    results_df = pd.read_pickle(os.path.join(job_results_dir, f"{job_id}_results_table.pkl")) # results df contains all residues

    uniprot_info = pd.read_pickle(os.path.join(job_results_dir, f"{job_id}_uniprot_info.pkl")) # uniprot data

    results_df["job_id"] = job_id
    # bss_ress, bss_data = get_bss_table(results_df, job_id)
    bss_ress = get_all_bs_ress(results_df, job_id)

    # bss_data.columns = headings # changing table column names
    # bss_data["ID"] = bss_data["ID"].astype(int) # converting ID to int
    bss_data = pd.read_pickle(os.path.join(job_results_dir, f"{job_id}_bss_RSA_table.pkl")) # binding sites data

    bss_data["ID"] = bss_data["ID"].astype(int) # converting ID to int

    bss_data = bss_data.sort_values(by="ID") # sorting by ID

    bss_MES_axis_lim = compute_symmetrical_log_limits(bss_data)

    bss_data = bss_data.fillna("NaN")

    first_site = bss_data.ID.unique().tolist()[0] # first binding site ID

    bs_ress_MES_axis_lim = compute_symmetrical_log_limits(bss_ress, col_name="oddsratio")

    bss_ress = bss_ress.fillna("NaN") # pre-processing could also be done before saving the pickle

    bss_ress = bss_ress.rename(columns={
        "UniProt_ResNum": "UPResNum",
        "alignment_column": "MSACol",
        "abs_norm_shenkin": "DS",
        "oddsratio": "MES",
        "pvalue": "p",
        "AA": "AA",
        "RSA": "RSA",
        "SS": "SS"
    })

    first_site_data = bss_ress.query('binding_sites == @first_site')[cc_new].to_dict(orient="list") # data of first binding site residues

    data1 = bss_data.to_dict(orient="list") # converting table to dictionary to pass to client (all protein binding sites)

    prot_ress = bss_ress # all protein binding residues

    data2 = prot_ress.to_dict(orient="list")

    bss_ress_dict = load_pickle(os.path.join(job_results_dir, f"{job_id}_bss_ress.pkl")) 

    seg_ress_dict = bss_ress_dict#[prot_id][seg_id]
    seg_ress_dict = {str(key): value for key, value in seg_ress_dict.items()}
    seg_ress_dict["ALL_BINDING"] = sorted(list(set([el2 for el in seg_ress_dict.values() for el2 in el]))) # add key: "ALL_BINDING" and value a sorted set of all binding residues

    prot_atoms_struc, ext = os.path.splitext(sorted([f for f in os.listdir(job_simple_cifs_dir) if f.endswith(".cif")])[0])

    prot_atoms_struc_name = prot_atoms_struc.split(".")[0]

    pdb2up_dict = load_pickle(f'{job_mappings_dir}/{prot_atoms_struc_name}_pdb2up.pkl')
    up2pdb_dict = load_pickle(f'{job_mappings_dir}/{prot_atoms_struc_name}_up2pdb.pkl')
    
    entry_name = "UNIPROT_ENTRY_NAME"#LIGYSIS_prots_data[prot_id]["entry"]

    upid_name = "UNIPROT_IDENTIFIER"#LIGYSIS_prots_data[prot_id]["upid"]

    prot_long_name = "UNIPROT_PROTEIN_LONG_NAME"#LIGYSIS_prots_data[prot_id]["prot_name_long"]

    assembly_pdbs = os.listdir(os.path.join(job_supp_cifs_dir)) # CIF bio assembly file names
    assembly_pdbs = [el for el in assembly_pdbs if el.endswith(".cif")]

    assembly_pdb_ids = sorted(list(set([el for el in assembly_pdbs])),) # sorted unique PDB IDs

    simple_cifs = os.listdir(job_simple_cifs_dir) # simple PDB file names (single chain)
    simple_cifs = [el for el in simple_cifs if el.endswith(".cif")] # TODO NEED TO FIGURE THIS OUT

    simple_cifs_full_path = [url_for('main.serve_file', session_id=session_id, submission_time=submission_time, file_type='simple_cifs', filename=el) for el in simple_cifs]

    n_strucs = len([el for el in os.listdir(job_supp_cifs_dir) if el.endswith(".cif")]) # number of structures
    n_ligs = len(load_pickle(os.path.join(job_results_dir, f"{job_id}_ligs_fingerprints.pkl"))) # number of ligands
    n_sites = len(bss_data) # number of binding sites
    seg_stats = {'strucs': n_strucs, 'ligs': n_ligs, 'bss': n_sites}

    lig_data = pd.read_pickle(f'{job_results_dir}/{job_id}_lig_data.pkl')
    #load_pickle(os.path.join(job_results_dir, f"{job_id}_lig_data.pkl")) # ligand data
    struc_count = lig_data.groupby(lig_data['struc_name'].str.split('.').str[0]).size().to_dict()

    return render_template(
        'user_structure.html', data = data1, headings = headings, data2 = data2, cc_new = cc_new, cc_new_sel = cc_new_sel, colors = colors,
        seg_ress_dict = seg_ress_dict, job_id = job_id,
        first_site_data = first_site_data, bs_table_tooltips = bs_table_tooltips, bs_ress_table_tooltips = bs_ress_table_tooltips,
        pdb2up_dict = pdb2up_dict, up2pdb_dict = up2pdb_dict, seg_stats = seg_stats, entry_name = entry_name, upid_name = upid_name, prot_long_name = prot_long_name,
        simple_pdbs = simple_cifs_full_path, assembly_pdb_ids = assembly_pdb_ids, prot_atoms_struc = prot_atoms_struc,
        prot_acc = uniprot_info["up_id"], prot_entry = uniprot_info["up_entry"], prot_name = uniprot_info["prot_name"], struc_count = struc_count,
        session_id = session_id, submission_time = submission_time,
        job_results_dir = job_results_dir, job_variants_dir = job_variants_dir,  # These may not be needed outside demo jobs
        bss_MES_axis_lim = bss_MES_axis_lim, bs_ress_MES_axis_lim = bs_ress_MES_axis_lim
    )

@main.route('/user-process-model-order', methods=['POST'])
def user_process_model_order(): # route to process model order data from ChimeraX files
    data = request.json
    loaded_order = data['modelOrder'] # this is the order in which files have been loaded by 3DMol.js
    job_id = data['jobId'] # name of the segment
    session_id = data['session_id']
    submission_time = data['submission_time']
    
    if is_valid_session_id(session_id):
        # TODO: job_id = "input_structures", it's fixed for user submissions. Review if this is ideal configuration.
        cxc_in = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", "input_structures", "simple_cifs", f'{job_id}_average_0.5.cxc')  # ChimeraX command file
        attr_in = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", "input_structures", "simple_cifs", f'{job_id}_average_0.5.defattr')  # ChimeraX attribute file
    else:
        # TODO: for demo user results, remove once demo is removed
        cxc_in =f'{USER_JOBS_OUT_FOLDER}/{job_id}/simple_cifs/{job_id}_average_0.5.cxc' # ChimeraX command file
        attr_in =  f'{USER_JOBS_OUT_FOLDER}/{job_id}/simple_cifs/{job_id}_average_0.5.defattr' # ChimeraX attribute file

    model_order = extract_open_files(cxc_in, fmt = "cif") ## fix this format issue

    result_tuples, bs_ids = transform_lines_3DMol(attr_in, model_order, loaded_order) # binding site attribute data list of tuples

    max_id = max(bs_ids) # maximum binding site ID

    response_data = {
        'resultTuples': result_tuples,
        'maxId': max_id
    }

    return jsonify(response_data) # send jasonified data back to client

@main.route('/user-get-table', methods=['POST'])
def user_get_table(): # route to get binding site residues for a given binding site

    lab = request.json.get('label', None)
    session_id = request.json.get('session_id', None)
    submission_time = request.json.get('submission_time', None)

    lab_data = lab.split("_")
    bs_id = int(lab_data[-1])
    job_id = "_".join(lab_data[:-1])

    job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)  # demo jobs, remove once demo is removed
    if not os.path.exists(job_output_dir):
        job_id = "input_structures"  # job ID is fixed

        # Validate session_id and submission_time
        if not is_valid_session_id(session_id) or not is_valid_submission_time(submission_time):
            return "Invalid input", 400

        # Construct the path
        job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", "input_structures")

        # Check if the test file exists before trying to load the app
        check_file_path = os.path.join(job_output_dir, "results", "input_structures_results_table.pkl")

        if not os.path.exists(check_file_path):
            return render_template('errors/404.html', error_message="File not found", file_path=check_file_path), 404
        
    job_results_dir = os.path.join(job_output_dir, "results")
    results_df = pd.read_pickle(os.path.join(job_results_dir, f"{job_id}_results_table.pkl")) # results df contains all residues

    all_bs_ress = results_df.query('binding_sites == binding_sites').reset_index(drop=True)
    all_bs_ress = all_bs_ress.explode("binding_sites")
    all_bs_ress = all_bs_ress.fillna("NaN")
    all_bs_ress.UniProt_ResNum = all_bs_ress.UniProt_ResNum.astype(int)
    all_bs_ress["RSA"].values[all_bs_ress["RSA"].values > 100] = 100

    all_bs_ress = all_bs_ress.rename(columns={
        "UniProt_ResNum": "UPResNum",
        "alignment_column": "MSACol",
        "abs_norm_shenkin": "DS",
        "oddsratio": "MES",
        "pvalue": "p",
        "AA": "AA",
        "RSA": "RSA",
        "SS": "SS"
    })

    site_ress = all_bs_ress.query('binding_sites == @bs_id')[cc_new]

    site_data = site_ress.to_dict(orient="list")

    return jsonify(site_data)

@main.route('/user-get-uniprot-mapping', methods=['POST'])
def user_get_uniprot_mapping(): # route to get UniProt residue and chain mapping for a given pdb
    data = request.json
    job_id = data['jobId']
    pdb_file = data['pdbFile']
    session_id = data['session_id']
    submission_time = data['submission_time']

    pdb_id, _ = os.path.splitext(pdb_file)

    job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    if not os.path.exists(job_output_dir):
        job_id = "input_structures"
        
        # Validate session_id and submission_time
        if not is_valid_session_id(session_id) or not is_valid_submission_time(submission_time):
            return "Invalid input", 400
        
        # Construct the path
        job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
        
    job_mappings_dir = os.path.join(job_output_dir, "mappings")

    struc_name = pdb_id.split(".")[0]

    pdb2up_map = load_pickle(f'{job_mappings_dir}/{struc_name}_pdb2up.pkl')
    up2pdb_map = load_pickle(f'{job_mappings_dir}/{struc_name}_up2pdb.pkl')
    
    response_data = {
        'pdb2up': pdb2up_map, # convert_mapping_dict(pdb2up_map),
        'up2pdb': up2pdb_map, # convert_mapping_dict(up2pdb_map),
    }

    return jsonify(response_data)

@main.route('/user-get-contacts', methods=['POST'])
def user_get_contacts(): # route to get contacts data from Arpeggio table for a given assembly

    data = request.json
    job_id = data['jobId']
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    struc_file = data['strucFile']
    struc_name = os.path.splitext(struc_file)[0].split(".")[0]

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")
    job_results_dir = os.path.join(job_output_dir, "results")
    
    arpeggio_cons = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
    ].copy()

    json_cons = arpeggio_cons_filt[USER_arpeggio_cols].to_json(orient='records')

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(struc_name)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = k.replace(f'{struc_name}_', "")
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    response_data = {
        'contacts': json_cons,
        'protein': struc_prot_data,
    }

    return jsonify(response_data) # send jasonified data back to client

@main.route('/user-download-all-structures-contact-data', methods=['POST'])
def user_download_all_structures_contact_data(): # route to download contacts data for all structures
    data = request.get_json()

    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    assembly_pdb_ids = data.get('assemblyPdbIds')

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")

    memory_file = io.BytesIO()

    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids:
            struc_name = os.path.splitext(pdb_id)[0].split(".")[0]
            try:
                arpeggio_df = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

                # Convert the DataFrame to CSV
                csv_data = io.StringIO()
                arpeggio_df.to_csv(csv_data, index=False)
                csv_data.seek(0)

                # Write the CSV data to the in-memory zip file
                zf.writestr(f'{job_id}_{struc_name}_contacts.csv', csv_data.getvalue())
            except:
                print(f"No Arpeggio contacts found for {pdb_id}")

    memory_file.seek(0)

    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_all_structures_contacts.zip'
    )

@main.route('/user-download-superposition-ChimeraX', methods=['POST'])
def user_download_superposition_ChimeraX(): # route to download ChimeraX script to visualise ligand superposition

    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')

    if not job_id or not session_id or not submission_time: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_simple_dir = os.path.join(job_output_dir, "simple_cifs")
    job_results_dir = os.path.join(job_output_dir, "results")
    
    simple_pdbs = os.listdir(job_simple_dir)
    simple_pdbs = [f'{job_simple_dir}/{el}' for el in simple_pdbs if el.endswith(".cif")]

    # seg_name = f'{prot_id}_{seg_id}'
    cxc_in =f'{job_simple_dir}/{job_id}_average_0.5.cxc' # ChimeraX command file
    attr_in =  f'{job_simple_dir}/{job_id}_average_0.5.defattr' # ChimeraX attribute file

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_ids = list(bs_membership.keys())

    # read lines in cxc_in and push to cxc_lines
    cxc_lines = []
    with open(cxc_in, 'r') as file:
        for line in file:
            if line.strip() == '':
                continue
            else:
                cxc_lines.append(line.strip())
                if line.strip().startswith("# colouring"):
                    break

    for el in extra_cxc_lines:
        cxc_lines.append(el)
    
    for bs_id in bs_ids:
        cxc_lines.append((f'col ::binding_site=={bs_id} {colors[bs_id]};'))
    
    cxc_lines.append('delete pseudobond;')
    cxc_lines.append(f'save {job_id}_average_0.5.cxs;')
    cxc_lines_string = "\n".join(cxc_lines)

    # Create and add in-memory files directly to the zip
    cxc_file = f'{job_id}_average_0.5.cxc'
    cxc_file_in_memory = io.BytesIO()
    cxc_file_in_memory.write(cxc_lines_string.encode('utf-8'))

    files_to_zip = simple_pdbs + [attr_in, ]#cxc_in]

    memory_file = io.BytesIO() # Create a BytesIO object to hold the in-memory zip file

    with zipfile.ZipFile(memory_file, 'w') as zf: # Create a ZipFile object for in-memory use
        for file_path in files_to_zip:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
            else:
                return f"File {file_path} not found", 404
        
        # Add the in-memory files directly to the in-memory zip
        cxc_file_in_memory.seek(0)
        zf.writestr(cxc_file, cxc_file_in_memory.read())
    
    memory_file.seek(0)  # Seek to the beginning of the BytesIO object before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_superposition_ChimeraX.zip'
    )

@main.route('/user-download-superposition-PyMol', methods=['POST'])
def user_download_superposition_PyMol(): # route to download PyMol script to visualise ligand superposition
    
    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')

    if not job_id or not session_id or not submission_time: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_simple_dir = os.path.join(job_output_dir, "simple_cifs")
    # job_results_dir = os.path.join(job_output_dir, "results")

    simple_cifs = os.listdir(job_simple_dir)
    simple_cifs = [f'{job_simple_dir}/{el}' for el in simple_cifs if el.endswith(".cif")]

    cxc_in =f'{job_simple_dir}/{job_id}_average_0.5.cxc' # ChimeraX command file
    attr_in =  f'{job_simple_dir}/{job_id}_average_0.5.defattr' # ChimeraX attribute file

    pymol_lines = chimeraX2PyMol(cxc_in, attr_in, fmt = "cif")
    pymol_lines_string = "\n".join(pymol_lines)

    # Create and add in-memory files directly to the zip
    pymol_file = f'{job_id}_average_0.5.pml'
    pymol_file_in_memory = io.BytesIO()
    pymol_file_in_memory.write(pymol_lines_string.encode('utf-8'))

    memory_file = io.BytesIO() # Create a BytesIO object to hold the in-memory zip file

    with zipfile.ZipFile(memory_file, 'w') as zf: # Create a ZipFile object for in-memory use
        for file_path in simple_cifs:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
            else:
                return f"File {file_path} not found", 404
        
        # Add the in-memory files directly to the in-memory zip
        pymol_file_in_memory.seek(0)
        zf.writestr(pymol_file, pymol_file_in_memory.read())
    
    memory_file.seek(0)  # Seek to the beginning of the BytesIO object before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_superposition_PyMol.zip'
    )

@main.route('/user-download-structure-ChimeraX', methods=['POST'])
def user_download_structure_ChimeraX(): # route to download ChimeraX script to visualise structure
    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    pdb_id = data.get('pdbId')
    
    if not job_id or not session_id or not submission_time or not pdb_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    struc_name = os.path.splitext(pdb_id)[0].split(".")[0]

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_supp_cifs_dir = os.path.join(job_output_dir, "supp_cifs")
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")
    job_results_dir = os.path.join(job_output_dir, "results")

    assembly_file = f'{job_supp_cifs_dir}/{struc_name}.supp.cif' # structure cif file

    arpeggio_cons = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
    ].copy()

    pseudobond_lines = "\n".join(generate_pseudobond_lines_ChimeraX(arpeggio_cons_filt))
    pseudobond_file = f'{job_id}_{struc_name}.pb'

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(struc_name)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = k.replace(f'{struc_name}_', "")
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    aas_str = []

    ligs_str = []

    for k, v in struc_prot_data.items():
        # print(k)
        lig_resn, lig_chain, lig_resi = k.split("_")
        ress = v[0]
        col_key = v[1]
        if ress != []:
            prot_sel_str = 'sel ' + ' '.join([f'/{el[1]}:{el[2]}' for el in ress]) + ';'
            prot_col_str = f'col sel {colors[col_key]}'
            prot_disp_str = 'disp sel'
            aas_str.extend([prot_sel_str, prot_col_str, prot_disp_str])

        lig_sel_str = 'sel ' + f'/{lig_chain}:{lig_resi};'
        lig_col_str = f'col sel {colors[col_key]}'
        lig_disp_str = 'disp sel'

        ligs_str.extend([lig_sel_str, lig_col_str, lig_disp_str])

    cxc_lines = "\n".join(
        [
            f'open {struc_name}.supp.cif',
            'rib', 'style all stick', # some structures have settings to not show cartoon and style as spheres
            'color white', 
            f'open {pseudobond_file}',
            'set bgColor white',
            'set silhouette ON',
            'set silhouettewidth 2',
            '~disp',
            'transparency 30',
        ]  + aas_str + ligs_str + ['~sel', 'color byhet', 'del H']
    )

    cxc_file = f'{job_id}_{struc_name}.cxc'

    files_to_zip = [
        assembly_file, 
    ]

    # Create and add in-memory files directly to the zip
    pb_file_in_memory = io.BytesIO()
    pb_file_in_memory.write(pseudobond_lines.encode('utf-8'))

    cxc_file_in_memory = io.BytesIO()
    cxc_file_in_memory.write(cxc_lines.encode('utf-8'))
        
    info_file_in_memory = io.BytesIO()
    info_file_in_memory.write(contacts_info.encode('utf-8'))
    
    # Create an in-memory zip file for sending to the client
    memory_file = io.BytesIO()
    with zipfile.ZipFile(memory_file, 'w') as zf:
        # Add the existing files to the in-memory zip
        for file_path in files_to_zip:
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))
        
        # Add the in-memory files directly to the in-memory zip
        pb_file_in_memory.seek(0)
        zf.writestr(pseudobond_file, pb_file_in_memory.read())

        cxc_file_in_memory.seek(0)
        zf.writestr(cxc_file, cxc_file_in_memory.read())

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_{struc_name}_structure_ChimeraX.zip'
    )

@main.route('/user-download-structure-PyMol', methods=['POST'])
def user_download_structure_PyMol(): # route to download PyMol script to visualise structure
    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    pdb_id = data.get('pdbId')
    
    if not job_id or not session_id or not submission_time or not pdb_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    struc_name = os.path.splitext(pdb_id)[0].split(".")[0]

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_supp_cifs_dir = os.path.join(job_output_dir, "supp_cifs")
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")
    job_results_dir = os.path.join(job_output_dir, "results")

    assembly_file = f'{job_supp_cifs_dir}/{struc_name}.supp.cif' # structure cif file

    arpeggio_cons = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

    arpeggio_cons_filt = arpeggio_cons[
        (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
        (arpeggio_cons['interacting_entities'] == "INTER") &
        (arpeggio_cons['type'] == "atom-atom") & 
        (~arpeggio_cons['auth_atom_id_end'].isin(['C', 'N', 'O',]))
    ].copy()

    distance_lines = generate_distance_lines_PyMol(arpeggio_cons_filt, mult = 1.5)

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(struc_name)}

    arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

    # print(struc_ligs)
    struc_prot_data = {}
    for k, v in struc_ligs.items():
        ligand_id = k.replace(f'{struc_name}_', "")
        ligand_site = v
        ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
        struc_prot_data[ligand_id] = [
            list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
            ligand_site
        ]

    struc_prot_data_rf = transform_dict2(struc_prot_data)

    aas_lines = []
    ligs_lines = []
    for k, v in struc_prot_data_rf.items():
        col_key = k
        ligs = v[0]
        ress = v[1]
        if ligs != []:
            lig_sels = []
            for el in ligs:
                lig_d = el.split("_")
                lig_sels.append(f'///{lig_d[1]}/{lig_d[2]}')
            lig_sel_str = f'select BS{col_key}_ligs, '+ ' '.join(lig_sels)
            bs_set_col_str = f'set_color BS{col_key}_color, {hex_to_rgb(colors[col_key])}'
            lig_col_str = f'color BS{col_key}_color, BS{col_key}_ligs'
            lig_disp_str = f'show licorice, BS{col_key}_ligs'
            ligs_lines.extend([lig_sel_str, bs_set_col_str, lig_col_str, lig_disp_str])
        if ress != []:
            prot_sel_str = f'select BS{col_key}, ' + ' '.join([f'///{el[1]}/{el[2]}' for el in ress])
            prot_col_str = f'color BS{col_key}_color, BS{col_key}'
            prot_disp_str = f'show licorice, BS{col_key}'
            aas_lines.extend([prot_sel_str, prot_col_str, prot_disp_str])

    load_line = [f'load {os.path.basename(assembly_file)}']

    pml_lines = pymol_looks + pymol_dash + load_line + basic_pymol_format + distance_lines + ligs_lines + aas_lines + ["hide sticks, name N+O+C", "deselect",] + pymol_atom_colors
    pml_string = "\n".join(pml_lines)

    pml_file = f'{job_id}_{struc_name}.pml'

    files_to_zip = [
        assembly_file, 
    ]

    pml_file_in_memory = io.BytesIO()
    pml_file_in_memory.write(pml_string.encode('utf-8'))
        
    info_file_in_memory = io.BytesIO()
    info_file_in_memory.write(contacts_info.encode('utf-8'))

    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for file_path in files_to_zip: # Add the existing files to the in-memory zip
            if os.path.exists(file_path):  # Check if the file exists
                zf.write(file_path, os.path.basename(file_path))

        pml_file_in_memory.seek(0)
        zf.writestr(pml_file, pml_file_in_memory.read())

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    memory_file.seek(0) # Seek to the beginning of the in-memory zip file before sending it
    
    return send_file( # Send the zip file to the client as a downloadable file
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_{struc_name}_structure_PyMol.zip'
    )

@main.route('/user-download-structure-contact-data', methods=['POST'])
def user_download_structure_contact_data(): # route to download contacts data for a given structure
    data = request.get_json()

    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    pdb_id = data.get('pdbId')

    if not job_id or not session_id or not submission_time or not pdb_id: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400

    struc_name  = os.path.splitext(pdb_id)[0].split(".")[0]

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")

    arpeggio_df = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

    # Convert the DataFrame to CSV
    csv_data = io.StringIO()
    arpeggio_df.to_csv(csv_data, index=False)
    csv_data.seek(0)

    # Return the CSV file as a downloadable response
    return send_file(
        io.BytesIO(csv_data.getvalue().encode('utf-8')),
            mimetype='text/csv',
            as_attachment=True,
            download_name=f'{job_id}_{struc_name}_contacts.csv'
        )

@main.route('/user-download-all-structures-ChimeraX', methods=['POST'])
def user_download_all_structures_ChimeraX(): # route to download ChimeraX scripts to visualise all structures
    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    assembly_pdb_ids = data.get('assemblyPdbIds')  # This is your array

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_supp_cifs_dir = os.path.join(job_output_dir, "supp_cifs")
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")
    job_results_dir = os.path.join(job_output_dir, "results")

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    if not job_id or not assembly_pdb_ids: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400
    
    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids: # Loop through each assembly PDB ID to create corresponding folders in the zip

            struc_name = os.path.splitext(pdb_id)[0].split(".")[0]

            folder_name = f'{struc_name}'

            assembly_file = f'{job_supp_cifs_dir}/{struc_name}.supp.cif' # structure cif file

            try:

                arpeggio_cons = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

                arpeggio_cons_filt = arpeggio_cons[
                    (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
                    (arpeggio_cons['interacting_entities'] == "INTER") &
                    (arpeggio_cons['type'] == "atom-atom") & 
                    (~arpeggio_cons['auth_atom_id_end'].isin(['N', 'O',]))
                ].copy()

                pseudobond_lines = "\n".join(generate_pseudobond_lines_ChimeraX(arpeggio_cons_filt))
                pseudobond_file = f'{job_id}_{struc_name}.pb'

                struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(struc_name)}

                arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

                struc_prot_data = {}
                for k, v in struc_ligs.items():
                    ligand_id = k.replace(f'{struc_name}_', "")
                    ligand_site = v
                    ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
                    struc_prot_data[ligand_id] = [
                        list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
                        ligand_site
                    ]

                aas_str = []
                ligs_str = []

                for k, v in struc_prot_data.items():
                    lig_resn, lig_chain, lig_resi = k.split("_")
                    ress = v[0]
                    col_key = v[1]
                    if ress != []:
                        prot_sel_str = 'sel ' + ' '.join([f'/{el[1]}:{el[2]}' for el in ress]) + ';'
                        prot_col_str = f'col sel {colors[col_key]}'
                        prot_disp_str = 'disp sel'
                        aas_str.extend([prot_sel_str, prot_col_str, prot_disp_str])

                    lig_sel_str = 'sel ' + f'/{lig_chain}:{lig_resi};'
                    lig_col_str = f'col sel {colors[col_key]}'
                    lig_disp_str = 'disp sel'

                    ligs_str.extend([lig_sel_str, lig_col_str, lig_disp_str])

                cxc_lines = "\n".join(
                    [
                        f'open {struc_name}.supp.cif',
                        'rib', 'style all stick', # some structures have settings to not show cartoon and style as spheres
                        'color white', 
                        f'open {pseudobond_file}',
                        'set bgColor white',
                        'set silhouette ON',
                        'set silhouettewidth 2',
                        '~disp',
                        'transparency 30',
                    ]  + aas_str + ligs_str + ['~sel', 'color byhet', 'del H']
                )

                cxc_file = f'{job_id}_{struc_name}.cxc'

                files_to_zip = [
                    assembly_file, 
                ]

                # Create and add in-memory files directly to the zip
                pb_file_in_memory = io.BytesIO()
                pb_file_in_memory.write(pseudobond_lines.encode('utf-8'))

                cxc_file_in_memory = io.BytesIO()
                cxc_file_in_memory.write(cxc_lines.encode('utf-8'))
                
                for file_path in files_to_zip:
                    if os.path.exists(file_path):  # Check if the file exists
                        zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

                # Add the in-memory files directly to the in-memory zip
                pb_file_in_memory.seek(0)
                zf.writestr(os.path.join(folder_name, pseudobond_file), pb_file_in_memory.read())

                cxc_file_in_memory.seek(0)
                zf.writestr(os.path.join(folder_name, cxc_file), cxc_file_in_memory.read())
            except:
                print(f"No Arpeggio contacts found for {pdb_id}")
                # need to do everyting except .pb file and arpeggio bit

                cxc_lines = "\n".join(
                    [
                        f'open {struc_name}.supp.cif',
                        'rib', 'style all stick', # some structures have settings to not show cartoon and style as spheres
                        'color white', 
                        'set bgColor white',
                        'set silhouette ON',
                        'set silhouettewidth 2',
                        '~disp',
                        'transparency 30',
                        'color byhet',
                        'del H'
                    ]
                )

                cxc_file = f'{job_id}_{struc_name}.cxc'

                files_to_zip = [
                    assembly_file, 
                ]

                cxc_file_in_memory = io.BytesIO()
                cxc_file_in_memory.write(cxc_lines.encode('utf-8'))
                
                for file_path in files_to_zip:
                    if os.path.exists(file_path):  # Check if the file exists
                        zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

                # Add the in-memory files directly to the in-memory zip

                cxc_file_in_memory.seek(0)
                zf.writestr(os.path.join(folder_name, cxc_file), cxc_file_in_memory.read())


        info_file_in_memory = io.BytesIO()
        info_file_in_memory.write(contacts_info.encode('utf-8'))

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_all_structures_ChimeraX.zip'
    )

@main.route('/user-download-all-structures-PyMol', methods=['POST'])
def user_download_all_structures_PyMol(): # route to download PyMol scripts to visualise all structures
    data = request.get_json() # Get JSON data from the POST request
    
    job_id = data.get('jobId')
    session_id = data.get('sessionId')
    submission_time = data.get('submissionTime')
    assembly_pdb_ids = data.get('assemblyPdbIds')  # This is your array

    # job_output_dir = os.path.join(USER_JOBS_OUT_FOLDER, job_id)
    job_output_dir = os.path.join(SESSIONS_FOLDER, session_id, submission_time, "OUT", job_id)
    job_supp_cifs_dir = os.path.join(job_output_dir, "supp_cifs")
    job_arpeggio_dir = os.path.join(job_output_dir, "arpeggio")
    job_results_dir = os.path.join(job_output_dir, "results")

    bs_membership = pd.read_pickle(f'{job_results_dir}/{job_id}_bss_membership.pkl')

    bs_membership_rev = {v: k for k, vs in bs_membership.items() for v in vs}

    if not job_id or not assembly_pdb_ids: # Validate the received data
        return jsonify({'error': 'Missing data'}), 400
    
    memory_file = io.BytesIO() # Create an in-memory zip file for sending to the client
    with zipfile.ZipFile(memory_file, 'w') as zf:
        for pdb_id in assembly_pdb_ids: # Loop through each assembly PDB ID to create corresponding folders in the zip

            struc_name = os.path.splitext(pdb_id)[0].split(".")[0]

            folder_name = f'{struc_name}'

            assembly_file = f'{job_supp_cifs_dir}/{struc_name}.supp.cif' # structure cif file

            try:

                arpeggio_cons = pd.read_pickle(f'{job_arpeggio_dir}/{struc_name}_proc.pkl')

                arpeggio_cons_filt = arpeggio_cons[
                    (arpeggio_cons['contact'].apply(lambda x: x != ["proximal"])) &
                    (arpeggio_cons['interacting_entities'] == "INTER") &
                    (arpeggio_cons['type'] == "atom-atom") & 
                    (~arpeggio_cons['auth_atom_id_end'].isin(['C', 'N', 'O',]))
                ].copy()

                distance_lines = generate_distance_lines_PyMol(arpeggio_cons_filt, mult = 1.5)

                struc_ligs = {k: v for k, v in bs_membership_rev.items() if k.startswith(struc_name)}

                arpeggio_cons_filt["LIGAND_ID"] = arpeggio_cons_filt.label_comp_id_bgn + "_" + arpeggio_cons_filt.auth_asym_id_bgn + "_" + arpeggio_cons_filt.auth_seq_id_bgn.astype(str)

                struc_prot_data = {}
                for k, v in struc_ligs.items():
                    ligand_id = k.replace(f'{struc_name}_', "")
                    ligand_site = v
                    ligand_rows = arpeggio_cons_filt[arpeggio_cons_filt.LIGAND_ID == ligand_id]
                    struc_prot_data[ligand_id] = [
                        list(ligand_rows[["label_comp_id_end", "auth_asym_id_end", "auth_seq_id_end"]].drop_duplicates().itertuples(index=False, name=None)),
                        ligand_site
                    ]

                struc_prot_data_rf = transform_dict2(struc_prot_data)

                aas_lines = []
                ligs_lines = []
                for k, v in struc_prot_data_rf.items():
                    col_key = k
                    ligs = v[0]
                    ress = v[1]
                    if ligs != []:
                        lig_sels = []
                        for el in ligs:
                            lig_d = el.split("_")
                            lig_sels.append(f'///{lig_d[1]}/{lig_d[2]}')
                        lig_sel_str = f'select BS{col_key}_ligs, '+ ' '.join(lig_sels)
                        bs_set_col_str = f'set_color BS{col_key}_color, {hex_to_rgb(colors[col_key])}'
                        lig_col_str = f'color BS{col_key}_color, BS{col_key}_ligs'
                        lig_disp_str = f'show licorice, BS{col_key}_ligs'
                        ligs_lines.extend([lig_sel_str, bs_set_col_str, lig_col_str, lig_disp_str])
                    if ress != []:
                        prot_sel_str = f'select BS{col_key}, ' + ' '.join([f'///{el[1]}/{el[2]}' for el in ress])
                        prot_col_str = f'color BS{col_key}_color, BS{col_key}'
                        prot_disp_str = f'show licorice, BS{col_key}'
                        aas_lines.extend([prot_sel_str, prot_col_str, prot_disp_str])

                load_line = [f'load {os.path.basename(assembly_file)}']

                pml_lines = pymol_looks + pymol_dash + load_line + basic_pymol_format + distance_lines + ligs_lines + aas_lines + ["hide sticks, name N+O+C", "deselect",] + pymol_atom_colors
                pml_string = "\n".join(pml_lines)

                pml_file = f'{job_id}_{struc_name}.pml'

                files_to_zip = [
                    assembly_file, 
                ]

                pml_file_in_memory = io.BytesIO()
                pml_file_in_memory.write(pml_string.encode('utf-8'))
                
                for file_path in files_to_zip:
                    if os.path.exists(file_path):  # Check if the file exists
                        zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

                pml_file_in_memory.seek(0)
                zf.writestr(os.path.join(folder_name, pml_file), pml_file_in_memory.read())
            
            except:
                print(f"No Arpeggio contacts found for {pdb_id}")

                load_line = [f'load {os.path.basename(assembly_file)}']

                pml_lines = pymol_looks + load_line + basic_pymol_format +  ["deselect",]
                pml_string = "\n".join(pml_lines)

                pml_file = f'{job_id}_{struc_name}.pml'

                files_to_zip = [
                    assembly_file, 
                ]

                pml_file_in_memory = io.BytesIO()
                pml_file_in_memory.write(pml_string.encode('utf-8'))
                
                for file_path in files_to_zip:
                    if os.path.exists(file_path):  # Check if the file exists
                        zf.write(file_path, os.path.join(folder_name, os.path.basename(file_path)))

                pml_file_in_memory.seek(0)
                zf.writestr(os.path.join(folder_name, pml_file), pml_file_in_memory.read())


        info_file_in_memory = io.BytesIO()
        info_file_in_memory.write(contacts_info.encode('utf-8'))

        info_file_in_memory.seek(0)
        zf.writestr(info_file, info_file_in_memory.read())
    
    # Seek to the beginning of the in-memory zip file before sending it
    memory_file.seek(0)
    
    # Send the zip file to the client as a downloadable file
    return send_file(
        memory_file,
        mimetype='application/zip',
        as_attachment=True,
        download_name=f'{job_id}_all_structures_PyMol.zip'
    )
# Register blueprint
app.register_blueprint(main)

######################## LAUNCHING SERVER #########################

if __name__ == "__main__":
    port = int(os.environ.get('PORT', 9000))
    app.run(port=port, debug=True)  # run Flask LIGYSIS app on the specified port

# the end