#! /usr/bin/python3 -t # _*_ coding: iso-8859-1 _*_ MODULE_NAME = "mformula_fatty_acid" MODULE_DESC = "general routines for building fatty acid structural formulas" MODULE_VERS = "1.0" MODULE_COPYRIGHT = "Copyright © 2020-08-08 by the State University of Campinas (UNICAMP)" import sys import os import re import math; from math import sqrt,sin,cos,pi # sys.path[1:0] = [ sys.path[0] + '/../lib', os.path.expandvars('${STOLFIHOME}/lib'), '.' ] # sys.stderr.write(re.sub('[,]', ',\n', "%s: path = %r\n" % (PROG_NAME, sys.path))) import rn import mformula import mformula_carbon_chain_horz as mcch def build_unbranched(svg,code,hydro,labmult,laball,wlabs,labpos): """Builds the structural formula for a fatty acid molecule, given its fatty acid code {code} Currently supports only unbranched chains. The code {code} should be like "{n}:{m} cis-{c1},{c2},... trans-{t1},{t2},... tri-{p1},{p2},..." where {n} is the number of carbons in the chain, including the carboxyl {m} is the total number of double and triple bonds {c1}, {c2}, ... are the positions of 'cis' double bonds {t1}, {t2}, ... are the positions of 'trans' double bonds {p1}, {p2}, ... are the positions of triple bonds Each position {k} is a carbon number, counting from 1 at the carboxyl. The multiple bond will be between carbons {k} and {k=1}. The ":{m}" part can be omitted, since it is implied by the multiple bond list. If there are no 'cis' bonds, the entire "cis-..." part should be omitted. Same for the other bond types. If the chain has a set of {q >= 2} adjacent double bonds (an allene or cumulene chain), the type 'cis' and 'trans' should be used for those bonds only if the number {q} is odd; and they all should have the same type. If the number {q} of adjacent double bonds is even, the type should be instead 'R' or 'S', to indicate the screw isomerism at those bonds. Also, the type 'dbl' may be used instead for any double bond (or set of cumulative double bonds) when the 'cis'/'trans' or 'R'/'S' isomerism is not important or does not exist (e.g. for bonds that include the terminal carbon). The boolean parameter {hydro} says whether the hydrogen atoms should be shown. If {laball} is true, shows the C-numbers of all carbons. Otherwise, if {labmult} is true, shows the C-number of the first carbon of every multiple bond, as well as of carbons 1 and {n}. If {wlabs} is true, also shows omega-numbers on labeled carbons. The {labpos} specifies the positions of the carbon number labels. see {mformula_carbon_chain_horz.build} for details. The molecule drawing will a linear and horizontal zigzag, with the carboxyl at left. Eventually there will be 'chemist' numbers below, from 1 at the carboxyl; and 'omega' numbers at top, from 1 at the opposite end. """ fm = mformula.obj() n, MB = parse_code(code) MB = flag_carbons_to_label(n, MB,labmult,laball) sys.stderr.write(f" labmult = {labmult}\n laball = {laball}\n MB = {MB}\n") # Build a chain of carbon atoms: iniz=True # Carbon 1 on the midline. finz=False # Carbon {n} unconstrained. clabs = True # Show C-labels on labeled carbons. fm = mcch.build(svg,n,MB,iniz,finz,clabs,wlabs,labpos) ka = 0 # Index ofcarboxylic carbon if hydro: fm = add_carboxyl(svg,fm,0,-1,120) mcch.add_hydrogens(svg,fm) else: fm = add_carboxyl(svg,fm,0,-1,60) return fm #---------------------------------------------------------------------- def parse_code(code): """Parses the code for an unbranched fatty acid. Returns the number {n} of carbons and the array {MB} that specifies the multiple bonds, in sequence.""" mt = re.fullmatch(r"([0-9]+)[:]([0-9]+) *([-cistransdbltripleRS=0-9, ]*)", code) assert mt != None, "invalid code \"" + code + "\"" gr = mt.groups() n = int(gr[0]) m = int(gr[1]) mbd = gr[2].strip() MB = [] while mbd != "": mt = re.fullmatch(r" *(c|cis|t|trans|dbl|tri|triple|R|S)[-]([0-9,]+) *(.*)", mbd) assert mt != None, "invalid code \"" + mbd + "\"" gr = mt.groups() kind = gr[0] nums = gr[1] rest = gr[2] # Convert {kind} to compact code: if kind == "cis" or kind == "c": kcd = "2c" elif kind == "trans" or kind == "t": kcd = "2t" elif kind == "dbl": kcd = "2" elif kind == "R": kcd = "2R" elif kind == "S": kcd = "2S" elif kind == "tri" or kind == "triple": kcd = "3" else: assert False, "invalid bond type = \"" + kind +"\"" # Expand {nums}: numlist = nums.split(',') for p in numlist: p = p.strip() if p != "": MB.append((kcd, int(p),)) # Continue parsing: mbd = rest.strip() MB.sort(key = lambda x: x[1]) return n, MB #---------------------------------------------------------------------- def flag_carbons_to_label(n,MB,labmult,laball): """Adds the "*" flag on all carbons that should be labeled. If {laball} is true, flags all carbons. Otherwise, if labmult is true, flags only the first of each multiple bond,as well as carbons 1 and n.""" if not (labmult or laball): return MB # Add "{k}*" for every carbon not listed in {MB}. cnext = 1 # Next expected carbon number in {MB}. MBnew = [] mtag = ("*" if labmult else "") for kcd, cnum in MB: assert type(cnum) is int and cnum >= 1 and cnum <= n, \ f"invalid carbon number {cnum} in {MB}" while cnext < cnum: if laball or cnext == 1: MBnew.append(("*", cnext,)) cnext += 1 MBnew.append((kcd + mtag, cnum,)) cnext = cnum+1 while cnext <= n: if laball or cnext == n: MBnew.append(("*", cnext,)) cnext += 1 return MBnew #---------------------------------------------------------------------- def add_carboxyl(svg,fm,kc,sgnx,aoco): """Assumes that atom {kc} in formula {fm} is a carbon atom. Adds a carbonyl and a hydroxyl to it. The carboxyl will be at constant {Z}, the same as carbon {kc}. The OCO angle will be {aoco} (degrees) and its bisector will be parallel to the {X} axis in the direction {sgn}.""" # Get atom coords: pc = fm.atom_ctr[kc] # {XYZ} coordinates of carbon {kc}. # Angle OCO in radians: roco = aoco*pi/180 # Add the carbonyl: rco2 = roco/2 # Arg of carbonyl C->O if {sgnx} is {+1}. bco2 = svg.CObondlength*svg.rel_bond_length(2.0) # Length to use for the C=O bond. po2 = (pc[0] + sgnx*bco2*cos(rco2), pc[1] + bco2*sin(rco2), pc[2]) ko2 = fm.add_atom('O', po2,0,0) # Add the oxygen of the hydroxyl: rco1 = -roco/2 # Arg of hydroxyl C->O if {sgnx} is {+1}. bco1 = svg.CObondlength # Length to use for the C-OH bond. po1 = (pc[0] + sgnx*bco1*cos(rco1), pc[1] + bco1*sin(rco1), pc[2]) ko1 = fm.add_atom('O', po1,0,0) # Add the hydrogen of the hydroxyl: roh = rco2 # Arg of O->H in radians. boh = svg.OHbondlength # Length to use for the O-H bond. ph = (po1[0] + sgnx*boh*cos(roh), po1[1] + boh*sin(roh), pc[2]) kh = fm.add_atom('H', ph,0,0) fm.add_bond(kc,ko2,2.0) fm.add_bond(kc,ko1,1.0) fm.add_bond(ko1,kh,1.0) return fm def acid_name_from_list(n, MB): """Returns the IUPAC (almost) name of the fatty acid given the number of carbons {n} and the list {MB} of positions and types of multiple bonds.""" kinds = "" # First part of name: double bond types. for kcd, pos in MB: kcd = re.sub(r'[*]*$', "", kcd) # Remove final "*" if any. assert kcd[0] == "2" or kcd[0] == "3", f"invalid multiple bond code {kcd} at {pos}" if len(kcd) > 1: assert len(kcd) == 2, f"invalid multiple bond code {kcd} at {pos}" if kcd[0] == "2": if kcd[1] == "c": kiu = "Z" elif kcd[1] == "t": kiu = "E" elif kcd[1] == "R" or kcd[1] == "S": kiu = kcd[1] else: assert False, f"invalid multiple bond code {kcd} at {pos}" kinds += "," + str(pos) + kiu kinds = "(" + kinds[1:] + ") " if len(kinds) > 0 else "" # Second part: double and triple bond positions grnum = mformula.greek_nums() aknum = mformula.alkane_nums() doubles = ""; ndoubles = 0 triples = ""; ntriples = 0 for kcd, pos in MB: if kcd[0] == "2": doubles += "," + str(pos); ndoubles += 1 elif kcd[0] == "3": triples += "," + str(pos); ntriples += 1 if ndoubles > 0 and ntriples > 0: assert n >= 5 # Must insert in name: acname = aknum[n] + \ "-" + doubles[1:] + "-" + grnum[ndoubles] + "en" + \ "-" + triples[1:] + "-" + grnum[ntripels] + "yn" + \ "oic acid" elif ndoubles > 0: assert n >= 3 # Prefix to whole name: acname = doubles[1:] + "-" + \ aknum[n] + \ ("a" + grnum[ndoubles] if ndoubles > 1 else "") + \ "enoic acid" elif ntriples > 0: assert n >= 3 # Prefix to whole name: acname = triples[1:] + "-" + \ aknum[n] + \ ("a" + grnum[ntriples] if ntriples > 1 else "") + \ "ynoic acid" else: acname = aknum[n] + "anoic acid" return kinds + acname