#! /usr/bin/python3 -t MODULE_DESC = "general routines for building horizontal zigzag carbon chains" MODULE_COPYRIGHT = "Copyright © 2020-08-09 by the State University of Campinas (UNICAMP)" import sys, os, re import math; from math import sqrt,sin,cos,pi import rn import mformula def build(svg,n,MB,iniz,finz,clabs,wlabs,labpos): """Builds the structural formula for an unbranched chain of carbon atoms, squeezed in an 'horizontal zigzag' format. The chain will have {n} carbons. The parameter {MB} is a list of pairs that indicates the multiple-bond positions and labeled carbons. See {get_valences_bends_nums_labels} for the details. The booleans {iniz} and {finz} control the {Y} positions of the first and last carbons. If true, the corresponding carbon will be constrained to lie on {X} axis. These constraints will hold automatically if {n} is 1 or 2, or if the double and triple bonds imply that there there are no bends in the whole chain. The bent carbon atoms will be placed on horizontal rows, "bot" and "top", equidistant from the {X}-axis. Any end that is not constrained by {iniz} or {finz} will also be placed on one of those rows. The first internal bent carbon will be on the top row, and the others will alternate between the rows, except between 'cis' bonds. The boolean parameters {clabs,wlabs} ask for labels along the chain. If {clabs} is true, label with count from the left end as 1. If {wlabs} is true, labes with count from the right end as 0. The {labpos} must be 0 or {±1}. If {labpos} is 0, the carbon number labels will be placed next to the atom, above or below, depending on what seems safe in case hydrogen atoms are added. Otherwise, carbon numbers will be placed just below the bottom rail if {labpos} is {-1}, or just above the top rail if labpos is {+1}. Irrespective of {clabs,wlabs}, each sequence of overlapping double bonds with even number of double bonds that are labeled 'R' of 'S' in {MB} will have a label 'R' or 'S' on the central carbon. The label will be placed on the side opposite to the side where the carbon numbers would be placed. """ assert type(n) is int and n >= 0, f"invalid n = {n}" fm = mformula.obj(); if n == 0: return fm # First, determine the multiplicity {v[i]} (1 to 3) of each bond # between carbons {i} and {+1}, for {i} in {0..n-2}; and the relative signs # {s[i]} of the bend angle at carbon {i} in {0..n-1}. v, s, c, e = get_valences_bends_nums_labels(n,MB) # Computes the distance {dyc} between the top and bottom rails so that # a chain with alternating single bonds will have bend angles {ac}: d1 = svg.rel_bond_length(1) # Length of single C-C bonds. ac = 109.5 # Ideal angle C-C-C between two single bonds (actual: 109.5). dyc = d1*sin((90-ac/2)*3.1415926/180) sys.stderr.write(f'd1 = {d1}\n') sys.stderr.write(f'dyc = {dyc}\n') # Assign coordinates to the {n} atoms, based on bond valences and relative # bend senses: xc, yc = choose_atom_coords(svg,n,v,s,dyc,iniz,finz) # Insert the atoms and bonds: for i in range(n): fm.add_atom('C', [xc[i],yc[i]], 0,0) if i >= 1: fm.add_bond(i-1, i, v[i-1]) # Add carbon number labels: fh = 1.2 if svg.style[0] == 'B' else 0.75; # Relative font size. cfRGB = [0.000, 0.000, 0.700] # Color for C-numbers. wfRGB = [0.900, 0.100, 0.000] # Color for omega-numbers. efRGB = cfRGB; # Color for extra labels. ylabs = (None if labpos == 0 else labpos*(dyc/2 + svg.CHbondlength)) # Reference Y for carbon numbers. for i in range(n): if clabs or wlabs: if c[i]: # Must show carbon number(s): kpos = +1 # Next to the atom in "safe" diretion: if clabs: attach_label(svg,fm,n,i,str(i+1),kpos,ylabs,fh,True,cfRGB); kpos += 1 if wlabs: attach_label(svg,fm,n,i,str(n-1-i),kpos,ylabs,fh,False,wfRGB); kpos += 1 if e[i] != None: # Must show extra label: kpos = -1 # Next to the atom in the "unsafe" direction. attach_label(svg,fm,n,i,e[i],kpos,ylabs,fh,False,efRGB) return fm #---------------------------------------------------------------------- def attach_label(svg,fm,n,i,txt,kpos,ylabs,fh,bold,fRGB): """Draw a label {txt} next to atom with index {i} on the formula {fm}, that is supposed to have the skeleton of an unbranched carbon chain, in order. The label will be written with nominal font height {fh} and color {fRGB} and will be placed above or below the atom. The parameter {kpos} is a nonzero integer that indicates the relative placement of the label. Its absolute value indicates the line of a multi-line label: {±1} is the first line, {±2} for one line further away from the atom, {±3} is two text lines further away, and so on. If {ylabs} is {None}, the reference ordinate is the atom's {Y} coordinate. With a positive {kpos}, the label will be placed just above or below the atom, whichever direction seems "safer" in case hydrogens or other groups are attached to it. A negative {kpos} places it on the opposite direction. If {ylabs} is not {None}, it must be an {Y} coordinate. If {kpos} is positive, the label will be placed just above {ylabs} if {ylabs} is positive, or below {ylabs} if {ylabs} is negative. If {kpos} is negative, the label will be placed next to the atom: just ABOVE it if {ylabs} is NEGATIVE, or just BELOW it if {ylabs} is POSITIVE. """ assert kpos != 0, f"invalid kpos = {kpos}" if txt == None: return txt = txt.strip() if len(txt) == 0: return # Find the {X,Y} coords of the atom: pm = fm.atom_ctr[i][0:2]; assert fm.atom_symbol[i] == 'C', f"atom {ic} is not carbon" # Compute a safe distance from carbon atom center to label: dsafe = (1.2*svg.atomradius + 0.6*svg.atomftheight)/svg.bondlength # Displacemen for each additional line: dlin = fh*svg.atomftheight/svg.bondlength # Additional {Y} distance for each new line. sys.stderr.write(f"svg.atomradius = {svg.atomradius} svg.atomftheight = {svg.atomftheight}\n") sys.stderr.write(f"dsafe = {dsafe} dlin = {dlin}\n") # Compute the ref ordinate {yref} and sign of displacement {sgny}; consume sign of {kpos}: if ylabs == None: # Label always next to the atom: yref = pm[1] # Determine the safe {Y} displacement direction from the atom: sgny = (+1 if pm[1] < -0.2 else -1) if kpos < 0: sgny = -sgny; kpos = -kpos else: # Ref direction depends on sign of {kpos}: if kpos > 0: # Just below given {ylabs}: sgny = -1; yref = ylabs else: # Just above the atom: sgny = +1; kpos = -kpos; yref = pm[1] assert kpos > 0 pos = (pm[0], yref + sgny*(dsafe + (kpos-1)*dlin)) fm.add_label(txt,pos,fh,bold,fRGB) #---------------------------------------------------------------------- def choose_atom_coords(svg,n,v,s,dyc,iniz,finz): """Computes the carbon atom coordinates for a chain with {n} atoms with bond multiplicities {v[0..n-2]} and bend angle signs {s[0..n-1]}, as returned by {get_valences_bends_nums_labels}. Uses {svg} only to determine bond lengths. If there are no internal atoms with bends ({s[i] == 0} for {i} in {1..n-2}), all atoms will be placed on the {X}-axis. Otherwise, the atoms with bends will be placed on two horizontal rails at ordinates {±dyc/2}. The first internal atom with a bend will be placed on the top rail. The other internal bends will be assigned to either rail as determined by the signs {s[1..n-2]}. The first atom will be placed on the {X}-axis if {iniz} is true. The last atom will be placed on the {X} axis if {finz} is true. Otherwise these atoms too will be placed on the rails, as determined by the bend signs {s[i]}. Returns a pair of vectors {xc,yc[0..n-1]} of coordinates.""" # Compute cumulative chain lengths {dc[0..n-1]} and count of bent carbons {nbent} nbent = 0 # Total number of bent carbons. dc = [None]*n # Cumulative bond length up to each carbon, indexed {0..n-1}. dtot = 0 for i in range(n): if (s[i] != None) and (s[i] != 0): nbent += 1 dc[i] = dtot if i < n-1: dtot += svg.rel_bond_length(v[i]) # Assign coordinates to the carbon atoms: xc = [None]*n # Carbon {X} coordinates, indexed {0..n-1}. yc = [None]*n # Carbon {Y} coordinates, indexed {0..n-1}. if nbent == 0: # No bent carbons. Put them all on the {X}-axis: for i in range(n): xc[i] = dc[i] yc[i] = 0 else: # At least one bent carbon in {1..n-2}. Put them and maybe the ends # on two rails spaced {dyc} apart. rc = [None]*n # Rail assignment of each carbon, indexed {0..n-1}. iprev = None # Last carbon with defined rail {rc[i]} and coords {xc[i],yc[i]} for i in range(n): ic = i + 1 # Carbon number starting from 1. # Assign to each carbon a formal rail {rc[i]}, which is {-1,+1} for bot, top, # and 0 for indeterminate. The end carbons will get {+1} or {-1} # even if they are forced onto the {X}-axis. if i == 0: # First carbon. xc[i] = 0 # If not constrained by {iniz}, assume that # the first internal bent carbon will be top, so assign to bot: rc[i] = -1 yc[i] = 0 if iniz else rc[i]*dyc/2 iprev = i elif (s[i] == 0): # Straight internal carbon: skip for now. assert i < n-1 # Paranoia: {s[n-1]} should be {None}. rc[i] = None # {rc[i]} is not relevant for straight carbons. else: # Bent internal carbon, or final carbon. assert (s[i] == None) == (i == n-1) # Paranoia. assert iprev != None # Paranoia. if (s[i] != None) and (s[iprev] != None) and (s[i] == s[iprev]): # This carbon is bent in the same sense as the previous bent one. # It must stay in the same rail as carbon {iprev}. rc[i] = rc[iprev] else: # This carbon is final or is the first bent carbon or is # bent oppositely to the previous bent carbon. # It must go on the opposite rail as carbon {iprev}. rc[i] = -rc[iprev] assert rc[i] != 0 # Paranoia. # Assign {Y} coordinate of this carbon based on rail and {finz}. yc[i] = 0 if (i == n-1) and finz else rc[i]*dyc/2 # Assign {X} coordinate of this carbon by Pythagoras: ddseg = dc[i] - dc[iprev] # Length of straight segment dyseg = yc[i] - yc[iprev] # {Y} span of straight segment dxseg = sqrt(ddseg**2 - dyseg**2) # {X} span of straight segment xc[i] = xc[iprev] + dxseg # Assign coordinates to preceding straight carbons by interpolation for j in range(iprev+1,i): frac = (dc[j] - dc[iprev])/ddseg xc[j] = xc[iprev] + frac*dxseg yc[j] = yc[iprev] + frac*dyseg # Now this is the last bent carbon: iprev = i assert iprev == n-1 # Paranoia. return xc, yc #---------------------------------------------------------------------- def get_valences_bends_nums_labels(n,MB): """Parses the list {MB} of multiple bond specifications in an unbranched chain of {n} carbons, and returns three lists {v,s,c,e} that describe the valence of each bond, the signs of the bend angles at each carbon, whether each carbon atom should be labeled or not, and an extra label to be shown on each atom. The parameter {MB} is a list of pairs. Each pair is a string {typ} and a positive integer {k}, sorted by the latter; it indicates a multiple-bond position or a significant carbon in the chain. The string {typ} is '2c', '2t', '2R', '2S', or '2' to indicate a double bond, '3' for triple bond. The multiple bond will be placed between carbons {k} and {k+1}, counting the first carbon as 1. Single bonds are the default and should not be indicated. Two or more adjacent double bonds should all have the same type {typ}. If the number of such bonds is odd, the type may be '2c' or '2t' to indicate the cis or trans configuration of the adjacent single bonds. If the number of such bonds is even, the type may be '2R' or '2S' to indicate the right-handed or left-handed screw configuration of those adjacent bonds. The type may be just '2' if there is no isomerism, e.g. at the beginning or end of a hydrocarbon chain. The types '2R' and '2S' will be drawn like the 'trans' configuration, as an approximation of the actual 3D screw geometry. The unspecified type '2' will be drawn as 'trans' too. If the type is '2R' or '2S', a label 'R' or 'S' will be written near the central carbon of the cumulative sequence. The {typ} can also be '*' to indicate that the carbon {k} should have a carbon number next to it. If {typ} is any of the above followed by '*', carbon {k} will be numbered too. A carbon connected by two double bonds, or a single and triple bond, will be constrained to be 'straight' (collinear with its two neighbors). A carbon connected to one singe and one double, or two single bonds, will be constrained to be bent (NOT collinear with its two neighbors). The first and last carbons of a string of one or more adjacent double bonds, if they are not the first and last of the chain, will be constrained to be bent in the same sense if the types of those bonds are '2c', and in opposite senses otherwise. Also, two carbons connected by a single bond will be constrained to be bent in opposite senses. Element {v[i]} of the returned list {v}, for {i} in {0..n-2}, will be the multiplicity (valence) of bond between carbons with C-number {i+1} and {i+2} namely an integer in {0..3}. Note that the array is indexed from 0, not from 1, even though {MB} numbers start at 1. Element {s[i]} of the returned list {s}, for {i} in {0..n-1}, will the the sense of the bend angle at carbon {i+1}. A value of {0} means straight, and {-1,+1} denote bends in opposite senses. Which will be 'bend up' and which will be 'bend down' should be chosen by the caller. Elements {s[0]} and {s[n-1]} will be {None} since there the bending angle makes no sense there. Element {c[i]} of the returned list {c}, for {i} in {0..n-2}, is a boolean that will be {True} iff carbon {i+1} should be identified by its C-number {i+1} and/or omega-number {n-1-i}; that is, if the corresponding {typ} ended with "*". shown. Element {e[i]} of the returned list {e} is an additional label (such as "R" or "S") that may have to be written next to carbon {i}". """ # Parse the {MB} list into vectors {t[0..n-2]}, {v[0..n-2]} t = [ '1' ]*(n-1) # Bond types (minus the "*" flags). v = [ 1 ]*(n-1) # Bond multiplicities. c = [ False ]*n # Should carbon be numbered. e = [ None ]*n # Labels to attach to each carbon. kprev = 0 # Previous value of {kk} from the list. for kind, kk in MB: assert kk > kprev, f"pair out of order ('{tk}', {kk}) in {MB}" assert kk <= n, f"invalid carbon number ('{tk}', {kk}) in {MB}" tk = kind if tk[-1] == '*': # Label first carbon with the C-number: c[kk-1] = True tk = tk[:-1] if tk != '' and tk != '1': # A multiple bond: assert kk < n, f"invalid multibond position in {MB}: ('{kind}', {kk})" assert re.fullmatch(r'(1|2[ctRS]?|3)', tk), \ f"invalid kind in MB: ('{kind}->{tk}', {kk})" t[kk-1] = tk v[kk-1] = int(tk[0]) kprev = kk # Validate bond types {t[0..n-1]} and compute bend signs {s[0..n-1]}. s = [ None ]*n # Relative bend senses. sprev = +1 # Assumed sign of last non-straight bend angle (arbitrary). nmult = 0 # Number of consecutive multibonds up to first carbon. for i in range(n): ic = i + 1 # Carbon number starting from 1. # Get types and valences of incoming and outgoing bonds tprev = '0' if i == 0 else t[i-1] vprev = 0 if i == 0 else v[i-1] tnext = '0' if i == n-1 else t[i] vnext = 0 if i == n-1 else v[i] # Total valence limit check: */ vtot = vprev + vnext # Total valence of this carbon. assert vtot <= 4, f"excess valence at carbon {ic}: {vprev} + {vnext} = {vtot}" # Update count of cumulative double bonds up to this carbon: nmult = nmult + 1 if vprev == 2 else 0 # Check consistency of types in cumulative double bonds: if (vprev >= 2) and (vnext >= 2): # Cumulative double bonds must have the same type: assert tprev == tnext, f"inconsistent bond types at carbon {ic}: {tprev}, {tnext}" elif vprev == 2: # End of a string of double bonds. Type depends on parity: assert vnext <= 1 # Paranoia. if (nmult % 2) == 0: # Even number of double bonds, screw chirality: assert tprev == '2R' or tprev == '2S' or tprev == '2', \ f"invalid type {tprev} for {nmult} (even) cum bonds ending at carbon {ic}" if tprev != '2': # Label the middle carbon with 'R' or 'S': eiso = tprev[-1] im = i - nmult//2; em = e[im] e[im] = eiso if em == None else em + "\n" + eiso else: # Odd number of double bonds, cis-trans isomerism: assert tprev == '2c' or tprev == '2t' or tprev == '2', \ f"invalid type {tprev} for {nmult} (odd) cum bonds ending at carbon {ic}" # Decide sign of bend angle at this carbon: if (ic == 1) or (ic == n): # Bend sign not meaningful for first and last carbons. s[i] = None elif vtot == 4: # Single + triple or double + double: straight carbon, preserve {sprev}: s[i] = 0 else: # Bent internal carbon: assert vprev >= 1 and vnext >= 1 # Paranoia. assert vprev == 1 or vnext == 1 # Paranoia. if tprev != '2c': # After any bond except a 'cis' double bond (or odd cumulative group of such bonds), # reverse the previous bend sign: sprev = -sprev s[i] = sprev sys.stderr.write(f"v = {str(v)}\n") sys.stderr.write(f"s = {str(s)}\n") sys.stderr.write(f"c = {str(c)}\n") sys.stderr.write(f"e = {str(e)}\n") return v, s, c, e; #---------------------------------------------------------------------- def add_hydrogens(svg,fm): """Given a formula {fm}, adds hydrogen atoms to all carbon atoms that have only 3 or fewer bonds.""" n = fm.natoms; bh = svg.CHbondlength # Length of single bonds with hydrogen. # Get the valences and mean bond directions: vds = fm.compute_atom_valences_and_dirs() assert len(vds) == n for kc in range(n): v, d = vds[kc] d = d[0:2] # Discard the {Z} coordinate. sys.stderr.write("atom %2d val = %.2f dir = ( %.3f %.3f )\n" % (kc, v, d[0], d[1])) if fm.atom_symbol[kc] == 'C' and v <= 3: pc = fm.atom_ctr[kc] phs = [] if v == 0: # Add 4 hydrogens at 90 degrees: for i in range(4): ang = (45 + i*90)*pi/180; ph = (pc[0] + bh*cos(ang), pc[1] + bh*sin(ang)) + pc[2:] phs.append(ph) else: dd, dm = rn.dir(d) # Unit drection vector towards most bonds. ang = math.atan2(dd[1], dd[0]) # Angular argument of {dd} (radians). if v <= 1: # Add three hydrogens spanning 120 degrees: for j in range(3): aj = ang + (0.6667 + j*0.3333)*pi ph = (pc[0] + bh*cos(aj), pc[1] + bh*sin(aj), pc[2]) phs.append(ph) elif v <= 2: # Add two hydrogens 45 degrees apart: for j in range(2): aj = ang + (0.8750 + j*0.2500)*pi ph = (pc[0] + bh*cos(aj), pc[1] + bh*sin(aj), pc[2]) phs.append(ph) elif v <= 3: # Add only one hydrogen: ph = (pc[0] + bh*cos(ang+pi), pc[1] + bh*sin(ang+pi), pc[2]) phs.append(ph) else: assert False for ph in phs: kh = fm.add_atom('H',ph,0,0); fm.add_bond(kc,kh,1) return fm #----------------------------------------------------------------------