#! /usr/bin/python3 import sys, re # Enumerates the unbranched monocarboxylic acids # given the number of carbons, double bonds, and triple bonds. # A double or triple bond "at" carbon {k} is between carbons {k} and {k+1}. # Each acid is described by the number {n} and an "isomer code" which is # a string of IUPAC-like bond configuration codes, each followed by a comma. # A double bond at carbon {k} has a bond code "{k}Z" for "cis", "{k}E" # for "trans"; except when {k=n-1}, in which case the code is just # "{k}". A triple bond at {k} (even at {n-1}) has bond code "{k}Y". def main(): nmax = 40; # Max number of carbons bonds. for n in range(1,nmax+1): tmax = 0 if n > 30 else 1 if n > 24 else 2; # Max number of triple bonds. for t in range(tmax+1): mmax = 0 if t > 0 else 1 if n > 24 else 6 if n > 18 else 8 # Max number of double bonds. for m in range(0, min(mmax, (n-1-2*t)//2)+1): sys.stderr.write(f"--------------- C{n}:{m}:{t}--------------------------------\n") # Assemble the generic name: gen_name = generic_name(n,m,t) count = (n > 18 or m > 2 or t > 2) L = enum_fatty(n,m,t,count) N = L if count else len(L) sys.stdout.write(f" | {n} | {m} | {t} | *{N} | {gen_name} |\n") if not count: L.sort(reverse=True) for cod in L: cod = cod.strip(',') # sys.stderr.write(f"cod = {cod}\n") spec_name = specific_name(n,m,t,cod) if cod == "": cod = "---" # So that the entry is not empty. sys.stdout.write(f" | {n} | {m} | {t} | {cod} | {spec_name}??? | \n") def enum_fatty(n,m,t,count): """Enumerates or counts the isomer codes of all unbranched monocarboxylic acids with {n} carbons (including the carboxyl one), {m} non-adjacent double bonds and {t} triple bonds at carbons 2 to n-1, considering cis- and trans- isomerism. If {count} is {False}, returns a list with the isomer codes of all isomers. If {count} is {True}, returns only the number of such isomers. """ L1 = enum_fatty_prefix(n-1, m, t, 1, count, "") # Single bond at {n-1}. L2 = enum_fatty_prefix(n-2, m-1,t, 2, count, str(n-1)) # Double bond at {n-1} L3 = enum_fatty_prefix(n-2, m, t-1, 2, count, str(n-1)+"Y") # Triple bond at {n-1} L = L1 + L2 + L3 sys.stderr.write("L = %r\n" % L) return L def enum_fatty_prefix(n,m,t,k,count,suff): """Enumerates the isomer codes of all unbranched monocarboxylic acids with a backbone of {n} carbons (including the carboxyl one) that are terminated by a fixed "tail" group other than hydrogen, and have {m} non-adjacent double bonds and {t} triple bonds at carbons 2 to n-1, considering cis- and trans- isomerism on each double bond The procedure assumes that the "tail" is connected to carbon {n} by a single bond, and consists of a chain of {k>=1} carbons, possibly with multiple bonds, in a fixed configuration. Because of the non-trivial tail, every double bond will have cis-trans isomerism. If {count} is {False}, assumes that {suff} is an isomer code that describes the double or triple bonds in the tail and their configuration, and returns a list of all isomer codes, each ending with {suff}. If {count} is {True}, returns only the number of such isomers, and ignores {suff}. """ # sys.stderr.write("%*s" % (2*k,"")) # sys.stderr.write(f"{n}:{m}:{t} {count}\n") if m < 0 or t < 0 or n < 0: # No isomers with these parameters: L = 0 if count else [] elif m == 0 and t == 0: # No more choices: L = 1 if count else [ suff ] elif n < 2*(m + t) + 1: # Not enough carbons for these double/triple bonds: L = 0 if count else [] else: # Still some multiple bonds to place: L1 = enum_fatty_prefix(n-1, m, t, k+1, count, suff) # Single bond at {n-1}. L3 = enum_fatty_prefix(n-2, m, t-1, k+2, count, str(n-1)+"Y,"+suff) # Triple bond at {n-1}. if count: L2 = 2 * enum_fatty_prefix(n-2, m-1, t, k+2, count, str(n-1)+"Z,"+suff) # Double bond at {n-1}. else: L2Z = enum_fatty_prefix(n-2, m-1, t, k+2, count, str(n-1)+"Z,"+suff) # "Cis" double bond at {n-1}. L2E = enum_fatty_prefix(n-2, m-1, t, k+2, count, str(n-1)+"E,"+suff) # "Trans" double bond at {n-1}. L2 = L2Z + L2E L = L1 + L2 + L3 # sys.stderr.write("%*s" % (2*k,"")) # sys.stderr.write("L = %r\n" % L) return L n_name = \ [ "nil", "meth", "eth", "prop", "but", "pent", "hex", "hept", "oct", "non", "dec", "undec", "dodec", "tridec", "tetradec", "pentadec", "hexadec", "heptadec", "octadec", "nonadec", "eicos", "heneicos", "docos", "tricos", "tetracos", "pentacos", "hexacos", "heptacos", "octacos", "nonacos", "triacont", "hentriacont", "dotriacont", "tritriacont", "tetratriacont", "pentatriacont", "hexatriacont", "heptatriacont", "octatriacont", "nonatriacont", "tetracont", ] assert len(n_name) == 41 mt_name = \ [ "anil", "", "adi", "atri", "atetra", "apenta", "ahexa", "ahepta", "aocta", "anona", "adeca", "aundeca", "adodeca", "atrideca", "atetradeca", "apentadeca", "ahexadeca", "aheptadeca", "aoctadeca", "anonadeca", ] assert len(mt_name) == 20 def generic_name(n,m,t): """Builds the generic name of the fatty acid, considering the number of double and triple bonds but not their positions and configurations.""" if m + t == 0: # Alkane root + "oic": gen_name = n_name[n] + "anoic" else: gen_name = n_name[n]; if m > 0: gen_name += mt_name[m] + "en" if t > 0: gen_name += mt_name[t] + "yn" gen_name += "oic" return gen_name def specific_name(n,m,t,cod): """Builds the semi-IUPAC name of the fatty acid, considering the number of double and triple bonds and their positions and configurations as described by {cod}. For now the name is defined only in simple cases.""" gen_name = generic_name(n,m,t) # sys.stderr.write(f" {cod} {gen_name}\n") if m == 0 and t == 0: return gen_name elif m > 0 and t > 0: # Mixed double and triple bonds - punt it: return "" elif t > 0: # Indicate the positions of the triple bonds by numeric prefixes: assert not re.match(r"[EZ]|(^,[,])[0-9]", cod) # Code cannot have double-bond items. cod = cod.replace("Y", "") return cod + "-" + gen_name else: # Some double bonds: # Get the "cis", "trans", and "none" numbers: bcodes = re.split(r"[,]", cod) ciscodes = []; transcodes = []; nocodes = [] for bc in bcodes: if len(bc) == 0: pass elif re.fullmatch(r"[0-9]+", bc): # Double bond at terminal position: assert int(bc) == n-1, "invalid non-EZ double bond position " + cod assert len(nocodes) == 0, "repated non-EZ double bond position " + cod nocodes.append(bc) elif re.fullmatch(r"[0-9]+[EZ]", bc): # Other double bond: if bc[-1] == "E": transcodes.append(bc) else: ciscodes.append(bc) cisnums = (",".join(ciscodes)).replace("Z","") transnums = (",".join(transcodes)).replace("E","") nonums = (",".join(nocodes)) if cisnums != "": cisnums += "-cis" if transnums != "": transnums += "-trans" if cisnums == "": ctnums = transnums elif transnums == "": ctnums = cisnums elif cisnums < transnums: ctnums = cisnums + "," + transnums else: ctnums = transnums + "," + cisnums if nonums == "": nums = ctnums elif ctnums == "": nums = nonums else: nums = ctnums + "," + nonums spec_name = nums + "-" + gen_name return spec_name main()