@inproceedings{bol-con-10-aa-autopol,
  author = {Boland, David and Constantinides, George A.},
  title = {Automated Precision Analysis: {A} Polynomial Algebraic Approach},
  booktitle = {Proceedings of the 18th IEEE Annual International Symposium on Field-Programmable Custom Computing Machines},
  year = 2010,
  month = may,
  pages = {157-164},
  doi = {10.1109/FCCM.2010.32},
  comment = {Describes their own method to analyze roundodd errors of FP cpmÃºtations. Applies to one itration of conjugate gradient. Claims better than AA.},
  abstract = {When migrating an algorithm onto hardware, the potential saving that can be obtained by tuning the precision used in the algorithm to meet a range or error specification is often overlooked; the major reason is that it is hard to choose a number system which can guarantee any such specification can be met. Instead, the problem is mitigated by opting to use IEEE standard single or double precision so as to be `no worse' than a software implementation. However, the flexibility in the number representation is one of the key factors that can only be exploited on FPGAs, unlike GPUs and general purpose processors, and hence ignoring this potential significantly limits the performance achievable on an FPGA. To this end, this paper describes a tool which analyses algorithms with given input ranges under a finite precision to provide information that could be used to tune the hardware to the algorithm specifications. We demonstrate the proposed procedure on an iteration of the conjugate gradient algorithm, achieving a reduction in slices of over 40% when meeting the same error specification found by traditional methods. We also show it achieves comparable bounds to recent literature in a small fraction of the execution time, with greater scalability.},
  url = {{\url{https://ieeexplore.ieee.org/abstract/document/5474056/?casa_token=Ag8abDncDvEAAAAA:ikzHesalutS3P0ckVQVR_RQO16hbSYM7XOsJfg3YJmf_L7JmBnauE5K3gG966T_l0VTeOafR}}},
  quotes = {... It is argued in this paper that this method can achieve significantly tighter bounds than both interval and affine arithmetic, while running significantly faster, with better scalability, than the ...}
}