@techreport{TR-IC-PFG-19-07,
   number = {IC-PFG-19-07},
   author  =  {Matheus  Mortatti Diamantino {M. M. Mortatti} and Helio
                   Pedrini {H. Pedrini}},
   title   =  {{Document  Classification  Using  Convolutional  Neural 
                   Networks}},
   month = {July},
   year = {2019},
   institution = {Institute of Computing, University of Campinas},
   note = {In English, 21 pages.
    \par\selectlanguage{english}\textbf{Abstract}
       In  this  work,  we  explore  different  architectures  of deep
       convolutional   neural  networks  applied  to  the  problem  of 
       document  image  classification,  without the need of using OCR
       techniques or others to extract information about the document,
       letting   the  CNN  to  learn  how  to  interpret  and  extract 
       information  about  the  images by itself. The popular document
       image  dataset  called  RVL-CDIP is used to train this model to
       compare  results  to other approaches in this field and also to
       compare  results  to different architectures than the ones used
       on them. We export weights from the VGG16 ImageNet network with
       inter-domain  transfer learning and apply region based training
       with  intra-domain  transfer  learning  to  train  a  model  to 
       classify  documents  by  looking  only  at headers, footers and
       other  sections  of  a  document.  We  also use a real use case
       company  document  dataset  to train VGG16. Since RVL-CDIP is a
       fairly  complex  dataset,  the  goal  was to train a model that
       would  be  used in a real world situation with a useful dataset
       for  their  context. With this dataset, a 99% accuracy rate was
       achieved using data augmentation techniques.
  }
}