#! /bin/bash
# Last edited on 2017-12-12 22:22:40 by stolfilocal

wget -nv "https://www.reddit.com/user/jstolfi/" -O raw/.start.html

https://www.reddit.com/user/jstolfi/?count=50&after=t1_dpx2yhr

# import libraries
import urllib2
from bs4 import BeautifulSoup

    
# specify the url
quote_page = "http://www.bloomberg.com/quote/SPX:IND"
# query the website and return the html to the variable ‘page’
page = urllib2.urlopen(quote_page)
# parse the html using beautiful soap and store in variable `soup`
soup = BeautifulSoup(page, ‘html.parser’)

# Take out the <div> of name and get its value
name_box = soup.find(‘h1’, attrs={‘class’: ‘name’})


name = name_box.text.strip() # strip() is used to remove starting and trailing
print name

markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
soup.prettify()
# '<html>\n <head>\n </head>\n <body>\n  <a href="http://example.com/">\n...'

print(soup.prettify())
# <html>
#  <head>
#  </head>
#  <body>
#   <a href="http://example.com/">
#    I linked to
#    <i>
#     example.com
#    </i>
#   </a>
#  </body>
# </html>
