#!/usr/bin/python # Licensed under the Apache License, Version 2.0 # http://www.apache.org/licenses/LICENSE-2.0 # A script to pull all SPG Points data and output it to a CSV file """ Challenges: 1) Authenticating with SPG.com ***Done*** 2) Scraping points information from those websites ***Done*** 3) Filtering the points information into usable information ***Done*** 4) Writing the info to a pipe-delimited file ***Done*** 5) Printing a summary report to the command line ***Done*** 6) Printing a summary report to a file ***Done*** 7) Proper error handling for the summaryreport() function ***Done*** 8) Compilation into a .exe file for use on other computers 9) Speed up the code by implementing a different parser """ import time import datetime import re import requests import sys from bs4 import BeautifulSoup from bs4 import SoupStrainer def cleansoup(soup): #Helper function to get rid of scripts, style, lists, and the header [[tree.extract() for tree in soup(elem)] for elem in ('script','style','ul')] return soup def getpoints(user, pwd): #Authentication Station wooo wooooooo urltop = 'https://www.starwoodhotels.com/preferredguest/account/sign_in.html' path = '/account/index.html' payload = {'successPath': path, 'login': user, 'password': pwd} s = requests.session() r = s.post(urltop, data=payload) if r.status_code != 302: print "Failed! :( Check your username and password" sys.exit(0) print "Login was successful" #Initialize some fun variables urlroot = 'https://www.starwoodhotels.com/preferredguest/account/starpoints/index.html?bcount=' itemlist = [] tuplelist = [] today = str(datetime.date.today()) file = open("spgpoints " + user + " " + today + ".txt", mode='w+') #Iterate through each of the pages and pull the proper data for i in range(20): q = s.get(urlroot + str(16*i)) nothing = re.findall(r"Currently, your account doesn't have any activity", q.text) if nothing: tuplelist.sort(reverse=True, key=lambda q: time.strptime(q[2],"%m/%d/%Y")) print "No more data to grab. We've got", len(tuplelist), "items!" for t in tuplelist: file.write(str(t[0])+'|'+ str(t[1])+'|'+ str(t[2])+'|'+ str(t[3])+'\n') file.close() print "They should be in the 'spgpoints "+str(datetime.date.today())+".txt' file, which is pipe-delimited." summaryreport(user, today) sys.exit(0) soup = cleansoup(BeautifulSoup(q.text, "html.parser", parse_only=SoupStrainer('body'))) #Finds everything with an even or odd class, but this includes the stupid inilineBookingRow class which kills the data #It works right now with the filter, but it's sloooooow even = soup.find_all('td', 'even') odd = soup.find_all('td', 'odd') crap = u'\n\n\n\n\n\n\n\nCheck in\n\n\n\nCheck out\n\n\n\n\n\n\nRooms\n\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\nAdults Per Room\n\n1\n2\n3\n4\n\n\n\n\n\n\n\n\n\n\n\nClose\n\n\n\n\n\n\n' for m in even: if m.text.decode('utf-8') != crap: itemlist.append(m.text.decode('utf-8')) for n in odd: if n.text.decode('utf-8') != crap: itemlist.append(n.text.decode('utf-8')) #Now that we've got all the data, we can throw it into tuples and clean them up a bit #OH GOD IT'S SO UGLY HIDE IT FROM THE TOWNSPEOPLE while len(itemlist) > 0: tuplelist.append((str(itemlist.pop(0)), #Earn/Redeem str(itemlist.pop(0)), #Points str(itemlist.pop(0)), #Date Posted ((((str(itemlist.pop(0)).replace('\n','')) #Description, which needs cleaning .replace('Book Now','')) .replace('Add to favorites',''))) #This next line is a dirty way to split the Description in two, but it works... .replace(' ','|').strip())) #Print a status while this is looping so the users don't freak out print "Got page #"+ str(i+1) def summaryreport(user, date): #Open the File and dump it to a tuplelist try: file = open("spgpoints " + user + " " + date + ".txt", mode='r') except IOError: print "I can't find this file: '" + "spgpoints " + user + " " + date + ".txt'" print "Maybe you should check your input parameters." sys.exit(1) data = [] for line in file: data.append((str(line).split('|'))) #Initialize some variables totalearned = 0 totalspent = 0 basepoints = 0 awardpoints = 0 westindulles = 0 gogreenawards = 0 gogreenpoints = 0 basenights = 0 spentnights = 0 stays = 0 awardstays = 0 daterange = [] fromdates = [] todates = [] #Compile some Regular Expressions dateRE = re.compile(r'\d+\/\d+\/\d+\s-\s\d+\/\d+\/\d+') gogreen = re.compile(r'SVC PROMISE (\d+) PTS') #Iterate through the tuplelist and add to the variables for tuple in data: #Regular Nights if tuple[0] == 'Earn': totalearned += int(tuple[1]) if dateRE.match(tuple[4]): basepoints += int(tuple[1]) daterange = str(tuple[4]).split(' - ') fromdate = time.strptime(daterange[0],"%m/%d/%Y") todate = time.strptime(daterange[1].replace('\n',''),"%m/%d/%Y") fromdates.append(fromdate) todates.append(todate) stay = todate.tm_yday - fromdate.tm_yday basenights += stay stays += 1 if str(tuple[3]) == 'The Westin Washington Dulles Airport': westindulles += 1 else: awardpoints += int(tuple[1]) if gogreen.match(tuple[4]): gogreenawards += 1 gogreenpoints += int(tuple[1]) #Award Nights else: totalspent += int(tuple[1]) awardstays += 1 match = re.search(r'CAT \d-(\d) FREE \w*', tuple[4]) if match: spentnights += int(match.group(1)) else: print "Hmmm...one of the award nights doesn't look right." #Calc the average stays and the date boundaries avgstay = round(float(basenights)/float(stays),3) avgawardstay = round(float(spentnights)/float(awardstays),3) earliestdate = time.strftime("%m/%d/%Y",min(fromdates)) latestdate = time.strftime("%m/%d/%Y",max(todates)) #Open a Summary File and write to it summary = open("spgpoints " + user + " " + str(date) + " summary.txt", mode='w+') summary.write("Points summary from " + str(earliestdate) + " to " + str(latestdate) + "\n") summary.write("\n") summary.write("-------------Points Spent----------------" + "\n") summary.write("Total Points Spent: " + str(totalspent) + "\n") summary.write("Total Award Nights: " + str(spentnights) + "\n") summary.write("Total Award Stays: " + str(awardstays) + "\n") summary.write("Average Award Stay Length: " + str(avgawardstay) + "\n") summary.write("-------------Points Earned---------------" + "\n") summary.write("Total Points Earned: " + str(totalearned) + "\n") summary.write("Total Nights Paid For: " + str(basenights) + "\n") summary.write("Total Stays: " + str(stays) + "\n") summary.write("Average Stay Length: " + str(avgstay) + "\n") summary.write("-------------Points Detail---------------" + "\n") summary.write("Points Earned Through Stays Alone:" + str(basepoints) + "\n") summary.write("Bonus Points Earned: " + str(awardpoints) + "\n") if westindulles > 0: summary.write("Westin Dulles Stays: " + str(westindulles) + "\n") if gogreenawards > 0: summary.write("Go Green Awards: " + str(gogreenawards) + "\n") if gogreenpoints > 0: summary.write("Go Green Points Earned: " + str(gogreenpoints) + "\n") summary.close() summary = open("spgpoints " + user + " " + str(date) + " summary.txt", mode='r') for line in summary: print line.replace('\n','') summary.close() def main(): args = sys.argv[1:] if args[0] == '-summary': summaryreport(args[1], args[2]) sys.exit(0) if len(args) != 2: print "To get points: spgpoints.py [username] [password]" print "For summary report: spgpoints.py -summary [username] [date in yyyy-mm-dd format]"; sys.exit(1) getpoints(args[0],args[1]) if __name__ == '__main__': main()