import string

def printHTMLfile(body,title):
    ''' create a standard html page with titles, header etc.
    and add the body (an html box) to that page. File created is title+'.html'
    '''
    fd = open(title+'.html','w')
    theStr="""
    <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
    <html> <head>
    <title>"""+title+"""</title>
    </head>

    <body>
    <h1>"""+title+'</h1>'+'\n'+body+'\n'+"""<hr>
    <address></address>
    <!-- hhmts start --> Last modified: Fri Oct 10 10:09:41 EDT 2008 <!-- hhmts end -->
    </body> </html>
    """
    fd.write(theStr)
    fd.close()

def makeHTMLbox(body):
    ''' make and HTML box that has all the words in it
    '''
    boxStr = """<div style=\"
    width: 800px;
    background-color: rgb(250,250,250);
    border: 1px grey solid;
    text-align: center\">%s</div>
    """
    return boxStr % (body)

def makeHTMLword(word,cnt,high,low):
    ''' make a word with a font size to be placed in the box. Font size is scaled
    between htmlBig and htmlLittle (to be user set). high and low represent the high 
    and low counts in the document. cnt is the cnt of the word 
    '''
    htmlBig = 96
    htmlLittle = 14
    ratio = (cnt-low)/float(high-low)
    fontsize = htmlBig*ratio + (1-ratio)*htmlLittle
    fontsize = int(fontsize)
    wordStr = '<span style=\"font-size:%spx;\">%s</span>'
    return wordStr % (str(fontsize), word)

def cleanStr(aStr):
    aStr = aStr.strip()
    aStr = aStr.lower()
    for char in string.punctuation:
        if char in aStr:
            aStr = aStr.replace(char,'')
    return aStr

def setDictionary(fields,bDict,iDict,pDict,curDict):
    if fields[0]=='IFILL:':
        fields.remove('IFILL:')
        return iDict
    elif fields[0]=='PALIN:':
        fields.remove('PALIN:')
        return pDict
    elif fields[0]=='BIDEN:':
        fields.remove('BIDEN:')
        return bDict
    else:
        return curDict

def extractTopX(theDict,num):
    lst = [(value,key) for key,value in theDict.items()]
    lst.sort()
    lst = lst[-num:]
    lst = [(word,cnt) for cnt,word in lst]
    return lst

fd = open('stop2.txt')
stopList = [line.strip() for line in fd]
fd.close()

palinDict={}
bidenDict={}
ifillDict={}
presentDict={}

fd = open('debate.txt')

for line in fd:
    fields = line.strip().split()
    if not fields:
        continue
    presentDict = setDictionary(fields,bidenDict,ifillDict,palinDict,presentDict)
    for f in fields:
        f = cleanStr(f)
        if f not in stopList and f:
            try:
                presentDict[f]+=1
            except KeyError:
                presentDict[f]=1

l=extractTopX(bidenDict,40)
least = l[0][1]
most =l[-1][1]
l.sort()
body = [makeHTMLword(w,cnt,most,least) for w,cnt in l]
body=' '.join(body)
body = makeHTMLbox(body)
printHTMLfile(body,'biden')

l=extractTopX(palinDict,40)
least = l[0][1]
most =l[-1][1]
l.sort()
body = [makeHTMLword(w,cnt,most,least) for w,cnt in l]
body=' '.join(body)
body = makeHTMLbox(body)
printHTMLfile(body,'palin')

fd.close()
