Trump tweet experiment
In [1]:
tweets = open("trump_tweets.csv")
In [2]:
tweets
Out[2]:
<open file 'trump_tweets.csv', mode 'r' at 0x106de2db0>
In [5]:
tweets = [tweet.strip() for tweet in tweets]
In [6]:
tweets[1]
Out[6]:
"763398630812311552,Morning Joe's weakness is its low ratings. I don't watch anymore but I heard he went wild against Rudy Giuliani and #2A - sad &amp; irrelevant!,Twitter for iPhone"
In [7]:
import spacy
In [9]:
# need python -m spacy.en.download
nlp = spacy.load('en')
In [11]:
doc = nlp(unicode(tweets[1]))
In [12]:
doc
Out[12]:
763398630812311552,Morning Joe's weakness is its low ratings. I don't watch anymore but I heard he went wild against Rudy Giuliani and #2A - sad &amp; irrelevant!,Twitter for iPhone
In [14]:
myents = []
In [16]:
docs = [nlp(unicode(tweet, errors='ignore')) for tweet in tweets]
In [19]:
for doc in docs:
    myents += doc.ents
In [20]:
from collections import Counter
In [25]:
Counter(map(lambda x: str(x), myents)).most_common()
Out[25]:
[('Android', 308),
 ('Trump', 89),
 ('Twitter', 72),
 ('Hillary', 57),
 ('tonight', 49),
 ('Crooked Hillary', 48),
 ('today', 45),
 ('America', 41),
 ('Trump2016', 40),
 ('tomorrow', 35),
 ('Ted Cruz', 33),
 ('Ohio', 32),
 ('Twitter Web Client', 29),
 ('Clinton', 28),
 ('Pennsylvania', 28),
 ('New York', 28),
 ('Indiana', 27),
 ('Marco Rubio', 27),
 ('Bernie', 26),
 ('Florida', 26),
 ('Obama', 26),
 ('Republican', 25),
 ('Crooked Hillary Clinton', 25),
 ('Hillary Clinton', 24),
 ('U.S.', 24),
 ('ISIS', 20),
 ('American', 20),
 ('Rubio', 18),
 ('Cruz', 17),
 ('Bernie Sanders', 17),
 ('GOP', 17),
 ('two', 17),
 ('Wisconsin', 17),
 ('Kasich', 17),
 ('last night', 16),
 ('California', 16),
 ('yesterday', 16),
 ('Arizona', 15),
 ('one', 15),
 ('millions', 15),
 ('Donald Trump', 14),
 ('Texas', 14),
 ('the Republican Party', 13),
 ('Twitter for Android', 13),
 ('Melania', 13),
 ('CNN', 13),
 ('Democrats', 13),
 ('Virginia', 13),
 ('Lyin', 12),
 ('Tuesday', 11),
 ('Colorado', 11),
 ('Dems', 10),
 ('Michigan', 10),
 ('Fox', 10),
 ('North Carolina', 9),
 ('Cleveland', 9),
 ('Orlando', 9),
 ('first', 9),
 ('NAFTA', 9),
 ('Mexico', 9),
 ('Native American', 9),
 ('Connecticut', 8),
 ('DNC', 8),
 ('Sanders', 8),
 ('Elizabeth Warren', 8),
 ('Kaine', 7),
 ('Washington', 7),
 ('News', 7),
 ('Scotland', 7),
 ('7pm', 7),
 ('7:00', 7),
 ('Romney', 7),
 ('100%', 7),
 ('Eric', 7),
 ('2', 7),
 ('Kansas', 7),
 ('AmericaFirst', 7),
 ('&amp', 7),
 ('John Kasich', 7),
 ('this morning', 7),
 ('Russia', 7),
 ('Maryland', 7),
 ('China', 7),
 ('Americans', 7),
 ('Jeb', 6),
 ('D.C.', 6),
 ('Georgia', 6),
 ('Tampa', 6),
 ('Dallas', 6),
 ('Maine', 6),
 ('thousands', 6),
 ('1', 6),
 ('TPP', 6),
 ('San Jose', 6),
 ('Nebraska', 6),
 ('Brussels', 6),
 ('Kentucky', 6),
 ('Pittsburgh', 6),
 ('Bill', 6),
 ('Mike Pence', 6),
 ('NATO', 6),
 ('Bill Clinton', 6),
 ('Nevada', 6),
 ('Mississippi', 6),
 ('third', 5),
 ('Syrian', 5),
 ("Hillary Clinton's", 5),
 ('Louisiana', 5),
 ('the United States', 5),
 ('Mitt Romney', 5),
 ('Iowa', 5),
 ('3', 5),
 ('Delaware', 5),
 ('Israel', 5),
 ('zero', 5),
 ('Lindsey Graham', 5),
 ('State', 5),
 ('San Diego', 5),
 ('V.P.', 5),
 ('Senate', 5),
 ('Ted', 5),
 ('ZERO', 5),
 ('Phoenix', 4),
 ('Monday', 4),
 ('Missouri', 4),
 ('night', 4),
 ('Pocahontas', 4),
 ('T.V.', 4),
 ('this afternoon', 4),
 ('Today', 4),
 ('AMERICA', 4),
 ('years', 4),
 ('West Virginia', 4),
 ('16', 4),
 ('Idaho', 4),
 ('Marco', 4),
 ('New Hampshire', 4),
 ('4', 4),
 ('6/11', 4),
 ('Wichita', 4),
 ('FBI', 4),
 ('5', 4),
 ('@MELANIATRUMP', 4),
 ('Richmond', 4),
 ('Columbus', 4),
 ('the NY Times', 4),
 ('Tiffany', 4),
 ('Don', 4),
 ('South Carolina', 4),
 ('Hawaii', 4),
 ('Jeb Bush', 4),
 ('Rhode Island', 4),
 ('8pm', 4),
 ('Trump University', 4),
 ('Wednesday', 4),
 ('DonaldTrump', 4),
 ('Democratic', 4),
 ('November', 4),
 ('Turnberry', 4),
 ('Utah', 4),
 ('Islamic', 4),
 ('Jacksonville', 4),
 ('four', 3),
 ('6pm', 3),
 ('second', 3),
 ('Colorado Springs', 3),
 ('7pmE', 3),
 ('Club For Growth', 3),
 ('Las Vegas', 3),
 ('Fort Worth', 3),
 ('Tim Kaine', 3),
 ('Chicago', 3),
 ('Alabama', 3),
 ('Republicans', 3),
 ('33,000', 3),
 ('me.",Twitter Web Client', 3),
 ('VA', 3),
 ('Cuba', 3),
 ('Facebook', 3),
 ('Clintons', 3),
 ('many years', 3),
 ('Debbie Wasserman Schultz', 3),
 ('1237', 3),
 ('Putin', 3),
 ('TRUMP', 3),
 ('Joe', 3),
 ('Bobby Knight', 3),
 ('Hispanic', 3),
 ('12', 3),
 ('17', 3),
 ('7:30', 3),
 ('New Mexico', 3),
 ('@realDonaldTrump', 3),
 ('Goofy Elizabeth Warren', 3),
 ('Megyn', 3),
 ('Thursday', 3),
 ('Denver', 3),
 ('Jason Greenblatt', 3),
 ('@nytimes', 3),
 ('Europe', 3),
 ('Tomorrow', 3),
 ('Illinois', 3),
 ('4pm', 3),
 ('Salt Lake City', 3),
 ('France', 3),
 ('Oregon', 3),
 ('the Republican Convention', 3),
 ('8:00 P.M.', 3),
 ('Khan', 3),
 ('the White House', 3),
 ("Crooked Hillary's", 3),
 ('Albany', 3),
 ('Cincinnati', 3),
 ('Massachusetts', 3),
 ('New Yorkers', 3),
 ('@CNN', 3),
 ('15,000', 3),
 ('38', 3),
 ('Nice', 3),
 ('10pm', 3),
 ('Buffalo', 3),
 ('Iran', 3),
 ('Donald', 3),
 ('Saturday', 3),
 ('SuperTuesday', 3),
 ('New Jersey', 3),
 ('Democrat', 3),
 ('RNC', 3),
 ('A.M. Enjoy!,Twitter', 3),
 ('Friday', 3),
 ('Chuck Todd', 3),
 ('Montana', 3),
 ('Toledo', 2),
 ('VoteTrump', 2),
 ('Redding', 2),
 ('the Democratic Convention', 2),
 ('help!",Twitter Web Client', 2),
 ('Crazy Bernie', 2),
 ('Ailsa Course', 2),
 ('10,000', 2),
 ('Muslims', 2),
 ('MakeAmericaGreatAgain', 2),
 ('Adam Scott', 2),
 ('BREXIT', 2),
 ('Palestinian', 2),
 ('TrumpPence16', 2),
 ('Ted Cruz!,Twitter', 2),
 ('Iraq', 2),
 ('550%', 2),
 ('25,000', 2),
 ('PA.', 2),
 ('Radical Islam', 2),
 ('360', 2),
 ('Ireland', 2),
 ('Anaheim', 2),
 ('U.S.A.G.', 2),
 ('Portland', 2),
 ('3pm', 2),
 ('TrumpTrain', 2),
 ('50', 2),
 ('Chris', 2),
 ('Syracuse', 2),
 ('Place', 2),
 ('Madison', 2),
 ('Brexit', 2),
 ('NBC', 2),
 ('Border Patrol Agents', 2),
 ('5,600,000', 2),
 ('April 5th', 2),
 ('another four years', 2),
 ('Atlanta', 2),
 ('Daytona Beach', 2),
 ('Libya', 2),
 ('MAGA', 2),
 ('7:00 P.M.', 2),
 ('730488544700010496,"Our', 2),
 ('1st', 2),
 ('South Bend', 2),
 ('Newt', 2),
 ('Ginsburg', 2),
 ('$50 million', 2),
 ('Hispanics', 2),
 ('Miami', 2),
 ('$$!",Twitter', 2),
 ('@FoxNews', 2),
 ('Brian France', 2),
 ('Koch', 2),
 ('200', 2),
 ('743628318402961408,"Thank', 2),
 ('people!,Twitter Web Client', 2),
 ('Sacramento', 2),
 ('MOVEMENT', 2),
 ('Lyin Ted Cruz', 2),
 ('Reagan', 2),
 ('Bethpage', 2),
 ('ON. Media', 2),
 ('Milwaukee', 2),
 ('6/10', 2),
 ('four more years', 2),
 ('FL.', 2),
 ('Millions', 2),
 ('Arkansas', 2),
 ('NH', 2),
 ('Greensboro', 2),
 ('USA', 2),
 ('Donald J. Trump",Twitter', 2),
 ('Mexican', 2),
 ('@MittRomney', 2),
 ('Rand Paul', 2),
 ('98%', 2),
 ('Constitution', 2),
 ('tomorrow morning', 2),
 ('Gonzalo Curiel', 2),
 ('Syria', 2),
 ('20', 2),
 ('22', 2),
 ('Wall Street', 2),
 ('7', 2),
 ('Democratsboth', 2),
 ('Ukraine', 2),
 ('This Week', 2),
 ('Oklahoma', 2),
 ('8:30', 2),
 ('@AP', 2),
 ('Rochester', 2),
 ('1,000,000', 2),
 ('S.C.', 2),
 ('12:00', 2),
 ('Graham', 2),
 ('@foxandfriends', 2),
 ('@IvankaTrump', 2),
 ('Atlantic City', 2),
 ('VETERANS', 2),
 ('AZ', 2),
 ('HillaryClinton', 2),
 ('@FoxNewsSunday', 2),
 ("Lyin'Ted Cruz", 2),
 ('last week', 2),
 ('CBS', 2),
 ('Indianapolis', 2),
 ('Mar-a', 2),
 ('16,500', 2),
 ('2016', 2),
 ('Michael Flynn', 2),
 ('70%', 2),
 ('705399128180789248,"""@blt21muttrades', 2),
 ('Evangelicals', 2),
 ('Air Force One', 2),
 ('the day', 2),
 ('Paris', 2),
 ('Evansville', 2),
 ('Barbara Res', 2),
 ('the Republican National Convention', 2),
 ('Crooked', 2),
 ('35M', 2),
 ('Carmel', 2),
 ('Houston', 2),
 ('Four more years', 2),
 ('Atlantic', 2),
 ('$1 million', 2),
 ('Rowanne Brewer', 2),
 ('judgment.",Twitter Web Client', 2),
 ('US', 2),
 ('UK', 2),
 ('Thousands', 2),
 ('Dayton', 2),
 ('one night', 2),
 ('WIN', 2),
 ('4/16', 2),
 ('@realDonaldTrump Trump', 2),
 ('Dustin', 2),
 ('millions of dollars', 2),
 ('Baltimore', 2),
 ('Paul Ryan', 2),
 ('the Cross Insurance Center', 1),
 ('708158520831553536,"""@adriparsonss', 1),
 ('NYT', 1),
 ('715980291672829953,"My', 1),
 ('13 year', 1),
 ('718993739348361216,"""@PennyHicks13', 1),
 ('100 years', 1),
 ('Trump Supports Rolling Thunder', 1),
 ('735871869581205504,.@kimguilfoyle-', 1),
 ('731802331923263488,"""@DistlerJoyce', 1),
 ('Rolling Thunder', 1),
 ('762790167732035585,"\'As', 1),
 ('705005540326383616,"""@JoeNBC', 1),
 ('757309616254545920,"Today', 1),
 ('757335224397225984,"Crooked Hillary Clinton', 1),
 ('GQ', 1),
 ('Rudy Giuliani', 1),
 ('702680136513155072,"""@HosierN', 1),
 ('a day', 1),
 ('7:40', 1),
 ('732734411398119424,"""@CostaKenneth', 1),
 ('Freedom Coalition', 1),
 ('Only 38,000', 1),
 ('September 11th', 1),
 ('718996943100375040,"""@Kids123Nicholas', 1),
 ('w/Paul Ryan', 1),
 ('760184664468418560,Will', 1),
 ('A.C. Others', 1),
 ('760293742381244416,"Just', 1),
 ('742469778560929793,Saudi', 1),
 ('ROLLING THUNDER.', 1),
 ('743766155093934080,MAKE', 1),
 ('Flying Trump Flag - Breitbart', 1),
 ('Collins', 1),
 ('$10.3B-$1.2M 74', 1),
 ('Harry Reid', 1),
 ('747873381077356545,"""@arnold_ziffel', 1),
 ('753209883202351104,"Thank', 1),
 ('10T', 1),
 ('Disgusting Rubio', 1),
 ('Donald Trump.",Twitter', 1),
 ('four days', 1),
 ('10%', 1),
 ('the Republican party', 1),
 ('722628702748688385,"""@bigop1', 1),
 ('738448988517453824,Crooked Hillary', 1),
 ('Trump Turnberry', 1),
 ('739998110915678208,A', 1),
 ('George S', 1),
 ('Russians', 1),
 ('West Virginia-', 1),
 ('708351381678088192,https://t.co/ZQ0osiFEJQ', 1),
 ('John Podestas Brother', 1),
 ('POTUS.', 1),
 ('20,000', 1),
 ('June', 1),
 ('750648675186147328,"Crooked Hillary Clinton', 1),
 ('702668337671708677,"""@GriceCindy', 1),
 ('Dutchess County', 1),
 ('710928141112332288,Mitt Romney', 1),
 ('762106904436961280,".@Larry_Kudlow - \'', 1),
 ('Trump U', 1),
 ('759021886055387136,Crooked Hillary Clinton', 1),
 ('Sleepy Eyes', 1),
 ('Robert C. Oaks', 1),
 ('Wall Street.",Twitter Web Client', 1),
 ('710409594204524544,Crazy', 1),
 ('710407581899079680,"Stuart Stevens', 1),
 ('Jack Morgan', 1),
 ('705173204948656129,I', 1),
 ('Donald u', 1),
 ('Huntsville/Madison', 1),
 ('next Tuesday', 1),
 ('Washington Post', 1),
 ('PrimaryDay https://t.co/9rup33Rl29",Twitter Web Client', 1),
 ('dishonest.",Twitter Web', 1),
 ('LA', 1),
 ('Couric', 1),
 ('@SpeakerRyan', 1),
 ('@TheJusticeDept', 1),
 ('51,152', 1),
 ('80k', 1),
 ('704440884029493248,"Phony Rubio', 1),
 ('732732783605469184,"""@KaceyIlliot1669', 1),
 ('702679818194841600,"""@ahernandez85a', 1),
 ('the Great Depression!",Twitter for', 1),
 ('U R', 1),
 ('757552876348645377,"If', 1),
 ('Clinton Foundation', 1),
 ('A.M. ENJOY!,Twitter', 1),
 ('749237508916056064,#AmericaFirst', 1),
 ('710452858043416577,".@WSJ', 1),
 ('8:00', 1),
 ('719124172903997441,"""@getreal1234', 1),
 ('732533409285865473,"Wow,', 1),
 ('A Strong America Endorses Donald Trump for president', 1),
 ('PORTLAND', 1),
 ('732734199678050304,"""@svhlevi', 1),
 ('40%', 1),
 ('BREXIT.', 1),
 ('Bobby', 1),
 ('749699315367612416,Crooked Hillary Clinton', 1),
 ('733778244076507136,"Great day', 1),
 ('sixteen', 1),
 ('55,000,000', 1),
 ('706674352167772160,"All', 1),
 ('730482068753424385,Goofy Elizabeth Warren', 1),
 ('713676890750509056,"""@Tytan01', 1),
 ('@cnn', 1),
 ('738385522301624323,Crooked Hillary Clinton', 1),
 ('734003305819570176,Crooked Hillary', 1),
 ('Sparks', 1),
 ('709580469323612160,"""@BrazielCarol', 1),
 ('759071976182861824,"Wow', 1),
 ('728305900415533056,"Bernie', 1),
 ('704756216157839360,MAKE', 1),
 ('BLANK', 1),
 ('713006012085760001,"These', 1),
 ('Elie Wiesel', 1),
 ('N.Y.', 1),
 ('721450217384579076,"""@Trumptbird', 1),
 ('723682291843780608,"Thank', 1),
 ('Obama plus!",Twitter', 1),
 ('this year', 1),
 ('U.K.', 1),
 ('the Cadillac World Golf Championship @TrumpDoral', 1),
 ('739814823689605120,"A', 1),
 ('725128021473132546,Thank', 1),
 ('179', 1),
 ('Trump U(', 1),
 ('752859250628648962,"Bernie', 1),
 ('Congress', 1),
 ('AmericanSamoa', 1),
 ('716811670031581185,"A great night', 1),
 ('744225952465489920,"In', 1),
 ('5,481,737', 1),
 ('West Chester', 1),
 ('GOP Convention https://t.co/WLkmYjJJR9,Twitter Web Client', 1),
 ('La Crosse', 1),
 ('Charleston', 1),
 ("The O'Reilly Factor", 1),
 ('March 5, 2016', 1),
 ('Town Hall', 1),
 ('729643519846055936,"""@NathanDWilsonFL', 1),
 ('709541660758315009,"THANK', 1),
 ('759848885900763141,"""@RealJamesWoods', 1),
 ('Primaries', 1),
 ('731801917379219456,"""@MrTohNey', 1),
 ('Christian', 1),
 ('@TomBarrackJr', 1),
 ('Donald Rumsfeld', 1),
 ('760070280932982784,"Mr', 1),
 ('763385288295055360,"""@dbongino', 1),
 ('Raul Castro', 1),
 ('400,000', 1),
 ('760185067054465024,"The', 1),
 ('740709235927977989,"""@Dale_Dangler', 1),
 ('Texans', 1),
 ('Bob Corker', 1),
 ('758731880183193601,"""@LallyRay', 1),
 ('750548959240843265,Crooked Hillary Clinton', 1),
 ('November 8', 1),
 ('PAC', 1),
 ('Tom Cotton', 1),
 ('703900742961270784,"""@ilduce2016', 1),
 ('11:30 A.M.', 1),
 ('719484455556161538,"""@TimeHasCome1', 1),
 ('John King', 1),
 ('718913705111613440,"A', 1),
 ('@JerryJrFalwell', 1),
 ('Northern Mariana', 1),
 ('Fred', 1),
 ('@ChuckGrassley', 1),
 ('756975589110779904,"""@NancyNielsenn', 1),
 ('714818473382248448,"I have', 1),
 ('Trump=Competence', 1),
 ('Realistic Trump Poll', 1),
 ('Hillary Clinton.,Twitter', 1),
 ('Ed Rollins', 1),
 ('Senators', 1),
 ('Memorial Day', 1),
 ('724050713953861632,I', 1),
 ('Corey Lewandowski', 1),
 ('3/8/2016', 1),
 ('732148805118885889,"Bernie Sanders', 1),
 ('9:00 A.M.', 1),
 ('710918088460517376,"Club For Growth', 1),
 ('726079789182808065,Thank', 1),
 ('First Amendment', 1),
 ('703915198864887810,"The Republican Establishment', 1),
 ('Listened', 1),
 ('FL - 11', 1),
 ('1%', 1),
 ('365', 1),
 ('727095549703364608,"Gov Mike Pence', 1),
 ('OREGON', 1),
 ('Lindsey', 1),
 ('737242189055594496,"""@FrankyLamouche', 1),
 ('Diamond and Silk', 1),
 ('hundreds', 1),
 ('728646402985869312,"Joe Scarborough', 1),
 ('David Perdue', 1),
 ("759070976181084160,In Hillary Clinton's", 1),
 ('Salah Abdeslam', 1),
 ('735311006826700800,"Thank', 1),
 ('712782227747311616,MAKE', 1),
 ('Laura', 1),
 ('722397728475582465,A big day', 1),
 ('VIOLENT.', 1),
 ('Ted Cruz!",Twitter', 1),
 ('759008377577967616,"Crooked Hillary Clinton', 1),
 ('the Democratic Party', 1),
 ('2nd', 1),
 ('7pm-', 1),
 ('727634574298255361,"Wow', 1),
 ('732734623285927936,"""@thydanielflores', 1),
 ('Hoosier', 1),
 ('6:15 A.M.', 1),
 ('711742734508421120,"""@BarronG510', 1),
 ('West Allis', 1),
 ('DC', 1),
 ('Bernie!,Twitter for Android', 1),
 ('730054891897536514,"WEST', 1),
 ('Fayetteville', 1),
 ('25K', 1),
 ('ASAP.', 1),
 ('Hillary Clinton - corruption', 1),
 ('738233323344920576,"Thank', 1),
 ('718993345197027328,"""@Theresa_Cali', 1),
 ('Mechanicsburg', 1),
 ('Debbie Wasserman Shultz', 1),
 ('725115824206450688,Thank', 1),
 ('seven', 1),
 ('Ben Carson-', 1),
 ('714617467839111173,"Lyin\'Ted Cruz', 1),
 ('738600538976096256,"""@LunsfordWhitney', 1),
 ('the House Task Force', 1),
 ('Nevada-', 1),
 ('HANNITY EXCLUSIVE EVENT', 1),
 ('Long Island---', 1),
 ('704379918222696449,My', 1),
 ('Harrisburg', 1),
 ('727225626575421440,Honored', 1),
 ('@morningmika - Enjoy!,Twitter', 1),
 ('4 years', 1),
 ('Hillary!,Twitter Web Client', 1),
 ('703442830211964928,"""@itsblakec', 1),
 ('JUSTICES', 1),
 ('Des Moines', 1),
 ('Sarah', 1),
 ('741645740884303872,"In', 1),
 ('USChamber', 1),
 ('735611127531347969,"Thank', 1),
 ('Sheldon', 1),
 ('731501778873749505,Great', 1),
 ('739807883521630208,Crooked Hillary Clinton', 1),
 ('703896353416617984,"""@fairess369', 1),
 ('711087670412185601,"""@grammy620', 1),
 ('Special: Meet the Trumps', 1),
 ('7% to 0%', 1),
 ('706325441028689921,"Thank', 1),
 ('716412682581053440,"A GREAT DAY', 1),
 ('Baghdad', 1),
 ('Nikki Haley!,Twitter', 1),
 ('The National Enq.were', 1),
 ('747029963702996992,"Crooked Hillary Clinton', 1),
 ('757577508346888192,"Great', 1),
 ('714899439706574848,"MAKE', 1),
 ('Piers', 1),
 ('CO GOP', 1),
 ('Fresno', 1),
 ('724633058398375936,Kasich', 1),
 ('Rules Committee', 1),
 ('761653875413618689,#MakeAmericaSafeAgain', 1),
 ('724942956306771968,"MAKE', 1),
 ('733837704312410112,Crooked Hillary Clinton', 1),
 ('VETERANS ADMINISTRATION', 1),
 ('Sentinel', 1),
 ('722635529708285952,MAKE', 1),
 ('more than @realDonaldTrump', 1),
 ('Bangladesh', 1),
 ('less than 200 - with', 1),
 ('734231223002894337,Crooked Hillary', 1),
 ('Hillarys', 1),
 ('HILLARY CLINTON', 1),
 ('ImWithYou', 1),
 ('Cruz-Lawsuit', 1),
 ('731220438530052096,"An', 1),
 ("735822406514675712,The Inspector General's", 1),
 ('6,000', 1),
 ('two hours', 1),
 ('736336010808688640,"I', 1),
 ('Bobby!",Twitter', 1),
 ('First', 1),
 ('712837228272164864,"""@tcsorr', 1),
 ('Jim Herman', 1),
 ('Haute', 1),
 ('730474838012366849,Thanks', 1),
 ('tonights', 1),
 ('702887030892658688,"Why', 1),
 ('762781826549030912,Many', 1),
 ('8 PM', 1),
 ('Making Money', 1),
 ('Long Island!",Twitter', 1),
 ('Davenport-', 1),
 ('723663455748763649,"Thank', 1),
 ('Independent.",Twitter Web Client', 1),
 ('703917643930210304,"While', 1),
 ('African-American', 1),
 ('Cleveland-', 1),
 ('711209847702749184,"If', 1),
 ('Pearl Harbor', 1),
 ('2nd A', 1),
 ('717189147497091072,"""@FoxNews', 1),
 ('739571700392722432,"The Clinton', 1),
 ('the Washington Post', 1),
 ('746052286640689157,"On', 1),
 ('7:15', 1),
 ('11/8/2016', 1),
 ('the Trump University', 1),
 ('Crooked Hillary V.P.', 1),
 ('731805070669557761,"""@tzard000', 1),
 ('756974970312462336,"""@OliMauritania', 1),
 ('702833969537142788,"Mitt Romney', 1),
 ('732535400498143232,"The', 1),
 ('ALSO', 1),
 ('762669882571980801,My', 1),
 ('727253907567726592,"THANK', 1),
 ('732788306237345794,"Paul', 1),
 ('John Allen', 1),
 ('Daytona', 1),
 ('her 29 years ago', 1),
 ('Wilmington', 1),
 ('Koran', 1),
 ('NOTHING', 1),
 ('Weeks - Breitbart', 1),
 ('704481336682266624,"Great', 1),
 ('756477153986895872,Thank', 1),
 ('7:35', 1),
 ('$25 million', 1),
 ('way!,Twitter Web Client', 1),
 ('705149907183738880,"""@HavBat22', 1),
 ('Radical', 1),
 ('Charles', 1),
 ('Rowanne Brewer Lane', 1),
 ('Radical Islamic Terror', 1),
 ('755747074939949056,"In November', 1),
 ('739080401747120128,Many', 1),
 ('2nd Amendment', 1),
 ('1:00pm', 1),
 ('19pts', 1),
 ('0', 1),
 ('755395950089211904,"""@RoxaneTancredi', 1),
 ('Donald J. Trump', 1),
 ('702155147695284224,"""@Vogelsong1', 1),
 ('705624889282068480,"MY', 1),
 ('Scranton', 1),
 ('@A Savage Nation', 1),
 ('Rand', 1),
 ('POTUS. Killer', 1),
 ('8:30 A.M.', 1),
 ('738950669927886848,Muhammad Ali', 1),
 ('democrat', 1),
 ('858,233%', 1),
 ('Kevin', 1),
 ('732733747586469888,"""@johnjohnlacca', 1),
 ('705230846249324544,Millions of dollars', 1),
 ('Mahoning County', 1),
 ('Trump",Twitter', 1),
 ('$400 million', 1),
 ('738236517949181952,"""@SCPioneer', 1),
 ('758350470402408449,"Great', 1),
 ('707377544224509952,Thank', 1),
 ('752215352122208256,"I', 1),
 ('718269255872081922,"""@DiCristo13', 1),
 ('749987869452828672,"Crooked Hillary Clinton', 1),
 ('Ford', 1),
 ('Trump Jupiter', 1),
 ('708979864704049152,"""@alextrent4', 1),
 ('L.A.', 1),
 ('women.,Twitter Web Client', 1),
 ('727637487871479808,"Thank', 1),
 ('730059920280793088,"Via', 1),
 ('741754548004458496,"Thank', 1),
 ('2:30', 1),
 ('709886343309107200,"""@J_Styborski', 1),
 ('757690132111777792,"Clinton', 1),
 ('749929565661884417,"In', 1),
 ('43%', 1),
 ('MI', 1),
 ('Michigan.",Twitter', 1),
 ('Crimea', 1),
 ('703444081251254272,"""@JerryJrFalwell', 1),
 ('10pm EST', 1),
 ('Juan Williams', 1),
 ('761025834350018561,"Thank', 1),
 ('Justice', 1),
 ('@SInow', 1),
 ('706812638215303168,"Lindsey Graham', 1),
 ('737055532960223233,The Republican Party', 1),
 ('30%', 1),
 ('Pete', 1),
 ('743765281898536960,People', 1),
 ('the U.S. Senate', 1),
 ('10 million', 1),
 ('Ben', 1),
 ('Trump &amp', 1),
 ('711088013560778752,"""@AshleyEdam', 1),
 ('740513939574951936,"""@southpaw816', 1),
 ('Hayes', 1),
 ('714093135857631232,"Wow', 1),
 ('Time', 1),
 ('MSG. Appreciate', 1),
 ('Cheryl Ann Kraft', 1),
 ('April 4th', 1),
 ('757775689525309440,"Elizabeth Warren', 1),
 ('Pope', 1),
 ('Petraeus', 1),
 ('1314.5%', 1),
 ('Costa Mesa', 1),
 ('761331433810132992,"Thank', 1),
 ('Sparks, Nevada', 1),
 ('Begala', 1),
 ('Media', 1),
 ('11:00', 1),
 ('745804574699692032,"""@justininglv', 1),
 ('761279538106097664,"Great', 1),
 ('718992948193570816,"""@vivhall3', 1),
 ('759592590106849280,Nielson Media Research', 1),
 ('Munich.",Twitter for Android', 1),
 ("Bill O'Reilly", 1),
 ('732988705531650048,"Thank', 1),
 ('702561529447763968,MAKE', 1),
 ('747873880103059457,"""@bluedogdemky', 1),
 ('Lou Holtz - a', 1),
 ('Bill Kristol', 1),
 ('@FoxBusiness 7', 1),
 ('November..vote TRUMP', 1),
 ("751405672877744128,Last night's", 1),
 ('729716712124362754,Crooked Hillary Clinton', 1),
 ('722920297393995776,"Thank', 1),
 ("745296338758270976,Hillary Clinton's", 1),
 ('Islam', 1),
 ('738495114532638720,"""@free_SA_BD', 1),
 ('732537744195522560,Crooked Hillary Clinton', 1),
 ('C.I.A.', 1),
 ('729404337164783617,"Crooked Hillary', 1),
 ('@Reince Priebus', 1),
 ('737237313034563585,"In', 1),
 ('714423264039317505,"""@nellalda', 1),
 ('Saudi Arabia', 1),
 ('755836720197795840,John Kasich', 1),
 ('719484822834581504,"""@WVTTS1017', 1),
 ('759222916387069952,"As', 1),
 ('16.Apologize",Twitter', 1),
 ('722181629528141824,Lyin', 1),
 ('702679486496706564,Ted Cruz', 1),
 ('Bernie.",Twitter for Android', 1),
 ('37 minutes', 1),
 ('710563892687196160,MAKE', 1),
 ('Senate?,Twitter Web Client', 1),
 ('Rick Scott', 1),
 ('Common Core', 1),
 ('754303051389861888,"Crooked Hillary', 1),
 ('744355251365511169,"Donald Trump\'s', 1),
 ('706530975992381441,Thank', 1),
 ('823', 1),
 ('733801387952340992,"""@montgomeriefdn', 1),
 ("Mort Zuckerman's", 1),
 ('Barack', 1),
 ('760420894271471620,The Washington Post', 1),
 ('increase!,Twitter Web Client', 1),
 ('740380953722421249,"Thank', 1),
 ('711084345356738560,"""@cmichaeld2004', 1),
 ('Q2 2016', 1),
 ('735541674793783297,"Thank', 1),
 ('745693121095303168,#Imwithyou', 1),
 ('738407796606013444,"Wow', 1),
 ('Westfield', 1),
 ('12:15', 1),
 ('708332792028008448,"Dr', 1),
 ('702367612232142848,"THANK', 1),
 ('730493295550468096,Goofy Elizabeth Warren', 1),
 ('SYRACUSE - NOON', 1),
 ('1001', 1),
 ('12 years ago', 1),
 ('Russell Moore', 1),
 ('734066397425307648,MAKE', 1),
 ('California-', 1),
 ('Diamond', 1),
 ('752618593196597248,Great', 1),
 ('704834059915104256,Thank', 1),
 ('At least 1/2', 1),
 ('702308297886400513,My', 1),
 ('Meltdown.",Twitter', 1),
 ('738132164399636480,Crooked Hillary Clinton', 1),
 ('756140881862766594,"Great', 1),
 ('740707325590274049,"""@nauthizjane', 1),
 ('the Republican National Convention #', 1),
 ('724236172181049344,.@AndreBauer', 1),
 ('733682878278696960,Crooked Hillary Clinton-', 1),
 ('April this year', 1),
 ('11', 1),
 ('740707234968113152,"""@setfire2flames', 1),
 ('Best Golf Hotel 2016.We', 1),
 ('741988371174699009,Clinton', 1),
 ('V.P. choice.",Twitter', 1),
 ('734066896794963968,"Crooked Hillary Clintons', 1),
 ('VETERANS.', 1),
 ('712300954775887876,I', 1),
 ('713687943278424064,"""@pattiandsammi', 1),
 ('734468447829004288,Crooked Hillary', 1),
 ('Philadelphia', 1),
 ('Bongino', 1),
 ('this evening', 1),
 ('735959405565378560,"My', 1),
 ('Trump for POTUS!",Twitter', 1),
 ('@USChamber', 1),
 ('20%', 1),
 ('747227503329173505,"""@brazosboys', 1),
 ('705611414128500736,"The Better Business Bureau', 1),
 ('Don King', 1),
 ('800- &amp', 1),
 ('NRA', 1),
 ('Trump Univ', 1),
 ('@PeteRose_14', 1),
 ('744519497764184064,"""@bfraser747', 1),
 ('30,000', 1),
 ('703562075616849920,"""@SassyPantsjj', 1),
 ('Chris Wallace', 1),
 ('politicians!,Twitter Web Client', 1),
 ('749698321074180096,Only', 1),
 ('Reuters', 1),
 ('709713295180169216,"Good morning', 1),
 ('761773576101953536,"Good', 1),
 ('10pmE w/ @MELANIATRUMP', 1),
 ('Hillary Just', 1),
 ('715758039836594176,The', 1),
 ('735345354992975873,"""@oasisupernova', 1),
 ('seven million', 1),
 ('earlier this year', 1),
 ('721371631118127104,"Just', 1),
 ('12:00 A.M.  ', 1),
 ('8,646,551.Trump', 1),
 ('@NASCAR', 1),
 ('739082885857173505,We', 1),
 ('716780878777556994,"I', 1),
 ('730823012463742976,Great day', 1),
 ('NewYorkValues', 1),
 ('morning', 1),
 ('3 hours', 1),
 ('VoteTrump!https://t.co/lsKdqGFyvQ",Twitter', 1),
 ('712291375958659072,MAKE', 1),
 ('732990965426888705,Thank', 1),
 ('738495529005420544,"""@TaylorEdwards99', 1),
 ('Trump National Doral', 1),
 ("Mitt Romney's", 1),
 ('Trumps Speech', 1),
 ('Robert Jeffress', 1),
 ('746289394231828480,"Self', 1),
 ('718269634814865410,"""@iamDaveK', 1),
 ('725419855265026048,"THANK', 1),
 ('Cruz-Kasich', 1),
 ('Donald J Trump', 1),
 ('1 hour', 1),
 ('Bible', 1),
 ('Hillary Clintons', 1),
 ('Robert E. Payne', 1),
 ('FOX', 1),
 ('Repubs', 1),
 ('LGBT', 1),
 ('Sheldon Adelson', 1),
 ('78%', 1),
 ('RALLY', 1),
 ('732575682367258624,"Crooked Hillary', 1),
 ('762982142783676416,"""@sprts08', 1),
 ('742437914731286530,"TERRORISM', 1),
 ('MakeAmericaGreatAgain https://t.co/ZnnaSPF5or",Twitter Web Client', 1),
 ('738599744759431168,"""@angeloftruth11', 1),
 ('69', 1),
 ('Fort Wayne', 1),
 ('WORST', 1),
 ('the U.S.A.G. So Bill', 1),
 ('723274222907301888,Cruz', 1),
 ('50K', 1),
 ('Trump Make America Great', 1),
 ('62.9%', 1),
 ('Phil', 1),
 ('JOB.",Twitter for', 1),
 ('721695114943442946,Crooked Hillary Clinton', 1),
 ('Palm Beach', 1),
 ('718761006068051968,"Bernie', 1),
 ('the United States!,Twitter for Android', 1),
 ('703252552641478656,"Wow', 1),
 ('the 2nd Amendment', 1),
 ('746272130992644096,"Just', 1),
 ('759513644258525184,"Wow', 1),
 ('POTUS', 1),
 ('one million dollars', 1),
 ('758421383063961604,"Thank', 1),
 ('737784398515441664,"Katie', 1),
 ('727252046286450689,"Thank', 1),
 ('757650904896139264,"MAKE', 1),
 ('707386022569623552,Thank', 1),
 ('Advisory Committee', 1),
 ('#Trump2016', 1),
 ('735968169001684992,"Today', 1),
 ('726045073775828992,"Crooked Hillary Clinton', 1),
 ('72.8%', 1),
 ('747030708678430720,Clinton', 1),
 ('Friday afternoon', 1),
 ('Megan Kelly', 1),
 ('702706412980453380,Great', 1),
 ('Guy Romney', 1),
 ('759029315006705664,"Crooked Hillary', 1),
 ('The Republican Establishment', 1),
 ('Crazy Bernie Sanders', 1),
 ('Reno', 1),
 ('Trump Tower-', 1),
 ('757916962269704192,"Funny', 1),
 ('Wikileakes', 1),
 ('now.,Twitter Web Client', 1),
 ('743485792585146368,"McAllen', 1),
 ('719626295248863232,Colorado Trump Delegates', 1),
 ('720821603064524800,"MAKE', 1),
 ('745204320593940480,"Crooked Hillary Clinton', 1),
 ...]
In [ ]:
 
In [32]:
obama = open("stemmed-speeches/stemmed-obama-speeches/2005.04.23.txt").read()
In [33]:
obama = obama.replace('\n', '').split('__PUNCT__')
In [39]:
word = doc[2]
In [43]:
len(obama)
Out[43]:
180
In [48]:
trump = [" ".join([d.lemma_ for d in doc]) for doc in docs]
In [50]:
trump = trump[:180]
In [51]:
from sklearn.feature_extraction.text import CountVectorizer
In [53]:
all_data = obama + trump
In [101]:
vectorizer = CountVectorizer(min_df=5, stop_words='english')
In [102]:
X = vectorizer.fit_transform(all_data)
In [56]:
X # This is aweosme sentence vectors
Out[56]:
<360x149 sparse matrix of type '<type 'numpy.int64'>'
	with 2473 stored elements in Compressed Sparse Row format>
In [58]:
y = [0] * 180 + [1] * 180
In [103]:
from sklearn.model_selection import train_test_split
In [104]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20)
In [64]:
from sklearn.svm import SVC
In [105]:
clf = SVC(kernel='linear')
In [106]:
clf.fit(X_train, y_train)
Out[106]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
In [107]:
clf.score(X_test, y_test)
Out[107]:
0.94999999999999996
In [108]:
clf.score(X_train, y_train)
Out[108]:
0.95588235294117652
In [112]:
X_test[0]
Out[112]:
<1x84 sparse matrix of type '<type 'numpy.int64'>'
	with 1 stored elements in Compressed Sparse Row format>
In [111]:
sorted(zip(clf.coef_.toarray()[0], vectorizer.vocabulary_))[-100:]
Out[111]:
[(-1.0, u'world'),
 (-0.28319244952656503, u'__num__'),
 (-0.28319244952656503, u'just'),
 (-0.28319244952656503, u'look'),
 (-0.18382694791713999, u'trumppence16'),
 (-0.18345700549461755, u'client'),
 (-0.17418112922662032, u'florida'),
 (-0.17418112922662032, u'join'),
 (-0.1568809353700869, u'crooked'),
 (-0.092327737988591549, u'make'),
 (-0.064783997228518381, u'oil'),
 (-0.022894566237240239, u'pm'),
 (-0.0077542519757889481, u'grow'),
 (-0.0007712300919004055, u'like'),
 (-0.00071990910553879355, u'don'),
 (-0.00066533781170015815, u'people'),
 (-0.00055648685974674095, u'web'),
 (-0.0005461661752109864, u'famili'),
 (-0.00054616617521086042, u'https'),
 (-0.00045254550385532915, u'tomorrow'),
 (-0.0004494846934692924, u'great'),
 (-0.00043291120253769833, u'farm'),
 (-0.00043291120253769833, u'thank'),
 (-0.00040712522183694296, u'ar'),
 (-0.00040712522183694296, u'support'),
 (-0.00039497628945880009, u'washington'),
 (-0.00039497628945880009, u'win'),
 (-0.00033869405669320618, u'want'),
 (-0.00032093896131939442, u'rise'),
 (-0.00025659085708552951, u'convention'),
 (-2.8361911662000416e-06, u'amp'),
 (0.0, u'agricultur'),
 (0.0, u'america'),
 (0.0, u'ask'),
 (0.0, u'cnn'),
 (0.0, u'colleg'),
 (0.0, u'countri'),
 (0.0, u'dai'),
 (0.0, u'ha'),
 (0.0, u'know'),
 (0.0, u'live'),
 (0.0, u'need'),
 (0.0, u'price'),
 (0.0, u'research'),
 (0.0, u'say'),
 (0.0, u'small'),
 (0.0, u'southern'),
 (0.0, u'twitt'),
 (0.0, u'work'),
 (0.0073335627409411939, u'fuel'),
 (0.015087814716730142, u'answer'),
 (0.015087814716730142, u'crookedhillary'),
 (0.015087814716730142, u'economi'),
 (0.015087814716730142, u'twitter'),
 (0.016460148014994121, u'illinoi'),
 (0.14989283813563853, u'talk'),
 (0.15646024613523912, u'onli'),
 (0.16635298615063265, u'million'),
 (0.17883030374945327, u'imwithyou'),
 (0.1829675833219192, u'e85'),
 (0.20172486998669351, u'makeamericagreatagain'),
 (0.22759417499830309, u'year'),
 (0.30156569603239775, u'thi'),
 (0.31561572523001385, u'help'),
 (0.38277435325667736, u'iphone'),
 (0.38277435325667736, u'question'),
 (0.41252780470736772, u'hillary'),
 (0.66723343335234597, u'medium'),
 (0.83275304018883778, u'daytona'),
 (0.84884015011559932, u'job'),
 (0.84928085511754081, u'middl'),
 (0.90071508412531964, u'president'),
 (0.9158028988420498, u'clinton'),
 (0.92227982307237388, u'android'),
 (1.0, u'bad'),
 (1.0, u'obama'),
 (1.0, u'speech'),
 (1.0163246474457248, u'pennsylvania'),
 (1.077460001330748, u'wa'),
 (1.1505878876966067, u'new'),
 (1.1667564395654866, u'ethanol'),
 (1.5867089339339882, u'maga'),
 (1.9994832335460369, u'trump'),
 (1.9997191707000379, u'colorado')]
In [ ]: