Libraries

In [1]:
import requests
import os
import yaml
os.chdir('../')
In [2]:
import datetime
In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
params = {'legend.fontsize': 14,
          'figure.figsize': (15, 8),
         'axes.labelsize': 14,
         'axes.titlesize': 14,
         'xtick.labelsize': 14,
         'ytick.labelsize': 14}
plt.rcParams.update(params)
pd.options.display.max_columns = 50
pd.options.display.precision = 2
In [5]:
from cloudant.view import View
from cloudant.client import Cloudant
from cloudant.document import Document
In [6]:
def unfold_keys(df):
    df = df.copy()
    key_column = 'key'
    for i in range(0, len(df.loc[0,key_column])):
        df.insert(i, 'level_{}'.format(i+1), df[key_column].apply(lambda x: x[i]))
    del df[key_column]
    return df
In [7]:
with open("config.yaml", 'r') as ymlfile:
    cfg = yaml.load(ymlfile)
In [8]:
cfg = cfg['COUCHDB']
client = Cloudant(cfg['user'], cfg['password'], url=cfg['host'])
client.connect()
tweets_db = client[cfg['tweets_db']]
aurin_db = client[cfg['aurin_db']]
ddoc = Document(tweets_db, '_design/sentiment_analysis')

i = 0 for document in tweets_db: break if document.json()[0] != '{': print(document) i += 1 if divmod(i, 10000)[1] == 0: print(i)

Load data

Load tweets counters

In [9]:
view = View(ddoc, 'geo_sentiment_counts', )
with view.custom_result(group=True, stale='ok') as rslt:
    df = pd.DataFrame.from_records(rslt.all())
In [10]:
df['value'].sum()
Out[10]:
1066680
In [11]:
geo_sentiment_columns = ['sa4_area', 'gccsa_area', 'state', 'sentiment', 'date', 'tweets_count']
data_df = unfold_keys(df)
#data_df.dropna(inplace=True)
data_df.columns = geo_sentiment_columns
data_df['date'] = data_df['date'].apply(lambda x: datetime.datetime.strptime(x, '%m/%d/%Y')).astype(str)
data_df.sort_values('tweets_count', ascending=False, inplace=True)
In [12]:
data_df.set_index(geo_sentiment_columns[:-1], inplace=True)
In [13]:
tweets_by_dates = data_df.groupby(level='date').sum()#.sum()
In [14]:
tweets_by_dates.loc['2018-01-01':].plot.area()
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4a71924208>
In [15]:
sentiment_df = data_df.unstack(level='sentiment', fill_value=0)
In [16]:
gccsa_sentiment_df = sentiment_df.groupby(level='gccsa_area').sum()
gccsa_sentiment_df.columns = gccsa_sentiment_df.columns.droplevel(0)
sorted_columns = list(gccsa_sentiment_df.sum().sort_values(ascending=False).index.values)
gccsa_sentiment_df = gccsa_sentiment_df[sorted_columns]
In [17]:
sa4_sentiment_df = sentiment_df.groupby(level='sa4_area').sum()
sa4_sentiment_df.columns = sa4_sentiment_df.columns.droplevel(0)
sorted_columns = list(sa4_sentiment_df.sum().sort_values(ascending=False).index.values)
sa4_sentiment_df = sa4_sentiment_df[sorted_columns]

Load Aurin Data

SA4 Median Income

In [18]:
data = [item['properties'] for item in aurin_db['SA4_median_income']['features']]
sa4_median_income_df = pd.DataFrame.from_records(data).set_index('sa4_name16')
del sa4_median_income_df['sa4_code16']
#sa4_median_income_df.head()

SA4_Labour_Force

In [19]:
data = [item['properties'] for item in aurin_db['SA4_Labour_Force']['features']]
sa4_labour_force_df = pd.DataFrame.from_records(data).set_index('sa4_name16')
del sa4_labour_force_df['sa4_code16']
#sa4_labour_force_df.head()

GCCSA Median Income

In [20]:
data = [item['properties'] for item in aurin_db['GCCSA_median_income']['features']]
gccsa_median_income_df = pd.DataFrame.from_records(data).set_index('gcc_name16')
del gccsa_median_income_df['gcc_code16']
#gccsa_median_income_df

GCCSA_Labour_Force

In [21]:
data = [item['properties'] for item in aurin_db['GCCSA_Labour_Force']['features']]
gccsa_labour_force_df = pd.DataFrame.from_records(data).set_index('gcc_name16')
del gccsa_labour_force_df['gcc_code16']
#gccsa_labour_force_df

GCCSA Data

In [22]:
gccsa_df = gccsa_sentiment_df.join(gccsa_median_income_df).join(gccsa_labour_force_df)
gccsa_df.to_csv('gccsa.csv')

SA4 Data

In [23]:
sa4_df = sa4_sentiment_df.join(sa4_median_income_df).join(sa4_labour_force_df)
sa4_df.to_csv('sa4.csv')
In [24]:
sa4_df
Out[24]:
Neutral Positive Negative nan median_tot_fam_inc_weekly median_tot_prsnl_inc_weekly p_tot_emp_tot p_tot_lf_tot p_tot_unemp_tot p_unem_look_ptw_tot
sa4_area
Adelaide - Central and Hills 1776 1674 325 13 1967 700 137691 147027 9332 4751
Adelaide - North 317 337 86 3 1407 563 180185 198775 18591 7125
Adelaide - South 7032 7286 3253 60 1622 635 164500 176840 12341 5512
Adelaide - West 377 252 61 2 1554 597 104681 113466 8786 3737
Australian Capital Territory 4223 4333 1686 26 2445 1000 205419 215586 10167 5305
Ballarat 902 823 467 5 1409 559 67396 72295 4899 2084
Barossa - Yorke - Mid North 325 349 164 3 1318 535 45980 49147 3170 1111
Bendigo 694 646 284 4 1443 593 66444 70699 4258 1897
Brisbane - East 440 364 77 3 1902 717 108128 115073 6948 2904
Brisbane - North 12682 13442 5848 77 1977 772 104609 111419 6802 2879
Brisbane - South 376 267 84 1 1930 700 167703 181178 13472 6393
Brisbane - West 359 227 50 0 2322 779 87898 94395 6494 3499
Brisbane Inner City 2312 1938 446 7 2495 938 140263 149531 9267 4579
Bunbury 759 634 202 3 1606 637 77663 83507 5842 2154
Cairns 1150 1134 424 8 1475 642 106148 115176 9031 3201
Capital Region 918 830 330 2 1561 655 97339 102324 4985 2028
Central Coast 704 907 229 7 1560 600 139593 149701 10105 4276
Central Queensland 447 595 132 1 1740 664 97655 106937 9283 2794
Central West 2528 2495 1055 16 1473 594 86287 91992 5707 2134
Coffs Harbour - Grafton 384 417 156 7 1249 520 52076 56542 4464 1779
Darling Downs - Maranoa 130 128 28 0 1366 582 54865 58062 3199 1036
Darwin 689 723 323 2 2386 1052 71357 74817 3466 1204
Far West and Orana 545 526 382 4 1386 590 46707 50234 3527 1215
Geelong 1247 1429 419 6 1615 616 125026 132944 7917 3621
Gold Coast 6591 7373 2205 18 1655 665 266886 286880 19985 8844
Hume 3168 3164 1391 25 1417 599 75545 79553 4011 1636
Hunter Valley exc Newcastle 1195 1167 516 5 1548 599 110966 119628 8665 3310
Illawarra 1690 1695 514 5 1679 591 127331 136719 9391 4595
Ipswich 300 275 60 1 1523 626 135965 149336 13365 4965
Latrobe - Gippsland 957 952 287 3 1338 540 110486 118717 8227 3068
... ... ... ... ... ... ... ... ... ... ...
Queensland - Outback 204 144 60 1 1540 648 33765 36651 2889 761
Richmond - Tweed 1070 1121 349 13 1333 550 96427 103710 7281 3165
Riverina 520 384 131 2 1517 643 69453 73276 3827 1461
Shepparton 568 478 145 0 1364 565 54705 58011 3306 1350
South Australia - Outback 2491 2860 1033 13 1418 594 34650 37644 2995 932
South Australia - South East 739 768 307 4 1266 544 77675 82647 4975 1806
Southern Highlands and Shoalhaven 751 764 237 5 1340 552 56899 60287 3392 1415
Sunshine Coast 1605 1967 516 9 1478 612 151648 163207 11559 5210
Sydney - Baulkham Hills and Hawkesbury 427 523 105 1 2397 801 115320 120642 5320 2781
Sydney - Blacktown 347 279 75 1 1818 672 152682 164680 12002 5265
Sydney - City and Inner South 7276 5575 1016 13 2369 904 174465 185043 10583 5487
Sydney - Eastern Suburbs 2310 1752 338 6 2756 1020 135879 142479 6600 3588
Sydney - Inner South West 470 355 84 1 1576 565 247411 267126 19722 9483
Sydney - Inner West 831 599 114 2 2282 828 150117 158859 8740 4299
Sydney - North Sydney and Hornsby 1163 923 230 2 2814 1029 205722 215480 9752 4744
Sydney - Northern Beaches 1154 1009 161 0 2528 916 129685 134320 4642 2441
Sydney - Outer South West 417 374 199 0 1794 686 121806 129960 8152 3491
Sydney - Outer West and Blue Mountains 729 619 159 2 1852 715 147331 155762 8431 3651
Sydney - Parramatta 1063 835 203 5 1631 586 191445 208788 17344 7578
Sydney - Ryde 280 246 43 0 2236 760 88601 94336 5737 3000
Sydney - South West 40429 41319 18561 200 1489 500 160218 175172 14949 6907
Sydney - Sutherland 557 406 100 2 2312 836 112434 116552 4116 2054
Toowoomba 764 848 342 3 1583 649 66386 71409 5028 2048
Townsville 2144 2090 911 12 1640 672 103917 113782 9866 3546
Warrnambool and South West 773 994 253 4 1411 582 55609 58316 2711 1089
West and North West 5 6 0 0 1274 523 44336 48009 3671 1287
Western Australia - Outback (North) 276 248 98 0 2416 1239 45541 48337 2792 761
Western Australia - Outback (South) 1345 1169 571 5 1836 732 53172 57159 3984 1242
Western Australia - Wheat Belt 395 405 149 1 1481 623 59053 62463 3412 1096
Wide Bay 617 648 227 0 1133 478 99867 111460 11596 3829

86 rows × 10 columns

In [25]:
#data_df.corr()
In [26]:
#sns.pairplot(sa4_df, kind='reg')

Sentiment By States

In [27]:
sa4_sentiment_df.head()
Out[27]:
sentiment Neutral Positive Negative nan
sa4_area
Adelaide - Central and Hills 1776 1674 325 13
Adelaide - North 317 337 86 3
Adelaide - South 7032 7286 3253 60
Adelaide - West 377 252 61 2
Australian Capital Territory 4223 4333 1686 26
In [28]:
plot_df = sa4_sentiment_df.sort_values('Positive', ascending=False).iloc[:20]
plot_df.plot.bar(stacked=True)
plt.show()
In [29]:
totals = plot_df.sum(axis=1)
for idx in plot_df.index.values:
    plot_df.loc[idx] = plot_df.loc[idx] / totals.loc[idx] 
In [30]:
plot_df
Out[30]:
sentiment Neutral Positive Negative nan
sa4_area
Melbourne - Inner South 0.40 0.42 0.18 2.24e-03
Sydney - South West 0.40 0.41 0.18 1.99e-03
Brisbane - North 0.40 0.42 0.18 2.40e-03
Perth - South East 0.41 0.42 0.17 1.54e-03
Gold Coast 0.41 0.46 0.14 1.11e-03
Adelaide - South 0.40 0.41 0.18 3.40e-03
Melbourne - Inner 0.51 0.40 0.09 7.46e-04
Sydney - City and Inner South 0.52 0.40 0.07 9.37e-04
Australian Capital Territory 0.41 0.42 0.16 2.53e-03
Other Territories 0.47 0.42 0.11 2.45e-03
Hume 0.41 0.41 0.18 3.23e-03
South Australia - Outback 0.39 0.45 0.16 2.03e-03
Newcastle and Lake Macquarie 0.39 0.43 0.17 4.57e-03
Central West 0.41 0.41 0.17 2.63e-03
Townsville 0.42 0.41 0.18 2.33e-03
Sunshine Coast 0.39 0.48 0.13 2.20e-03
Brisbane Inner City 0.49 0.41 0.09 1.49e-03
Sydney - Eastern Suburbs 0.52 0.40 0.08 1.36e-03
Illawarra 0.43 0.43 0.13 1.28e-03
Adelaide - Central and Hills 0.47 0.44 0.09 3.43e-03
In [31]:
#* 100
In [32]:
plot_df.plot.bar(stacked=True)
ax = plt.gca()
ax.legend(frameon=True, loc=2, bbox_to_anchor=(1.02, 0.0,.25, .95), mode='expand', borderaxespad=0.,
                      title='Sentiment')
ax.set_title('Percentage of tweets by Sentiment and State')
ax.set_ylabel('Percentage of Tweets')
plt.show()
plot_df.applymap(lambda x: '{:.0f}%'.format(x*100))
Out[32]:
sentiment Neutral Positive Negative nan
sa4_area
Melbourne - Inner South 40% 42% 18% 0%
Sydney - South West 40% 41% 18% 0%
Brisbane - North 40% 42% 18% 0%
Perth - South East 41% 42% 17% 0%
Gold Coast 41% 46% 14% 0%
Adelaide - South 40% 41% 18% 0%
Melbourne - Inner 51% 40% 9% 0%
Sydney - City and Inner South 52% 40% 7% 0%
Australian Capital Territory 41% 42% 16% 0%
Other Territories 47% 42% 11% 0%
Hume 41% 41% 18% 0%
South Australia - Outback 39% 45% 16% 0%
Newcastle and Lake Macquarie 39% 43% 17% 0%
Central West 41% 41% 17% 0%
Townsville 42% 41% 18% 0%
Sunshine Coast 39% 48% 13% 0%
Brisbane Inner City 49% 41% 9% 0%
Sydney - Eastern Suburbs 52% 40% 8% 0%
Illawarra 43% 43% 13% 0%
Adelaide - Central and Hills 47% 44% 9% 0%
In [33]:
class TweetsDB():
    def __init__(self, cfg):
        self._cfg = cfg
        self._client = Cloudant(self._cfg['user'], self._cfg['password'], url=self._cfg['host'])
        self._client.connect()
        self._db = self._get_db()
        
    def _get_db(self):
        databases = self._client.all_dbs()
        db_name = self._cfg['tweets_db']
        if not db_name in databases:
            self._client.create_database(db_name)
        return self._client[db_name]
        
    def save_tweet(self, document):
        if not '_id' in document:
            document['_id'] = document['id_str']
        if 'id' in document:
            document.pop('id')
        self._db.create_document(document) 
        
    def update_document(self, document_id, attributes_dict):
        if attributes_dict is None:
            return False
        
        document = self._db[document_id]

        for key in attributes_dict.keys():
            document[key] = attributes_dict[key]
        document.save()
        return True

Plotly plots

In [34]:
import plotly
import plotly.graph_objs as go
from plotly.graph_objs import Scatter, Layout
plotly.offline.init_notebook_mode(connected=True)
In [35]:
mapbox_access_token = 'pk.eyJ1Ijoidml0YWx5LXlha3V0ZW5rbyIsImEiOiJjamd3ZGJpMTQwcDA2MzNsNjg2dTJzeG1zIn0.shxkPXdNpbwF1UI2Gm24fg'
In [36]:
sa4_points_df = pd.read_json('notebooks/sa4_center.json')
sa4_points_df.set_index('SA4_name', inplace=True)
sa4_points_df['lat'] = sa4_points_df.Centre.apply(lambda x: x[0][1])
sa4_points_df['lng'] = sa4_points_df.Centre.apply(lambda x: x[0][0])
del sa4_points_df['Centre']
In [37]:
sa4_df = sa4_df.join(sa4_points_df)
In [38]:
sa4_df.head()
Out[38]:
Neutral Positive Negative nan median_tot_fam_inc_weekly median_tot_prsnl_inc_weekly p_tot_emp_tot p_tot_lf_tot p_tot_unemp_tot p_unem_look_ptw_tot lat lng
sa4_area
Adelaide - Central and Hills 1776 1674 325 13 1967 700 137691 147027 9332 4751 -34.97 138.84
Adelaide - North 317 337 86 3 1407 563 180185 198775 18591 7125 -34.69 138.66
Adelaide - South 7032 7286 3253 60 1622 635 164500 176840 12341 5512 -35.14 138.57
Adelaide - West 377 252 61 2 1554 597 104681 113466 8786 3737 -34.87 138.53
Australian Capital Territory 4223 4333 1686 26 2445 1000 205419 215586 10167 5305 -35.49 149.00
In [39]:
scl = [ [0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
    [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"] ]
#colorscale='Viridis'

data = [ dict(
        type = 'scattergeo',
        lat=sa4_df.lat,
        lon=sa4_df.lng,
        text =sa4_df.index.values,
        mode = 'markers',
        marker = dict(
            size = 8,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'square',
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = scl,
            cmin = 0,
            color = sa4_df['Positive'],
            cmax = sa4_df['Positive'].max(),
            colorbar=dict(
                title="Positive Sentiment Count"
            )
        )
        )]

layout = dict(
        #title = 'Most trafficked US airports<br>(Hover for airport names)',
        colorbar = True,
    width=1000,
    height=900,
    margin=go.Margin(l=10, r=10, b=10, t=10, pad=4
    ),
        geo = dict(
            #domain=dict(x=[0,3], y=[-1,1]),
            resolution=50,
            projection = dict( type = 'Mercator'),
            lataxis=dict(range=[-45.0, -5.0]),
            lonaxis=dict(range=[110.0, 155.0] ),
                        
            showland = True,
            showcountries=True,
            showsubunits=True,
            
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
        ),
    )

fig = dict( data=data, layout=layout )
plotly.offline.iplot( fig, validate=False, filename='' )
In [40]:
sa4_df
Out[40]:
Neutral Positive Negative nan median_tot_fam_inc_weekly median_tot_prsnl_inc_weekly p_tot_emp_tot p_tot_lf_tot p_tot_unemp_tot p_unem_look_ptw_tot lat lng
sa4_area
Adelaide - Central and Hills 1776 1674 325 13 1967 700 137691 147027 9332 4751 -34.97 138.84
Adelaide - North 317 337 86 3 1407 563 180185 198775 18591 7125 -34.69 138.66
Adelaide - South 7032 7286 3253 60 1622 635 164500 176840 12341 5512 -35.14 138.57
Adelaide - West 377 252 61 2 1554 597 104681 113466 8786 3737 -34.87 138.53
Australian Capital Territory 4223 4333 1686 26 2445 1000 205419 215586 10167 5305 -35.49 149.00
Ballarat 902 823 467 5 1409 559 67396 72295 4899 2084 -37.38 143.73
Barossa - Yorke - Mid North 325 349 164 3 1318 535 45980 49147 3170 1111 -33.66 138.45
Bendigo 694 646 284 4 1443 593 66444 70699 4258 1897 -36.60 144.09
Brisbane - East 440 364 77 3 1902 717 108128 115073 6948 2904 -27.55 153.33
Brisbane - North 12682 13442 5848 77 1977 772 104609 111419 6802 2879 -27.37 153.06
Brisbane - South 376 267 84 1 1930 700 167703 181178 13472 6393 -27.57 153.07
Brisbane - West 359 227 50 0 2322 779 87898 94395 6494 3499 -27.49 152.91
Brisbane Inner City 2312 1938 446 7 2495 938 140263 149531 9267 4579 -27.45 153.03
Bunbury 759 634 202 3 1606 637 77663 83507 5842 2154 -33.95 116.01
Cairns 1150 1134 424 8 1475 642 106148 115176 9031 3201 -17.63 145.51
Capital Region 918 830 330 2 1561 655 97339 102324 4985 2028 -35.56 149.24
Central Coast 704 907 229 7 1560 600 139593 149701 10105 4276 -33.31 151.29
Central Queensland 447 595 132 1 1740 664 97655 106937 9283 2794 -24.11 149.33
Central West 2528 2495 1055 16 1473 594 86287 91992 5707 2134 -33.22 148.36
Coffs Harbour - Grafton 384 417 156 7 1249 520 52076 56542 4464 1779 -29.82 152.77
Darling Downs - Maranoa 130 128 28 0 1366 582 54865 58062 3199 1036 -27.32 149.36
Darwin 689 723 323 2 2386 1052 71357 74817 3466 1204 -12.52 131.11
Far West and Orana 545 526 382 4 1386 590 46707 50234 3527 1215 -30.99 145.03
Geelong 1247 1429 419 6 1615 616 125026 132944 7917 3621 -38.11 144.13
Gold Coast 6591 7373 2205 18 1655 665 266886 286880 19985 8844 -28.01 153.27
Hume 3168 3164 1391 25 1417 599 75545 79553 4011 1636 -36.78 146.41
Hunter Valley exc Newcastle 1195 1167 516 5 1548 599 110966 119628 8665 3310 -32.35 150.98
Illawarra 1690 1695 514 5 1679 591 127331 136719 9391 4595 -34.43 150.77
Ipswich 300 275 60 1 1523 626 135965 149336 13365 4965 -27.61 152.55
Latrobe - Gippsland 957 952 287 3 1338 540 110486 118717 8227 3068 -37.73 147.41
... ... ... ... ... ... ... ... ... ... ... ... ...
Queensland - Outback 204 144 60 1 1540 648 33765 36651 2889 761 -21.81 142.52
Richmond - Tweed 1070 1121 349 13 1333 550 96427 103710 7281 3165 -28.74 153.06
Riverina 520 384 131 2 1517 643 69453 73276 3827 1461 -34.62 146.77
Shepparton 568 478 145 0 1364 565 54705 58011 3306 1350 -36.25 145.17
South Australia - Outback 2491 2860 1033 13 1418 594 34650 37644 2995 932 -29.48 135.38
South Australia - South East 739 768 307 4 1266 544 77675 82647 4975 1806 -35.70 139.91
Southern Highlands and Shoalhaven 751 764 237 5 1340 552 56899 60287 3392 1415 -34.87 150.37
Sunshine Coast 1605 1967 516 9 1478 612 151648 163207 11559 5210 -26.59 152.91
Sydney - Baulkham Hills and Hawkesbury 427 523 105 1 2397 801 115320 120642 5320 2781 -33.37 150.83
Sydney - Blacktown 347 279 75 1 1818 672 152682 164680 12002 5265 -33.74 150.86
Sydney - City and Inner South 7276 5575 1016 13 2369 904 174465 185043 10583 5487 -33.92 151.19
Sydney - Eastern Suburbs 2310 1752 338 6 2756 1020 135879 142479 6600 3588 -33.92 151.25
Sydney - Inner South West 470 355 84 1 1576 565 247411 267126 19722 9483 -33.94 151.06
Sydney - Inner West 831 599 114 2 2282 828 150117 158859 8740 4299 -33.87 151.12
Sydney - North Sydney and Hornsby 1163 923 230 2 2814 1029 205722 215480 9752 4744 -33.69 151.16
Sydney - Northern Beaches 1154 1009 161 0 2528 916 129685 134320 4642 2441 -33.69 151.25
Sydney - Outer South West 417 374 199 0 1794 686 121806 129960 8152 3491 -34.12 150.68
Sydney - Outer West and Blue Mountains 729 619 159 2 1852 715 147331 155762 8431 3651 -33.87 150.38
Sydney - Parramatta 1063 835 203 5 1631 586 191445 208788 17344 7578 -33.83 151.01
Sydney - Ryde 280 246 43 0 2236 760 88601 94336 5737 3000 -33.79 151.10
Sydney - South West 40429 41319 18561 200 1489 500 160218 175172 14949 6907 -33.93 150.82
Sydney - Sutherland 557 406 100 2 2312 836 112434 116552 4116 2054 -34.08 151.06
Toowoomba 764 848 342 3 1583 649 66386 71409 5028 2048 -27.62 152.08
Townsville 2144 2090 911 12 1640 672 103917 113782 9866 3546 -20.04 145.93
Warrnambool and South West 773 994 253 4 1411 582 55609 58316 2711 1089 -37.97 142.38
West and North West 5 6 0 0 1274 523 44336 48009 3671 1287 -41.63 145.48
Western Australia - Outback (North) 276 248 98 0 2416 1239 45541 48337 2792 761 -20.04 123.72
Western Australia - Outback (South) 1345 1169 571 5 1836 732 53172 57159 3984 1242 -27.73 121.99
Western Australia - Wheat Belt 395 405 149 1 1481 623 59053 62463 3412 1096 -32.07 117.92
Wide Bay 617 648 227 0 1133 478 99867 111460 11596 3829 -25.63 151.79

86 rows × 12 columns

In [42]:
sa4_df
Out[42]:
Neutral Positive Negative nan median_tot_fam_inc_weekly median_tot_prsnl_inc_weekly p_tot_emp_tot p_tot_lf_tot p_tot_unemp_tot p_unem_look_ptw_tot lat lng
sa4_area
Adelaide - Central and Hills 1776 1674 325 13 1967 700 137691 147027 9332 4751 -34.97 138.84
Adelaide - North 317 337 86 3 1407 563 180185 198775 18591 7125 -34.69 138.66
Adelaide - South 7032 7286 3253 60 1622 635 164500 176840 12341 5512 -35.14 138.57
Adelaide - West 377 252 61 2 1554 597 104681 113466 8786 3737 -34.87 138.53
Australian Capital Territory 4223 4333 1686 26 2445 1000 205419 215586 10167 5305 -35.49 149.00
Ballarat 902 823 467 5 1409 559 67396 72295 4899 2084 -37.38 143.73
Barossa - Yorke - Mid North 325 349 164 3 1318 535 45980 49147 3170 1111 -33.66 138.45
Bendigo 694 646 284 4 1443 593 66444 70699 4258 1897 -36.60 144.09
Brisbane - East 440 364 77 3 1902 717 108128 115073 6948 2904 -27.55 153.33
Brisbane - North 12682 13442 5848 77 1977 772 104609 111419 6802 2879 -27.37 153.06
Brisbane - South 376 267 84 1 1930 700 167703 181178 13472 6393 -27.57 153.07
Brisbane - West 359 227 50 0 2322 779 87898 94395 6494 3499 -27.49 152.91
Brisbane Inner City 2312 1938 446 7 2495 938 140263 149531 9267 4579 -27.45 153.03
Bunbury 759 634 202 3 1606 637 77663 83507 5842 2154 -33.95 116.01
Cairns 1150 1134 424 8 1475 642 106148 115176 9031 3201 -17.63 145.51
Capital Region 918 830 330 2 1561 655 97339 102324 4985 2028 -35.56 149.24
Central Coast 704 907 229 7 1560 600 139593 149701 10105 4276 -33.31 151.29
Central Queensland 447 595 132 1 1740 664 97655 106937 9283 2794 -24.11 149.33
Central West 2528 2495 1055 16 1473 594 86287 91992 5707 2134 -33.22 148.36
Coffs Harbour - Grafton 384 417 156 7 1249 520 52076 56542 4464 1779 -29.82 152.77
Darling Downs - Maranoa 130 128 28 0 1366 582 54865 58062 3199 1036 -27.32 149.36
Darwin 689 723 323 2 2386 1052 71357 74817 3466 1204 -12.52 131.11
Far West and Orana 545 526 382 4 1386 590 46707 50234 3527 1215 -30.99 145.03
Geelong 1247 1429 419 6 1615 616 125026 132944 7917 3621 -38.11 144.13
Gold Coast 6591 7373 2205 18 1655 665 266886 286880 19985 8844 -28.01 153.27
Hume 3168 3164 1391 25 1417 599 75545 79553 4011 1636 -36.78 146.41
Hunter Valley exc Newcastle 1195 1167 516 5 1548 599 110966 119628 8665 3310 -32.35 150.98
Illawarra 1690 1695 514 5 1679 591 127331 136719 9391 4595 -34.43 150.77
Ipswich 300 275 60 1 1523 626 135965 149336 13365 4965 -27.61 152.55
Latrobe - Gippsland 957 952 287 3 1338 540 110486 118717 8227 3068 -37.73 147.41
... ... ... ... ... ... ... ... ... ... ... ... ...
Queensland - Outback 204 144 60 1 1540 648 33765 36651 2889 761 -21.81 142.52
Richmond - Tweed 1070 1121 349 13 1333 550 96427 103710 7281 3165 -28.74 153.06
Riverina 520 384 131 2 1517 643 69453 73276 3827 1461 -34.62 146.77
Shepparton 568 478 145 0 1364 565 54705 58011 3306 1350 -36.25 145.17
South Australia - Outback 2491 2860 1033 13 1418 594 34650 37644 2995 932 -29.48 135.38
South Australia - South East 739 768 307 4 1266 544 77675 82647 4975 1806 -35.70 139.91
Southern Highlands and Shoalhaven 751 764 237 5 1340 552 56899 60287 3392 1415 -34.87 150.37
Sunshine Coast 1605 1967 516 9 1478 612 151648 163207 11559 5210 -26.59 152.91
Sydney - Baulkham Hills and Hawkesbury 427 523 105 1 2397 801 115320 120642 5320 2781 -33.37 150.83
Sydney - Blacktown 347 279 75 1 1818 672 152682 164680 12002 5265 -33.74 150.86
Sydney - City and Inner South 7276 5575 1016 13 2369 904 174465 185043 10583 5487 -33.92 151.19
Sydney - Eastern Suburbs 2310 1752 338 6 2756 1020 135879 142479 6600 3588 -33.92 151.25
Sydney - Inner South West 470 355 84 1 1576 565 247411 267126 19722 9483 -33.94 151.06
Sydney - Inner West 831 599 114 2 2282 828 150117 158859 8740 4299 -33.87 151.12
Sydney - North Sydney and Hornsby 1163 923 230 2 2814 1029 205722 215480 9752 4744 -33.69 151.16
Sydney - Northern Beaches 1154 1009 161 0 2528 916 129685 134320 4642 2441 -33.69 151.25
Sydney - Outer South West 417 374 199 0 1794 686 121806 129960 8152 3491 -34.12 150.68
Sydney - Outer West and Blue Mountains 729 619 159 2 1852 715 147331 155762 8431 3651 -33.87 150.38
Sydney - Parramatta 1063 835 203 5 1631 586 191445 208788 17344 7578 -33.83 151.01
Sydney - Ryde 280 246 43 0 2236 760 88601 94336 5737 3000 -33.79 151.10
Sydney - South West 40429 41319 18561 200 1489 500 160218 175172 14949 6907 -33.93 150.82
Sydney - Sutherland 557 406 100 2 2312 836 112434 116552 4116 2054 -34.08 151.06
Toowoomba 764 848 342 3 1583 649 66386 71409 5028 2048 -27.62 152.08
Townsville 2144 2090 911 12 1640 672 103917 113782 9866 3546 -20.04 145.93
Warrnambool and South West 773 994 253 4 1411 582 55609 58316 2711 1089 -37.97 142.38
West and North West 5 6 0 0 1274 523 44336 48009 3671 1287 -41.63 145.48
Western Australia - Outback (North) 276 248 98 0 2416 1239 45541 48337 2792 761 -20.04 123.72
Western Australia - Outback (South) 1345 1169 571 5 1836 732 53172 57159 3984 1242 -27.73 121.99
Western Australia - Wheat Belt 395 405 149 1 1481 623 59053 62463 3412 1096 -32.07 117.92
Wide Bay 617 648 227 0 1133 478 99867 111460 11596 3829 -25.63 151.79

86 rows × 12 columns

In [43]:
scl = [ [0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
    [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"] ]

data = [ dict(
        type = 'scattermapbox',
        lat=sa4_points_df.lat,
        lon=sa4_points_df.lng,
        text =sa4_points_df.index.values,
        mode = 'markers+text',
    

        marker=go.Marker(
            #symbol='square-15.svg',
            size = 12,
            opacity = 0.6,
            #reversescale = True,
            autocolorscale = False,
            #line = dict(width=1, color='rgba(102, 102, 102)'),
            colorscale = 'Viridis',
            cmin = 0,
            #color = sa4_df['Positive'],
            color = sa4_df['median_tot_prsnl_inc_weekly'],
            cmax = sa4_df['median_tot_prsnl_inc_weekly'].max(),
            colorbar=dict( title="Positive Sentiment")
            )
        )]

layout = dict(
        #title = 'Most trafficked US airports<br>(Hover for airport names)',
        colorbar = True,
    width=1000,
    height=850,
    margin=go.Margin(l=10, r=10, b=10, t=10, pad=4),
        mapbox=dict(
                    accesstoken=mapbox_access_token,
                    bearing=0,
                    center=dict(
                        lat=-28,
                        lon=134
                    ),
        pitch=0,
        zoom=3.8
    ),
    )

fig = dict( data=data, layout=layout )
plotly.offline.iplot( fig, validate=False, filename='' )