import json
import requests
import plotly.express as px
import difflib
import geopandas as gpd

import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import matplotlib.colors as mcolors

import holoviews as hv
from chord import Chord
from wordcloud import WordCloud


df = pd.read_csv("datasets/sankey.csv")
df.head(2) # require long data


# hv.extension('bokeh')
# df = pd.read_csv("datasets/sankey.csv")
# sakey = hv.Sankey(df, kdims=["FROM", "TO"], vdims=["VALUE"])

# sakey.opts(cmap='Spectral',
#            label_position='right',
#            edge_color='FROM', 
#            edge_line_width=0.5,
#            edge_alpha= 0.8,
#            node_alpha=0.5, 
#            node_width=10,
#            node_sort=True,

#            width=1100, height=500, 
#            bgcolor="white", 
#            margin=0, padding=0,
#            title="My Fancy Sankey Plot")


circos = pd.read_excel("datasets/migration.xlsx", skiprows= 1)
circos = pd.read_excel("datasets/migration.xlsx", skiprows= 1)
circos.drop(columns = ['Unnamed: 0', "Unnamed: 17"], inplace=True)
circos.rename(columns = {'Unnamed: 1': 'Origin'}, inplace=True)
circos.set_index("Origin", inplace=True)
print(circos.shape) # square matrix!
circos.head(2)

(15, 15)


# names=list(circos.columns)
# df = circos.values.tolist()

# circos = Chord(df, names, 
#               font_size_large='10px', 
#               wrap_labels=False, 
#               margin=100,
#               width=800)

# # only way to display is save it and load after
# circos.to_html('circos.html')


colors= list(mcolors.TABLEAU_COLORS.values())
df = pd.read_excel("datasets/census.xlsx",)
df.drop(columns=["Unnamed: 23"], inplace=True)
df_bar = df.loc[:, df.mean() > 4000].transpose().reset_index()
df_bar = df.transpose().reset_index().loc[1:,:]
df_bar.columns = ['region'] + df.ANNI.values.tolist() 
df_bar.region.iloc[1] = "Valle d'Aosta"
df_bar.region.iloc[4]='Trentino-Alto Adige'
df.head(2)

c:\users\dell\venv\jupyter\lib\site-packages\pandas\core\indexing.py:1637: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


m=df_bar.transpose()
m.columns = m.iloc[0,:]
m = m.iloc[1:]
sum_over_all_years=m.sum().reset_index(name='tot')
sum_over_all_years.sort_values(by='tot',inplace=True)
sum_over_all_years.head(2)


fig, axes = plt.subplots(3,1,figsize= (20,60))
sns.set_style("white")

#lineplot over 2000
for region in df.columns.values[1:]:
    if df[region].mean() > 2000: # decide to work only if the the mean of the region is over 2000!
        sns.lineplot(ax=axes[0], data=df, x = 'ANNI', y= region, label=region)
        
title = "LinePlot of the most highest important regions"
ylabel = "Value"
xlabel= "Year"
style_title= dict(size=20, weight='bold', style='italic')
style_labels = dict(size=15, style='italic')
axes[0].set_title(title +"\n",   fontdict=style_title)
axes[0].set_ylabel("Census Value")
axes[0].set_xlabel("Years")
for tick in axes[0].get_xticklabels():
    tick.set_rotation(45)
axes[0].legend(loc='upper left', ncol=4, 
           fontsize=20, shadow=True, 
           facecolor= '#d4ebf2', 
           framealpha=0.8, borderpad=0.45)
axes[0].set_ylim(0,13000)

        
# stacked area chart with palette
palette= "tab10"
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961]+df_bar[1971] + df_bar[1981] + df_bar[1991] + df_bar[2001] + df_bar[2011], color=colors[0],label='up-to 2011')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961]+df_bar[1971] + df_bar[1981] + df_bar[1991] + df_bar[2001], color=colors[1],label='up-to 2001')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961]+df_bar[1971] + df_bar[1981] + df_bar[1991], color=colors[2],label='up-to 1991')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961]+df_bar[1971] + df_bar[1981], color=colors[3],label='up-to 1981')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961]+df_bar[1971], color=colors[4],label='up-to 1971')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = df_bar[1951]+df_bar[1961], color=colors[5],label='up-to 1961')
sns.barplot(ax=axes[1], data=df_bar, x='region', 
            y = 1951,color=colors[7], label='up-to 1951')
    
title = "Stackbarchart related to the years"
ylabel = "Value from 1951 to 2011"
xlabel= "region"
style_title= dict(size=20, weight='bold', style='italic')
style_labels = dict(size=15, style='italic')
axes[1].set_title(title +"\n",   fontdict=style_title)
axes[1].set_ylabel("Census Value")
axes[1].set_xlabel("Years")
for tick in axes[1].get_xticklabels():
    tick.set_rotation(70)
    
axes[1].legend(loc='best', ncol=4, 
           fontsize=20, shadow=True, 
           facecolor= '#d4ebf2', 
           framealpha=0.8, borderpad=0.45)

# last cumulatived and sorted
sns.barplot(ax=axes[2], data= sum_over_all_years, x='region', y='tot', palette='viridis')
title = "Cumulative and sorted stack bar chart"
ylabel = "Value from 1951 to 2011"
xlabel= "region"
style_title= dict(size=20, weight='bold', style='italic')
style_labels = dict(size=15, style='italic')
axes[2].set_title(title +"\n",   fontdict=style_title)
axes[2].set_ylabel("Census Value")
axes[2].set_xlabel("Years")
for tick in axes[2].get_xticklabels():
    tick.set_rotation(90)

    
# txt="Each stack represents a ten years count"
# axes[1].text(4.5, 60000, txt, wrap=True, horizontalalignment='center', 
#              fontdict=(dict(size=25,style='italic', color='tab:blue')))

plt.show()


iris = pd.read_csv("datasets/iris.csv")
iris.head(2)


sns.set_style("whitegrid")

g1 = sns.jointplot(data=iris,
              x = "sepal length",
              y="sepal width",
              ratio =5,
              hue='class',
              legend=False,
              height=14,
              dropna=True,
              joint_kws=  {"alpha":0.8,"s":200,  # <- !
                           'style': iris['class'],
                           "markers" : ['o','s','d']},
              marginal_kws= dict(alpha=0.3))

g1.ax_joint.text(x=4.7,y=5.9,s='Iris Setosa', fontdict=(dict(size=20, color='blue')))
g1.ax_joint.text(x=5.7,y=5.9,s='Iris Versicolor', fontdict=(dict(size=20, color='orange')))
g1.ax_joint.text(x=7,y=5.9,s='Iris Virginica', fontdict=(dict(size=20, color='green')))

plt.show()


df = pd.read_csv("datasets/economist_data.csv")
df.head(2)


def text_scatter_xy(data, _x, _y, _s, space_x =0.3 , space_y=0.2, fontdict=None):
    for idx in range(len(data)):
        x = data[_x].iloc[idx]
        y = data[_y].iloc[idx]
        s = str(data[_s].iloc[idx])
        
        plt.text(x= x + space_x,
                 y= y + space_y, 
                 s= s,
                 horizontalalignment='left',
                 fontdict = fontdict)


fig = plt.figure(figsize=(15,10))
sns.set_style("whitegrid")


g1 = sns.scatterplot(data=df, x='CPI', y='HDI', hue='Region', s=350,
                marker="$\circ$", edgecolor="face", linewidth=0.1)

sns.regplot(data=df, 
            x='CPI', 
            y='HDI', 
            order=3,
            ci=0,
            scatter_kws=dict(alpha=0.0), 
            line_kws= dict(color='red',label='r^3'),
             )

ticks_y = [i for i in np.arange(0.2,1.001,.1)]
label_y = [f"${str(round(i,1))}$" for i in np.arange(0.2,1.001,.1)]

ticks_x = [i for i in range(1,11)]
label_x = [f"${str(round(i,1))}$" for i in np.arange(1,11)]

plt.xticks(ticks= ticks_x, label=label_x, rotation=0)
plt.yticks(ticks= ticks_y, labels=label_y, rotation=0)

ylabel = "HDI"
xlabel= "CPI"

plt.ylabel(ylabel)
plt.xlabel(xlabel)

plt.hlines(y =np.arange(0,1,.1) ,
           xmin= 0, xmax=10, 
           color='black')

plt.legend(loc='upper left', ncol=3, 
           fontsize=20, shadow=True, 
           facecolor= 'white', 
           framealpha=0.8, borderpad=0.45,
           bbox_to_anchor=(0.10, 1.2))

plt.title("Corruption and Human development"+ "\n"*9)
plt.ylim(0.2,1)

annotation_dict= dict(size=15)
text_scatter_xy(df[::5], _x="CPI", _y="HDI", _s="Country", space_x=0.1, space_y=0.01,fontdict=annotation_dict)

plt.show()


with open('datasets/license.txt', 'r') as f:
    text= (f.read())


fig = plt.figure(figsize=(10,10))


wordcloud = WordCloud(margin=0,
                      background_color ='white',
                      stopwords=None,
                      colormap='icefire',
                      collocation_threshold=300)

word_clouded = wordcloud.generate(text)


plt.imshow(word_clouded)
plt.margins(x=0, y=0)
plt.axis("off")
plt.title('Word Cloud\n\n',fontdict=dict(size=20, style='italic'))
plt.show()

	FROM	TO	VALUE
0	Brazil	Portugal	5
1	Brazil	France	1

	North America	Central America	South America	North Africa	Sub-Saharan Africa	Northern Europe	Western Europe	Southern Europe	Eastern Europe	Central Asia	Western Asia	South Asia	East Asia	South-East Asia	Oceania
Origin
North America	96102	208668	68240	4253	58827	248379	279267	551959	162959	85259	131288	158	39145	41758	52303
Central America	3233173	215008	36975	390	2414	42572	81356	190931	4962	3145	6236	14	1754	510	7541

	ANNI	Piemonte	Valle d'Aosta-Vallée d'Aoste	Liguria	Lombardia	Trentino-Alto Adige/Südtirol	Bolzano-Bozen	Trento	Veneto	Friuli-Venezia Giulia	...	Marche	Lazio	Abruzzo	Molise	Campania	Puglia	Basilicata	Calabria	Sicilia	Sardegna
0	1951	3518.00	94.000	1567.000	6566.000	729.000	334.000	395.000	3918.000	1226.000	...	1364.000	3341.000	1277.000	407.000	4346.000	3220.000	628.000	2044.000	4487.000	1276.000
1	1961	3914.25	100.959	1735.349	7406.152	785.967	373.863	412.104	3846.562	1204.298	...	1347.489	3958.957	1206.266	358.052	4760.759	3421.217	644.297	2045.047	4721.001	1419.362

	sepal length	sepal width	petal length	petal width	class
0	5.1	3.5	1.4	0.2	Iris-setosa
1	4.9	3.0	1.4	0.2	Iris-setosa

	Country	HDI.Rank	HDI	CPI	Region
0	Afghanistan	172	0.398	1.5	Asia Pacific
1	Albania	70	0.739	3.1	East EU Cemt Asia

Fancy plots

Sakey Plot¶

Circos Plot¶

Stacked barchart¶

Marginal Plot¶

Line Plot¶

Word Cloud¶

	region	tot
1	Valle d'Aosta	778.313
15	Molise	2378.313