diff --git a/Notebook/EDA.ipynb b/Notebook/EDA.ipynb index bf9e710..afc89bd 100644 --- a/Notebook/EDA.ipynb +++ b/Notebook/EDA.ipynb @@ -326,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "0e045ad6", "metadata": {}, "outputs": [], @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "df3d4c9f", "metadata": {}, "outputs": [], @@ -448,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "5beb5971", "metadata": {}, "outputs": [ @@ -497,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 7, "id": "15cd2efc", "metadata": {}, "outputs": [], @@ -523,9 +523,11 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 19, "id": "bd2362f4", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -652,7 +654,7 @@ "4 2020-1 " ] }, - "execution_count": 136, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -660,12 +662,14 @@ "source": [ "# cria um dataset aplicando a transformação\n", "df_time = get_time_features.transform(df_type)\n", + "df_time.loc[df_time['DT_VENDA']>='2021-1-1','MES'] = 13\n", + "df_time.loc[df_time['DT_VENDA']>='2021-2-1','MES'] = 14\n", "df_time.head()" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 20, "id": "6e0a244e", "metadata": {}, "outputs": [ @@ -710,7 +714,7 @@ }, { "cell_type": "markdown", - "id": "79ecca08", + "id": "fc44fa49", "metadata": {}, "source": [ "### Valor total por dia" @@ -757,7 +761,7 @@ }, { "cell_type": "markdown", - "id": "f68edad7", + "id": "89f15828", "metadata": {}, "source": [ "### Valor mensal" @@ -765,18 +769,7 @@ }, { "cell_type": "code", - "execution_count": 146, - "id": "8a93e97c", - "metadata": {}, - "outputs": [], - "source": [ - "df_time.loc[df_time['DT_VENDA']>='2021-1-1','MES'] = 13\n", - "df_time.loc[df_time['DT_VENDA']>='2021-2-1','MES'] = 14" - ] - }, - { - "cell_type": "code", - "execution_count": 155, + "execution_count": 11, "id": "b34d60ce", "metadata": {}, "outputs": [], @@ -789,7 +782,7 @@ { "cell_type": "code", "execution_count": 233, - "id": "6db76ce9", + "id": "60b2028b", "metadata": {}, "outputs": [ { @@ -820,7 +813,7 @@ }, { "cell_type": "markdown", - "id": "b5087188", + "id": "c8536dbd", "metadata": {}, "source": [ "Os valores de compra tem desvio padrão maior que a própria média, isso é um spread significativo. O desvio padrão aparenta seguir a mesma tendência da média." @@ -829,7 +822,7 @@ { "cell_type": "code", "execution_count": 245, - "id": "07a3388a", + "id": "8952fc6c", "metadata": {}, "outputs": [ { @@ -860,7 +853,7 @@ }, { "cell_type": "markdown", - "id": "7a1e1350", + "id": "8229b5ad", "metadata": {}, "source": [ "O pico em 2020-12 ocorre tanto no valor total quanto na média, entretanto, o pico é mais acentuado quando olhamos o valor vendido total. Isso sugere que a quantidade de compras cresceu muito em dezembro. Vamos checar essa relação." @@ -868,7 +861,7 @@ }, { "cell_type": "markdown", - "id": "c905d9fd", + "id": "0253e38a", "metadata": {}, "source": [ "### Número de Compras" @@ -877,7 +870,7 @@ { "cell_type": "code", "execution_count": 249, - "id": "ce7b76e8", + "id": "ccce38ba", "metadata": {}, "outputs": [ { @@ -903,7 +896,7 @@ }, { "cell_type": "markdown", - "id": "ab09c357", + "id": "3ea571c2", "metadata": {}, "source": [ "## Valor por canal" @@ -912,7 +905,7 @@ { "cell_type": "code", "execution_count": 197, - "id": "ea598b9d", + "id": "90b69086", "metadata": {}, "outputs": [ { @@ -947,7 +940,7 @@ }, { "cell_type": "markdown", - "id": "79ef0ca1", + "id": "79f01d9f", "metadata": {}, "source": [ "### Dividindo os anos" @@ -956,7 +949,7 @@ { "cell_type": "code", "execution_count": 251, - "id": "5f8985e6", + "id": "8b4d3509", "metadata": {}, "outputs": [ { @@ -1292,10 +1285,56 @@ "test_stationarity(df_test)" ] }, + { + "cell_type": "markdown", + "id": "5748c5f1", + "metadata": {}, + "source": [ + "### Número de dias por mês\n", + "\n", + "Vamos ver quantos dias válidos temos em cada mês." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0e9de500", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MES ANO+MES\n", + "1 2020-1 30\n", + "2 2020-2 28\n", + "3 2020-3 29\n", + "4 2020-4 26\n", + "5 2020-5 27\n", + "6 2020-6 29\n", + "7 2020-7 31\n", + "8 2020-8 31\n", + "9 2020-9 30\n", + "10 2020-10 31\n", + "11 2020-11 30\n", + "12 2020-12 30\n", + "13 2021-1 30\n", + "14 2021-2 24\n", + "Name: DIA, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_time.groupby(['MES','ANO+MES'])['DIA'].nunique()" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "e97f579e", + "id": "65efd8ed", "metadata": {}, "outputs": [], "source": []