Files
mailbox-science/mailbox-science.ipynb
2021-05-07 18:42:44 +02:00

243 lines
13 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "asian-tracker",
"metadata": {},
"source": [
"# Briefkastensensor: Analyse der Temperaturabhängigkeit"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "subject-cambridge",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import sqlite3"
]
},
{
"cell_type": "markdown",
"id": "tough-construction",
"metadata": {},
"source": [
"## Datengrundlage\n",
"\n",
"* Homeassistant SQLite Database (home-assistants.db)\n",
"* Entity Helligkeit: sensor.briefkasten_zda_measure\n",
"* Entity Lorapark: sensor.aussentemperatur_lorapark"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "civilian-causing",
"metadata": {},
"outputs": [
{
"ename": "DatabaseError",
"evalue": "Execution failed on sql 'SELECT state, created FROM states WHERE entity_id = 'sensor.briefkasten_zda_measure'': no such table: states",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOperationalError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1696\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1697\u001b[0;31m \u001b[0mcur\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1698\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcur\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mOperationalError\u001b[0m: no such table: states",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-8bcec3e0fc38>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mquery2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"SELECT state, created FROM states WHERE entity_id = 'sensor.aussentemperatur_lorapark'\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mmessung\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_sql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'created'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mtemperatur\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_sql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparse_dates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'created'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpandas_sql\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSQLiteDatabase\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 484\u001b[0;31m return pandas_sql.read_query(\n\u001b[0m\u001b[1;32m 485\u001b[0m \u001b[0msql\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex_col\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mread_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize)\u001b[0m\n\u001b[1;32m 1741\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_convert_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1743\u001b[0;31m \u001b[0mcursor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1744\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mcol_desc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol_desc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescription\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.8/site-packages/pandas/io/sql.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1708\u001b[0m \u001b[0mex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatabaseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Execution failed on sql '{args[0]}': {exc}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1709\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mex\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1711\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql 'SELECT state, created FROM states WHERE entity_id = 'sensor.briefkasten_zda_measure'': no such table: states"
]
}
],
"source": [
"# SQLite lesen\n",
"con = sqlite3.connect(\"./home-assistant.db\")\n",
"\n",
"query1 = \"SELECT state, created FROM states WHERE entity_id = 'sensor.briefkasten_zda_measure'\"\n",
"query2 = \"SELECT state, created FROM states WHERE entity_id = 'sensor.aussentemperatur_lorapark'\"\n",
"\n",
"messung = pd.read_sql(query1, con, parse_dates=True, index_col='created')\n",
"temperatur = pd.read_sql(query2, con, parse_dates=True, index_col='created')\n",
"\n",
"def drop_non_numeric (df, data_columns):\n",
" df_new = (df.drop(data_columns, axis=1).join(df[data_columns].apply(pd.to_numeric, errors='coerce')))\n",
" return df_new[df_new[data_columns].notnull().all(axis=1)]\n",
"\n",
"# Drop rows where state is \"unavailable\"\n",
"messung = drop_non_numeric(messung, ['state'])\n",
"temperatur = drop_non_numeric(temperatur, ['state'])\n",
"\n",
"# Daten in Pivot Table zusammenführen\n",
"messung['sensor'] = 'brightness'\n",
"temperatur['sensor'] = 'temperature'\n",
"time_df = pd.concat([messung, temperatur], axis=0)\n",
"time_df = time_df.pivot_table(index='created', columns='sensor', values='state', aggfunc=np.sum)\n",
"\n",
"# Jetzt sind die allermeisten Datensätze nur mit einem Wert (z.B. Helligkeit), der andere (z.B. Temperatur) ist null\n",
"# Deswegen mit vorherigen Werten auffüllen\n",
"time_df = time_df.fillna(method='ffill', limit=10)"
]
},
{
"cell_type": "markdown",
"id": "unauthorized-india",
"metadata": {},
"source": [
"## Optional: Daten eingrenzen"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "adjacent-traveler",
"metadata": {},
"outputs": [],
"source": [
"# time_df = time_df[\"2021-03-23\":\"2021-03-24\"]\n",
"# time_df = time_df[\"2021-03-29\":\"2021-03-30\"]\n",
"# time_df = time_df[\"2021-04-13\":\"2021-04-14\"]"
]
},
{
"cell_type": "markdown",
"id": "junior-basketball",
"metadata": {},
"source": [
"## Optional: CSV-Export"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "silver-collins",
"metadata": {},
"outputs": [],
"source": [
"time_df.to_csv(\"export.csv\")"
]
},
{
"cell_type": "markdown",
"id": "collaborative-range",
"metadata": {},
"source": [
"## Temperatur und Brightness über die Zeit"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "proper-mounting",
"metadata": {},
"outputs": [],
"source": [
"time_df.plot(subplots=True,figsize=(20,20))\n"
]
},
{
"cell_type": "markdown",
"id": "velvet-combining",
"metadata": {},
"source": [
"## Modellierung von Regressionsfunktionen"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "emotional-respect",
"metadata": {},
"outputs": [],
"source": [
"# Nur Datensätze verwenden, wo beide Werte vorhanden sind\n",
"time_df.dropna(inplace=True)\n",
"\n",
"# Generator für Regressionsfunktionen (mit Numpy) - Degree: Grad der gewünschten ganzrationalen Funktion\n",
"def reg_func (x, y, degree=1):\n",
" d = np.polyfit(x, y, degree)\n",
" \n",
" # <--- Print out function -->\n",
" printstr = f\"\\n\"\n",
" for i, n in enumerate(d):\n",
" if (n >= 0): \n",
" n = f\"+{n}\"\n",
" if (len(d)-i-1 == 0):\n",
" printstr += f\" {n}\"\n",
" elif (len(d)-i-1 == 1):\n",
" printstr += f\" {n}x\"\n",
" else:\n",
" printstr += f\" {n}x^{len(d)-i-1}\"\n",
" print(printstr)\n",
" # <------------------------>\n",
" \n",
" f = np.poly1d(d)\n",
" return f(x)\n",
"\n",
"\n",
"# ------- Add regression functions here -------\n",
"time_df.loc[:, 'reg-01'] = reg_func(time_df['temperature'], time_df['brightness'], degree=1)\n",
"time_df.loc[:, 'reg-02'] = reg_func(time_df['temperature'], time_df['brightness'], degree=2)\n",
"time_df.loc[:, 'reg-03'] = reg_func(time_df['temperature'], time_df['brightness'], degree=3)\n",
"time_df.loc[:, 'reg-04'] = reg_func(time_df['temperature'], time_df['brightness'], degree=4)\n",
"time_df.loc[:, 'reg-05'] = reg_func(time_df['temperature'], time_df['brightness'], degree=5)\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "timely-utility",
"metadata": {},
"outputs": [],
"source": [
"ax = time_df.plot.scatter(x=\"temperature\", y=\"brightness\", figsize=(10,10))\n",
"\n",
"\n",
"# ------- Add regression functions here -------\n",
"time_df.plot(x='temperature',y='reg-01',color='Red', ax=ax)\n",
"time_df.plot(x='temperature',y='reg-02',color='Green', ax=ax)\n",
"time_df.plot(x='temperature',y='reg-03',color='Purple', ax=ax)\n",
"time_df.plot(x='temperature',y='reg-04',color='Yellow', ax=ax)\n",
"time_df.plot(x='temperature',y='reg-05',color='Orange', ax=ax)\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}