from ipywidgets import FloatProgress
from IPython.display import display
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
ダウンロードしたデータは、先頭から新しいデータが入っています。
filename = "sparql-2017.csv"
df_level = pd.read_csv(filename, header=None, skiprows=1)
df_level.columns = ["url","datetime","level"]
df_level["datetime"] = df_level.datetime.map(lambda _: pd.to_datetime(_))
df_level.index = df_level.pop("datetime")
df_level = df_level.sort_index()
df_level["level"].plot(figsize=(15,5), ylim=(0,250))
filename = "data-2017.csv"
df_rain = pd.read_csv(filename,encoding="SHIFT-JIS",skiprows=4)
df_rain.columns = ["datetime", "rain", "現象なし情報","品質情報","均質番号"]
df_rain["datetime"] = df_rain.datetime.map(lambda _: pd.to_datetime(_))
df_rain.index = df_rain.pop("datetime")
plt.figure(figsize=(15,5))
plt.ylim(0,250)
plt.plot(df_level.level)
plt.plot(df_rain.rain * 5)
df_rain = df_rain[df_rain.index < df_level.index.max()]
ixs = df_rain.index
df = []
y = []
for i in range(len(ixs)-3):
dt1 = ixs[i + 1]
dt2 = ixs[i + 2]
dt3 = ixs[i + 3]
d1 = df_level[dt1:dt2].level.tolist()
d2 = df_level[dt2:dt3].level.tolist()
if len(d1) > 10 and len(d2) > 10:
y.append(max(d2))
d1.sort()
d1.reverse()
d1 = d1[:10]
d1.append(df_rain.ix[i].rain)
df.append(d1)
df = pd.DataFrame(df)
df["y"] = y
df.shape
df.head()
X_cols = df.columns[:-1]
X = df[X_cols].as_matrix().astype("float")
y = df.y.as_matrix().astype("int").flatten()
num = int(len(X) * 0.9)
print(len(X), num, len(X)-num)
X_train = X[:num]
X_test = X[num:]
y_train = y[:num]
y_test = y[num:]
# 正規化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train.std()
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train.std()
# モデルの設定
# ランダムフォレスト
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=42)
# 勾配ブースティング
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor(random_state=42)
# ニューラルネットワーク
from sklearn.neural_network import MLPRegressor
model = MLPRegressor(random_state=42)
model
# 学習と予測
model.fit(X_train, y_train)
result = model.predict(X_test)
result.shape
# スコア
print(model.score(X_test,y_test))
pp = pd.DataFrame({'act': np.array(y_test), "pred": np.array(result), "rain": X_test[:,-1]})
#pp = pd.DataFrame({"pred": np.array(result), "rain": X_test[:,-1]})
pp.rain = pp.rain * 5
plt.figure(figsize=(15,5))
plt.ylim(0,250)
plt.legend = pp.columns
plt.plot(pp)
import random
i = random.randint(0,len(df))
d = df.ix[i].as_matrix().tolist()
print(i, d)
df_test = []
for i in range(21):
temp = d[:10]
temp.append(i)
df_test.append(temp)
d = scaler.transform(np.array(df_test).astype("float"))
test = model.predict(d)
test.tolist()
#plt.ylim(50,200)
plt.xlim(0,20)
plt.plot(test)