オープンデータ活用¶

福井県鯖江市
http://data.city.sabae.lg.jp/opendata-list/
上記サイトにある「水位データ(福井県鯖江市)」をダウンロード

気象庁
http://www.data.jma.go.jp/gmd/risk/obsdl/index.php
上記サイトより福井市の降水量のデータを取得

ライブラリの読込¶

from ipywidgets import FloatProgress
from IPython.display import display

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import datetime

水位データの読込¶

ダウンロードしたデータは、先頭から新しいデータが入っています。

filename = "sparql-2017.csv"
df_level = pd.read_csv(filename, header=None, skiprows=1)

df_level.columns = ["url","datetime","level"]

df_level["datetime"] = df_level.datetime.map(lambda _: pd.to_datetime(_))

df_level.index = df_level.pop("datetime")

df_level = df_level.sort_index()

df_level["level"].plot(figsize=(15,5), ylim=(0,250))

<matplotlib.axes._subplots.AxesSubplot at 0x111964dd8>

降水量データの読込¶

filename = "data-2017.csv"
df_rain = pd.read_csv(filename,encoding="SHIFT-JIS",skiprows=4)

df_rain.columns = ["datetime", "rain", "現象なし情報","品質情報","均質番号"]

df_rain["datetime"] = df_rain.datetime.map(lambda _: pd.to_datetime(_))

df_rain.index = df_rain.pop("datetime")

plt.figure(figsize=(15,5))
plt.ylim(0,250)
plt.plot(df_level.level)
plt.plot(df_rain.rain * 5)

[<matplotlib.lines.Line2D at 0x11240b080>]

データ加工¶

df_rain = df_rain[df_rain.index < df_level.index.max()]

ixs = df_rain.index

df = []
y = []

for i in range(len(ixs)-3):
    
    dt1 = ixs[i + 1]
    dt2 = ixs[i + 2]
    dt3 = ixs[i + 3]
    
    d1 = df_level[dt1:dt2].level.tolist()
    d2 = df_level[dt2:dt3].level.tolist()

    if len(d1) > 10 and len(d2) > 10:
        y.append(max(d2))
        d1.sort()
        d1.reverse()
        d1 = d1[:10]
        d1.append(df_rain.ix[i].rain)
        df.append(d1)
        
df = pd.DataFrame(df)
df["y"] = y

df.shape

(6862, 12)

df.head()

機械学習¶

X_cols = df.columns[:-1]

X = df[X_cols].as_matrix().astype("float")
y = df.y.as_matrix().astype("int").flatten()

num = int(len(X) * 0.9)
print(len(X), num, len(X)-num)

X_train = X[:num]
X_test = X[num:]
y_train = y[:num]
y_test = y[num:]

6862 6175 687

# 正規化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train.std()

28.81180290625171

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train.std()

1.0

# モデルの設定

# ランダムフォレスト
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=42)

# 勾配ブースティング
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor(random_state=42)

# ニューラルネットワーク
from sklearn.neural_network import MLPRegressor
model = MLPRegressor(random_state=42)

model

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=42, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

# 学習と予測
model.fit(X_train, y_train)
result = model.predict(X_test)
result.shape

(687,)

# スコア
print(model.score(X_test,y_test))

0.937992677762

pp = pd.DataFrame({'act': np.array(y_test), "pred": np.array(result), "rain": X_test[:,-1]})
#pp = pd.DataFrame({"pred": np.array(result), "rain": X_test[:,-1]})
pp.rain = pp.rain * 5
plt.figure(figsize=(15,5))
plt.ylim(0,250)
plt.legend = pp.columns
plt.plot(pp)

[<matplotlib.lines.Line2D at 0x11554a400>,
 <matplotlib.lines.Line2D at 0x11554a5c0>,
 <matplotlib.lines.Line2D at 0x11554a7b8>]

import random

i = random.randint(0,len(df))
d = df.ix[i].as_matrix().tolist()
print(i, d)

df_test = []

for i in range(21):
    temp = d[:10]
    temp.append(i)
    df_test.append(temp)
    
d = scaler.transform(np.array(df_test).astype("float"))

test = model.predict(d)
test.tolist()

#plt.ylim(50,200)
plt.xlim(0,20)
plt.plot(test)

2035 [65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 0.0, 65.0]

[<matplotlib.lines.Line2D at 0x1160a6c50>]

	0	1	2	3	4	5	6	7	8	9	y
0	66	65	65	65	65	65	65	65	64	64	64
1	64	64	64	64	64	64	64	64	64	64	64
2	64	64	64	64	63	63	63	63	63	63	64
3	64	64	64	64	64	64	63	63	63	63	65
4	65	65	65	64	64	64	64	64	64	64	65

	0	1	2	3	4	5	6	7	8	9	y
0	66	65	65	65	65	65	65	65	64	64	64
1	64	64	64	64	64	64	64	64	64	64	64
2	64	64	64	64	63	63	63	63	63	63	64
3	64	64	64	64	64	64	63	63	63	63	65
4	65	65	65	64	64	64	64	64	64	64	65

	0	1	2	3	4	5	6	7	8	9	y
0	66	65	65	65	65	65	65	65	64	64	64
1	64	64	64	64	64	64	64	64	64	64	64
2	64	64	64	64	63	63	63	63	63	63	64
3	64	64	64	64	64	64	63	63	63	63	65
4	65	65	65	64	64	64	64	64	64	64	65