From a72e27f85bf9fd1a1a8431bce4e54138a8765aff Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <starsareblueandfaraway@gmail.com>
Date: Mon, 18 Jan 2016 16:45:08 -0500
Subject: [PATCH] Fix scripts

---
 .gitignore                                    |   5 +-
 CHANGES.rst                                   |   0
 README.rst                                    |   0
 TODO.goals                                    |   2 +-
 ...ted-regression-extrapolated-function.ipynb | 449 ++++++++++++++++++
 forecast_demographic_from_series.py           |   8 +-
 infrastructure_planning/demography/linear.py  |  17 +-
 .../electricity/consumption/linear.py         |   9 +-
 infrastructure_planning/growth.py             |  42 --
 infrastructure_planning/growth/__init__.py    |   6 +
 infrastructure_planning/growth/fitted.py      |  14 +-
 .../growth/interpolated.py                    |   2 +-
 setup.cfg                                     |   2 +
 setup.py                                      |  26 +
 tests/test_growth_fitted.py                   |  23 +
 15 files changed, 545 insertions(+), 60 deletions(-)
 create mode 100644 CHANGES.rst
 create mode 100644 README.rst
 delete mode 100644 infrastructure_planning/growth.py
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 create mode 100644 tests/test_growth_fitted.py

diff --git a/.gitignore b/.gitignore
index 2554066..83915e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
-*.swp
+*.egg-info
 *.pyc
+*.swp
+.cache
+.eggs
 .ipynb_checkpoints
diff --git a/CHANGES.rst b/CHANGES.rst
new file mode 100644
index 0000000..e69de29
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..e69de29
diff --git a/TODO.goals b/TODO.goals
index 6a79de0..7068dcc 100644
--- a/TODO.goals
+++ b/TODO.goals
@@ -1,4 +1,4 @@
-= Prototype tool to get population-consumption curve points by world, region, income group
+Prototype tool to get population-consumption curve points by world, region, income group
 
 Make forecast_population use new spline fitting model
 
diff --git a/experiments/get-spline-interpolated-regression-extrapolated-function.ipynb b/experiments/get-spline-interpolated-regression-extrapolated-function.ipynb
index 312741a..44e8483 100644
--- a/experiments/get-spline-interpolated-regression-extrapolated-function.ipynb
+++ b/experiments/get-spline-interpolated-regression-extrapolated-function.ipynb
@@ -460,6 +460,455 @@
     "print f(0)\n",
     "print f(1)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "xs = 1, 2, 3\n",
+    "ys = 4, 5, 6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(1, 4), (2, 5), (3, 6)]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "zip(xs, ys)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "m = LinearRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "m.fit?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/lib64/python2.7/site-packages/sklearn/utils/validation.py:386: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.\n",
+      "  DeprecationWarning)\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Found arrays with inconsistent numbers of samples: [1 2]",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-43-4958cec89440>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mxs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mys\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mxs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mys\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;32m/usr/lib64/python2.7/site-packages/sklearn/linear_model/base.pyc\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    425\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    426\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 427\u001b[1;33m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m    428\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    429\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m/usr/lib64/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    518\u001b[0m         \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    519\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 520\u001b[1;33m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    521\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    522\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m/usr/lib64/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m    174\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    175\u001b[0m         raise ValueError(\"Found arrays with inconsistent numbers of samples: \"\n\u001b[1;32m--> 176\u001b[1;33m                          \"%s\" % str(uniques))\n\u001b[0m\u001b[0;32m    177\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    178\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mValueError\u001b[0m: Found arrays with inconsistent numbers of samples: [1 2]"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "xs = np.array([0, 0])\n",
+    "ys = np.array([0, 1])\n",
+    "m.fit(xs.reshape(-1, 1), ys)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "xs, ys = zip(*[(0, 1), (1, 0), (2, 2), (3, 1)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.fit(np.array(xs).reshape(-1, 1), ys)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.9])"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.predict(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0]])"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.array([0]).reshape(-1, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[0, 0]])"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = np.array([\n",
+    "    (0, 0),\n",
+    "])\n",
+    "print len(x)\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0, 0]])"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x.reshape(1, -1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "x = np.array([0, 1, 2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 2, 3])"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x + 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 2, 4])"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x * 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.70000000000000007"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.intercept_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0.]\n",
+      "[ 0.2]\n",
+      "[ 0.]\n",
+      "[ 0.5]\n"
+     ]
+    }
+   ],
+   "source": [
+    "m.coef_ = np.array([0.2])\n",
+    "print m.predict(0)\n",
+    "print m.predict(1)\n",
+    "m.coef_ = np.array([0.5])\n",
+    "print m.predict(0)\n",
+    "print m.predict(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.fit(np.array([[0]]), [0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.])"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.coef_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0, 1],\n",
+       "       [2, 3]])"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = np.array([[0, 1], [2, 3]])\n",
+    "x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The slowest run took 6.11 times longer than the fastest. This could mean that an intermediate result is being cached \n",
+      "100000 loops, best of 3: 2.61 µs per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "timeit zip(*x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The slowest run took 11.98 times longer than the fastest. This could mean that an intermediate result is being cached \n",
+      "1000000 loops, best of 3: 577 ns per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "timeit x[:, 0], x[:, 1]"
+   ]
   }
  ],
  "metadata": {
diff --git a/forecast_demographic_from_series.py b/forecast_demographic_from_series.py
index 040f985..970cad9 100644
--- a/forecast_demographic_from_series.py
+++ b/forecast_demographic_from_series.py
@@ -16,7 +16,13 @@ def run(
         demographic_by_year_table_population_column,
         default_yearly_population_growth_percent):
     d = []
-    demographic_by_year_table = forecast_demographic_from_series(*args)
+    demographic_by_year_table = forecast_demographic_from_series(
+        target_year,
+        demographic_by_year_table,
+        demographic_by_year_table_name_column,
+        demographic_by_year_table_year_column,
+        demographic_by_year_table_population_column,
+        default_yearly_population_growth_percent)
     demographic_by_year_table_path = join(
         target_folder, 'demographic-by-year.csv')
     demographic_by_year_table.to_csv(
diff --git a/infrastructure_planning/demography/linear.py b/infrastructure_planning/demography/linear.py
index 4c46430..2e8ffde 100644
--- a/infrastructure_planning/demography/linear.py
+++ b/infrastructure_planning/demography/linear.py
@@ -2,7 +2,8 @@
 from collections import defaultdict
 from pandas import DataFrame, concat
 
-from ..growth import get_future_years, get_linear_model, prepare_xs
+from ..growth import get_future_years
+from ..growth.fitted import get_fitted_linear_function
 
 
 make_whole_number = lambda x: int(x) if x > 0 else 0
@@ -30,12 +31,12 @@ def forecast_demographic_from_series(
         demographic_by_year_table_year_column,
         demographic_by_year_table_population_column)
 
-    growth_models = [get_linear_model(
+    estimate_populations = [get_fitted_linear_function(
         year_packs, default_yearly_population_growth_percent,
     ) for name, year_packs in name_packs]
 
     name_packs = _estimate_future_population_counts(
-        target_year, name_packs, growth_models)
+        target_year, name_packs, estimate_populations)
 
     return concat([demographic_by_year_table, _get_demographic_by_year_table(
         name_packs,
@@ -60,15 +61,15 @@ def _get_name_packs(
 
 
 def _estimate_future_population_counts(
-        target_year, name_packs, growth_models):
+        target_year, name_packs, estimate_populations):
     extended_name_packs = []
     make_whole_numbers = np.vectorize(make_whole_number)
-    for (name, year_packs), growth_model in zip(name_packs, growth_models):
-        years = get_future_years(target_year, year_packs)
+    for (name, year_packs), estimate_population in zip(
+            name_packs, estimate_populations):
+        years = get_future_years(target_year, [x[0] for x in year_packs])
         if not years:
             continue
-        populations = make_whole_numbers(
-            growth_model.predict(prepare_xs(years)))
+        populations = make_whole_numbers(estimate_population(years))
         extended_name_packs.append((name, zip(years, populations)))
     return extended_name_packs
 
diff --git a/infrastructure_planning/electricity/consumption/linear.py b/infrastructure_planning/electricity/consumption/linear.py
index 5644708..b67fc6b 100644
--- a/infrastructure_planning/electricity/consumption/linear.py
+++ b/infrastructure_planning/electricity/consumption/linear.py
@@ -1,6 +1,7 @@
 from pandas import DataFrame, concat, merge
 
-from ...growth import get_future_years, get_linear_model, prepare_xs
+from ...growth import get_future_years
+from ...growth.fitted import get_fitted_linear_function
 
 
 def estimate_electricity_consumption_from_series(
@@ -58,13 +59,13 @@ def forecast_electricity_consumption_per_capita_from_series(
         electricity_consumption_per_capita_by_year_table_consumption_per_capita_column,  # noqa
     ]].values
 
-    growth_model = get_linear_model(
+    estimate_electricity_consumption = get_fitted_linear_function(
         year_packs, default_yearly_electricity_consumption_growth_percent)
 
-    years = get_future_years(target_year, year_packs)
+    years = get_future_years(target_year, [x[0] for x in year_packs])
     if not years:
         return electricity_consumption_per_capita_by_year_table
-    values = growth_model.predict(prepare_xs(years))
+    values = estimate_electricity_consumption(years)
 
     return concat([
         electricity_consumption_per_capita_by_year_table,
diff --git a/infrastructure_planning/growth.py b/infrastructure_planning/growth.py
deleted file mode 100644
index f70291b..0000000
--- a/infrastructure_planning/growth.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import numpy as np
-from sklearn.linear_model import LinearRegression
-
-
-prepare_xs = lambda x: np.array(x).reshape(-1, 1)
-
-
-def get_future_years(target_year, year_packs):
-    past_years = sorted(int(x[0]) for x in year_packs)
-    future_years = range(past_years[-1] + 1, target_year)
-    if target_year not in past_years:
-        future_years.append(target_year)
-    return future_years
-
-
-def get_linear_model(time_series, default_growth_percent):
-    'Fit linear model on time_series'
-    time_packs = _get_time_packs(time_series)
-    default_growth_factor = 1 + default_growth_percent / 100.
-    model = LinearRegression()
-    if len(time_packs) == 1:
-        time, value = time_packs[-1]
-        times = [time, time + 1]
-        values = [value, value * default_growth_factor]
-    else:
-        time_array = np.array(time_packs)
-        times = time_array[:, 0]
-        values = time_array[:, 1]
-    model.fit(prepare_xs(times), values)
-    return model
-
-
-def _get_time_packs(time_series):
-    'Convert series into list of tuples'
-    if hasattr(time_series, 'values'):
-        time_count = len(time_series)
-        xs = time_series.index.values.reshape(time_count, 1)
-        ys = time_series.values
-        time_packs = zip(xs, ys)
-    else:
-        time_packs = time_series
-    return time_packs
diff --git a/infrastructure_planning/growth/__init__.py b/infrastructure_planning/growth/__init__.py
index e69de29..531ddc2 100644
--- a/infrastructure_planning/growth/__init__.py
+++ b/infrastructure_planning/growth/__init__.py
@@ -0,0 +1,6 @@
+def get_future_years(target_year, past_years):
+    past_years = sorted(past_years)
+    future_years = range(int(past_years[-1]) + 1, target_year)
+    if target_year not in past_years:
+        future_years.append(target_year)
+    return future_years
diff --git a/infrastructure_planning/growth/fitted.py b/infrastructure_planning/growth/fitted.py
index e38445e..7128002 100644
--- a/infrastructure_planning/growth/fitted.py
+++ b/infrastructure_planning/growth/fitted.py
@@ -1,2 +1,12 @@
-def get_fitted_linear_function():
-    pass
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+
+def get_fitted_linear_function(xys, default_slope=0):
+    xys = np.array(xys)
+    xs, ys = xys[:, 0], xys[:, 1]
+    model = LinearRegression()
+    model.fit(xs.reshape(-1, 1), ys)
+    slope = model.coef_[0] if len(set(xs)) > 1 else default_slope
+    y_intercept = model.intercept_
+    return lambda x: slope * np.array(x) + y_intercept
diff --git a/infrastructure_planning/growth/interpolated.py b/infrastructure_planning/growth/interpolated.py
index 6aa3a3e..09c0257 100644
--- a/infrastructure_planning/growth/interpolated.py
+++ b/infrastructure_planning/growth/interpolated.py
@@ -1,2 +1,2 @@
-def get_interpolated_spline_extrapolated_linear_function():
+def get_interpolated_spline_extrapolated_linear_function(xys, default_slope):
     pass
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..b7e4789
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[aliases]
+test=pytest
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1adcaf9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,26 @@
+from os.path import abspath, dirname, join
+from setuptools import find_packages, setup
+
+
+FOLDER = dirname(abspath(__file__))
+DESCRIPTION = '\n\n'.join(open(join(FOLDER, x)).read().strip() for x in [
+    'README.rst', 'CHANGES.rst'])
+setup(
+    name='infrastructure-planning',
+    version='0.0.1',
+    description='Infrastructure planning tools',
+    long_description=DESCRIPTION,
+    classifiers=[
+        'Programming Language :: Python',
+        'License :: OSI Approved :: MIT License',
+    ],
+    author='Roy Hyunjin Han',
+    author_email='rhh@crosscompute.com',
+    url='http://crosscompute.com',
+    keywords='crosscompute',
+    packages=find_packages(),
+    include_package_data=True,
+    zip_safe=False,
+    setup_requires=['pytest-runner'],
+    install_requires=[],
+    tests_require=['pytest'])
diff --git a/tests/test_growth_fitted.py b/tests/test_growth_fitted.py
new file mode 100644
index 0000000..1e5c4e1
--- /dev/null
+++ b/tests/test_growth_fitted.py
@@ -0,0 +1,23 @@
+import numpy as np
+from infrastructure_planning.growth.fitted import get_fitted_linear_function
+
+
+def test_get_fitted_linear_function():
+    # If there is only one unique x, expect default_slope
+    f = get_fitted_linear_function([(0, 0)], default_slope=5)
+    assert np.isclose(f(0), 0)
+    assert np.isclose(f(1), 5)
+    f = get_fitted_linear_function([(0, 0), (0, 1)], default_slope=5)
+    assert np.isclose(f(0), 0.5)
+    assert np.isclose(f(1), 5.5)
+    # If there is more than one unique x, expect fitted model
+    f = get_fitted_linear_function([(0, 0), (1, 1)])
+    assert np.isclose(f(0), 0)
+    assert np.isclose(f(1), 1)
+    f = get_fitted_linear_function([(0, 0), (0, 1), (1, 1), (1, 2)])
+    assert np.isclose(f(0), 0.5)
+    assert np.isclose(f(1), 1.5)
+    f = get_fitted_linear_function([(0, 1), (1, 0), (2, 2), (3, 1)])
+    assert np.isclose(f(0), 0.7)
+    assert np.isclose(f(1), 0.9)
+    assert np.allclose(f([0, 1]), [0.7, 0.9])  # Given xs, return ys