diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..58461f2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.ipynb_checkpoints
\ No newline at end of file
diff --git a/.pytest_cache/v/cache/nodeids b/.pytest_cache/v/cache/nodeids
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/.pytest_cache/v/cache/nodeids
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/src/.pytest_cache/v/cache/nodeids b/src/.pytest_cache/v/cache/nodeids
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/src/.pytest_cache/v/cache/nodeids
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/__pycache__/__init__.cpython-36.pyc b/src/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..d051bdd
Binary files /dev/null and b/src/__pycache__/__init__.cpython-36.pyc differ
diff --git a/src/__pycache__/assessment.cpython-36.pyc b/src/__pycache__/assessment.cpython-36.pyc
new file mode 100644
index 0000000..b957338
Binary files /dev/null and b/src/__pycache__/assessment.cpython-36.pyc differ
diff --git a/assessment.py b/src/assessment.py
similarity index 72%
rename from assessment.py
rename to src/assessment.py
index 281675d..410b283 100644
--- a/assessment.py
+++ b/src/assessment.py
@@ -14,7 +14,13 @@ def count_characters(string):
     Characters which with a count of 0 should not be included in the
     output dictionary.
     '''
-    pass
+    char_dict = {}
+    for char in string:
+        if char not in char_dict:
+            char_dict[char] = 1
+        else:
+            char_dict[char] += 1
+    return char_dict
 
 
 def invert_dictionary(d):
@@ -28,7 +34,13 @@ def invert_dictionary(d):
     the set of d's keys which shared the same value.
     e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}}
     '''
-    pass
+    new_d = {}
+    for k,v in d.items():
+        if v not in new_d:
+            new_d[v] = set(k)
+        else:
+            new_d[v].update(k)
+    return new_d
 
 
 def word_count(filename):
@@ -44,7 +56,17 @@ def word_count(filename):
       2. number of words (broken by whitespace)
       3. number of characters
     '''
-    pass
+    line_count = 0
+    word_count = 0
+    char_count = 0
+    with open(filename, 'r') as f:
+        for line in f:
+            words = line.split()
+            line_count += 1
+            word_count += len(words)
+            char_count += len(line)
+
+    return line_count, word_count, char_count
 
 
 def matrix_multiplication(A, B):
@@ -67,7 +89,24 @@ def matrix_multiplication(A, B):
 
     Please do not use numpy. Write your solution in straight python.
     '''
-    pass
+    
+    new_matrix = []
+
+    # for Arow in range(len(A)):
+    #     #new_matrix     row.append ETC [row]
+    #     for Bcol in range(len(B[0])):
+    #         for Brow in range(len(B)):
+    #             new_matrix[Arow][Bcol] += A[Arow][Brow] * B[Brow][Bcol]
+
+    new_matrix = [[sum(a*b for a,b in zip(A_row, B_col))
+               for B_col in zip(*B)] for A_row in A]
+
+    return new_matrix
+
+# a = [[5, 3, 8], [9, 0, 1],[6,9,3]]
+# b = [[1, 2, 3], [4, 5, 6],[6,1,2]]
+# print(matrix_multiplication(a,b))
+
 
 
 # NumPy SECTION
@@ -89,7 +128,8 @@ def array_work(rows, cols, scalar, matrixA):
             [5, 6],   *   [5, 5, 5]]
             [7, 8]]
     '''
-    pass
+    matrixNew = np.full((rows, cols), scalar)
+    return np.dot(matrixA, matrixNew)
 
 
 def boolean_indexing(arr, minimum):
@@ -105,7 +145,7 @@ def boolean_indexing(arr, minimum):
     In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7)
     Out[1]: array([7, 8])
     '''
-    pass
+    return arr[arr >= minimum]
 
 
 # Pandas SECTION
@@ -128,7 +168,9 @@ def make_series(start, length, index):
     c    7
     dtype: int64
     '''
-    pass
+    return pd.Series(range(start,start+length), index=index)
+
+# print (make_series(5, 3, ['a', 'b', 'c']))
 
 
 def data_frame_work(df, colA, colB, colC):
@@ -139,4 +181,4 @@ def data_frame_work(df, colA, colB, colC):
     Insert a column (colC) into the dataframe that is the sum of colA and colB.
     Assume that df contains columns colA and colB and that these are numeric.
     '''
-    pass
+    df[colC] = df[colA] + df[colB]
diff --git a/test/.pytest_cache/v/cache/lastfailed b/test/.pytest_cache/v/cache/lastfailed
new file mode 100644
index 0000000..c878883
--- /dev/null
+++ b/test/.pytest_cache/v/cache/lastfailed
@@ -0,0 +1,3 @@
+{
+  "testing.py::test_word_count": true
+}
\ No newline at end of file
diff --git a/test/.pytest_cache/v/cache/nodeids b/test/.pytest_cache/v/cache/nodeids
new file mode 100644
index 0000000..86b78dc
--- /dev/null
+++ b/test/.pytest_cache/v/cache/nodeids
@@ -0,0 +1,3 @@
+[
+  "testing.py::test_matrix_multiplication"
+]
\ No newline at end of file
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/__pycache__/__init__.cpython-36.pyc b/test/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..0cae296
Binary files /dev/null and b/test/__pycache__/__init__.cpython-36.pyc differ
diff --git a/test/__pycache__/testing.cpython-36-PYTEST.pyc b/test/__pycache__/testing.cpython-36-PYTEST.pyc
new file mode 100644
index 0000000..ac7882f
Binary files /dev/null and b/test/__pycache__/testing.cpython-36-PYTEST.pyc differ
diff --git a/test/alice.txt b/test/alice.txt
new file mode 100644
index 0000000..84bf3cc
--- /dev/null
+++ b/test/alice.txt
@@ -0,0 +1,17 @@
+Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?'
+So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.
+There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge.
+In another moment down went Alice after it, never once considering how in the world she was to get out again.
+The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well.
+Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.
+'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)
+Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.)
+Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.'
+Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over.
+Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof.
+There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again.
+Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted!
+Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible.
+There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here before,' said Alice,) and round the neck of the bottle was a paper label, with the words 'DRINK ME' beautifully printed on it in large letters.
+It was all very well to say 'Drink me,' but the wise little Alice was not going to do THAT in a hurry. 'No, I'll look first,' she said, 'and see whether it's marked "poison" or not'; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they WOULD not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger VERY deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked 'poison,' it is almost certain to disagree with you, sooner or later.
+However, this bottle was NOT marked 'poison,' so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off.
diff --git a/test/testing.py b/test/testing.py
new file mode 100644
index 0000000..5d0af2b
--- /dev/null
+++ b/test/testing.py
@@ -0,0 +1,87 @@
+import sys
+sys.path.append('../')
+from src import assessment #as a
+# from src.vector import Vector
+import numpy as np
+import pandas as pd
+from pytest import approx
+
+
+# def test_count_characters():
+#     string = "abafdcggfaabe"
+#     answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
+#     result = assessment.count_characters(string)
+#     assert result == answer
+
+
+# def test_invert_dictionary():
+#     d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
+#     result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}}
+#     assert assessment.invert_dictionary(d) == result
+
+
+# def test_word_count():
+#     assert assessment.word_count('alice.txt') == (17, 1615, 8449)
+### not sure why failing this one; just off on char_count by 12 chars
+
+def test_matrix_multiplication():
+    A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]]
+    B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]]
+    answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]]
+    assert assessment.matrix_multiplication(A, B) == answer
+### can't get this to work
+
+
+# def test_array_work():
+#     matrixA = np.array([[-4, -2],
+#                         [0, -3],
+#                         [-4, -1],
+#                         [-1, 1],
+#                         [-3, 0]])
+#     answer1 = np.array([[-24, -24, -24],
+#                         [-12, -12, -12],
+#                         [-20, -20, -20],
+#                         [0, 0, 0],
+#                         [-12, -12, -12]])
+#     result1 = assessment.array_work(2, 3, 4, matrixA)
+#     assert np.all(answer1 == result1)
+
+#     answer2 = np.array([[-36, -36],
+#                         [-18, -18],
+#                         [-30, -30],
+#                         [0, 0],
+#                         [-18, -18]])
+    # result2 = assessment.array_work(2, 2, 6, matrixA)
+    # assert np.all(answer2 == result2)
+
+
+# def test_make_series():
+#     result = assessment.make_series(7, 4, ['a', 'b', 'c', 'd'])
+#     assert isinstance(result, pd.Series)
+#     assert result['a'] == 7
+#     assert result['d'] == 10
+
+#     result = assessment.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi'])
+#     assert result['a'] == 22
+#     assert result['d'] == 25
+#     assert result['hi'] == 26
+
+
+# def test_data_frame_work():
+#     df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+#     colA, colB, colC = ('a', 'b', 'c')
+#     assessment.data_frame_work(df, colA, colB, colC)
+#     assert colC in df.columns.tolist()
+#     assert df[colC].tolist(), [5, 7, 9]
+
+
+# def test_boolean_indexing():
+#     arr = np.array([[-4, -4, -3],
+#                     [-1, 16, -4],
+#                     [-3, 6, 4]])
+#     result1 = assessment.boolean_indexing(arr, 0)
+#     answer1 = np.array([16, 6, 4])
+#     assert np.all(result1 == answer1)
+#     result2 = assessment.boolean_indexing(arr, 10)
+#     answer2 = np.array([16])
+#     assert np.all(result2 == answer2)
diff --git a/testing.py b/testing.py
deleted file mode 100644
index c68b010..0000000
--- a/testing.py
+++ /dev/null
@@ -1,77 +0,0 @@
-def test_count_characters(self):
-    string = "abafdcggfaabe"
-    answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
-    result = a.count_characters(string)
-    self.assertEqual(result, answer)
-
-
-def test_invert_dictionary(self):
-    d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
-    result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}}
-    self.assertEqual(a.invert_dictionary(d), result)
-
-
-def test_word_count(self):
-    self.assertEqual(a.word_count('data/alice.txt'), (17, 1615, 8449))
-
-
-def test_matrix_multiplication(self):
-    A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]]
-    B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]]
-    answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]]
-    self.assertEqual(a.matrix_multiplication(A, B), answer)
-
-
-def test_array_work(self):
-    matrixA = np.array([[-4, -2],
-                        [0, -3],
-                        [-4, -1],
-                        [-1, 1],
-                        [-3, 0]])
-    answer1 = np.array([[-24, -24, -24],
-                        [-12, -12, -12],
-                        [-20, -20, -20],
-                        [0, 0, 0],
-                        [-12, -12, -12]])
-    result1 = a.array_work(2, 3, 4, matrixA)
-    self.assertTrue(np.all(answer1 == result1))
-
-    answer2 = np.array([[-36, -36],
-                        [-18, -18],
-                        [-30, -30],
-                        [0, 0],
-                        [-18, -18]])
-    result2 = a.array_work(2, 2, 6, matrixA)
-    self.assertTrue(np.all(answer2 == result2))
-
-
-def test_make_series(self):
-    result = a.make_series(7, 4, ['a', 'b', 'c', 'd'])
-    self.assertTrue(isinstance(result, pd.Series))
-    self.assertEqual(result['a'], 7)
-    self.assertEqual(result['d'], 10)
-
-    result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi'])
-    self.assertEqual(result['a'], 22)
-    self.assertEqual(result['d'], 25)
-    self.assertEqual(result['hi'], 26)
-
-
-def test_data_frame_work(self):
-    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
-    colA, colB, colC = ('a', 'b', 'c')
-    a.data_frame_work(df, colA, colB, colC)
-    self.assertTrue(colC in df.columns.tolist())
-    self.assertEqual(df[colC].tolist(), [5, 7, 9])
-
-
-def test_boolean_indexing(self):
-    arr = np.array([[-4, -4, -3],
-                    [-1, 16, -4],
-                    [-3, 6, 4]])
-    result1 = a.boolean_indexing(arr, 0)
-    answer1 = np.array([16, 6, 4])
-    self.assertTrue(np.all(result1 == answer1))
-    result2 = a.boolean_indexing(arr, 10)
-    answer2 = np.array([16])
-    self.assertTrue(np.all(result2 == answer2))
diff --git a/titanic.ipynb b/titanic.ipynb
new file mode 100644
index 0000000..481a567
--- /dev/null
+++ b/titanic.ipynb
@@ -0,0 +1,2868 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1226,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import statsmodels.api as sm\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import r2_score\n",
+    "from pandas.plotting import scatter_matrix\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1227,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('train.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1228,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>count</th>\n",
+       "      <th>mean</th>\n",
+       "      <th>std</th>\n",
+       "      <th>min</th>\n",
+       "      <th>25%</th>\n",
+       "      <th>50%</th>\n",
+       "      <th>75%</th>\n",
+       "      <th>max</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>PassengerId</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>446.000000</td>\n",
+       "      <td>257.353842</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>223.5000</td>\n",
+       "      <td>446.0000</td>\n",
+       "      <td>668.5</td>\n",
+       "      <td>891.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Survived</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>0.383838</td>\n",
+       "      <td>0.486592</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Pclass</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>2.308642</td>\n",
+       "      <td>0.836071</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>2.0000</td>\n",
+       "      <td>3.0000</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Age</th>\n",
+       "      <td>714.0</td>\n",
+       "      <td>29.699118</td>\n",
+       "      <td>14.526497</td>\n",
+       "      <td>0.42</td>\n",
+       "      <td>20.1250</td>\n",
+       "      <td>28.0000</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>80.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SibSp</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>0.523008</td>\n",
+       "      <td>1.102743</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>8.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Parch</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>0.381594</td>\n",
+       "      <td>0.806057</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>6.0000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Fare</th>\n",
+       "      <td>891.0</td>\n",
+       "      <td>32.204208</td>\n",
+       "      <td>49.693429</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>7.9104</td>\n",
+       "      <td>14.4542</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>512.3292</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             count        mean         std   min       25%       50%    75%  \\\n",
+       "PassengerId  891.0  446.000000  257.353842  1.00  223.5000  446.0000  668.5   \n",
+       "Survived     891.0    0.383838    0.486592  0.00    0.0000    0.0000    1.0   \n",
+       "Pclass       891.0    2.308642    0.836071  1.00    2.0000    3.0000    3.0   \n",
+       "Age          714.0   29.699118   14.526497  0.42   20.1250   28.0000   38.0   \n",
+       "SibSp        891.0    0.523008    1.102743  0.00    0.0000    0.0000    1.0   \n",
+       "Parch        891.0    0.381594    0.806057  0.00    0.0000    0.0000    0.0   \n",
+       "Fare         891.0   32.204208   49.693429  0.00    7.9104   14.4542   31.0   \n",
+       "\n",
+       "                  max  \n",
+       "PassengerId  891.0000  \n",
+       "Survived       1.0000  \n",
+       "Pclass         3.0000  \n",
+       "Age           80.0000  \n",
+       "SibSp          8.0000  \n",
+       "Parch          6.0000  \n",
+       "Fare         512.3292  "
+      ]
+     },
+     "execution_count": 1228,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe().T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1229,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Survived  Pclass     Sex   Age  SibSp  Parch     Fare Embarked\n",
+       "0         0       3    male  22.0      1      0   7.2500        S\n",
+       "1         1       1  female  38.0      1      0  71.2833        C\n",
+       "2         1       3  female  26.0      0      0   7.9250        S\n",
+       "3         1       1  female  35.0      1      0  53.1000        S\n",
+       "4         0       3    male  35.0      0      0   8.0500        S"
+      ]
+     },
+     "execution_count": 1229,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = df.drop(columns=['PassengerId','Name','Cabin','Ticket'])\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1230,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 891 entries, 0 to 890\n",
+      "Data columns (total 8 columns):\n",
+      "Survived    891 non-null int64\n",
+      "Pclass      891 non-null int64\n",
+      "Sex         891 non-null object\n",
+      "Age         714 non-null float64\n",
+      "SibSp       891 non-null int64\n",
+      "Parch       891 non-null int64\n",
+      "Fare        891 non-null float64\n",
+      "Embarked    889 non-null object\n",
+      "dtypes: float64(2), int64(4), object(2)\n",
+      "memory usage: 55.8+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1231,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[df.Age.isna()]\n",
+    "age_median = df.Age.median()\n",
+    "df['Age'] = df.Age.fillna(age_median)\n",
+    "# also try using mean?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1232,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[df.Embarked.isna()]\n",
+    "embarked_mode = df.Embarked.mode()\n",
+    "df['Embarked'] = df.Embarked.fillna(embarked_mode)\n",
+    "# also try dropping if embarked is important???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1233,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df['Pclass'] = pd.to_string(df.Pclass, errors='coerce')\n",
+    "df['Pclass'] = df['Pclass'].astype(str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1234,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# baseline is guess of non-survival\n",
+    "# or probability of survival based on whole pop, .38 = (342 / (342+549))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1235,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Pclass     Sex   Age  SibSp  Parch     Fare Embarked\n",
+       "0      3    male  22.0      1      0   7.2500        S\n",
+       "1      1  female  38.0      1      0  71.2833        C\n",
+       "2      3  female  26.0      0      0   7.9250        S\n",
+       "3      1  female  35.0      1      0  53.1000        S\n",
+       "4      3    male  35.0      0      0   8.0500        S"
+      ]
+     },
+     "execution_count": 1235,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# df.groupby([int(a)//2 for a in 'Age'])['Survived'].count()\n",
+    "y = df.Survived\n",
+    "X = df.drop(columns=['Survived'])\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1236,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Pclass_2</th>\n",
+       "      <th>Pclass_3</th>\n",
+       "      <th>Sex_male</th>\n",
+       "      <th>Embarked_Q</th>\n",
+       "      <th>Embarked_S</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const   Age  SibSp  Parch     Fare  Pclass_2  Pclass_3  Sex_male  \\\n",
+       "0    1.0  22.0      1      0   7.2500         0         1         1   \n",
+       "1    1.0  38.0      1      0  71.2833         0         0         0   \n",
+       "2    1.0  26.0      0      0   7.9250         0         1         0   \n",
+       "3    1.0  35.0      1      0  53.1000         0         0         0   \n",
+       "4    1.0  35.0      0      0   8.0500         0         1         1   \n",
+       "\n",
+       "   Embarked_Q  Embarked_S  \n",
+       "0           0           1  \n",
+       "1           0           0  \n",
+       "2           0           1  \n",
+       "3           0           1  \n",
+       "4           0           1  "
+      ]
+     },
+     "execution_count": 1236,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X =pd.get_dummies(X)\n",
+    "# prob drop PassengerID and will need to drop one and dummy Sex, Embarked, Pclass\n",
+    "X = X.drop(columns=['Sex_female','Embarked_C','Pclass_1'])\n",
+    "X = sm.add_constant(X)\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1237,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    549\n",
+       "1    342\n",
+       "Name: Survived, dtype: int64"
+      ]
+     },
+     "execution_count": 1237,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAENRJREFUeJzt3X+s3XV9x/Hn21bUta5Fqjek7VYWayKRqHhDuphst9a4ggvlD1gwOApp1sSxxU2y2W3J3K8/YAtjgRj1bjiKQQtzc22AzZDCCXNZme1QCjLDFTu4ltBpy92uqFvne3+cT8213HK/9/y4397PfT6Sm/P9fr6fcz6f9+nt637v55zzvZGZSJLq9aq2JyBJGi6DXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS55W1PAGDNmjW5YcOGnu773e9+lxUrVgx2Qmc5a14arHlp6KfmQ4cOfTsz3zhXv7Mi6Dds2MDBgwd7um+n02FsbGywEzrLWfPSYM1LQz81R8R/NOnn0o0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXurPhkbD8Of2uK63bd38rYR256fyvjStJ8eEYvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekirXKOgj4khEHI6Ir0TEwdL2hoh4MCKeLrfnlvaIiNsiYiIiHo+Ii4dZgCTplc3njH5zZr4jM0fL/i5gf2ZuBPaXfYBLgY3layfwiUFNVpI0f/0s3WwDdpft3cAVM9rvyq4DwOqIOL+PcSRJfYjMnLtTxDeBE0ACn8rM8Yh4MTNXz+hzIjPPjYj7gJsy80ulfT/w0cw8eNpj7qR7xs/IyMi79uzZ01MBx45P8cL3erpr3y5au6qVcaenp1m5cmUrY7fFmpcGa56fzZs3H5qxynJGTf+U4Lsz82hEvAl4MCL+/RX6xixtL/tpkpnjwDjA6Ohojo2NNZzKj7v97r3ccridv4h45JqxVsbtdDr0+nwtVta8NFjzcDRausnMo+X2GPAF4BLghVNLMuX2WOk+Cayfcfd1wNFBTViSND9zBn1ErIiI15/aBt4HPAHsA7aXbtuBvWV7H3BteffNJmAqM58f+MwlSY00WfMYAb4QEaf6fzYz/zEivgzcGxE7gGeBq0r/B4DLgAngJeD6gc9aktTYnEGfmc8Ab5+l/TvAllnaE7hhILOTJPXNT8ZKUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpco2DPiKWRcRjEXFf2b8gIh6NiKcj4p6IOKe0v6bsT5TjG4YzdUlSE/M5o/8w8NSM/ZuBWzNzI3AC2FHadwAnMvPNwK2lnySpJY2CPiLWAe8H/qrsB/Ae4POly27girK9rexTjm8p/SVJLWh6Rv8XwG8DPyz75wEvZubJsj8JrC3ba4HnAMrxqdJfktSC5XN1iIhfBI5l5qGIGDvVPEvXbHBs5uPuBHYCjIyM0Ol0msz3ZUZeBzdedHLujkPQ65z7NT093drYbbHmpcGah2POoAfeDVweEZcBrwV+ku4Z/uqIWF7O2tcBR0v/SWA9MBkRy4FVwPHTHzQzx4FxgNHR0RwbG+upgNvv3ssth5uUMXhHrhlrZdxOp0Ovz9diZc1LgzUPx5xLN5n5O5m5LjM3AFcDD2XmNcDDwJWl23Zgb9neV/Ypxx/KzJed0UuSFkY/76P/KPCRiJiguwZ/R2m/AzivtH8E2NXfFCVJ/ZjXmkdmdoBO2X4GuGSWPt8HrhrA3CRJA+AnYyWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJaly7fxVbUk6i2zYdX9rY9+5dcXQx/CMXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqtycQR8Rr42If42Ir0bEkxHxh6X9goh4NCKejoh7IuKc0v6asj9Rjm8YbgmSpFfS5Iz+B8B7MvPtwDuArRGxCbgZuDUzNwIngB2l/w7gRGa+Gbi19JMktWTOoM+u6bL76vKVwHuAz5f23cAVZXtb2acc3xIRMbAZS5LmpdEafUQsi4ivAMeAB4FvAC9m5snSZRJYW7bXAs8BlONTwHmDnLQkqbnIzOadI1YDXwB+H/jrsjxDRKwHHsjMiyLiSeAXMnOyHPsGcElmfue0x9oJ7AQYGRl51549e3oq4NjxKV74Xk937dtFa1e1Mu709DQrV65sZey2WPPS0FbNh781teBjnnLBqmU917x58+ZDmTk6V795/eGRzHwxIjrAJmB1RCwvZ+3rgKOl2ySwHpiMiOXAKuD4LI81DowDjI6O5tjY2Hym8iO3372XWw638/dTjlwz1sq4nU6HXp+vxcqal4a2ar6u5T88Muyam7zr5o3lTJ6IeB3wXuAp4GHgytJtO7C3bO8r+5TjD+V8fm2QJA1Uk1Ph84HdEbGM7g+GezPzvoj4GrAnIv4EeAy4o/S/A/hMREzQPZO/egjzliQ1NGfQZ+bjwDtnaX8GuGSW9u8DVw1kdpKkvvnJWEmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekys0Z9BGxPiIejoinIuLJiPhwaX9DRDwYEU+X23NLe0TEbRExERGPR8TFwy5CknRmTc7oTwI3ZuZbgU3ADRFxIbAL2J+ZG4H9ZR/gUmBj+doJfGLgs5YkNTZn0Gfm85n5b2X7v4GngLXANmB36bYbuKJsbwPuyq4DwOqIOH/gM5ckNRKZ2bxzxAbgEeBtwLOZuXrGsROZeW5E3AfclJlfKu37gY9m5sHTHmsn3TN+RkZG3rVnz56eCjh2fIoXvtfTXft20dpVrYw7PT3NypUrWxm7Lda8NLRV8+FvTS34mKdcsGpZzzVv3rz5UGaOztVvedMHjIiVwN8Cv5GZ/xURZ+w6S9vLfppk5jgwDjA6OppjY2NNp/Jjbr97L7ccblzGQB25ZqyVcTudDr0+X4uVNS8NbdV83a77F3zMU+7cumLoNTd6101EvJpuyN+dmX9Xml84tSRTbo+V9klg/Yy7rwOODma6kqT5avKumwDuAJ7KzD+fcWgfsL1sbwf2zmi/trz7ZhMwlZnPD3DOkqR5aLLm8W7gl4HDEfGV0va7wE3AvRGxA3gWuKocewC4DJgAXgKuH+iMJUnzMmfQlxdVz7Qgv2WW/gnc0Oe8JEkD4idjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLk5gz4iPh0RxyLiiRltb4iIByPi6XJ7bmmPiLgtIiYi4vGIuHiYk5ckza3JGf2dwNbT2nYB+zNzI7C/7ANcCmwsXzuBTwxmmpKkXs0Z9Jn5CHD8tOZtwO6yvRu4Ykb7Xdl1AFgdEecParKSpPnrdY1+JDOfByi3byrta4HnZvSbLG2SpJYsH/DjxSxtOWvHiJ10l3cYGRmh0+n0NODI6+DGi072dN9+9Trnfk1PT7c2dluseWloq+a2MgQWpuZeg/6FiDg/M58vSzPHSvsksH5Gv3XA0dkeIDPHgXGA0dHRHBsb62kit9+9l1sOD/rnVTNHrhlrZdxOp0Ovz9diZc1LQ1s1X7fr/gUf85Q7t64Yes29Lt3sA7aX7e3A3hnt15Z332wCpk4t8UiS2jHnqXBEfA4YA9ZExCTwMeAm4N6I2AE8C1xVuj8AXAZMAC8B1w9hzpKkeZgz6DPzA2c4tGWWvgnc0O+kJEmD4ydjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqN5Sgj4itEfH1iJiIiF3DGEOS1MzAgz4ilgEfBy4FLgQ+EBEXDnocSVIzwzijvwSYyMxnMvN/gD3AtiGMI0lqYBhBvxZ4bsb+ZGmTJLVg+RAeM2Zpy5d1itgJ7Cy70xHx9R7HWwN8u8f79iVubmNUoMWaW2TNS8OSq3nzzX3V/NNNOg0j6CeB9TP21wFHT++UmePAeL+DRcTBzBzt93EWE2teGqx5aViImoexdPNlYGNEXBAR5wBXA/uGMI4kqYGBn9Fn5smI+DXgi8Ay4NOZ+eSgx5EkNTOMpRsy8wHggWE89iz6Xv5ZhKx5abDmpWHoNUfmy14nlSRVxEsgSFLlFk3Qz3VZhYh4TUTcU44/GhEbFn6Wg9Wg5o9ExNci4vGI2B8Rjd5qdTZrevmMiLgyIjIiFv07NJrUHBG/VP6tn4yIzy70HAetwff2T0XEwxHxWPn+vqyNeQ5KRHw6Io5FxBNnOB4RcVt5Ph6PiIsHOoHMPOu/6L6o+w3gZ4BzgK8CF57W51eBT5btq4F72p73AtS8GfiJsv2hpVBz6fd64BHgADDa9rwX4N95I/AYcG7Zf1Pb816AmseBD5XtC4Ejbc+7z5p/DrgYeOIMxy8D/oHu55A2AY8OcvzFckbf5LIK24DdZfvzwJaImO3DW4vFnDVn5sOZ+VLZPUD3MwuLWdPLZ/wx8KfA9xdyckPSpOZfAT6emScAMvPYAs9x0JrUnMBPlu1VzPJZnMUkMx8Bjr9Cl23AXdl1AFgdEecPavzFEvRNLqvwoz6ZeRKYAs5bkNkNx3wvJbGD7hnBYjZnzRHxTmB9Zt63kBMboib/zm8B3hIR/xwRByJi64LNbjia1PwHwAcjYpLuO/h+fWGm1pqhXjpmKG+vHIIml1VodOmFRaRxPRHxQWAU+Pmhzmj4XrHmiHgVcCtw3UJNaAE0+XdeTnf5Zozub23/FBFvy8wXhzy3YWlS8weAOzPzloj4WeAzpeYfDn96rRhqfi2WM/oml1X4UZ+IWE73171X+lXpbNfoUhIR8V7g94DLM/MHCzS3YZmr5tcDbwM6EXGE7lrmvkX+gmzT7+29mfm/mflN4Ot0g3+xalLzDuBegMz8F+C1dK+DU6tG/997tViCvsllFfYB28v2lcBDWV7lWKTmrLksY3yKbsgv9nVbmKPmzJzKzDWZuSEzN9B9XeLyzDzYznQHosn39t/TfeGdiFhDdynnmQWd5WA1qflZYAtARLyVbtD/54LOcmHtA64t777ZBExl5vODevBFsXSTZ7isQkT8EXAwM/cBd9D99W6C7pn81e3NuH8Na/4zYCXwN+V152cz8/LWJt2nhjVXpWHNXwTeFxFfA/4P+K3M/E57s+5Pw5pvBP4yIn6T7hLGdYv5xC0iPkd36W1Ned3hY8CrATLzk3Rfh7gMmABeAq4f6PiL+LmTJDWwWJZuJEk9MuglqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6Sarc/wOuFeo7bmQhLgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#df['Survived'].hist()\n",
+    "y.hist()\n",
+    "y.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1238,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Age        -0.064910\n",
+       "SibSp      -0.035322\n",
+       "Parch       0.081629\n",
+       "Fare        0.257307\n",
+       "Survived    1.000000\n",
+       "Name: Survived, dtype: float64"
+      ]
+     },
+     "execution_count": 1238,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.corr()['Survived'].sort_values()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1239,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1240,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(596, 10)"
+      ]
+     },
+     "execution_count": 1240,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1241,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>OLS Regression Results</caption>\n",
+       "<tr>\n",
+       "  <th>Dep. Variable:</th>        <td>Survived</td>     <th>  R-squared:         </th> <td>   0.374</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.364</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   38.84</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Date:</th>             <td>Sun, 22 Jul 2018</td> <th>  Prob (F-statistic):</th> <td>3.69e-54</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Time:</th>                 <td>15:14:03</td>     <th>  Log-Likelihood:    </th> <td> -273.13</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>No. Observations:</th>      <td>   596</td>      <th>  AIC:               </th> <td>   566.3</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Residuals:</th>          <td>   586</td>      <th>  BIC:               </th> <td>   610.2</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Model:</th>              <td>     9</td>      <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>const</th>      <td>    1.0607</td> <td>    0.076</td> <td>   13.892</td> <td> 0.000</td> <td>    0.911</td> <td>    1.211</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Age</th>        <td>   -0.0040</td> <td>    0.001</td> <td>   -2.935</td> <td> 0.003</td> <td>   -0.007</td> <td>   -0.001</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>SibSp</th>      <td>   -0.0337</td> <td>    0.015</td> <td>   -2.271</td> <td> 0.023</td> <td>   -0.063</td> <td>   -0.005</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Parch</th>      <td>   -0.0203</td> <td>    0.023</td> <td>   -0.872</td> <td> 0.384</td> <td>   -0.066</td> <td>    0.025</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Fare</th>       <td>    0.0006</td> <td>    0.000</td> <td>    1.615</td> <td> 0.107</td> <td>   -0.000</td> <td>    0.001</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Pclass_2</th>   <td>   -0.0339</td> <td>    0.056</td> <td>   -0.600</td> <td> 0.549</td> <td>   -0.145</td> <td>    0.077</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Pclass_3</th>   <td>   -0.2488</td> <td>    0.053</td> <td>   -4.714</td> <td> 0.000</td> <td>   -0.352</td> <td>   -0.145</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Sex_male</th>   <td>   -0.4913</td> <td>    0.035</td> <td>  -13.881</td> <td> 0.000</td> <td>   -0.561</td> <td>   -0.422</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Embarked_Q</th> <td>   -0.0509</td> <td>    0.068</td> <td>   -0.743</td> <td> 0.458</td> <td>   -0.185</td> <td>    0.084</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Embarked_S</th> <td>   -0.1261</td> <td>    0.044</td> <td>   -2.862</td> <td> 0.004</td> <td>   -0.213</td> <td>   -0.040</td>\n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "  <th>Omnibus:</th>       <td>32.662</td> <th>  Durbin-Watson:     </th> <td>   2.036</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  36.901</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Skew:</th>          <td> 0.608</td> <th>  Prob(JB):          </th> <td>9.71e-09</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Kurtosis:</th>      <td> 3.081</td> <th>  Cond. No.          </th> <td>    378.</td>\n",
+       "</tr>\n",
+       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+      ],
+      "text/plain": [
+       "<class 'statsmodels.iolib.summary.Summary'>\n",
+       "\"\"\"\n",
+       "                            OLS Regression Results                            \n",
+       "==============================================================================\n",
+       "Dep. Variable:               Survived   R-squared:                       0.374\n",
+       "Model:                            OLS   Adj. R-squared:                  0.364\n",
+       "Method:                 Least Squares   F-statistic:                     38.84\n",
+       "Date:                Sun, 22 Jul 2018   Prob (F-statistic):           3.69e-54\n",
+       "Time:                        15:14:03   Log-Likelihood:                -273.13\n",
+       "No. Observations:                 596   AIC:                             566.3\n",
+       "Df Residuals:                     586   BIC:                             610.2\n",
+       "Df Model:                           9                                         \n",
+       "Covariance Type:            nonrobust                                         \n",
+       "==============================================================================\n",
+       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
+       "------------------------------------------------------------------------------\n",
+       "const          1.0607      0.076     13.892      0.000       0.911       1.211\n",
+       "Age           -0.0040      0.001     -2.935      0.003      -0.007      -0.001\n",
+       "SibSp         -0.0337      0.015     -2.271      0.023      -0.063      -0.005\n",
+       "Parch         -0.0203      0.023     -0.872      0.384      -0.066       0.025\n",
+       "Fare           0.0006      0.000      1.615      0.107      -0.000       0.001\n",
+       "Pclass_2      -0.0339      0.056     -0.600      0.549      -0.145       0.077\n",
+       "Pclass_3      -0.2488      0.053     -4.714      0.000      -0.352      -0.145\n",
+       "Sex_male      -0.4913      0.035    -13.881      0.000      -0.561      -0.422\n",
+       "Embarked_Q    -0.0509      0.068     -0.743      0.458      -0.185       0.084\n",
+       "Embarked_S    -0.1261      0.044     -2.862      0.004      -0.213      -0.040\n",
+       "==============================================================================\n",
+       "Omnibus:                       32.662   Durbin-Watson:                   2.036\n",
+       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               36.901\n",
+       "Skew:                           0.608   Prob(JB):                     9.71e-09\n",
+       "Kurtosis:                       3.081   Cond. No.                         378.\n",
+       "==============================================================================\n",
+       "\n",
+       "Warnings:\n",
+       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 1241,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = sm.OLS(y_train, X_train)\n",
+    "results = model.fit()\n",
+    "results.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1242,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+       "          verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 1242,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1243,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1,\n",
+       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,\n",
+       "       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
+       "       1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n",
+       "       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
+       "       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 1243,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1244,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Pclass_2</th>\n",
+       "      <th>Pclass_3</th>\n",
+       "      <th>Sex_male</th>\n",
+       "      <th>Embarked_Q</th>\n",
+       "      <th>Embarked_S</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>709</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.2458</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>439</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>10.5000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>840</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>720</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>33.0000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>11.2417</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     const   Age  SibSp  Parch     Fare  Pclass_2  Pclass_3  Sex_male  \\\n",
+       "709    1.0  28.0      1      1  15.2458         0         1         1   \n",
+       "439    1.0  31.0      0      0  10.5000         1         0         1   \n",
+       "840    1.0  20.0      0      0   7.9250         0         1         1   \n",
+       "720    1.0   6.0      0      1  33.0000         1         0         0   \n",
+       "39     1.0  14.0      1      0  11.2417         0         1         0   \n",
+       "\n",
+       "     Embarked_Q  Embarked_S  \n",
+       "709           0           0  \n",
+       "439           0           1  \n",
+       "840           0           1  \n",
+       "720           0           1  \n",
+       "39            0           0  "
+      ]
+     },
+     "execution_count": 1244,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_test.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1245,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8067796610169492"
+      ]
+     },
+     "execution_count": 1245,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1246,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['const', 'Age', 'SibSp', 'Parch', 'Fare', 'Pclass_2', 'Pclass_3',\n",
+       "       'Sex_male', 'Embarked_Q', 'Embarked_S'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 1246,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# scatter_matrix(X);\n",
+    "X.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1247,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Sex_male</th>\n",
+       "      <th>Embarked_Q</th>\n",
+       "      <th>Embarked_S</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const   Age  SibSp  Parch     Fare  Sex_male  Embarked_Q  Embarked_S\n",
+       "0    1.0  22.0      1      0   7.2500         1           0           1\n",
+       "1    1.0  38.0      1      0  71.2833         0           0           0\n",
+       "2    1.0  26.0      0      0   7.9250         0           0           1\n",
+       "3    1.0  35.0      1      0  53.1000         0           0           1\n",
+       "4    1.0  35.0      0      0   8.0500         1           0           1"
+      ]
+     },
+     "execution_count": 1247,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = X.drop(columns=['Pclass_2', 'Pclass_3'])\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1248,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Sex_male</th>\n",
+       "      <th>Embarked_Q</th>\n",
+       "      <th>Embarked_S</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const   Age  SibSp  Parch     Fare  Sex_male  Embarked_Q  Embarked_S\n",
+       "0    1.0  22.0      1      0   7.2500         1           0           1\n",
+       "1    1.0  38.0      1      0  71.2833         0           0           0\n",
+       "2    1.0  26.0      0      0   7.9250         0           0           1\n",
+       "3    1.0  35.0      1      0  53.1000         0           0           1\n",
+       "4    1.0  35.0      0      0   8.0500         1           0           1"
+      ]
+     },
+     "execution_count": 1248,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X =pd.get_dummies(X)\n",
+    "# prob drop PassengerID and will need to drop one and dummy Sex, Embarked, Pclass\n",
+    "X = sm.add_constant(X)\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1249,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1250,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>OLS Regression Results</caption>\n",
+       "<tr>\n",
+       "  <th>Dep. Variable:</th>        <td>Survived</td>     <th>  R-squared:         </th> <td>   0.334</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.326</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   42.10</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Date:</th>             <td>Sun, 22 Jul 2018</td> <th>  Prob (F-statistic):</th> <td>3.94e-48</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Time:</th>                 <td>15:14:03</td>     <th>  Log-Likelihood:    </th> <td> -291.45</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>No. Observations:</th>      <td>   596</td>      <th>  AIC:               </th> <td>   598.9</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Residuals:</th>          <td>   588</td>      <th>  BIC:               </th> <td>   634.0</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Model:</th>              <td>     7</td>      <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>const</th>      <td>    0.8716</td> <td>    0.061</td> <td>   14.316</td> <td> 0.000</td> <td>    0.752</td> <td>    0.991</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Age</th>        <td>   -0.0020</td> <td>    0.001</td> <td>   -1.494</td> <td> 0.136</td> <td>   -0.005</td> <td>    0.001</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>SibSp</th>      <td>   -0.0420</td> <td>    0.015</td> <td>   -2.754</td> <td> 0.006</td> <td>   -0.072</td> <td>   -0.012</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Parch</th>      <td>   -0.0331</td> <td>    0.024</td> <td>   -1.390</td> <td> 0.165</td> <td>   -0.080</td> <td>    0.014</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Fare</th>       <td>    0.0015</td> <td>    0.000</td> <td>    4.405</td> <td> 0.000</td> <td>    0.001</td> <td>    0.002</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Sex_male</th>   <td>   -0.5209</td> <td>    0.036</td> <td>  -14.432</td> <td> 0.000</td> <td>   -0.592</td> <td>   -0.450</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Embarked_Q</th> <td>   -0.1391</td> <td>    0.069</td> <td>   -2.025</td> <td> 0.043</td> <td>   -0.274</td> <td>   -0.004</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Embarked_S</th> <td>   -0.1363</td> <td>    0.045</td> <td>   -3.050</td> <td> 0.002</td> <td>   -0.224</td> <td>   -0.049</td>\n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "  <th>Omnibus:</th>       <td>25.798</td> <th>  Durbin-Watson:     </th> <td>   2.007</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  28.434</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Skew:</th>          <td> 0.535</td> <th>  Prob(JB):          </th> <td>6.69e-07</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Kurtosis:</th>      <td> 2.966</td> <th>  Cond. No.          </th> <td>    328.</td>\n",
+       "</tr>\n",
+       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+      ],
+      "text/plain": [
+       "<class 'statsmodels.iolib.summary.Summary'>\n",
+       "\"\"\"\n",
+       "                            OLS Regression Results                            \n",
+       "==============================================================================\n",
+       "Dep. Variable:               Survived   R-squared:                       0.334\n",
+       "Model:                            OLS   Adj. R-squared:                  0.326\n",
+       "Method:                 Least Squares   F-statistic:                     42.10\n",
+       "Date:                Sun, 22 Jul 2018   Prob (F-statistic):           3.94e-48\n",
+       "Time:                        15:14:03   Log-Likelihood:                -291.45\n",
+       "No. Observations:                 596   AIC:                             598.9\n",
+       "Df Residuals:                     588   BIC:                             634.0\n",
+       "Df Model:                           7                                         \n",
+       "Covariance Type:            nonrobust                                         \n",
+       "==============================================================================\n",
+       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
+       "------------------------------------------------------------------------------\n",
+       "const          0.8716      0.061     14.316      0.000       0.752       0.991\n",
+       "Age           -0.0020      0.001     -1.494      0.136      -0.005       0.001\n",
+       "SibSp         -0.0420      0.015     -2.754      0.006      -0.072      -0.012\n",
+       "Parch         -0.0331      0.024     -1.390      0.165      -0.080       0.014\n",
+       "Fare           0.0015      0.000      4.405      0.000       0.001       0.002\n",
+       "Sex_male      -0.5209      0.036    -14.432      0.000      -0.592      -0.450\n",
+       "Embarked_Q    -0.1391      0.069     -2.025      0.043      -0.274      -0.004\n",
+       "Embarked_S    -0.1363      0.045     -3.050      0.002      -0.224      -0.049\n",
+       "==============================================================================\n",
+       "Omnibus:                       25.798   Durbin-Watson:                   2.007\n",
+       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               28.434\n",
+       "Skew:                           0.535   Prob(JB):                     6.69e-07\n",
+       "Kurtosis:                       2.966   Cond. No.                         328.\n",
+       "==============================================================================\n",
+       "\n",
+       "Warnings:\n",
+       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 1250,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = sm.OLS(y_train, X_train)\n",
+    "results = model.fit()\n",
+    "results.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1251,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+       "          verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 1251,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1252,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n",
+       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
+       "       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
+       "       1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n",
+       "       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
+       "       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 1252,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1253,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8033898305084746"
+      ]
+     },
+     "execution_count": 1253,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1254,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimization terminated successfully.\n",
+      "         Current function value: 0.478634\n",
+      "         Iterations 6\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "const         8.193931e-07\n",
+       "Age           1.201037e-01\n",
+       "SibSp         4.293742e-03\n",
+       "Parch         1.026211e-01\n",
+       "Fare          1.949962e-04\n",
+       "Sex_male      1.307261e-28\n",
+       "Embarked_Q    6.099504e-02\n",
+       "Embarked_S    5.288578e-03\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 1254,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "logitModel = sm.Logit(y_train, X_train)\n",
+    "logitModel_fit = logitModel.fit()\n",
+    "logitModel_fit.pvalues"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1255,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['const', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q',\n",
+       "       'Embarked_S'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 1255,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1256,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Sex_male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const  SibSp  Parch  Sex_male\n",
+       "0    1.0      1      0         1\n",
+       "1    1.0      1      0         0\n",
+       "2    1.0      0      0         0\n",
+       "3    1.0      1      0         0\n",
+       "4    1.0      0      0         1"
+      ]
+     },
+     "execution_count": 1256,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = X.drop(columns=['Age', 'Fare', 'Embarked_Q', 'Embarked_S'])\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1257,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1258,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>OLS Regression Results</caption>\n",
+       "<tr>\n",
+       "  <th>Dep. Variable:</th>        <td>Survived</td>     <th>  R-squared:         </th> <td>   0.290</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.286</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   80.57</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Date:</th>             <td>Sun, 22 Jul 2018</td> <th>  Prob (F-statistic):</th> <td>1.02e-43</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Time:</th>                 <td>15:14:03</td>     <th>  Log-Likelihood:    </th> <td> -310.50</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>No. Observations:</th>      <td>   596</td>      <th>  AIC:               </th> <td>   629.0</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Residuals:</th>          <td>   592</td>      <th>  BIC:               </th> <td>   646.6</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "      <td></td>        <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>const</th>    <td>    0.7630</td> <td>    0.032</td> <td>   23.732</td> <td> 0.000</td> <td>    0.700</td> <td>    0.826</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>SibSp</th>    <td>   -0.0357</td> <td>    0.015</td> <td>   -2.342</td> <td> 0.020</td> <td>   -0.066</td> <td>   -0.006</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Parch</th>    <td>   -0.0200</td> <td>    0.024</td> <td>   -0.836</td> <td> 0.404</td> <td>   -0.067</td> <td>    0.027</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Sex_male</th> <td>   -0.5538</td> <td>    0.036</td> <td>  -15.174</td> <td> 0.000</td> <td>   -0.625</td> <td>   -0.482</td>\n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "  <th>Omnibus:</th>       <td>22.278</td> <th>  Durbin-Watson:     </th> <td>   2.020</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  24.206</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Skew:</th>          <td> 0.491</td> <th>  Prob(JB):          </th> <td>5.54e-06</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Kurtosis:</th>      <td> 2.904</td> <th>  Cond. No.          </th> <td>    4.58</td>\n",
+       "</tr>\n",
+       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+      ],
+      "text/plain": [
+       "<class 'statsmodels.iolib.summary.Summary'>\n",
+       "\"\"\"\n",
+       "                            OLS Regression Results                            \n",
+       "==============================================================================\n",
+       "Dep. Variable:               Survived   R-squared:                       0.290\n",
+       "Model:                            OLS   Adj. R-squared:                  0.286\n",
+       "Method:                 Least Squares   F-statistic:                     80.57\n",
+       "Date:                Sun, 22 Jul 2018   Prob (F-statistic):           1.02e-43\n",
+       "Time:                        15:14:03   Log-Likelihood:                -310.50\n",
+       "No. Observations:                 596   AIC:                             629.0\n",
+       "Df Residuals:                     592   BIC:                             646.6\n",
+       "Df Model:                           3                                         \n",
+       "Covariance Type:            nonrobust                                         \n",
+       "==============================================================================\n",
+       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
+       "------------------------------------------------------------------------------\n",
+       "const          0.7630      0.032     23.732      0.000       0.700       0.826\n",
+       "SibSp         -0.0357      0.015     -2.342      0.020      -0.066      -0.006\n",
+       "Parch         -0.0200      0.024     -0.836      0.404      -0.067       0.027\n",
+       "Sex_male      -0.5538      0.036    -15.174      0.000      -0.625      -0.482\n",
+       "==============================================================================\n",
+       "Omnibus:                       22.278   Durbin-Watson:                   2.020\n",
+       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               24.206\n",
+       "Skew:                           0.491   Prob(JB):                     5.54e-06\n",
+       "Kurtosis:                       2.904   Cond. No.                         4.58\n",
+       "==============================================================================\n",
+       "\n",
+       "Warnings:\n",
+       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 1258,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = sm.OLS(y_train, X_train)\n",
+    "results = model.fit()\n",
+    "results.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1259,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+       "          verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 1259,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1260,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n",
+       "       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n",
+       "       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
+       "       1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,\n",
+       "       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n",
+       "       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
+       "       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
+       "       1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 1260,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1261,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7966101694915254"
+      ]
+     },
+     "execution_count": 1261,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1262,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>SibSp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Pclass     Sex  SibSp\n",
+       "0      3    male      1\n",
+       "1      1  female      1\n",
+       "2      3  female      0\n",
+       "3      1  female      1\n",
+       "4      3    male      0"
+      ]
+     },
+     "execution_count": 1262,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = df.drop(columns=['Age', 'Fare', 'Embarked', 'Survived', 'Parch'])\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1263,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Pclass_1</th>\n",
+       "      <th>Pclass_2</th>\n",
+       "      <th>Pclass_3</th>\n",
+       "      <th>Sex_female</th>\n",
+       "      <th>Sex_male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const  SibSp  Pclass_1  Pclass_2  Pclass_3  Sex_female  Sex_male\n",
+       "0    1.0      1         0         0         1           0         1\n",
+       "1    1.0      1         1         0         0           1         0\n",
+       "2    1.0      0         0         0         1           1         0\n",
+       "3    1.0      1         1         0         0           1         0\n",
+       "4    1.0      0         0         0         1           0         1"
+      ]
+     },
+     "execution_count": 1263,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X =pd.get_dummies(X)\n",
+    "X = sm.add_constant(X)\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1264,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = X.drop(columns = ['Pclass_3', 'Sex_male'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1265,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1266,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>OLS Regression Results</caption>\n",
+       "<tr>\n",
+       "  <th>Dep. Variable:</th>        <td>Survived</td>     <th>  R-squared:         </th> <td>   0.347</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.343</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   78.63</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Date:</th>             <td>Sun, 22 Jul 2018</td> <th>  Prob (F-statistic):</th> <td>1.81e-53</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Time:</th>                 <td>15:14:03</td>     <th>  Log-Likelihood:    </th> <td> -285.38</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>No. Observations:</th>      <td>   596</td>      <th>  AIC:               </th> <td>   580.8</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Residuals:</th>          <td>   591</td>      <th>  BIC:               </th> <td>   602.7</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>const</th>      <td>    0.1110</td> <td>    0.025</td> <td>    4.443</td> <td> 0.000</td> <td>    0.062</td> <td>    0.160</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>SibSp</th>      <td>   -0.0298</td> <td>    0.013</td> <td>   -2.250</td> <td> 0.025</td> <td>   -0.056</td> <td>   -0.004</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Pclass_1</th>   <td>    0.2707</td> <td>    0.040</td> <td>    6.689</td> <td> 0.000</td> <td>    0.191</td> <td>    0.350</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Pclass_2</th>   <td>    0.1928</td> <td>    0.041</td> <td>    4.658</td> <td> 0.000</td> <td>    0.112</td> <td>    0.274</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Sex_female</th> <td>    0.5118</td> <td>    0.034</td> <td>   14.935</td> <td> 0.000</td> <td>    0.444</td> <td>    0.579</td>\n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "  <th>Omnibus:</th>       <td>33.990</td> <th>  Durbin-Watson:     </th> <td>   2.035</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  38.802</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Skew:</th>          <td> 0.625</td> <th>  Prob(JB):          </th> <td>3.75e-09</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Kurtosis:</th>      <td> 3.017</td> <th>  Cond. No.          </th> <td>    4.59</td>\n",
+       "</tr>\n",
+       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+      ],
+      "text/plain": [
+       "<class 'statsmodels.iolib.summary.Summary'>\n",
+       "\"\"\"\n",
+       "                            OLS Regression Results                            \n",
+       "==============================================================================\n",
+       "Dep. Variable:               Survived   R-squared:                       0.347\n",
+       "Model:                            OLS   Adj. R-squared:                  0.343\n",
+       "Method:                 Least Squares   F-statistic:                     78.63\n",
+       "Date:                Sun, 22 Jul 2018   Prob (F-statistic):           1.81e-53\n",
+       "Time:                        15:14:03   Log-Likelihood:                -285.38\n",
+       "No. Observations:                 596   AIC:                             580.8\n",
+       "Df Residuals:                     591   BIC:                             602.7\n",
+       "Df Model:                           4                                         \n",
+       "Covariance Type:            nonrobust                                         \n",
+       "==============================================================================\n",
+       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
+       "------------------------------------------------------------------------------\n",
+       "const          0.1110      0.025      4.443      0.000       0.062       0.160\n",
+       "SibSp         -0.0298      0.013     -2.250      0.025      -0.056      -0.004\n",
+       "Pclass_1       0.2707      0.040      6.689      0.000       0.191       0.350\n",
+       "Pclass_2       0.1928      0.041      4.658      0.000       0.112       0.274\n",
+       "Sex_female     0.5118      0.034     14.935      0.000       0.444       0.579\n",
+       "==============================================================================\n",
+       "Omnibus:                       33.990   Durbin-Watson:                   2.035\n",
+       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               38.802\n",
+       "Skew:                           0.625   Prob(JB):                     3.75e-09\n",
+       "Kurtosis:                       3.017   Cond. No.                         4.59\n",
+       "==============================================================================\n",
+       "\n",
+       "Warnings:\n",
+       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 1266,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = sm.OLS(y_train, X_train)\n",
+    "results = model.fit()\n",
+    "results.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1267,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+       "          verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 1267,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1268,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "survival_predict = model.predict(X_test)  #[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1269,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8135593220338984"
+      ]
+     },
+     "execution_count": 1269,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1270,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['const', 'SibSp', 'Pclass_1', 'Pclass_2', 'Sex_female'], dtype='object')"
+      ]
+     },
+     "execution_count": 1270,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1271,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# best combo of variables I've gotten so far based on model score is above, \n",
+    "# including only siblings/spouses, passenger class, and sex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1272,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wanted to try the age bins\n",
+    "# first, getting an idea of where to bin"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1273,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th>Survived</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">0</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">1</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Sex</th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Age</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0.42</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0.92</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8.00</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>16</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22.00</th>\n",
+       "      <td>2</td>\n",
+       "      <td>14</td>\n",
+       "      <td>10</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24.50</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28.00</th>\n",
+       "      <td>19</td>\n",
+       "      <td>124</td>\n",
+       "      <td>41</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30.50</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11</td>\n",
+       "      <td>7</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39.00</th>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>7</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45.50</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>53.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64.00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70.50</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Survived      0           1     \n",
+       "Sex      female male female male\n",
+       "Age                             \n",
+       "0.42          0    0      0    1\n",
+       "0.92          0    0      0    1\n",
+       "4.00          0    3      5    2\n",
+       "8.00          1    1      1    1\n",
+       "12.00         0    0      0    1\n",
+       "15.00         0    1      4    0\n",
+       "19.00         0   16      7    2\n",
+       "22.00         2   14     10    1\n",
+       "24.50         0    1      0    0\n",
+       "28.00        19  124     41   18\n",
+       "30.50         1    1      0    0\n",
+       "33.00         0    9      6    0\n",
+       "36.00         0   11      7    4\n",
+       "39.00         2    7      4    1\n",
+       "42.00         0    7      3    3\n",
+       "45.50         0    2      0    0\n",
+       "49.00         0    2      2    2\n",
+       "53.00         0    0      1    0\n",
+       "56.00         0    2      1    1\n",
+       "60.00         0    2      1    1\n",
+       "64.00         0    2      0    0\n",
+       "70.50         0    1      0    0"
+      ]
+     },
+     "execution_count": 1273,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.pivot_table(index=['Age'], columns=('Survived','Sex'), aggfunc='size', fill_value=0)[::4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1274,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Pclass_1</th>\n",
+       "      <th>Pclass_2</th>\n",
+       "      <th>Sex_female</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const  SibSp  Pclass_1  Pclass_2  Sex_female\n",
+       "0    1.0      1         0         0           0\n",
+       "1    1.0      1         1         0           1\n",
+       "2    1.0      0         0         0           1\n",
+       "3    1.0      1         1         0           1\n",
+       "4    1.0      0         0         0           0"
+      ]
+     },
+     "execution_count": 1274,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X =pd.get_dummies(X)\n",
+    "X = sm.add_constant(X)\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1279,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>const</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Pclass_1</th>\n",
+       "      <th>Pclass_2</th>\n",
+       "      <th>Sex_female</th>\n",
+       "      <th>age_bin</th>\n",
+       "      <th>age_bin</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   const  SibSp  Pclass_1  Pclass_2  Sex_female age_bin age_bin\n",
+       "0    1.0      1         0         0           0     NaN       2\n",
+       "1    1.0      1         1         0           1     NaN       2\n",
+       "2    1.0      0         0         0           1     NaN       2\n",
+       "3    1.0      1         1         0           1     NaN       2\n",
+       "4    1.0      0         0         0           0     NaN       2"
+      ]
+     },
+     "execution_count": 1279,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# variablizing age bins for testing\n",
+    "a = 0\n",
+    "b = 2\n",
+    "c = 82\n",
+    "# d = 82\n",
+    "age_bins = [a,b,c]\n",
+    "# age_bins = [0,1,16,82]\n",
+    "age_series = pd.cut(df['Age'], bins=age_bins, labels=age_bins[:-1])\n",
+    "age_series.name = 'age_bin'\n",
+    "X = pd.concat([X, age_series], axis=1)\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1276,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1277,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model = sm.OLS(y_train, X_train)\n",
+    "# results = model.fit()\n",
+    "# results.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1278,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Input contains NaN, infinity or a value too large for dtype('float64').",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-1278-a002323288ce>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLogisticRegression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m   1214\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1215\u001b[0m         X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype,\n\u001b[1;32m-> 1216\u001b[1;33m                          order=\"C\")\n\u001b[0m\u001b[0;32m   1217\u001b[0m         \u001b[0mcheck_classification_targets\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1218\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclasses_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    571\u001b[0m     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m    572\u001b[0m                     \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 573\u001b[1;33m                     ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m    574\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    575\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    451\u001b[0m                              % (array.ndim, estimator_name))\n\u001b[0;32m    452\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 453\u001b[1;33m             \u001b[0m_assert_all_finite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    454\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    455\u001b[0m     \u001b[0mshape_repr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_shape_repr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36m_assert_all_finite\u001b[1;34m(X)\u001b[0m\n\u001b[0;32m     42\u001b[0m             and not np.isfinite(X).all()):\n\u001b[0;32m     43\u001b[0m         raise ValueError(\"Input contains NaN, infinity\"\n\u001b[1;32m---> 44\u001b[1;33m                          \" or a value too large for %r.\" % X.dtype)\n\u001b[0m\u001b[0;32m     45\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     46\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mValueError\u001b[0m: Input contains NaN, infinity or a value too large for dtype('float64')."
+     ]
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "survival_predict = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# best score only slightly better with bins for ages of 0, 1, 16, and 82"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}