|
19 | 19 |
|
20 | 20 | friendships = {user["id"]: [] for user in users}
|
21 | 21 |
|
22 |
| -# print(*users) |
23 |
| - |
| 22 | +print(*users) |
| 23 | +print(friendships) |
24 | 24 |
|
25 | 25 | #заполняем друзей
|
26 | 26 | for i, j in friendship_pairs:
|
27 | 27 | friendships[i].append(j) # Add j as a friend of user i
|
28 | 28 | friendships[j].append(i) # Add i as a friend of user j
|
29 | 29 |
|
30 |
| -# print(friendships) |
| 30 | +print(friendships) |
31 | 31 |
|
32 | 32 | def number_of_friends(user):
|
33 | 33 | #ск друзей есть у юзера
|
34 |
| - return len(user["friends"]) |
| 34 | + user_id = user["id"] |
| 35 | + friends_ids = friendships[user_id] |
| 36 | + return len(friends_ids) |
35 | 37 |
|
36 | 38 | total_connections = sum(number_of_friends(user) for user in users)
|
37 | 39 |
|
38 | 40 | num_users = len(users) # Длина списка пользователей
|
39 | 41 | avg_connections = total_connections / num_users # 24 / 10 = 2.4
|
40 | 42 |
|
| 43 | +# Создать список в формате (id пользователя, число друзей) |
| 44 | +num_friends_by_id = [(user["id"], number_of_friends(user)) for user in users] |
| 45 | + |
| 46 | +num_friends_by_id.sort( # Sort the list |
| 47 | + key=lambda id_and_friends: id_and_friends[1], # by num_friends |
| 48 | + reverse=True) |
| 49 | +# Отсортировать список по полю key=laшЬda. , # num friends # в убывающем порядке |
| 50 | + |
| 51 | +def foaf_ids_bad(user): |
| 52 | + """foaf is short for "friend of a friend" """ |
| 53 | + return [foaf_id |
| 54 | + for friend_id in friendships[user["id"]] |
| 55 | + for foaf_id in friendships[friend_id]] |
| 56 | + |
| 57 | +assert foaf_ids_bad(users[0]) == [0, 2, 3, 0, 1, 3] |
| 58 | + |
| 59 | +print(friendships[0]) # [1, 2] |
| 60 | +print(friendships[1]) # [0, 2, 3] |
| 61 | +print(friendships[2]) # [0, 1, 3] |
| 62 | + |
| 63 | + |
| 64 | +assert friendships[0] == [1, 2] |
| 65 | +assert friendships[1] == [0, 2, 3] |
| 66 | +assert friendships[2] == [0, 1, 3] |
| 67 | + |
| 68 | +from collections import Counter # not loaded by default |
| 69 | + |
| 70 | +def friends_of_friends(user): |
| 71 | + user_id = user["id"] |
| 72 | + return Counter( |
| 73 | + foaf_id |
| 74 | + for friend_id in friendships[user_id] # For each of my friends, |
| 75 | + for foaf_id in friendships[friend_id] # find their friends |
| 76 | + if foaf_id != user_id # who aren't me |
| 77 | + and foaf_id not in friendships[user_id] # and aren't my friends. |
| 78 | + ) |
| 79 | + |
| 80 | + |
| 81 | +print(friends_of_friends(users[3])) # Counter({0: 2, 5: 1}) |
| 82 | + |
| 83 | + |
| 84 | +assert friends_of_friends(users[3]) == Counter({0: 2, 5: 1}) |
| 85 | + |
| 86 | +interests = [ |
| 87 | + (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"), |
| 88 | + (0, "Spark"), (0, "Storm"), (0, "Cassandra"), |
| 89 | + (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"), |
| 90 | + (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"), |
| 91 | + (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"), |
| 92 | + (3, "statistics"), (3, "regression"), (3, "probability"), |
| 93 | + (4, "machine learning"), (4, "regression"), (4, "decision trees"), |
| 94 | + (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"), |
| 95 | + (5, "Haskell"), (5, "programming languages"), (6, "statistics"), |
| 96 | + (6, "probability"), (6, "mathematics"), (6, "theory"), |
| 97 | + (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"), |
| 98 | + (7, "neural networks"), (8, "neural networks"), (8, "deep learning"), |
| 99 | + (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"), |
| 100 | + (9, "Java"), (9, "MapReduce"), (9, "Big Data") |
| 101 | +] |
| 102 | + |
| 103 | +def data_scientists_who_like(target_interest): |
| 104 | + """Find the ids of all users who like the target interest.""" |
| 105 | + return [user_id |
| 106 | + for user_id, user_interest in interests |
| 107 | + if user_interest == target_interest] |
| 108 | + |
| 109 | +from collections import defaultdict |
| 110 | + |
| 111 | +# Keys are interests, values are lists of user_ids with that interest |
| 112 | +user_ids_by_interest = defaultdict(list) |
| 113 | + |
| 114 | +for user_id, interest in interests: |
| 115 | + user_ids_by_interest[interest].append(user_id) |
| 116 | + |
| 117 | +# Keys are user_ids, values are lists of interests for that user_id. |
| 118 | +interests_by_user_id = defaultdict(list) |
| 119 | + |
| 120 | +for user_id, interest in interests: |
| 121 | + interests_by_user_id[user_id].append(interest) |
| 122 | + |
| 123 | +def most_common_interests_with(user): |
| 124 | + return Counter( |
| 125 | + interested_user_id |
| 126 | + for interest in interests_by_user_id[user["id"]] |
| 127 | + for interested_user_id in user_ids_by_interest[interest] |
| 128 | + if interested_user_id != user["id"] |
| 129 | + ) |
| 130 | + |
| 131 | +salaries_and_tenures = [(83000, 8.7), (88000, 8.1), |
| 132 | + (48000, 0.7), (76000, 6), |
| 133 | + (69000, 6.5), (76000, 7.5), |
| 134 | + (60000, 2.5), (83000, 10), |
| 135 | + (48000, 1.9), (63000, 4.2)] |
| 136 | + |
| 137 | +# Keys are years, values are lists of the salaries for each tenure. |
| 138 | +salary_by_tenure = defaultdict(list) |
| 139 | + |
| 140 | +for salary, tenure in salaries_and_tenures: |
| 141 | + salary_by_tenure[tenure].append(salary) |
| 142 | + |
| 143 | +# Keys are years, each value is average salary for that tenure. |
| 144 | +average_salary_by_tenure = { |
| 145 | + tenure: sum(salaries) / len(salaries) |
| 146 | + for tenure, salaries in salary_by_tenure.items() |
| 147 | +} |
| 148 | + |
| 149 | + |
| 150 | +assert average_salary_by_tenure == { |
| 151 | + 0.7: 48000.0, |
| 152 | + 1.9: 48000.0, |
| 153 | + 2.5: 60000.0, |
| 154 | + 4.2: 63000.0, |
| 155 | + 6: 76000.0, |
| 156 | + 6.5: 69000.0, |
| 157 | + 7.5: 76000.0, |
| 158 | + 8.1: 88000.0, |
| 159 | + 8.7: 83000.0, |
| 160 | + 10: 83000.0 |
| 161 | +} |
| 162 | + |
| 163 | +{0.7: 48000.0, |
| 164 | + 1.9: 48000.0, |
| 165 | + 2.5: 60000.0, |
| 166 | + 4.2: 63000.0, |
| 167 | + 6: 76000.0, |
| 168 | + 6.5: 69000.0, |
| 169 | + 7.5: 76000.0, |
| 170 | + 8.1: 88000.0, |
| 171 | + 8.7: 83000.0, |
| 172 | + 10: 83000.0} |
| 173 | + |
| 174 | +def tenure_bucket(tenure): |
| 175 | + if tenure < 2: |
| 176 | + return "less than two" |
| 177 | + elif tenure < 5: |
| 178 | + return "between two and five" |
| 179 | + else: |
| 180 | + return "more than five" |
| 181 | + |
| 182 | +# Keys are tenure buckets, values are lists of salaries for that bucket. |
| 183 | +salary_by_tenure_bucket = defaultdict(list) |
| 184 | + |
| 185 | +for salary, tenure in salaries_and_tenures: |
| 186 | + bucket = tenure_bucket(tenure) |
| 187 | + salary_by_tenure_bucket[bucket].append(salary) |
| 188 | + |
| 189 | +# Keys are tenure buckets, values are average salary for that bucket |
| 190 | +average_salary_by_bucket = { |
| 191 | + tenure_bucket: sum(salaries) / len(salaries) |
| 192 | + for tenure_bucket, salaries in salary_by_tenure_bucket.items() |
| 193 | +} |
| 194 | + |
| 195 | +{'between two and five': 61500.0, |
| 196 | + 'less than two': 48000.0, |
| 197 | + 'more than five': 79166.66666666667} |
| 198 | + |
| 199 | + |
| 200 | +assert average_salary_by_bucket == { |
| 201 | + 'between two and five': 61500.0, |
| 202 | + 'less than two': 48000.0, |
| 203 | + 'more than five': 79166.66666666667 |
| 204 | +} |
| 205 | + |
| 206 | +def predict_paid_or_unpaid(years_experience): |
| 207 | + if years_experience < 3.0: |
| 208 | + return "paid" |
| 209 | + elif years_experience < 8.5: |
| 210 | + return "unpaid" |
| 211 | + else: |
| 212 | + return "paid" |
| 213 | + |
| 214 | +interests = [ |
| 215 | + (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"), |
| 216 | + (0, "Spark"), (0, "Storm"), (0, "Cassandra"), |
| 217 | + (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"), |
| 218 | + (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"), |
| 219 | + (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"), |
| 220 | + (3, "statistics"), (3, "regression"), (3, "probability"), |
| 221 | + (4, "machine learning"), (4, "regression"), (4, "decision trees"), |
| 222 | + (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"), |
| 223 | + (5, "Haskell"), (5, "programming languages"), (6, "statistics"), |
| 224 | + (6, "probability"), (6, "mathematics"), (6, "theory"), |
| 225 | + (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"), |
| 226 | + (7, "neural networks"), (8, "neural networks"), (8, "deep learning"), |
| 227 | + (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"), |
| 228 | + (9, "Java"), (9, "MapReduce"), (9, "Big Data") |
| 229 | +] |
| 230 | + |
| 231 | +words_and_counts = Counter(word |
| 232 | + for user, interest in interests |
| 233 | + for word in interest.lower().split()) |
| 234 | + |
| 235 | +for word, count in words_and_counts.most_common(): |
| 236 | + if count > 1: |
| 237 | + print(word, count) |
0 commit comments