Skip to content

Commit 4c653cd

Browse files
committed
программирование бпла
1 parent 7108e64 commit 4c653cd

File tree

6 files changed

+699
-9
lines changed

6 files changed

+699
-9
lines changed

data_sci_from_scratch/1.py

+201-4
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,219 @@
1919

2020
friendships = {user["id"]: [] for user in users}
2121

22-
# print(*users)
23-
22+
print(*users)
23+
print(friendships)
2424

2525
#заполняем друзей
2626
for i, j in friendship_pairs:
2727
friendships[i].append(j) # Add j as a friend of user i
2828
friendships[j].append(i) # Add i as a friend of user j
2929

30-
# print(friendships)
30+
print(friendships)
3131

3232
def number_of_friends(user):
3333
#ск друзей есть у юзера
34-
return len(user["friends"])
34+
user_id = user["id"]
35+
friends_ids = friendships[user_id]
36+
return len(friends_ids)
3537

3638
total_connections = sum(number_of_friends(user) for user in users)
3739

3840
num_users = len(users) # Длина списка пользователей
3941
avg_connections = total_connections / num_users # 24 / 10 = 2.4
4042

43+
# Создать список в формате (id пользователя, число друзей)
44+
num_friends_by_id = [(user["id"], number_of_friends(user)) for user in users]
45+
46+
num_friends_by_id.sort( # Sort the list
47+
key=lambda id_and_friends: id_and_friends[1], # by num_friends
48+
reverse=True)
49+
# Отсортировать список по полю key=laшЬda. , # num friends # в убывающем порядке
50+
51+
def foaf_ids_bad(user):
52+
"""foaf is short for "friend of a friend" """
53+
return [foaf_id
54+
for friend_id in friendships[user["id"]]
55+
for foaf_id in friendships[friend_id]]
56+
57+
assert foaf_ids_bad(users[0]) == [0, 2, 3, 0, 1, 3]
58+
59+
print(friendships[0]) # [1, 2]
60+
print(friendships[1]) # [0, 2, 3]
61+
print(friendships[2]) # [0, 1, 3]
62+
63+
64+
assert friendships[0] == [1, 2]
65+
assert friendships[1] == [0, 2, 3]
66+
assert friendships[2] == [0, 1, 3]
67+
68+
from collections import Counter # not loaded by default
69+
70+
def friends_of_friends(user):
71+
user_id = user["id"]
72+
return Counter(
73+
foaf_id
74+
for friend_id in friendships[user_id] # For each of my friends,
75+
for foaf_id in friendships[friend_id] # find their friends
76+
if foaf_id != user_id # who aren't me
77+
and foaf_id not in friendships[user_id] # and aren't my friends.
78+
)
79+
80+
81+
print(friends_of_friends(users[3])) # Counter({0: 2, 5: 1})
82+
83+
84+
assert friends_of_friends(users[3]) == Counter({0: 2, 5: 1})
85+
86+
interests = [
87+
(0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
88+
(0, "Spark"), (0, "Storm"), (0, "Cassandra"),
89+
(1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
90+
(1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
91+
(2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
92+
(3, "statistics"), (3, "regression"), (3, "probability"),
93+
(4, "machine learning"), (4, "regression"), (4, "decision trees"),
94+
(4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
95+
(5, "Haskell"), (5, "programming languages"), (6, "statistics"),
96+
(6, "probability"), (6, "mathematics"), (6, "theory"),
97+
(7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
98+
(7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
99+
(8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
100+
(9, "Java"), (9, "MapReduce"), (9, "Big Data")
101+
]
102+
103+
def data_scientists_who_like(target_interest):
104+
"""Find the ids of all users who like the target interest."""
105+
return [user_id
106+
for user_id, user_interest in interests
107+
if user_interest == target_interest]
108+
109+
from collections import defaultdict
110+
111+
# Keys are interests, values are lists of user_ids with that interest
112+
user_ids_by_interest = defaultdict(list)
113+
114+
for user_id, interest in interests:
115+
user_ids_by_interest[interest].append(user_id)
116+
117+
# Keys are user_ids, values are lists of interests for that user_id.
118+
interests_by_user_id = defaultdict(list)
119+
120+
for user_id, interest in interests:
121+
interests_by_user_id[user_id].append(interest)
122+
123+
def most_common_interests_with(user):
124+
return Counter(
125+
interested_user_id
126+
for interest in interests_by_user_id[user["id"]]
127+
for interested_user_id in user_ids_by_interest[interest]
128+
if interested_user_id != user["id"]
129+
)
130+
131+
salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
132+
(48000, 0.7), (76000, 6),
133+
(69000, 6.5), (76000, 7.5),
134+
(60000, 2.5), (83000, 10),
135+
(48000, 1.9), (63000, 4.2)]
136+
137+
# Keys are years, values are lists of the salaries for each tenure.
138+
salary_by_tenure = defaultdict(list)
139+
140+
for salary, tenure in salaries_and_tenures:
141+
salary_by_tenure[tenure].append(salary)
142+
143+
# Keys are years, each value is average salary for that tenure.
144+
average_salary_by_tenure = {
145+
tenure: sum(salaries) / len(salaries)
146+
for tenure, salaries in salary_by_tenure.items()
147+
}
148+
149+
150+
assert average_salary_by_tenure == {
151+
0.7: 48000.0,
152+
1.9: 48000.0,
153+
2.5: 60000.0,
154+
4.2: 63000.0,
155+
6: 76000.0,
156+
6.5: 69000.0,
157+
7.5: 76000.0,
158+
8.1: 88000.0,
159+
8.7: 83000.0,
160+
10: 83000.0
161+
}
162+
163+
{0.7: 48000.0,
164+
1.9: 48000.0,
165+
2.5: 60000.0,
166+
4.2: 63000.0,
167+
6: 76000.0,
168+
6.5: 69000.0,
169+
7.5: 76000.0,
170+
8.1: 88000.0,
171+
8.7: 83000.0,
172+
10: 83000.0}
173+
174+
def tenure_bucket(tenure):
175+
if tenure < 2:
176+
return "less than two"
177+
elif tenure < 5:
178+
return "between two and five"
179+
else:
180+
return "more than five"
181+
182+
# Keys are tenure buckets, values are lists of salaries for that bucket.
183+
salary_by_tenure_bucket = defaultdict(list)
184+
185+
for salary, tenure in salaries_and_tenures:
186+
bucket = tenure_bucket(tenure)
187+
salary_by_tenure_bucket[bucket].append(salary)
188+
189+
# Keys are tenure buckets, values are average salary for that bucket
190+
average_salary_by_bucket = {
191+
tenure_bucket: sum(salaries) / len(salaries)
192+
for tenure_bucket, salaries in salary_by_tenure_bucket.items()
193+
}
194+
195+
{'between two and five': 61500.0,
196+
'less than two': 48000.0,
197+
'more than five': 79166.66666666667}
198+
199+
200+
assert average_salary_by_bucket == {
201+
'between two and five': 61500.0,
202+
'less than two': 48000.0,
203+
'more than five': 79166.66666666667
204+
}
205+
206+
def predict_paid_or_unpaid(years_experience):
207+
if years_experience < 3.0:
208+
return "paid"
209+
elif years_experience < 8.5:
210+
return "unpaid"
211+
else:
212+
return "paid"
213+
214+
interests = [
215+
(0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
216+
(0, "Spark"), (0, "Storm"), (0, "Cassandra"),
217+
(1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
218+
(1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
219+
(2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
220+
(3, "statistics"), (3, "regression"), (3, "probability"),
221+
(4, "machine learning"), (4, "regression"), (4, "decision trees"),
222+
(4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
223+
(5, "Haskell"), (5, "programming languages"), (6, "statistics"),
224+
(6, "probability"), (6, "mathematics"), (6, "theory"),
225+
(7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
226+
(7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
227+
(8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
228+
(9, "Java"), (9, "MapReduce"), (9, "Big Data")
229+
]
230+
231+
words_and_counts = Counter(word
232+
for user, interest in interests
233+
for word in interest.lower().split())
234+
235+
for word, count in words_and_counts.most_common():
236+
if count > 1:
237+
print(word, count)

0 commit comments

Comments
 (0)