Skip to content

Commit da16a76

Browse files
author
precious
committed
first commit
0 parents  commit da16a76

File tree

11 files changed

+8476
-0
lines changed

11 files changed

+8476
-0
lines changed

error.log

Whitespace-only changes.

imdb_find_movie.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/python
2+
3+
import urllib
4+
# import urllib2
5+
import sys
6+
import json
7+
import imdb_find_movie2
8+
from romanization import romanize
9+
10+
11+
def get_movie_url(movie_data):
12+
url = 'http://www.imdbapi.com/'
13+
params_dict = {}
14+
data_list = movie_data.split('*')
15+
if len(data_list) < 3:
16+
raise ValueError('invalid movie data')
17+
18+
# first trying to find movie using api
19+
params_dict['t'] = data_list[1] if data_list[1] else romanize(data_list[0])
20+
params_dict['y'] = data_list[2]
21+
response = urllib.urlopen(url + '?' + urllib.urlencode(params_dict))
22+
response_dict = json.loads(response.read())
23+
if 'ID' in response_dict:
24+
return 'http://www.imdb.com/title/' + response_dict['ID'] + '/'
25+
26+
# then trying own function
27+
movie_url = imdb_find_movie2.get_movie_url(movie_data)
28+
if movie_url:
29+
return movie_url
30+
else:
31+
with open('error.log','a+') as err_file:
32+
err_file.write(params_dict['t'])
33+
err_file.write(' | movie not found\n')
34+
return None
35+
36+
37+
if __name__ == "__main__":
38+
if len(sys.argv) > 1: # read from command line
39+
for data in sys.argv[1:]:
40+
url = get_movie_url(data)
41+
if url: print url
42+
else:
43+
while True: # read from stdin
44+
input_line = sys.stdin.readline().strip()
45+
if input_line == '':
46+
break
47+
url = get_movie_url(input_line)
48+
if url: print url
49+

imdb_find_movie2.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/python
2+
3+
import urllib
4+
import urllib2
5+
import re
6+
import sys
7+
from romanization import romanize
8+
from parser import get_between, remove_tags
9+
10+
imdb_url_str = 'http://www.imdb.com'
11+
12+
def get_response(path,headers = None):
13+
global imdb_url_str
14+
headers_dict = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"}
15+
request = urllib2.Request(imdb_url_str + path,None,headers_dict)
16+
return urllib2.urlopen(request)
17+
18+
def check_imdb_movie_year(path,year):
19+
global imdb_url_str
20+
response = get_response(path)
21+
re_html_h1_header = re.compile('(?P<h1><h1.*?class="header".*?</h1>)',re.U|re.M|re.I|re.S)
22+
html_h1_header = re_html_h1_header.search(response.read()).group('h1')
23+
return year in html_h1_header
24+
25+
def get_movie_url(movie_data):
26+
global imdb_url_str
27+
params_dict = {'s': 'tt'}
28+
data_list = movie_data.split('*')
29+
params_dict['q'] = data_list[1] if data_list[1] else romanize(data_list[0])
30+
31+
response = get_response('/find?' + urllib.urlencode(params_dict))
32+
33+
# first check whether response is desired movie page
34+
re_movie_url = re.compile(r'(?P<path>/title/[\d\w]+/)')
35+
response_url = response.geturl()
36+
if re_movie_url.search(response_url):
37+
return response_url
38+
39+
# then check 1st link to movie in response page
40+
response_str = response.read()
41+
url_match = re_movie_url.search(response_str)
42+
43+
if url_match and check_imdb_movie_year(url_match.group('path'),data_list[2]):
44+
return imdb_url_str + url_match.group('path')
45+
46+
# finally try to find movie in exact matches table
47+
if response_str.find('Titles (Exact Matches)') != -1:
48+
table_str = get_between(response_str,'<table>','</table>',response_str.find('Titles (Exact Matches)'))
49+
for row in re.findall(r'<tr>.*?</tr>',table_str,flags = re.I|re.M|re.U|re.S):
50+
if data_list[2] in row:
51+
return imdb_url_str + get_between(row,'href="','"')
52+

index.html

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
2+
<html>
3+
<head>
4+
<title>ÊèíîÏîèñê.ru - Âñå ôèëüìû ïëàíåòû</title>
5+
<style>
6+
td {
7+
font:100 13px tahoma, verdana;
8+
}
9+
a {color:#000055}
10+
</style>
11+
<META HTTP-EQUIV="Pragma" CONTENT="no-cache">
12+
</head>
13+
<body><center>
14+
<table cellpadding=5 cellspacing=0 width=500>
15+
<tr>
16+
<td><img src="/images/logonew2.gif" width=210 height=55></td>
17+
</tr>
18+
<tr>
19+
<td style="padding-left:10px">
20+
<br>
21+
Åñëè âû âèäèòå ýòó ñòðàíèöó, çíà÷èò ñ âàøåãî IP-àäðåñà ïîñòóïèëî íåîáû÷íî ìíîãî çàïðîñîâ.
22+
Ñèñòåìà çàùèòû îò ðîáîòîâ (ÑÇîÐ) ðåøèëà, ÷òî âìåñòî âàñ äåéñòâóåò ïðîãðàììà, è îãðàíè÷èëà äîñòóï.
23+
<br><br><small>Åñëè âû ïàðñèòå ñàéò ðàäè ðåéòèíãà, ïîæàëóéñòà èñïîëüçóéòå <a href='http://www.kinopoisk.ru/rating/462666.xml' target='_blank'>xml</a> âåðñèþ ðåéòèíãà.</small>
24+
<!-- <br><br>×òîáû ïðîäîëæèòü ðàáîòó, ââåäèòå ÷èñëî, èçîáðàæåííîå íèæå. Íà ýòî ðîáîòû íå ñïîñîáíû, ïî êðàéíåé ìåðå, áîëüøèíñòâî èç íèõ. -->
25+
26+
<form action=? method=post>
27+
<center>
28+
</form>
29+
30+
<!--<br><br><br><a href="javascript:" onclick="document.getElementById('mailto').style.display='block'" style="color:#777">åñëè íè÷åãî íå ïîìîãàåò >></a>--><br><br>
31+
<div id="mailto" style="width:480px; text-align:left; display:block; background-color:#f2f2f2; padding:15px">
32+
Îòïðàâüòå ïèñüìî íà àäðåñ: <a href="mailto:[email protected]">[email protected]</a>, ñ óêàçàíèåì ýòèõ äàííûõ:<br><br>
33+
<b>IP àäðåñ:</b> 46.118.131.189
34+
<br>
35+
<b>UserAgent:</b> Wget/1.13.4 (linux-gnu)
36+
<br>
37+
<b>Referer:</b>
38+
<br>
39+
<b>Ht:</b> 1
40+
</div>
41+
<br>
42+
</td>
43+
</tr>
44+
<tr>
45+
<td style="padding:25px 10px;font-size:11px;color:999"><hr style="height:1px">2012 (c) ÊèíîÏîèñê.ru
46+
</td>
47+
</tr>
48+
</table>
49+
<[email protected] COUNTER--><script language="JavaScript" type="text/javascript"><!--
50+
d=document;var a='';a+=';r='+escape(d.referrer)
51+
js=10//--></script><script language="JavaScript1.1" type="text/javascript"><!--
52+
a+=';j='+navigator.javaEnabled()
53+
js=11//--></script><script language="JavaScript1.2" type="text/javascript"><!--
54+
s=screen;a+=';s='+s.width+'*'+s.height
55+
a+=';d='+(s.colorDepth?s.colorDepth:s.pixelDepth)
56+
js=12//--></script><script language="JavaScript1.3" type="text/javascript"><!--
57+
js=13//--></script><script language="JavaScript" type="text/javascript"><!--
58+
d.write('<a href="http://top.mail.ru/jump?from=925296"'+
59+
' target=_top><img src="http://top.list.ru/counter'+
60+
'?id=925296;t=51;js='+js+a+';rand='+Math.random()+
61+
'" alt="Ðåéòèíã@Mail.ru"'+' border=0 height=1 width=1/><\/a>')
62+
if(11<js)d.write('<'+'!-- ')//--></script><noscript><a
63+
target=_top href="http://top.mail.ru/jump?from=925296"><img
64+
src="http://top.list.ru/counter?js=na;id=925296;t=51"
65+
border=0 height=1 width=1
66+
alt="Ðåéòèíã@Mail.ru"/></a></noscript><script language="JavaScript" type="text/javascript"><!--
67+
if(11<js)d.write('--'+'>')//--></script><!--/COUNTER-->
68+
69+
<!--LiveInternet counter--><script type="text/javascript"><!--
70+
document.write("<img src='http://counter.yadro.ru/hit?r"+
71+
escape(document.referrer)+((typeof(screen)=="undefined")?"":
72+
";s"+screen.width+"*"+screen.height+"*"+(screen.colorDepth?
73+
screen.colorDepth:screen.pixelDepth))+";u"+escape(document.URL)+
74+
";"+Math.random()+
75+
"' width=1 height=1 alt=''>")//--></script><!--/LiveInternet-->
76+
77+
<!-- tns-counter.ru -->
78+
<script language="JavaScript">
79+
var img = new Image();
80+
img.src = 'http://www.tns-counter.ru/V13a***R>' + document.referrer.replace(/\*/g,'%2a') + '*kinopoisk_ru/ru/CP1251/tmsec=kinopoisk_total/';
81+
</script>
82+
<noscript>
83+
<img src="http://www.tns-counter.ru/V13a****kinopoisk_ru/ru/CP1251/tmsec=kinopoisk_total/" width="1" height="1" alt="" />
84+
</noscript>
85+
<!--/ tns-counter.ru -->
86+
87+
<script type="text/javascript">
88+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
89+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
90+
</script>
91+
<script type="text/javascript">
92+
var pageTracker = _gat._getTracker("UA-1078348-1");
93+
pageTracker._initData();
94+
pageTracker._trackPageview();
95+
</script>
96+
</body>
97+
</html>

0 commit comments

Comments
 (0)