|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | +Author: Florian Bersier (xpressyoo) |
| 5 | +Date: September 2011 |
| 6 | +URL: https://github.com/xpressyoo/ScrAPI |
| 7 | +
|
| 8 | +=========================================== |
| 9 | +
|
| 10 | +This work is licensed under the Creative Commons Attribution 2.0 UK: England & Wales License. To view a copy of this license, visit http://creativecommons.org/licenses/by/2.0/uk/ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA. |
| 11 | +
|
| 12 | +*/ |
| 13 | + |
| 14 | +session_start(); ?> |
| 15 | +<html> |
| 16 | +<head> |
| 17 | +<style type="text/css"> |
| 18 | +body{margin:4% 0 0 3%} |
| 19 | +ul li{list-style:none;display:inline-block;margin:0;padding:0;font-family:Arial,sans-serif;font-size:16px} |
| 20 | +li:first-child{margin-right:20px;border-right:1px solid #333;width:250px;color:#666} |
| 21 | +li{list-style:none;display:inline-block;padding:5px 10px;line-height:30px;vertical-align:top} |
| 22 | +h4{background-color:lightyellow;padding:3px 7px}p{font-size:14px;color:#999} |
| 23 | +</style> |
| 24 | +</head> |
| 25 | +<body> |
| 26 | +<?php |
| 27 | + |
| 28 | +$errors=0; |
| 29 | +$base = $_SERVER['HTTP_REFERER']; |
| 30 | +$ip = $_SERVER['REMOTE_ADDR']; |
| 31 | +$browser = $_SERVER['HTTP_USER_AGENT']; |
| 32 | +$language = $_SERVER['HTTP_ACCEPT_LANGUAGE']; |
| 33 | +$date = date('j/m/Y'); |
| 34 | +$time = date('G:i'); |
| 35 | +$etime = date('B'); |
| 36 | + |
| 37 | +if(isset($_POST['submit']))//Retrieve the URL from an INPUT field |
| 38 | +{ |
| 39 | + |
| 40 | +//GET URL |
| 41 | +$url = "https://addons.mozilla.org/en-US/firefox/addon/".$_POST['url']; |
| 42 | + |
| 43 | +//REVIEWS AND PAGES |
| 44 | +$file_string = file_get_contents($url.'/reviews/'); |
| 45 | +preg_match('#<b>(.*?)</b>#i', $file_string, $nb); |
| 46 | +$nbreviews = str_replace(",", "",$nb[1]); |
| 47 | +$pages0 = round(($nbreviews/20)); |
| 48 | +$pages1 = round(($nbreviews/20),1); |
| 49 | +$pages = $pages0 - $pages1; |
| 50 | + |
| 51 | +if ($pages >= 0){$pages = $pages0;} |
| 52 | +else{$pages = $pages0 + 1;} |
| 53 | + |
| 54 | + |
| 55 | +//USERS |
| 56 | +$file_string2 = file_get_contents($url); |
| 57 | +preg_match('#<b>(.*?)</b>#i', $file_string2, $users); |
| 58 | +$nbusers = str_replace(",", "",$users[1]); |
| 59 | + |
| 60 | +//RATIO |
| 61 | +$ratio = $nbreviews/$nbusers; |
| 62 | + |
| 63 | +//INITIALIZATION |
| 64 | +$add = 0; |
| 65 | + |
| 66 | +//LOOP REVIEWS SCRAPING |
| 67 | + |
| 68 | +for ($i = 1; $i <= $pages; $i++) { |
| 69 | +$oldSetting = libxml_use_internal_errors( true ); |
| 70 | +libxml_clear_errors(); |
| 71 | + |
| 72 | +$html = new DOMDocument(); |
| 73 | +$html->loadHtmlFile($url.'/reviews/?page='.$i); |
| 74 | + |
| 75 | +$xpath = new DOMXPath( $html ); |
| 76 | +$links = $xpath->query("//div[contains(@class, 'review')and |
| 77 | + not(contains(@class,'reply'))]"); //Do not include the comments written by the addon's developer |
| 78 | + |
| 79 | +$return = array(); |
| 80 | + |
| 81 | +foreach ( $links as $item ) { |
| 82 | + $newDom = new DOMDocument; |
| 83 | + $newDom->appendChild($newDom->importNode($item,true)); |
| 84 | + |
| 85 | + $xpath = new DOMXPath( $newDom ); |
| 86 | + $review = str_replace("\"","",trim($xpath->query("//p[@class='review-body']")->item(0)->nodeValue)); |
| 87 | + //$review = "\"".$review."\","; |
| 88 | + $return[] = array($review,); |
| 89 | +} |
| 90 | + |
| 91 | +// REVIEWS ARRAY |
| 92 | +$return = print_r($return,true); |
| 93 | +$return = htmlspecialchars($return); |
| 94 | +$return = str_replace("[0]", "", $return); |
| 95 | +$return = str_replace("Array", "", $return); |
| 96 | +$return = str_replace("(", "", $return); |
| 97 | +$return = str_replace(")", "", $return); |
| 98 | +$return = str_replace("=", "", $return); |
| 99 | +$return = str_replace(">", "", $return); |
| 100 | +$return = str_replace("[", "", $return); |
| 101 | +$return = str_replace("]", "", $return); |
| 102 | +$vowels = array("1", "2", "3", "4", "5", "6", "7", "8", "9", "0", " "); |
| 103 | +$return = str_replace($vowels, "", $return); |
| 104 | +$strlen = strlen($return); |
| 105 | +$add += $strlen; |
| 106 | + |
| 107 | + |
| 108 | +libxml_clear_errors(); |
| 109 | +libxml_use_internal_errors( $oldSetting ); |
| 110 | +} |
| 111 | + |
| 112 | +//DISPLAY RESULTS |
| 113 | + |
| 114 | +$final = round($add/$nbreviews,6); |
| 115 | + |
| 116 | +echo "<h4>".$url. "</h4><ul><li>Number of Reviews<br />Number of Pages<br />Number of Users<br />Ratio Reviews/Users<br />Total number of strings<br />Avg number of string per review</li><li>". $nbreviews."<br />". $pages . "<br />".$nbusers."<br />".$ratio."<br />". $add . "<br />". $final . "</li></ul><br /><p>Data retrieved on ".$date." at ".$time." from ".$browser."</p>"; |
| 117 | + |
| 118 | +} |
| 119 | + |
| 120 | +?> |
| 121 | +</body> |
| 122 | +</html> |
0 commit comments