This repository was archived by the owner on Jun 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScrape.php
More file actions
75 lines (56 loc) · 1.43 KB
/
Scrape.php
File metadata and controls
75 lines (56 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
class Scrape
{
protected $source;
protected $html;
/**
** @param string $url Target page
**/
function __construct($url)
{
$this->source = $url;
}
function getHtml() {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->source);
$this->html = curl_exec($ch);
curl_close($ch);
return $this->html;
}
/**
** @param String $resource Html that requires processing
** @param String $starttag First target tag/string to search by
** @param String $endtag last target tag/string to search by
** @return Array Containing contents of tags
**/
function getItemsByTag($resource, $starttag, $endtag) {
preg_match_all("'".$starttag."(.*?)".$endtag."'si", $resource, $result);
return $result[1];
}
/**
** @param Array $data Html that requires processing
** @param Int $beginning Items to be trimmed from beginning
** @param Int $end last Items to be trimmed from end
** @return Array Containing trimmed data
**/
function trimData($data, $beginning, $end) {
for ($i=0; $i < $beginning; $i++) {
array_shift($data);
}
for ($i=0; $i < $end; $i++) {
array_pop($data);
}
return $data;
}
/**
** @param Array $data Raw scraped data
** @return Json Converted array
**/
function cleanData($data) {
$data = json_encode($data);
$data = str_replace('\n', '', $data);
$data = strip_tags($data);
return $data;
}
}
?>