Here's a php script the grabs a list of all films ever made (according to en.wikipedi.org)
<?php
// Returns a string containing the name of every movie (known to wikipedia)
// separated by lines
function all_movies()
{
// List of wiki urls containing lists of movies for each "letter" of alphabet
$urls = array(
"https://en.wikipedia.org/wiki/List_of_films:_numbers",
"https://en.wikipedia.org/wiki/List_of_films:_A",
"https://en.wikipedia.org/wiki/List_of_films:_B",
"https://en.wikipedia.org/wiki/List_of_films:_C",
"https://en.wikipedia.org/wiki/List_of_films:_D",
"https://en.wikipedia.org/wiki/List_of_films:_E",
"https://en.wikipedia.org/wiki/List_of_films:_F",
"https://en.wikipedia.org/wiki/List_of_films:_G",
"https://en.wikipedia.org/wiki/List_of_films:_H",
"https://en.wikipedia.org/wiki/List_of_films:_I",
"https://en.wikipedia.org/wiki/List_of_films:_J-K",
"https://en.wikipedia.org/wiki/List_of_films:_L",
"https://en.wikipedia.org/wiki/List_of_films:_M",
"https://en.wikipedia.org/wiki/List_of_films:_N-O",
"https://en.wikipedia.org/wiki/List_of_films:_P",
"https://en.wikipedia.org/wiki/List_of_films:_Q-R",
"https://en.wikipedia.org/wiki/List_of_films:_S",
"https://en.wikipedia.org/wiki/List_of_films:_T",
"https://en.wikipedia.org/wiki/List_of_films:_U-V-W",
"https://en.wikipedia.org/wiki/List_of_films:_X-Y-Z");
// output string
$titles = array();
// Loop over urls
foreach($urls as $url)
{
$curl = curl_init();
curl_setopt($curl,CURLOPT_URL,$url);
curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);
curl_setopt($curl,CURLOPT_TIMEOUT,2);
$buffer = curl_exec($curl);
if (curl_errno($curl))
{
die ("An error occurred:".curl_error());
}
preg_match_all("/<li><i>(.*)<\/i>.*/", $buffer, $matches);
foreach ($matches[1] as $title)
{
$title = html_entity_decode(strip_tags($title));
array_push($titles,$title);
}
}
return $titles;
}
if(__FILE__ == $_SERVER['SCRIPT_FILENAME'])
{
header ('Content-type: text/plain; charset=utf-8');
echo implode("\n",all_movies());
}
?>
This defines the function all_movies() and when the above is called directly it lists all the titles as a line in a plain text file.