commit c346c0ecaf4d5ed214896f4544a987690773667c
Author: Gary Buhrmaster <gary.buhrmaster(a)gmail.com>
Date: Tue Jan 26 17:38:10 2021 +0000
update to recent upstream patches
0001-Reenable-tv_grab_ch_search.patch | 65 +
0002-README.md-update-TOC.patch | 26 +
...EADME.md-refresh-req-d-rec-d-modules-list.patch | 32 +
0004-tv_imdb-fix-some-typos.patch | 48 +
0005-tv_imdb-use-warnings.patch | 30 +
0006-tv_imdb-refresh-short-description-POD.patch | 29 +
0007-tv_grab_zz_sdjson_sqlite-fix-a-typo.patch | 26 +
...-Update-tv_grab_eu_xmltvse-to-use-SSL-116.patch | 50 +
...e-version-for-cherry-pick-typo-correction.patch | 33 +
...me-detect-parental-level-with-white-space.patch | 27 +
0011-Remove-swedb-grabber-117.patch | 678 +++
0012-ampparit-add-missing-empty-title-check.patch | 39 +
0013-telsu-add-missing-empty-title-check.patch | 77 +
...tvguide-fix-for-missing-form-options-in-c.patch | 31 +
...oid-break-when-website-object-missing-125.patch | 77 +
...-debug-is-on-stdout-should-be-on-stderr-1.patch | 28 +
...ogramme-data-via-SSL-avoids-301-redirects.patch | 33 +
...atability-with-older-versions-of-Perl-122.patch | 25 +
...tional-file-if-its-prepStage-is-specifica.patch | 26 +
0020-Unbreak-parsing-of-keywords-file.patch | 25 +
...emory-consumption-in-building-database-63.patch | 79 +
...ce-spaces-with-tabs-and-prettify-the-code.patch | 6375 ++++++++++++++++++++
0023-iltapulu-fix-channel-parser.patch | 77 +
0024-iltapulu-fix-grab-parser.patch | 199 +
...source-avoid-name-clashes-between-modules.patch | 65 +
...onf-update-to-latest-list-channels-output.patch | 266 +
...l-ids-compliant-with-the-DTD.-Use-legacyc.patch | 66 +
0028-Change-whitespace-to-tabs.patch | 434 ++
0029-Add-info-message-about-frozen-IMDb-data.patch | 48 +
...ented-sample-option-to-limit-records-proc.patch | 104 +
...uce-memory-usage-during-final-build-stage.patch | 25 +
...emove-tv-episodes-from-intermediate-files.patch | 76 +
0033-eu_xmltvse-refresh-test.conf.patch | 254 +
...ble-grabber-after-source-site-disappeared.patch | 42 +
...ite-for-the-change-to-episode-handling-63.patch | 24 +
...ows-xmltv.exe-to-use-PAR-Packer-rather-th.patch | 251 +
0037-extend-scope-of-title-person-qualifier.patch | 77 +
0038-eu-epgdata-Add-channel-IDs.patch | 21 +
0039-whitespace-changes.patch | 62 +
...ory-usage-during-database-build-bug-fixes.patch | 3074 ++++++++++
0041-bugfixes-in-augment-function.patch | 401 ++
0042-Add-tests-for-edge-cases.patch | 1112 ++++
0043-Use-disc-sort-to-reduce-memory-usage-63.patch | 840 +++
...exclude-tv-series-from-the-database-build.patch | 74 +
0045-fix-broken-url-to-imdb-website.patch | 915 +++
0046-fix-minor-typo-in-example.patch | 25 +
0047-fix-broken-tests.patch | 47 +
...n-channel-id-exp-to-filter-by-regex-on-ch.patch | 105 +
...added-tests-for-new-option-channel-id-exp.patch | 689 +++
...ndows-xmltv.exe-s-PAR-Packer-based-build-.patch | 145 +
xmltv.spec | 57 +-
51 files changed, 17433 insertions(+), 1 deletion(-)
---
diff --git a/0001-Reenable-tv_grab_ch_search.patch
b/0001-Reenable-tv_grab_ch_search.patch
new file mode 100644
index 0000000..db47b83
--- /dev/null
+++ b/0001-Reenable-tv_grab_ch_search.patch
@@ -0,0 +1,65 @@
+From cb029fc6ea0b7ec688d7b881806e699119353458 Mon Sep 17 00:00:00 2001
+From: Patric Mueller <bhaak(a)gmx.net>
+Date: Sun, 23 Aug 2020 00:40:38 +0200
+Subject: [PATCH 01/50] Reenable tv_grab_ch_search
+
+Site layout slightly changed by adding new class names to existing class
+attributes.
+---
+ Makefile.PL | 26 +++++++++++++-------------
+ grab/ch_search/tv_grab_ch_search.in | 2 +-
+ 2 files changed, 14 insertions(+), 14 deletions(-)
+
+diff --git a/Makefile.PL b/Makefile.PL
+index 3eb12a33..5d3be741 100644
+--- a/Makefile.PL
++++ b/Makefile.PL
+@@ -277,19 +277,19 @@ my @opt_components
+ 'HTTP::Cookies' => 0, },
+ },
+
+- # { name => 'tv_grab_ch_search',
+- # blurb => 'Grabber for Switzerland',
+- # exes => [ 'grab/ch_search/tv_grab_ch_search' ],
+- # deps => [ 'grab/ch_search/tv_grab_ch_search' => [
'grab/ch_search/tv_grab_ch_search.in' ] ],
+- # pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' =>
'grab/ch_search/tv_grab_ch_search' },
+- # to_clean => [ 'grab/ch_search/tv_grab_ch_search' ],
+- # grab_need_share => [ 'ch_search' ],
+- # prereqs => { 'HTML::Entities' => 1.27,
+- # 'HTML::TreeBuilder' => 0,
+- # 'HTTP::Cookies' => 0,
+- # 'URI::Escape' => 0,
+- # 'URI::URL' => 0, },
+- # },
++ { name => 'tv_grab_ch_search',
++ blurb => 'Grabber for Switzerland',
++ exes => [ 'grab/ch_search/tv_grab_ch_search' ],
++ deps => [ 'grab/ch_search/tv_grab_ch_search' => [
'grab/ch_search/tv_grab_ch_search.in' ] ],
++ pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' =>
'grab/ch_search/tv_grab_ch_search' },
++ to_clean => [ 'grab/ch_search/tv_grab_ch_search' ],
++ grab_need_share => [ 'ch_search' ],
++ prereqs => { 'HTML::Entities' => 1.27,
++ 'HTML::TreeBuilder' => 0,
++ 'HTTP::Cookies' => 0,
++ 'URI::Escape' => 0,
++ 'URI::URL' => 0, },
++ },
+
+ { name => 'tv_grab_dk_dr',
+ blurb => 'Grabber for Denmark (dr.dk)',
+diff --git a/grab/ch_search/tv_grab_ch_search.in b/grab/ch_search/tv_grab_ch_search.in
+index 3bc1dbf6..f3502df3 100755
+--- a/grab/ch_search/tv_grab_ch_search.in
++++ b/grab/ch_search/tv_grab_ch_search.in
+@@ -374,7 +374,7 @@ sub parse_page {
+ foreach my $tv_channel ( $tb->look_down('class' => 'sl-card
tv-index-channel') ) {
+ my $channel_id = substr($tv_channel->attr('id'), 3); # tv-sf1 ->
sf1
+ if ( defined($channel_id) ) {
+- foreach my $tv_show ( $tv_channel ->look_down('class' =>
'tv-tooltip') ) {
++ foreach my $tv_show ( $tv_channel ->look_down('class', qr/(^|
)tv-tooltip( |$)/) ) {
+ my %show;
+ $show{channel} = channel_id($channel_id);
+
+--
+2.29.2
+
diff --git a/0002-README.md-update-TOC.patch b/0002-README.md-update-TOC.patch
new file mode 100644
index 0000000..5a35aee
--- /dev/null
+++ b/0002-README.md-update-TOC.patch
@@ -0,0 +1,26 @@
+From 8b5913a1457358d65bfe4b0187b3fc4d4e96abf4 Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Mon, 7 Sep 2020 16:00:51 +0100
+Subject: [PATCH 02/50] README.md: update TOC
+
+---
+ README.md | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/README.md b/README.md
+index b34384b4..dbabddf4 100644
+--- a/README.md
++++ b/README.md
+@@ -7,7 +7,8 @@
+ # XMLTV 0.6.3
+
+ ## Table of Contents
+-- [XMLTV 0.6.2](#xmltv-063)
++
++- [XMLTV](#xmltv)
+ * [Description](#description)
+ * [Changes](#changes)
+ * [Installation (Package)](#installation-package)
+--
+2.29.2
+
diff --git a/0003-README.md-refresh-req-d-rec-d-modules-list.patch
b/0003-README.md-refresh-req-d-rec-d-modules-list.patch
new file mode 100644
index 0000000..f2129f0
--- /dev/null
+++ b/0003-README.md-refresh-req-d-rec-d-modules-list.patch
@@ -0,0 +1,32 @@
+From 47d346da47b3a8afefc435e0d6439c2a2ae63115 Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Tue, 8 Sep 2020 02:17:02 +0100
+Subject: [PATCH 03/50] README.md: refresh req'd/rec'd modules list
+
+---
+ README.md | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/README.md b/README.md
+index dbabddf4..a63b19d5 100644
+--- a/README.md
++++ b/README.md
+@@ -166,6 +166,7 @@ Time::Seconds (tv_grab_huro, core module since
5.9.5)
+ Tk (tv_check)
+ Tk::TableMatrix (tv_check)
+ URI (for some of the grabbers, part of URI)
++URI::Encode (tv_grab_pt_vodafone)
+ URI::Escape (for some of the grabbers, part of URI)
+ XML::DOM (tv_grab_is)
+ XML::LibXSLT (tv_grab_is)
+@@ -185,6 +186,7 @@ Log::TraceMessages (useful for debugging, not needed
for normal us
+ PerlIO::gzip (can make tv_imdb a bit faster)
+ Term::ProgressBar (displays pretty progress bars)
+ Unicode::String (improved character handling in tv_to_latex)
++URI::Escape::XS (faster URI handling)
+ ```
+
+ ### JSON libraries
+--
+2.29.2
+
diff --git a/0004-tv_imdb-fix-some-typos.patch b/0004-tv_imdb-fix-some-typos.patch
new file mode 100644
index 0000000..0bce4fc
--- /dev/null
+++ b/0004-tv_imdb-fix-some-typos.patch
@@ -0,0 +1,48 @@
+From ea01fcb293b1d95ce89fe055bef38b54253de26e Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Tue, 8 Sep 2020 03:33:16 +0100
+Subject: [PATCH 04/50] tv_imdb: fix some typos
+
+---
+ filter/tv_imdb | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index ee0b2d88..0addb452 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -32,9 +32,9 @@ XMLTV::IMDB package.
+
+ B<--output FILE> write to FILE rather than standard output.
+
+-B<--with-keywords> include IDMb keywords in the output file.
++B<--with-keywords> include IMDb keywords in the output file.
+
+-B<--with-plot> include IDMb plot summary in the output file.
++B<--with-plot> include IMDb plot summary in the output file.
+
+ B<--actors NUMBER> number of actors from IMDb to add (default=3).
+
+@@ -115,7 +115,7 @@ the '--download' flag and be prompted for what you need to
download by
+ hand. See <
http://www.imdb.com/interfaces> for the download sites.
+ Then once you have the files rerun without '--download'.
+
+-Note: '--prepStage' sucks a bit of memeory, but you can run each
++Note: '--prepStage' sucks a bit of memory, but you can run each
+ prepStage separately by running --prepStage with each of the stages
+ (see --help for details).
+
+@@ -158,8 +158,8 @@ to define what a "close" match is. For instance does a
movie by the
+ same title with a date out by 1 year or 2 years considered a match
+ (currently weE<39>re using 2).
+
+-Nice to haves include: verification/addition of programe MPAA/VCHIP ratings,
+-addition of
imdb.com user ratings (by votes) to programes. Potenially we
++Nice to haves include: verification/addition of programme MPAA/VCHIP ratings,
++addition of
imdb.com user ratings (by votes) to programmes. Potentially we
+ could expand to include "country of origin", "description",
"writer" and
+ "producer" credits, maybe even "commentator".
+
+--
+2.29.2
+
diff --git a/0005-tv_imdb-use-warnings.patch b/0005-tv_imdb-use-warnings.patch
new file mode 100644
index 0000000..6b325c4
--- /dev/null
+++ b/0005-tv_imdb-use-warnings.patch
@@ -0,0 +1,30 @@
+From 4556b5bb04792894a8eac4aebae5b1b038b7128c Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Tue, 8 Sep 2020 03:34:08 +0100
+Subject: [PATCH 05/50] tv_imdb: use warnings
+
+---
+ filter/tv_imdb | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index 0addb452..bb7206f2 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -1,4 +1,4 @@
+-#!/usr/bin/perl -w
++#!/usr/bin/perl
+
+ =pod
+
+@@ -177,6 +177,7 @@ Jerry Veldhuis, jerry(a)matilda.com
+ =cut
+
+ use strict;
++use warnings;
+ use XMLTV;
+ use XMLTV::Version "$XMLTV::VERSION";
+ use Data::Dumper;
+--
+2.29.2
+
diff --git a/0006-tv_imdb-refresh-short-description-POD.patch
b/0006-tv_imdb-refresh-short-description-POD.patch
new file mode 100644
index 0000000..74aa1df
--- /dev/null
+++ b/0006-tv_imdb-refresh-short-description-POD.patch
@@ -0,0 +1,29 @@
+From b6dd5a2c315e4529a7f2e0e4f55799c2ea42fe6a Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Tue, 8 Sep 2020 03:35:45 +0100
+Subject: [PATCH 06/50] tv_imdb: refresh short description (POD)
+
+---
+ filter/tv_imdb | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index bb7206f2..4e8652ce 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -25,10 +25,9 @@ tv_imdb --imdbdir <dir>
+
+ =head1 DESCRIPTION
+
+-Very similar to tv_cat in semantics (see tv_cat),
++tv_imdb is very similar to tv_cat in semantics (see tv_cat),
+ except whenever a programme appears with "date" entry the
+-title and date are used to look up extra data by using the
+-XMLTV::IMDB package.
++title and date are used to look up extra data using XMLTV::IMDB.
+
+ B<--output FILE> write to FILE rather than standard output.
+
+--
+2.29.2
+
diff --git a/0007-tv_grab_zz_sdjson_sqlite-fix-a-typo.patch
b/0007-tv_grab_zz_sdjson_sqlite-fix-a-typo.patch
new file mode 100644
index 0000000..028b8a7
--- /dev/null
+++ b/0007-tv_grab_zz_sdjson_sqlite-fix-a-typo.patch
@@ -0,0 +1,26 @@
+From 0996952a3792c2ec981262dbfa0382548f1c4924 Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Tue, 8 Sep 2020 03:37:07 +0100
+Subject: [PATCH 07/50] tv_grab_zz_sdjson_sqlite: fix a typo
+
+Detected by lintian on Debian
+---
+ grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
b/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
+index f552ad0d..656c86f4 100644
+--- a/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
++++ b/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
+@@ -6129,7 +6129,7 @@ this is nominally useful only in offline situations.
+
+ B<--force-download> Deletes most existing local database data and
+ forces a download of the data. If there is a suspicion that the
+-data is currupt (and not being automatically corrected), forcing
++data is corrupt (and not being automatically corrected), forcing
+ a new download might be necessary.
+
+ B<--days N> When grabbing, grab N days rather than all available days.
+--
+2.29.2
+
diff --git a/0008-Update-tv_grab_eu_xmltvse-to-use-SSL-116.patch
b/0008-Update-tv_grab_eu_xmltvse-to-use-SSL-116.patch
new file mode 100644
index 0000000..7b2cddd
--- /dev/null
+++ b/0008-Update-tv_grab_eu_xmltvse-to-use-SSL-116.patch
@@ -0,0 +1,50 @@
+From f04d109936bcc5a5ed52731579539b590d6d18c7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Joakim=20Nyl=C3=A9n?= <git(a)joakim.nylen.nu>
+Date: Tue, 20 Oct 2020 20:57:37 +0200
+Subject: [PATCH 08/50] Update tv_grab_eu_xmltvse to use SSL (#116)
+
+---
+ grab/eu_xmltvse/tv_grab_eu_xmltvse | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/grab/eu_xmltvse/tv_grab_eu_xmltvse b/grab/eu_xmltvse/tv_grab_eu_xmltvse
+index f570513b..f1fe994d 100644
+--- a/grab/eu_xmltvse/tv_grab_eu_xmltvse
++++ b/grab/eu_xmltvse/tv_grab_eu_xmltvse
+@@ -79,11 +79,11 @@ it might be necessary to set HOME to a path without spaces in it.
+
+ =head1 SUPPORTED CHANNELS
+
+-For information on supported channels, see
http://xmltv.xmltv.se
++For information on supported channels, see
https://xmltv.xmltv.se
+
+ =head1 AUTHOR
+
+-Joakim Nylén, me -at- jnylen -dot- nu. This script is a modified version
++Joakim Nylén, joakim -at- pixelmonster -dot- ee. This script is a modified version
+ of tv_grab_se_swedb by Mattias Holmlund, mattias -at- holmlund -dot- se.
+ While the documentation and parts of the code copied from tv_grab_uk by
+ Ed Avis, ed -at- membled -dot- com.
+@@ -125,8 +125,8 @@ use XMLTV::Memoize;
XMLTV::Memoize::check_argv('getuncompressed');
+
+ sub t;
+
+-my $default_root_url = 'http://xmltv.xmltv.se/';
+-my $default_main_url = 'http://xmltv.se/';
++my $default_root_url = 'https://xmltv.xmltv.se/';
++my $default_main_url = 'https://xmltv.se/';
+ my $default_cachedir = get_default_cachedir();
+
+ my( $opt, $conf ) = ParseOptions( {
+@@ -473,7 +473,7 @@ sub write_channel_list
+ if( not exists $channels->{$channel_id} )
+ {
+ print STDERR "Unknown channel $channel_id." .
+- " See
http://xmltv.xmltv.se" .
++ " See
https://xmltv.xmltv.se" .
+ " for a list of available channels or run" .
+ " tv_grab_eu_xmltvse --configure to reconfigure.\n";
+ next;
+--
+2.29.2
+
diff --git a/0009-update-version-for-cherry-pick-typo-correction.patch
b/0009-update-version-for-cherry-pick-typo-correction.patch
new file mode 100644
index 0000000..f2bc3d6
--- /dev/null
+++ b/0009-update-version-for-cherry-pick-typo-correction.patch
@@ -0,0 +1,33 @@
+From 8ce679b822966481d87ccdc98d6576c2b66f44d7 Mon Sep 17 00:00:00 2001
+From: Gary Buhrmaster <gary.buhrmaster(a)gmail.com>
+Date: Tue, 15 Sep 2020 21:39:00 +0000
+Subject: [PATCH 09/50] update version for cherry-pick typo correction
+
+---
+ grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
b/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
+index 656c86f4..ce73bdcb 100644
+--- a/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
++++ b/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite
+@@ -44,6 +44,7 @@
+ #
+ # Version history:
+ #
++# 2020/09/15 - 1.102 - update for cherry-pick typo correction
+ # 2020/06/21 - 1.101 - rename scaledownload to scale-download
+ # 2020/06/20 - 1.100 - add support for --scaledownload
+ # 2020/06/12 - 1.99 - include programID in metadata
+@@ -193,7 +194,7 @@ use sort 'stable';
+ my $RFC2838_COMPLIANT = 1; # RFC2838 compliant station ids, which
makes XMLTV
+ # validate even though the docs say
"SHOULD" not "MUST"
+
+-my $SCRIPT_VERSION = '$Id: tv_grab_zz_sdjson_sqlite,v 1.101 2020/06/21
20:30:00 gtb Exp ed $';
++my $SCRIPT_VERSION = '$Id: tv_grab_zz_sdjson_sqlite,v 1.102 2020/09/15
21:30:00 gtb Exp ed $';
+ my $SCRIPT_URL =
'https://github.com/garybuhrmaster/tv_grab_zz_sdjson_sqlite';
+ my $SCRIPT_NAME = basename("$0");
+ my $SCRIPT_NAME_DIR = dirname("$0");
+--
+2.29.2
+
diff --git a/0010-programme-detect-parental-level-with-white-space.patch
b/0010-programme-detect-parental-level-with-white-space.patch
new file mode 100644
index 0000000..b31b7db
--- /dev/null
+++ b/0010-programme-detect-parental-level-with-white-space.patch
@@ -0,0 +1,27 @@
+From 34122a921c07c881a195625d431617a140a2f59c Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Sun, 25 Oct 2020 14:53:46 +0200
+Subject: [PATCH 10/50] programme: detect parental level with white space
+
+It seems that some programme sources now use e.g. "( S )" instead of
+"(S)". Add the optional white space to the regex.
+---
+ grab/fi/fi/programme.pm | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/grab/fi/fi/programme.pm b/grab/fi/fi/programme.pm
+index 2d83c345..8a60c5e6 100644
+--- a/grab/fi/fi/programme.pm
++++ b/grab/fi/fi/programme.pm
+@@ -160,7 +160,7 @@ sub dump {
+ # Programme post-processing
+ #
+ # Parental level removal (catch also the duplicates)
+- $title =~ s/(?:\s+\((?:S|T|K?7|K?9|K?12|K?16|K?18)\))+\s*$//
++ $title =~ s/(?:\s+\(\s*(?:S|T|K?7|K?9|K?12|K?16|K?18)\s*\))+\s*$//
+ if $title_strip_parental;
+ #
+ # Title mapping
+--
+2.29.2
+
diff --git a/0011-Remove-swedb-grabber-117.patch b/0011-Remove-swedb-grabber-117.patch
new file mode 100644
index 0000000..e5ffa17
--- /dev/null
+++ b/0011-Remove-swedb-grabber-117.patch
@@ -0,0 +1,678 @@
+From 64e0fe74b752fbc1286fe6f27db12ca247fb3c7c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Joakim=20Nyl=C3=A9n?= <git(a)joakim.nylen.nu>
+Date: Sun, 1 Nov 2020 03:01:15 +0100
+Subject: [PATCH 11/50] Remove swedb grabber (#117)
+
+---
+ MANIFEST | 3 -
+ Makefile.PL | 14 -
+ grab/se_swedb/test.conf | 100 -------
+ grab/se_swedb/tv_grab_se_swedb.PL | 24 --
+ grab/se_swedb/tv_grab_se_swedb.in | 478 ------------------------------
+ 5 files changed, 619 deletions(-)
+ delete mode 100644 grab/se_swedb/test.conf
+ delete mode 100644 grab/se_swedb/tv_grab_se_swedb.PL
+ delete mode 100755 grab/se_swedb/tv_grab_se_swedb.in
+
+diff --git a/MANIFEST b/MANIFEST
+index 599065c7..d96fc2fe 100644
+--- a/MANIFEST
++++ b/MANIFEST
+@@ -119,9 +119,6 @@ grab/pt_meo/test.conf
+ grab/pt_meo/tv_grab_pt_meo
+ grab/pt_vodafone/test.conf
+ grab/pt_vodafone/tv_grab_pt_vodafone
+-grab/se_swedb/test.conf
+-grab/se_swedb/tv_grab_se_swedb.PL
+-grab/se_swedb/tv_grab_se_swedb.in
+ grab/test_grabbers
+ grab/tr/test.conf
+ grab/tr/tv_grab_tr
+diff --git a/Makefile.PL b/Makefile.PL
+index 5d3be741..270add4c 100644
+--- a/Makefile.PL
++++ b/Makefile.PL
+@@ -472,20 +472,6 @@ my @opt_components
+ 'URI::Encode' => 0, },
+ },
+
+- # { name => 'tv_grab_se_swedb',
+- # blurb => 'Grabber for Sweden',
+- # exes => [ 'grab/se_swedb/tv_grab_se_swedb' ],
+- # pl_files => { 'grab/se_swedb/tv_grab_se_swedb.PL'
+- # => 'grab/se_swedb/tv_grab_se_swedb' },
+- # to_clean => [ 'grab/se_swedb/tv_grab_se_swedb' ],
+- # deps => [ 'grab/se_swedb/tv_grab_se_swedb'
+- # => [ 'grab/se_swedb/tv_grab_se_swedb.in' ] ],
+- # prereqs => { 'Compress::Zlib' => 0,
+- # 'HTTP::Cache::Transparent' => 0,
+- # 'IO::Scalar' => 0,
+- # 'XML::LibXML' => 0, },
+- # },
+-
+ { name => 'tv_grab_tr',
+ blurb => 'Grabber for Turkey (Digiturk)',
+ exes => [ 'grab/tr/tv_grab_tr' ],
+diff --git a/grab/se_swedb/test.conf b/grab/se_swedb/test.conf
+deleted file mode 100644
+index 702f9d72..00000000
+--- a/grab/se_swedb/test.conf
++++ /dev/null
+@@ -1,100 +0,0 @@
+-root-url=http://xmltv.tvsajten.com/xmltv/channels.xml.gz
+-cachedir=/tmp/.xmltv/cache
+-channel!14hd.viasat.se
+-channel!action.cmore.se
+-channel!action.viasat.se
+-channel!axess.se
+-channel!classic.viasat.se
+-channel!comedy.viasat.se
+-channel!dance.mtv.se
+-channel!disneychannel.se
+-channel=dr1.dr.dk
+-channel!dr2.dr.dk
+-channel!drama.viasat.se
+-channel!elva.tv
+-channel!emotion.cmore.se
+-channel!eurosport.com
+-channel!eurosport2.eurosport.com
+-channel!explorer.viasat.se
+-channel!extreme.cmore.se
+-channel!fakta.tv4.se
+-channel!family.viasat.se
+-channel!film.tv4.se
+-channel!film.viasat.se
+-channel!first.cmore.se
+-channel!firsthd.cmore.se
+-channel!fotboll-hockey-kids.cmore.se
+-channel!fotboll.cmore.se
+-channel!fotboll.viasat.se
+-channel!fotbollhd.viasat.se
+-channel!fxl.tv4.se
+-channel!golf.viasat.se
+-channel!guld.tv4.se
+-channel!hd.animalplanet.discovery.com
+-channel!hd.dr.dk
+-channel!hd.ngcsverige.com
+-channel!hdshowcase.discovery.com
+-channel!history.viasat.se
+-channel!hits.cmore.se
+-channel!hits.mtv.se
+-channel!hitshd.cmore.se
+-channel!hockey.cmore.se
+-channel!hockey.viasat.se
+-channel!investigation.discovery.com
+-channel!jr.nickelodeon.se
+-channel!kanal5.se
+-channel!kanal9.se
+-channel!kids.cmore.se
+-channel!komedi.tv4.se
+-channel!kunskapskanalen.svt.se
+-channel!live.cmore.se
+-channel!live2.cmore.se
+-channel!live3.cmore.se
+-channel!live4.cmore.se
+-channel!livehd.cmore.se
+-channel!motor.viasat.se
+-channel!motorhd.viasat.se
+-channel!mtv.se
+-channel!nature.viasat.se
+-channel!ngcsverige.com
+-channel!nickelodeon.se
+-channel!nordic.animalplanet.discovery.com
+-channel!nordic.discovery.com
+-channel!nordic.science.discovery.com
+-channel!nordic.viasat.se
+-channel!p1.sr.se
+-channel!p2.sr.se
+-channel!p3.sr.se
+-channel!playhouse.disneychannel.se
+-channel!premierleaguehd.viasat.se
+-channel!rocks.mtv.se
+-channel!se.comedycentral.tv
+-channel!series.cmore.se
+-channel!serieshd.cmore.se
+-channel!sf.cmore.se
+-channel!showcasehd.discovery.com
+-channel!sjuan.tv4.se
+-channel!sport-hd.cmore.se
+-channel!sport.cmore.se
+-channel!sport.tv4.se
+-channel!sport.viasat.se
+-channel!sport1-sf.cmore.se
+-channel!sporthd.cmore.se
+-channel=svt1.svt.se
+-channel!svt2.svt.se
+-channel!svt24.svt.se
+-channel!svtb.svt.se
+-channel!tennis.cmore.se
+-channel!tlc.discovery.com
+-channel!tnt7.se
+-channel!tv10.viasat.se
+-channel!tv12.tv4.se
+-channel!tv3.viasat.se
+-channel!tv4.se
+-channel!tv6.viasat.se
+-channel!tv8.viasat.se
+-channel!vh1.com
+-channel!world.discovery.com
+-channel!world.svt.se
+-channel!xd.disneychannel.se
+-channel!xtra.viasat.se
+diff --git a/grab/se_swedb/tv_grab_se_swedb.PL b/grab/se_swedb/tv_grab_se_swedb.PL
+deleted file mode 100644
+index 6f9598a1..00000000
+--- a/grab/se_swedb/tv_grab_se_swedb.PL
++++ /dev/null
+@@ -1,24 +0,0 @@
+-# Generate tv_grab_se_swedb from tv_grab_se_swedb.in. This is done
+-# to allow grabbers for other countries to use the same code.
+-#
+-
+-use strict;
+-
+-use IO::File;
+-my $out = shift @ARGV; die "no output file given" if not defined $out;
+-my $in = 'grab/se_swedb/tv_grab_se_swedb.in';
+-my $in_fh = new IO::File "< $in" or die "cannot read $in: $!";
+-my $out_fh = new IO::File "> $out" or die "cannot write to $out:
$!";
+-my $seen = 0;
+-while (<$in_fh>) {
+- s/\@\@name/tv_grab_se_swedb/;
+- s/\@\@nspc/ /;
+- s/\@\@country/Sweden/;
+- s/\@\@desc/Sweden (swedb\/tvsajten)/;
+- s%\@\@url%http://xmltv.tvsajten.com/channels.xml.gz%;
+- s%\@\@site%http://xmltv.tvsajten.com/%;
+- print $out_fh $_;
+-}
+-close $out_fh or die "cannot close $out: $!";
+-close $in_fh or die "cannot close $in: $!";
+-
+diff --git a/grab/se_swedb/tv_grab_se_swedb.in b/grab/se_swedb/tv_grab_se_swedb.in
+deleted file mode 100755
+index 23fe1738..00000000
+--- a/grab/se_swedb/tv_grab_se_swedb.in
++++ /dev/null
+@@ -1,478 +0,0 @@
+-#!/usr/bin/perl -w
+-
+-=pod
+-
+-=head1 NAME
+-
+-@@name - Grab TV listings for @@country.
+-
+-=head1 SYNOPSIS
+-
+-@@name --help
+-
+-@@name --configure [--config-file FILE] [--gui OPTION]
+-
+-@@name [--config-file FILE]
+-@@nspc [--days N] [--offset N]
+-@@nspc [--output FILE] [--quiet] [--debug]
+-
+-@@name --list-channels [--config-file FILE]
+-@@nspc [--output FILE] [--quiet] [--debug]
+-
+-
+-=head1 DESCRIPTION
+-
+-Output TV and listings in XMLTV format for many stations
+-available in @@country.
+-
+-First you must run B<@@name --configure> to choose which stations
+-you want to receive.
+-
+-Then running B<@@name> with no arguments will get a listings for
+-the stations you chose for five days including today.
+-
+-=head1 OPTIONS
+-
+-B<--configure> Prompt for which stations to download and write the
+-configuration file.
+-
+-B<--config-file FILE> Set the name of the configuration file, the
+-default is B<~/.xmltv/@(a)name.conf>. This is the file written by
+-B<--configure> and read when grabbing.
+-
+-B<--gui OPTION> Use this option to enable a graphical interface to be used.
+-OPTION may be 'Tk', or left blank for the best available choice.
+-Additional allowed values of OPTION are 'Term' for normal terminal output
+-(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.
+-
+-B<--output FILE> When grabbing, write output to FILE rather than
+-standard output.
+-
+-B<--days N> When grabbing, grab N days rather than 5.
+-
+-B<--offset N> Start grabbing at today + N days. N may be negative.
+-
+-B<--quiet> Suppress the progress-bar normally shown on standard error.
+-
+-B<--debug> Provide more information on progress to stderr to help in
+-debugging.
+-
+-B<--list-channels> Output a list of all channels that data is available
+- for. The list is in xmltv-format.
+-
+-B<--version> Show the version of the grabber.
+-
+-B<--help> Print a help message and exit.
+-
+-=head1 ERROR HANDLING
+-
+-If the grabber fails to download data for some channel on a specific day,
+-it will print an errormessage to STDERR and then continue with the other
+-channels and days. The grabber will exit with a status code of 1 to indicate
+-that the data is incomplete.
+-
+-=head1 ENVIRONMENT VARIABLES
+-
+-The environment variable HOME can be set to change where configuration
+-files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
+-it might be necessary to set HOME to a path without spaces in it.
+-
+-=head1 SUPPORTED CHANNELS
+-
+-For information on supported channels, see @@site
+-
+-=head1 AUTHOR
+-
+-Mattias Holmlund, mattias -at- holmlund -dot- se. This documentation
+-and parts of the code copied from tv_grab_uk by
+-Ed Avis, ed -at- membled -dot- com.
+-
+-=head1 BUGS
+-
+-=cut
+-
+-use strict;
+-
+-use XMLTV;
+-use XMLTV::ProgressBar;
+-use XMLTV::Options qw/ParseOptions/;
+-use XMLTV::Configure::Writer;
+-
+-use XML::LibXML;
+-use Date::Manip;
+-use Compress::Zlib;
+-use File::Path;
+-use File::Basename;
+-use IO::Scalar;
+-use LWP;
+-
+-my $ua;
+-$ua = LWP::UserAgent->new();
+-$ua->agent("xmltv/$XMLTV::VERSION");
+-$ua->env_proxy();
+-
+-use HTTP::Cache::Transparent;
+-
+-# Although we use HTTP::Cache::Transparent, this undocumented --cache
+-# option for debugging is still useful since it will _always_ use a
+-# cached copy of a page, without contacting the server at all.
+-#
+-use XMLTV::Memoize; XMLTV::Memoize::check_argv('getuncompressed');
+-
+-sub t;
+-
+-my $default_root_url = '@@url';
+-my $default_cachedir = get_default_cachedir();
+-
+-my( $opt, $conf ) = ParseOptions( {
+- grabber_name => "@@name",
+- capabilities => [qw/baseline manualconfig tkconfig apiconfig cache/],
+- stage_sub => \&config_stage,
+- listchannels_sub => \&list_channels,
+- load_old_config_sub => \&load_old_config,
+- version => "$XMLTV::VERSION",
+- description => "@@desc",
+-
+-} );
+-
+-if (not defined( $conf->{cachedir} )) {
+- print STDERR "No cachedir defined in configfile " .
+- $opt->{'config-file'} . "\n" .
+- "Please run the grabber with --configure.\n";
+- exit 1;
+-}
+-
+-if (not defined( $conf->{'root-url'} )) {
+- print STDERR "No root-url defined in configfile " .
+- $opt->{'config-file'} . "\n" .
+- "Please run the grabber with --configure.\n";
+- exit 1;
+-}
+-
+-if (not defined( $conf->{'channel'} )) {
+- print STDERR "No channels selected in configfile " .
+- $opt->{'config-file'} . "\n" .
+- "Please run the grabber with --configure.\n";
+- exit 1;
+-}
+-
+-init_cachedir( $conf->{cachedir}->[0] );
+-HTTP::Cache::Transparent::init( {
+- BasePath => $conf->{cachedir}->[0],
+- NoUpdate => 15*60,
+- Verbose => $opt->{debug},
+- } );
+-
+-binmode (STDOUT);
+-
+-my($xmldecl, $channels) = load_channels( $conf->{'root-url'}->[0] );
+-
+-my( $odoc, $root );
+-my $warnings = 0;
+-
+-write_header( $xmldecl );
+-
+-write_channel_list( $conf->{channel} );
+-
+-my $now = ParseDate( 'now' );
+-my $date =$now;
+-$date = DateCalc( $now, "+$opt->{offset} days" )
+- if( $opt->{offset} );
+-
+-my $bar = undef;
+-$bar = new XMLTV::ProgressBar( {
+- name => 'downloading listings',
+- count => $opt->{days} * @{$conf->{channel}},
+- }) if (not $opt->{quiet}) && (not $opt->{debug});
+-
+-for( my $i=0; $i < $opt->{days}; $i++ )
+-{
+- t "Date: $date";
+- foreach my $channel_id (@{$conf->{channel}})
+- {
+- # We have already warned the user if the channel doesn't exist.
+- if( exists $channels->{$channel_id} )
+- {
+- t " $channel_id";
+- my( $channel_name, $url ) = @{$channels->{$channel_id}};
+- print_data( $url, $channel_id, $date )
+- or warning( "Failed to download data for $channel_id on " .
+- UnixDate( $date, "%Y-%m-%d" ) . "." );
+- }
+- $bar->update() if defined( $bar );
+- }
+- $date = DateCalc( $date, "+1 days" );
+-}
+-
+-$bar->finish() if defined $bar;
+-
+-write_footer();
+-
+-# Signal that something went wrong if there were warnings.
+-exit(1) if $warnings;
+-
+-# All data fetched ok.
+-t "Exiting without warnings.";
+-exit(0);
+-
+-sub t
+-{
+- my( $message ) = @_;
+- print STDERR $message . "\n" if $opt->{debug};
+-}
+-
+-sub warning
+-{
+- my( $message ) = @_;
+- print STDERR $message . "\n";
+- $warnings++;
+-}
+-
+-sub list_channels
+-{
+- my( $conf, $opt ) = @_;
+-
+- ( $xmldecl, $channels ) = load_channels( $conf->{'root-url'}->[0] );
+-
+- my $result="";
+- my $fh = new IO::Scalar \$result;
+- my $oldfh = select( $fh );
+- write_header( $xmldecl );
+- write_channel_list( [sort keys %{$channels}] );
+- write_footer();
+- select( $oldfh );
+- $fh->close();
+-
+- return $result;
+-}
+-
+-sub config_stage
+-{
+- my( $stage, $conf ) = @_;
+-
+- die "Unknown stage $stage" if $stage ne "start";
+-
+- my $result;
+- my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result,
+- encoding => 'iso-8859-1' );
+- $writer->start( { grabber => '@@name' } );
+- $writer->write_string( {
+- id => 'root-url',
+- title => [ [ 'Root URL for grabbing data', 'en' ] ],
+- description => [
+- [ 'The file at this URL describes which channels are available and ' .
+- 'where data can be found for them. ', 'en' ] ],
+- default => $default_root_url,
+- } );
+- $writer->write_string( {
+- id => 'cachedir',
+- title => [ [ 'Directory to store the cache in', 'en' ] ],
+- description => [
+- [ '@@name uses a cache with files that it has already '.
+- 'downloaded. Please specify where the cache shall be stored. ',
+- 'en' ] ],
+- default => $default_cachedir,
+- } );
+-
+- $writer->end( 'select-channels' );
+-
+- return $result;
+-}
+-
+-#
+-# Load a configuration file in the old format.
+-#
+-
+-sub load_old_config
+-{
+- my( $config_file ) = @_;
+-
+- my @lines = XMLTV::Config_file::read_lines( $config_file );
+-
+- my $conf = {};
+- $conf->{cachedir}->[0] = $default_cachedir;
+- $conf->{'root-url'}->[0] = $default_root_url;
+- $conf->{channel} = [];
+-
+- foreach my $line (@lines)
+- {
+- next unless defined $line;
+-
+- my( $command, $param ) = split( /\s+/, $line, 2 );
+- $param =~ tr/\n\r//d;
+- $param =~ s/\s+$//;
+-
+- if ( $command =~ /^\s*root-url\s*$/) {
+- $conf->{'root-url'}->[0] = $param;
+- } elsif ( $command =~ /^\s*channel\s*$/) {
+- push @{$conf->{channel}}, $param;
+- } elsif ( $command eq 'cache-dir' ) {
+- $conf->{'cachedir'}->[0] = $param;
+- } else {
+- die "Unknown command $command in config-file $config_file"
+- }
+- }
+-
+- return $conf;
+-}
+-
+-sub get_default_cachedir
+-{
+- my $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH}
+- if defined( $ENV{HOMEDRIVE} )
+- and defined( $ENV{HOMEPATH} );
+-
+- my $home = $ENV{HOME} || $winhome || ".";
+- return "$home/.xmltv/cache";
+-}
+-
+-sub init_cachedir
+-{
+- my( $path ) = @_;
+- if( not -d $path )
+- {
+- mkpath( $path ) or die "Failed to create cache-directory $path: $@";
+- }
+-}
+-
+-sub load_channels
+-{
+- my( $url ) = @_;
+-
+- my %channels;
+-
+- my $xmldata = getuncompressed( $url );
+-
+- defined( $xmldata ) or die "Failed to fetch $url";
+-
+- my $xml = XML::LibXML->new;
+-
+- my $doc = $xml->parse_string($xmldata);
+-
+- my $xmldecl = "<?xml version='" . $doc->version() . "'
" .
+- "encoding='" . $doc->encoding() . "'?>\n";
+-
+- my $ns = $doc->find( "//channel" );
+-
+- foreach my $node ($ns->get_nodelist)
+- {
+- my $id = $node->findvalue( '@id' );
+- my $name = $node->findvalue( 'display-name[1]' );
+- my $url = $node->findvalue( 'base-url' );
+- my $urlns = $node->find( './base-url' );
+- foreach my $urlnode ($urlns->get_nodelist)
+- {
+- $node->removeChild( $urlnode );
+- }
+- $channels{$id} = [ $name, $url, $node->toString(0, 1) ];
+- }
+-
+- return ($xmldecl, \%channels);
+-}
+-
+-sub print_data
+-{
+- my( $rooturl, $channel_id, $date ) = @_;
+-
+- my $url = $rooturl . $channel_id . "_" . UnixDate( $date,
"%Y-%m-%d" ) .
+- ".xml.gz";
+-
+- my $xmldata = getuncompressed( $url );
+-
+- defined $xmldata or return 0;
+-
+- my $in = new IO::Scalar \$xmldata;
+- while( my $line = $in->getline() )
+- {
+- last if $line =~ /<tv/;
+- }
+-
+- while( my $line = $in->getline() )
+- {
+- last if $line =~ /<\/tv>/;
+- print $line;
+- }
+-
+- return 1;
+-}
+-
+-sub write_header
+-{
+- my( $xmldecl ) = @_;
+-
+- # Use the same xml declaration as the one in
+- # channels.xml
+- print $xmldecl;
+- print '<!DOCTYPE tv SYSTEM "xmltv.dtd">' . "\n";
+- print "<tv>\n";
+-}
+-
+-sub write_channel_list
+-{
+- my( $channel_list ) = @_;
+-
+- # Write list of channels.
+- t 'Writing list of channels.';
+-
+- foreach my $channel_id (@{$channel_list})
+- {
+- if( not exists $channels->{$channel_id} )
+- {
+- print STDERR "Unknown channel $channel_id." .
+- " See @@site" .
+- " for a list of available channels or run" .
+- " @@name --configure to reconfigure.\n";
+- next;
+- }
+-
+- my( $channel_name, $url, $def ) = @{$channels->{$channel_id}};
+- print " $def\n";
+- }
+-}
+-
+-sub write_footer
+-{
+- print "</tv>\n";
+-}
+-
+-sub getuncompressed {
+- my( $url ) = @_;
+-
+- my $response = $ua->get($url);
+-
+- return undef
+- unless $response->is_success;
+-
+- my $compressed = $response->content
+- or return undef;
+-
+- # Since LWP 5.827, the result from get() is already
+- # uncompressed.
+-
+- my $uncompressed;
+-
+- eval {
+- $uncompressed = Compress::Zlib::memGunzip( \$compressed );
+- };
+-
+- $uncompressed = $compressed if not defined $uncompressed;
+-
+- return $uncompressed;
+-}
+-
+-### Setup indentation in Emacs
+-## Local Variables:
+-## perl-indent-level: 4
+-## perl-continued-statement-offset: 4
+-## perl-continued-brace-offset: 0
+-## perl-brace-offset: -4
+-## perl-brace-imaginary-offset: 0
+-## perl-label-offset: -2
+-## cperl-indent-level: 4
+-## cperl-brace-offset: 0
+-## cperl-continued-brace-offset: 0
+-## cperl-label-offset: -2
+-## cperl-extra-newline-before-brace: t
+-## cperl-merge-trailing-else: nil
+-## cperl-continued-statement-offset: 2
+-## End:
+--
+2.29.2
+
diff --git a/0012-ampparit-add-missing-empty-title-check.patch
b/0012-ampparit-add-missing-empty-title-check.patch
new file mode 100644
index 0000000..6dd5d59
--- /dev/null
+++ b/0012-ampparit-add-missing-empty-title-check.patch
@@ -0,0 +1,39 @@
+From 68126caa72a652a3d396d62ce64c215c680e2f79 Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Mon, 16 Nov 2020 21:40:55 +0200
+Subject: [PATCH 12/50] ampparit: add missing empty title check
+
+Programmes must have a valid title.
+---
+ grab/fi/fi/source/ampparit.pm | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/grab/fi/fi/source/ampparit.pm b/grab/fi/fi/source/ampparit.pm
+index 8d2768e9..223a582a 100644
+--- a/grab/fi/fi/source/ampparit.pm
++++ b/grab/fi/fi/source/ampparit.pm
+@@ -135,13 +135,16 @@ sub grab {
+ if (my($hour, $minute) =
+ $start->as_text() =~ /^(\d{2})[:.](\d{2})$/) {
+ $title = $title->as_text();
+- $desc = $desc->as_text();
+
+- debug(3, "List entry ${id} ($hour:$minute) $title");
+- debug(4, $desc) if $desc;
++ if (length($title)) {
++ $desc = $desc->as_text();
+
+- my $object = appendProgramme($opaque, $hour, $minute, $title);
+- $object->description($desc);
++ debug(3, "List entry ${id} ($hour:$minute) $title");
++ debug(4, $desc) if $desc;
++
++ my $object = appendProgramme($opaque, $hour, $minute, $title);
++ $object->description($desc);
++ }
+ }
+ }
+ }
+--
+2.29.2
+
diff --git a/0013-telsu-add-missing-empty-title-check.patch
b/0013-telsu-add-missing-empty-title-check.patch
new file mode 100644
index 0000000..4998f9b
--- /dev/null
+++ b/0013-telsu-add-missing-empty-title-check.patch
@@ -0,0 +1,77 @@
+From 2befcce245a92ff458940246ac6677e7ad6ecb35 Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Mon, 16 Nov 2020 21:42:22 +0200
+Subject: [PATCH 13/50] telsu: add missing empty title check
+
+Programmes must have a valid title.
+
+Fixes #121
+---
+ grab/fi/fi/source/telsu.pm | 49 ++++++++++++++++++++------------------
+ 1 file changed, 26 insertions(+), 23 deletions(-)
+
+diff --git a/grab/fi/fi/source/telsu.pm b/grab/fi/fi/source/telsu.pm
+index d77303eb..7708415f 100644
+--- a/grab/fi/fi/source/telsu.pm
++++ b/grab/fi/fi/source/telsu.pm
+@@ -118,31 +118,34 @@ sub grab {
+ if (my($new, $start_h, $start_m, $end_h, $end_m) =
+ $time->as_text() =~ /^(.+)\s(\d{2})[:.](\d{2})\s-\s(\d{2})[:.](\d{2})/) {
+ $title = $title->as_text();
+- $desc = $desc->as_text();
+
+- # Detect day change
+- if ($new ne $current) {
+- $current = $new;
+- shift(@offsets);
++ if (length($title)) {
++ $desc = $desc->as_text();
++
++ # Detect day change
++ if ($new ne $current) {
++ $current = $new;
++ shift(@offsets);
++ }
++ my $start = timeToEpoch($offsets[0], $start_h, $start_m);
++ my $end = timeToEpoch($offsets[0], $end_h, $end_m);
++
++ # Detect end time on next day
++ if ($end < $start) {
++ # Are there enough day offsets left to handle a day change?
++ # No -> more programmes than we asked for, exit loop
++ last if @offsets < 2;
++ $end = timeToEpoch($offsets[1], $end_h, $end_m);
++ }
++
++ debug(3, "List entry ${id} ($start -> $end) $title");
++ debug(4, $desc) if $desc;
++
++ # Create program object
++ my $object = fi::programme->new($id, "fi", $title, $start, $end);
++ $object->description($desc);
++ push(@objects, $object);
+ }
+- my $start = timeToEpoch($offsets[0], $start_h, $start_m);
+- my $end = timeToEpoch($offsets[0], $end_h, $end_m);
+-
+- # Detect end time on next day
+- if ($end < $start) {
+- # Are there enough day offsets left to handle a day change?
+- # No -> more programmes than we asked for, exit loop
+- last if @offsets < 2;
+- $end = timeToEpoch($offsets[1], $end_h, $end_m);
+- }
+-
+- debug(3, "List entry ${id} ($start -> $end) $title");
+- debug(4, $desc) if $desc;
+-
+- # Create program object
+- my $object = fi::programme->new($id, "fi", $title, $start, $end);
+- $object->description($desc);
+- push(@objects, $object);
+ }
+ }
+ }
+--
+2.29.2
+
diff --git a/0014-tv_grab_uk_tvguide-fix-for-missing-form-options-in-c.patch
b/0014-tv_grab_uk_tvguide-fix-for-missing-form-options-in-c.patch
new file mode 100644
index 0000000..506550a
--- /dev/null
+++ b/0014-tv_grab_uk_tvguide-fix-for-missing-form-options-in-c.patch
@@ -0,0 +1,31 @@
+From fdb2097e41630def708e1ca3786f79fa6dec7fac Mon Sep 17 00:00:00 2001
+From: Ian Cameron <1661072+mkbloke(a)users.noreply.github.com>
+Date: Mon, 21 Dec 2020 11:23:47 +0000
+Subject: [PATCH 14/50] tv_grab_uk_tvguide: fix for missing form options in
+ channel listings (#125)
+
+---
+ grab/uk_tvguide/tv_grab_uk_tvguide | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/grab/uk_tvguide/tv_grab_uk_tvguide b/grab/uk_tvguide/tv_grab_uk_tvguide
+index 8838ec31..208e6a03 100755
+--- a/grab/uk_tvguide/tv_grab_uk_tvguide
++++ b/grab/uk_tvguide/tv_grab_uk_tvguide
+@@ -206,6 +206,13 @@ sub fetch_listings {
+ if ($tree) {
+ my $channelname = $tree->look_down('_tag' => 'option',
'value' => $channel_id);
+
++ # Try a fallback method if the form options are missing
++ if (!defined $channelname) {
++ my $fallback = $tree->look_down('_tag' => 'input',
'name' => 'cTime');
++ $fallback = $fallback->look_up('_tag', 'tr') if $fallback;
++ $channelname = $fallback->look_down('_tag' => 'span',
'class' => 'programmeheading') if $fallback;
++ }
++
+ # tvguide website can be very slow - try to avoid barfing when no response
+ if (!defined $channelname) {
+ warning "Unable to retrieve web page for $channel_id";
+--
+2.29.2
+
diff --git a/0015-avoid-break-when-website-object-missing-125.patch
b/0015-avoid-break-when-website-object-missing-125.patch
new file mode 100644
index 0000000..b2111e0
--- /dev/null
+++ b/0015-avoid-break-when-website-object-missing-125.patch
@@ -0,0 +1,77 @@
+From e63cab357a41b77bde693946e5ca32452a48f668 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 21 Dec 2020 12:04:49 +0000
+Subject: [PATCH 15/50] avoid break when website object missing (#125)
+
+---
+ grab/uk_tvguide/tv_grab_uk_tvguide | 35 ++++++++++++++++++++++++------
+ 1 file changed, 28 insertions(+), 7 deletions(-)
+
+diff --git a/grab/uk_tvguide/tv_grab_uk_tvguide b/grab/uk_tvguide/tv_grab_uk_tvguide
+index 208e6a03..9fd62d44 100755
+--- a/grab/uk_tvguide/tv_grab_uk_tvguide
++++ b/grab/uk_tvguide/tv_grab_uk_tvguide
+@@ -124,6 +124,9 @@ my $bar = new XMLTV::ProgressBar({
+ my $programmes = ();
+ my $channels = ();
+
++# Store channel names during fetch
++my $channames = undef;
++
+ # Get the schedule(s) from TV Guide
+ fetch_listings();
+
+@@ -204,25 +207,43 @@ sub fetch_listings {
+
+ # Scrub the page
+ if ($tree) {
+- my $channelname = $tree->look_down('_tag' => 'option',
'value' => $channel_id);
+-
+- # Try a fallback method if the form options are missing
++ my $channelname = undef;
++
++ # Store the channel ids in a list (do this only once per program run)
++ if (!defined $channames) {
++ #debug 'fetching options tags';
++ my $choptions = $tree->look_down('_tag' => 'select',
'name' => 'ch');
++ if (defined $choptions) {
++ my @choptionslist = $choptions->look_down('_tag' =>
'option');
++ if (@choptionslist) {
++ foreach my $choption (@choptionslist) {
++ $channames->{$choption->attr('value')} = $choption->as_text;
++ }
++ }
++ }
++ }
++
++ $channelname = $channames->{$channel_id} if $channames;
++
++ # Try a fallback method if the form options are missing [Credit mkbloke]
+ if (!defined $channelname) {
++ #debug 'using fallback method';
+ my $fallback = $tree->look_down('_tag' => 'input',
'name' => 'cTime');
+ $fallback = $fallback->look_up('_tag', 'tr') if $fallback;
+ $channelname = $fallback->look_down('_tag' => 'span',
'class' => 'programmeheading') if $fallback;
++ $channelname = $channelname->as_text if $channelname;
+ }
+-
++
++ #debug 'found channel name: '.$channelname;
++
+ # tvguide website can be very slow - try to avoid barfing when no response
+ if (!defined $channelname) {
+ warning "Unable to retrieve web page for $channel_id";
+ next;
+ }
+
+- $channelname = $channelname->as_text;
+-
+ # <table border="0" cellpadding="0"
style="background:black;border-collapse: collapse;background-image:
url(http://i.g8.tv/HighlightImages/Large/);background-repeat: no-repeat;"
width="677">
+-
++ #
+ my @shows = $tree->look_down('_tag' => 'table',
'border' => '0', 'cellpadding' => '0',
'style' => qr/background:\s*black;border-collapse:\s*collapse;/);
+
+ if (@shows) {
+--
+2.29.2
+
diff --git a/0016-fix-UA-page-debug-is-on-stdout-should-be-on-stderr-1.patch
b/0016-fix-UA-page-debug-is-on-stdout-should-be-on-stderr-1.patch
new file mode 100644
index 0000000..36e4e8e
--- /dev/null
+++ b/0016-fix-UA-page-debug-is-on-stdout-should-be-on-stderr-1.patch
@@ -0,0 +1,28 @@
+From c92c6da845c19afb82b1ffd7bac74c665902e75b Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 21 Dec 2020 16:15:59 +0000
+Subject: [PATCH 16/50] fix UA page debug is on stdout (should be on stderr)
+ (#124)
+
+---
+ grab/na_dtv/tv_grab_na_dtv | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/grab/na_dtv/tv_grab_na_dtv b/grab/na_dtv/tv_grab_na_dtv
+index 0adc76c8..5bd741a3 100755
+--- a/grab/na_dtv/tv_grab_na_dtv
++++ b/grab/na_dtv/tv_grab_na_dtv
+@@ -323,8 +323,8 @@ sub getBrowser {
+ if $conf->{proxy}->[0];
+
+ if ($DEBUG && $VERBOSE) {
+- $ua->add_handler("request_send", sub { print "Request:\n";
shift->dump; return });
+- $ua->add_handler("response_done", sub { print "Response:\n";
shift->dump; return });
++ $ua->add_handler("request_send", sub { print STDERR
"Request:\n" . shift->dump; return });
++ $ua->add_handler("response_done", sub { print STDERR
"Response:\n" . shift->dump; return });
+ }
+
+ return $ua;
+--
+2.29.2
+
diff --git a/0017-Fetch-programme-data-via-SSL-avoids-301-redirects.patch
b/0017-Fetch-programme-data-via-SSL-avoids-301-redirects.patch
new file mode 100644
index 0000000..94cea8f
--- /dev/null
+++ b/0017-Fetch-programme-data-via-SSL-avoids-301-redirects.patch
@@ -0,0 +1,33 @@
+From 074d069414ee12946685a77e93c8aeec8716b6b3 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 21 Dec 2020 16:20:00 +0000
+Subject: [PATCH 17/50] Fetch programme data via SSL (avoids 301 redirects)
+
+---
+ grab/na_dtv/tv_grab_na_dtv | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/grab/na_dtv/tv_grab_na_dtv b/grab/na_dtv/tv_grab_na_dtv
+index 5bd741a3..a4cdf860 100755
+--- a/grab/na_dtv/tv_grab_na_dtv
++++ b/grab/na_dtv/tv_grab_na_dtv
+@@ -141,6 +141,7 @@ $TMP_FILEBASE .= '/na_dtv_';
+ my $queue_filename = "$TMP_FILEBASE" . "q";
+
+ my $SITEBASE = "http://www.directv.com";
++my $SITEBASESSL = "https://www.directv.com";
+
+ # URL for grabbing channel list
+ my $CHANNEL_LIST_URL = "$SITEBASE/json/channels";
+@@ -149,7 +150,7 @@ my $CHANNEL_LIST_URL = "$SITEBASE/json/channels";
+ my $SCHEDULE_URL = "$SITEBASE/json/channelschedule";
+
+ # Each program ID will be appended to this URL to get its details.
+-my $DETAILS_URL = "$SITEBASE/json/program/flip";
++my $DETAILS_URL = "$SITEBASESSL/json/program/flip";
+
+ my $XML_PRELUDE =
+ '<?xml version="1.0" encoding="ISO-8859-1"?>' .
"\n"
+--
+2.29.2
+
diff --git a/0018-for-compatability-with-older-versions-of-Perl-122.patch
b/0018-for-compatability-with-older-versions-of-Perl-122.patch
new file mode 100644
index 0000000..04d9d8b
--- /dev/null
+++ b/0018-for-compatability-with-older-versions-of-Perl-122.patch
@@ -0,0 +1,25 @@
+From d8b8bb60ff35c94e2dd5421d26d53d2bad348131 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 23 Dec 2020 11:51:25 +0000
+Subject: [PATCH 18/50] for compatability with older versions of Perl (#122)
+
+---
+ grab/pt_vodafone/tv_grab_pt_vodafone | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/grab/pt_vodafone/tv_grab_pt_vodafone b/grab/pt_vodafone/tv_grab_pt_vodafone
+index 1ca953ee..a0f2bea3 100755
+--- a/grab/pt_vodafone/tv_grab_pt_vodafone
++++ b/grab/pt_vodafone/tv_grab_pt_vodafone
+@@ -271,7 +271,7 @@ sub get_epg
+
+ if ( ! $epgSource ){
+ die("Bad EPG download, probably channel list is outdated, rerun the
grabber configure to update the list.\n" ); }
+- elsif ( $epgSource->{data}->@* == 0 ){
++ elsif ( !$epgSource->{data} || scalar @{$epgSource->{data}} == 0 ){
+ print( STDERR " Empty EPG download for ".$channel.",
probably channel list is outdated or no API data for that channel\n" .
+ " Rerun the grabber configure to update the list or check for the
channel EPG in the Vodafone app.\n" );
+ next;
+--
+2.29.2
+
diff --git a/0019-download-optional-file-if-its-prepStage-is-specifica.patch
b/0019-download-optional-file-if-its-prepStage-is-specifica.patch
new file mode 100644
index 0000000..2dbba96
--- /dev/null
+++ b/0019-download-optional-file-if-its-prepStage-is-specifica.patch
@@ -0,0 +1,26 @@
+From 4a9d4274b056ed039e9dcf894a77d179e249e22b Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 23 Dec 2020 16:49:41 +0000
+Subject: [PATCH 19/50] download optional file if its prepStage is specifically
+ requested
+
+---
+ lib/IMDB.pm | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 1996a115..1e794ff9 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -1439,7 +1439,7 @@ sub checkFiles () {
+ if ( not $filenameExists and not $filenameGzExists ) {
+ # Just report one of the filenames, keep the message simple.
+ warn "$filenameGz does not exist\n";
+- if ( $self->{optionalStages}{$file} ) {
++ if ( $self->{optionalStages}{$file} && lc($self->{stageToRun})
eq 'all' ) {
+ warn "$file will not be added to database\n";
+ } else {
+ $missingListFiles{$file}=$filenameGz;
+--
+2.29.2
+
diff --git a/0020-Unbreak-parsing-of-keywords-file.patch
b/0020-Unbreak-parsing-of-keywords-file.patch
new file mode 100644
index 0000000..99200bd
--- /dev/null
+++ b/0020-Unbreak-parsing-of-keywords-file.patch
@@ -0,0 +1,25 @@
+From 8f61ae35af813ecd0e381fc4d9c199381c8d10d6 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 23 Dec 2020 17:02:09 +0000
+Subject: [PATCH 20/50] Unbreak parsing of keywords file
+
+---
+ lib/IMDB.pm | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 1e794ff9..2c36b2b9 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -2007,7 +2007,7 @@ sub readKeywords($$$$)
+ }
+ last;
+ }
+- elsif ( $lineCount > 100000 ) {
++ elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23
+ $self->error("$file: stopping at line $lineCount, didn't see \"THE
KEYWORDS LIST\" line");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+--
+2.29.2
+
diff --git a/0021-Reduce-memory-consumption-in-building-database-63.patch
b/0021-Reduce-memory-consumption-in-building-database-63.patch
new file mode 100644
index 0000000..1256ce1
--- /dev/null
+++ b/0021-Reduce-memory-consumption-in-building-database-63.patch
@@ -0,0 +1,79 @@
+From 49473786312f0d0e60ed98c4ac9fcbf5c8a68605 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 23 Dec 2020 17:38:20 +0000
+Subject: [PATCH 21/50] Reduce memory consumption in building database (#63)
+
+---
+ lib/IMDB.pm | 21 ++++++++-------------
+ 1 file changed, 8 insertions(+), 13 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 2c36b2b9..f143eaf3 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -2788,8 +2788,8 @@ sub invokeStage($$)
+
+ if ( 1 ) {
+ # fill in default for movies we didn't have a director for
+- for my $key (keys %movies) {
+- if ( !length($movies{$key})) {
++ while (my ($key, $val) = each (%movies)) {
++ if (!length($val)) {
+ $movies{$key}="<>";
+ }
+ }
+@@ -2880,8 +2880,8 @@ sub invokeStage($$)
+ }
+ if ( 1 ) {
+ # fill in placeholder if no actors were found
+- for my $key (keys %movies) {
+- if ( !($movies{$key}=~m/$tab/o) ) {
++ while (my ($key, $val) = each (%movies)) {
++ if ( !($val=~m/$tab/o) ) {
+ $movies{$key}.=$tab."<>";
+ }
+ }
+@@ -2928,8 +2928,7 @@ sub invokeStage($$)
+
+ if ( 1 ) {
+ # fill in placeholder if no genres were found
+- for my $key (keys %movies) {
+- my $val=$movies{$key};
++ while (my ($key, $val) = each (%movies)) {
+ my $t=index($val, $tab);
+ if ( $t == -1 ) {
+ die "corrupt entry '$key' '$val'";
+@@ -2982,9 +2981,7 @@ sub invokeStage($$)
+
+ if ( 1 ) {
+ # fill in placeholder if no genres were found
+- for my $key (keys %movies) {
+- my $val=$movies{$key};
+-
++ while (my ($key, $val) = each (%movies)) {
+ my $t=index($val, $tab);
+ if ( $t == -1 ) {
+ die "corrupt entry '$key' '$val'";
+@@ -3040,8 +3037,7 @@ sub invokeStage($$)
+
+ if ( 1 ) {
+ # fill in default for movies we didn't have any keywords for
+- for my $key (keys %movies) {
+- my $val=$movies{$key};
++ while (my ($key, $val) = each (%movies)) {
+ #keyword is 6th entry
+ my $t = 0;
+ for my $i (0..4) {
+@@ -3097,8 +3093,7 @@ sub invokeStage($$)
+ }
+ if ( 1 ) {
+ # fill in default for movies we didn't have any plot for
+- for my $key (keys %movies) {
+- my $val=$movies{$key};
++ while (my ($key, $val) = each (%movies)) {
+ #plot is 7th entry
+ my $t = 0;
+ for my $i (0..5) {
+--
+2.29.2
+
diff --git a/0022-replace-spaces-with-tabs-and-prettify-the-code.patch
b/0022-replace-spaces-with-tabs-and-prettify-the-code.patch
new file mode 100644
index 0000000..1c7fcd4
--- /dev/null
+++ b/0022-replace-spaces-with-tabs-and-prettify-the-code.patch
@@ -0,0 +1,6375 @@
+From bfca82acc4eb2da44f1e628e7221d1395611e04c Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Thu, 24 Dec 2020 11:38:50 +0000
+Subject: [PATCH 22/50] replace spaces with tabs, and prettify the code
+
+---
+ lib/IMDB.pm | 5893 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 2947 insertions(+), 2946 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index f143eaf3..123ceb03 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -1,29 +1,29 @@
+ # The IMDB file contains two packages:
+ # 1. XMLTV::IMDB::Cruncher package which parses and manages IMDB "lists"
files
+-# from
ftp.imdb.com
++# from
ftp.imdb.com
+ # 2. XMLTV::IMDB package that uses data files from the Cruncher package to
+-# update/add details to XMLTV programme nodes.
++# update/add details to XMLTV programme nodes.
+ #
+ # FUTURE - multiple hits on the same 'title only' could try and look for
+-# character names matching from description to
imdb.com character
+-# names.
++# character names matching from description to
imdb.com character
++# names.
+ #
+ # FUTURE - multiple hits on 'title only' should probably pick latest
+-# tv series over any older ones. May make for better guesses.
++# tv series over any older ones. May make for better guesses.
+ #
+ # BUG - we identify 'presenters' by the word "Host" appearing in the
character
+-# description. For some movies, character names include the word Host.
+-# ex. Animal, The (2001) has a character named "Badger Milk Host".
++# description. For some movies, character names include the word Host.
++# ex. Animal, The (2001) has a character named "Badger Milk Host".
+ #
+ # BUG - if there is a matching title with > 1 entry (say made for tv-movie and
+-# at tv-mini series) made in the same year (or even "close" years) it
is
+-# possible for us to pick the wrong one we should pick the one with the
+-# closest year, not just the first closest match based on the result ordering
+-# for instance Ghost Busters was made in 1984, and into a tv series in
+-# 1986. if we have a list of GhostBusters 1983, we should pick the 1984 movie
+-# and not 1986 tv series...maybe :) but currently we'll pick the first
+-# returned close enough match instead of trying the closest date match of
+-# the approx hits.
++# at tv-mini series) made in the same year (or even "close" years) it is
++# possible for us to pick the wrong one we should pick the one with the
++# closest year, not just the first closest match based on the result ordering
++# for instance Ghost Busters was made in 1984, and into a tv series in
++# 1986. if we have a list of GhostBusters 1983, we should pick the 1984 movie
++# and not 1986 tv series...maybe :) but currently we'll pick the first
++# returned close enough match instead of trying the closest date match of
++# the approx hits.
+ #
+
+ use strict;
+@@ -36,313 +36,314 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+ # HISTORY
+ # .6 = what was here for the longest time
+ # .7 = fixed file size est calculations
+-# = moviedb.info now includes _file_size_uncompressed values for each downloaded
file
++# = moviedb.info now includes _file_size_uncompressed values for each downloaded file
+ # .8 = updated file size est calculations
+-# = moviedb.dat directors and actors list no longer include repeated names (which
mostly
+-# occured in episodic tv programs (reported by Alexy Khrabrov)
++# = moviedb.dat directors and actors list no longer include repeated names (which
mostly
++# occured in episodic tv programs (reported by Alexy Khrabrov)
+ # .9 = added keywords data
+ # .10 = added plot data
+ #
+-our $VERSION = '0.10'; # version number of database
++our $VERSION = '0.10'; # version number of database
+
+ sub new
+ {
+- my ($type) = shift;
+- my $self={ @_ }; # remaining args become attributes
++ my ($type) = shift;
++ my $self={ @_ }; # remaining args become attributes
+
+- for ('imdbDir', 'verbose') {
++ for ('imdbDir', 'verbose') {
+ die "invalid usage - no $_" if ( !defined($self->{$_}));
+- }
+- #$self->{verbose}=2;
+- $self->{replaceDates}=0 if ( !defined($self->{replaceDates}));
+- $self->{replaceTitles}=0 if ( !defined($self->{replaceTitles}));
+- $self->{replaceCategories}=0 if ( !defined($self->{replaceCategories}));
+- $self->{replaceKeywords}=0 if ( !defined($self->{replaceKeywords}));
+- $self->{replaceURLs}=0 if ( !defined($self->{replaceURLs}));
+- $self->{replaceDirectors}=1 if ( !defined($self->{replaceDirectors}));
+- $self->{replaceActors}=0 if ( !defined($self->{replaceActors}));
+- $self->{replacePresentors}=1 if ( !defined($self->{replacePresentors}));
+- $self->{replaceCommentators}=1 if ( !defined($self->{replaceCommentators}));
+- $self->{replaceStarRatings}=0 if ( !defined($self->{replaceStarRatings}));
+- $self->{replacePlot}=0 if ( !defined($self->{replacePlot}));
+-
+- $self->{updateDates}=1 if ( !defined($self->{updateDates}));
+- $self->{updateTitles}=1 if ( !defined($self->{updateTitles}));
+- $self->{updateCategories}=1 if ( !defined($self->{updateCategories}));
+- $self->{updateCategoriesWithGenres}=1 if (
!defined($self->{updateCategoriesWithGenres}));
+- $self->{updateKeywords}=0 if ( !defined($self->{updateKeywords}));
# default is to NOT add keywords
+- $self->{updateURLs}=1 if ( !defined($self->{updateURLs}));
+- $self->{updateDirectors}=1 if ( !defined($self->{updateDirectors}));
+- $self->{updateActors}=1 if ( !defined($self->{updateActors}));
+- $self->{updatePresentors}=1 if ( !defined($self->{updatePresentors}));
+- $self->{updateCommentators}=1 if ( !defined($self->{updateCommentators}));
+- $self->{updateStarRatings}=1 if ( !defined($self->{updateStarRatings}));
+- $self->{updatePlot}=0 if ( !defined($self->{updatePlot})); #
default is to NOT add plot
+-
+- $self->{numActors}=3 if ( !defined($self->{numActors})); #
default is to add top 3 actors
+-
+- $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx";
+- $self->{moviedbData}="$self->{imdbDir}/moviedb.dat";
+- $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info";
+- $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline";
+-
+- # default is not to cache lookups
+- $self->{cacheLookups}=0 if ( !defined($self->{cacheLookups}) );
+- $self->{cacheLookupSize}=0 if ( !defined($self->{cacheLookupSize}) );
+-
+- $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0;
+-
+- bless($self, $type);
+-
+- $self->{categories}={'movie' =>'Movie',
+- 'tv_movie' =>'TV Movie', # made for tv
+- 'video_movie' =>'Video Movie', # went straight to video or was
made for it
+- 'tv_series' =>'TV Series',
++ }
++ #$self->{verbose}=2;
++ $self->{replaceDates}=0 if ( !defined($self->{replaceDates}));
++ $self->{replaceTitles}=0 if ( !defined($self->{replaceTitles}));
++ $self->{replaceCategories}=0 if ( !defined($self->{replaceCategories}));
++ $self->{replaceKeywords}=0 if ( !defined($self->{replaceKeywords}));
++ $self->{replaceURLs}=0 if ( !defined($self->{replaceURLs}));
++ $self->{replaceDirectors}=1 if ( !defined($self->{replaceDirectors}));
++ $self->{replaceActors}=0 if ( !defined($self->{replaceActors}));
++ $self->{replacePresentors}=1 if ( !defined($self->{replacePresentors}));
++ $self->{replaceCommentators}=1 if ( !defined($self->{replaceCommentators}));
++ $self->{replaceStarRatings}=0 if ( !defined($self->{replaceStarRatings}));
++ $self->{replacePlot}=0 if ( !defined($self->{replacePlot}));
++
++ $self->{updateDates}=1 if ( !defined($self->{updateDates}));
++ $self->{updateTitles}=1 if ( !defined($self->{updateTitles}));
++ $self->{updateCategories}=1 if ( !defined($self->{updateCategories}));
++ $self->{updateCategoriesWithGenres}=1 if (
!defined($self->{updateCategoriesWithGenres}));
++ $self->{updateKeywords}=0 if ( !defined($self->{updateKeywords})); # default is
to NOT add keywords
++ $self->{updateURLs}=1 if ( !defined($self->{updateURLs}));
++ $self->{updateDirectors}=1 if ( !defined($self->{updateDirectors}));
++ $self->{updateActors}=1 if ( !defined($self->{updateActors}));
++ $self->{updatePresentors}=1 if ( !defined($self->{updatePresentors}));
++ $self->{updateCommentators}=1 if ( !defined($self->{updateCommentators}));
++ $self->{updateStarRatings}=1 if ( !defined($self->{updateStarRatings}));
++ $self->{updatePlot}=0 if ( !defined($self->{updatePlot})); # default is to
NOT add plot
++
++ $self->{numActors}=3 if ( !defined($self->{numActors})); # default is to
add top 3 actors
++
++ $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx";
++ $self->{moviedbData}="$self->{imdbDir}/moviedb.dat";
++ $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info";
++ $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline";
++
++ # default is not to cache lookups
++ $self->{cacheLookups}=0 if ( !defined($self->{cacheLookups}) );
++ $self->{cacheLookupSize}=0 if ( !defined($self->{cacheLookupSize}) );
++
++ $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0;
++
++ bless($self, $type);
++
++ $self->{categories}={'movie' =>'Movie',
++ 'tv_movie' =>'TV Movie', # made for tv
++ 'video_movie' =>'Video Movie', # went straight to video or was
made for it
++ 'tv_series' =>'TV Series',
+ 'tv_mini_series' =>'TV Mini Series'};
+
+- $self->{stats}->{programCount}=0;
++ $self->{stats}->{programCount}=0;
+
+- for my $cat (keys %{$self->{categories}}) {
+- $self->{stats}->{perfect}->{$cat}=0;
+- $self->{stats}->{close}->{$cat}=0;
+- }
+- $self->{stats}->{perfectMatches}=0;
+- $self->{stats}->{closeMatches}=0;
++ for my $cat (keys %{$self->{categories}}) {
++ $self->{stats}->{perfect}->{$cat}=0;
++ $self->{stats}->{close}->{$cat}=0;
++ }
++ $self->{stats}->{perfectMatches}=0;
++ $self->{stats}->{closeMatches}=0;
+
+- $self->{stats}->{startTime}=time();
++ $self->{stats}->{startTime}=time();
+
+- return($self);
++ return($self);
+ }
+
+ sub loadDBInfo($)
+ {
+- my $file=shift;
+- my $info;
++ my $file=shift;
++ my $info;
+
+- open(INFO, "< $file") || return("imdbDir index file
\"$file\":$!\n");
+- while(<INFO>) {
++ open(INFO, "< $file") || return("imdbDir index file
\"$file\":$!\n");
++ while(<INFO>) {
+ chop();
+ if ( s/^([^:]+)://o ) {
+- $info->{$1}=$_;
++ $info->{$1}=$_;
+ }
+- }
+- close(INFO);
+- return($info);
++ }
++ close(INFO);
++ return($info);
+ }
+
+ sub checkIndexesOkay($)
+ {
+- my $self=shift;
+- if ( ! -d "$self->{imdbDir}" ) {
+- return("imdbDir \"$self->{imdbDir}\" does not exist\n");
+- }
++ my $self=shift;
++ if ( ! -d "$self->{imdbDir}" ) {
++ return("imdbDir \"$self->{imdbDir}\" does not exist\n");
++ }
+
+- if ( -f "$self->{moviedbOffline}" ) {
+- return("imdbDir index offline: check $self->{moviedbOffline} for
details");
+- }
++ if ( -f "$self->{moviedbOffline}" ) {
++ return("imdbDir index offline: check $self->{moviedbOffline} for
details");
++ }
+
+- for my $file ($self->{moviedbIndex}, $self->{moviedbData},
$self->{moviedbInfo}) {
+- if ( ! -f "$file" ) {
+- return("imdbDir index file \"$file\" does not exist\n");
++ for my $file ($self->{moviedbIndex}, $self->{moviedbData},
$self->{moviedbInfo}) {
++ if ( ! -f "$file" ) {
++ return("imdbDir index file \"$file\" does not exist\n");
++ }
+ }
+- }
+
+- $VERSION=~m/^(\d+)\.(\d+)$/o || die "package corrupt, VERSION string invalid
($VERSION)";
+- my ($major, $minor)=($1, $2);
++ $VERSION=~m/^(\d+)\.(\d+)$/o || die "package corrupt, VERSION string invalid
($VERSION)";
++ my ($major, $minor)=($1, $2);
+
+- my $info=loadDBInfo($self->{moviedbInfo});
+- return($info) if ( ref $info eq 'SCALAR' );
++ my $info=loadDBInfo($self->{moviedbInfo});
++ return($info) if ( ref $info eq 'SCALAR' );
+
+- if ( !defined($info->{db_version}) ) {
+- return("imdbDir index db missing version information, rerun --prepStage
all\n");
+- }
+- if ( $info->{db_version}=~m/^(\d+)\.(\d+)$/o ) {
+- if ( $1 != $major || $2 < $minor ) {
+- return("imdbDir index db requires updating, rerun --prepStage all\n");
+- }
+- if ( $1 == 0 && $2 == 1 ) {
+- return("imdbDir index db requires update, rerun --prepStage 5 (bug:actresses
never appear)\n");
+- }
+- if ( $1 == 0 && $2 == 2 ) {
+- # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
+- return("imdbDir index db requires minor reindexing, rerun --prepStage 3 and
5\n");
++ if ( !defined($info->{db_version}) ) {
++ return("imdbDir index db missing version information, rerun --prepStage
all\n");
+ }
+- if ( $1 == 0 && $2 == 3 ) {
+- # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
+- return("imdbDir index db requires major reindexing, rerun --prepStage 2 and
new prepStages 5,6,7,8 and 9\n");
++ if ( $info->{db_version}=~m/^(\d+)\.(\d+)$/o ) {
++ if ( $1 != $major || $2 < $minor ) {
++ return("imdbDir index db requires updating, rerun --prepStage all\n");
++ }
++ if ( $1 == 0 && $2 == 1 ) {
++ return("imdbDir index db requires update, rerun --prepStage 5 (bug:actresses
never appear)\n");
++ }
++ if ( $1 == 0 && $2 == 2 ) {
++ # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
++ return("imdbDir index db requires minor reindexing, rerun --prepStage 3 and
5\n");
++ }
++ if ( $1 == 0 && $2 == 3 ) {
++ # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
++ return("imdbDir index db requires major reindexing, rerun --prepStage 2 and new
prepStages 5,6,7,8 and 9\n");
++ }
++ if ( $1 == 0 && $2 == 4 ) {
++ # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
++ return("imdbDir index db corrupt (got version 0.4), rerun --prepStage
all\n");
++ }
++ # okay
++ return(undef);
+ }
+- if ( $1 == 0 && $2 == 4 ) {
+- # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run
+- return("imdbDir index db corrupt (got version 0.4), rerun --prepStage
all\n");
++ else {
++ return("imdbDir index version of '$info->{db_version}' is invalid,
rerun --prepStage all\n".
++ "if problem persists, submit bug report to xmltv-devel\(a)lists.sf.net\n");
+ }
+- # okay
+- return(undef);
+- }
+- else {
+- return("imdbDir index version of '$info->{db_version}' is invalid,
rerun --prepStage all\n".
+- "if problem persists, submit bug report to
xmltv-devel\(a)lists.sf.net\n");
+- }
+ }
+
+ sub basicVerificationOfIndexes($)
+ {
+- my $self=shift;
+-
+- # check that the imdbdir is invalid and up and running
+- my $title="Army of Darkness";
+- my $year=1992;
+-
+- $self->openMovieIndex() || return("basic verification of indexes
failed\n".
+- "database index isn't readable");
+-
+- my $verbose = $self->{verbose}; $self->{verbose} = 0;
+- my $res=$self->getMovieMatches($title, $year);
+- $self->{verbose} = $verbose; undef $verbose;
+- if ( !defined($res) ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "no match for basic verification of movie \"$title,
$year\"\n");
+- }
+- if ( !defined($res->{exactMatch}) ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "no exact match for movie \"$title, $year\"\n");
+- }
+- if ( scalar(@{$res->{exactMatch}})!= 1) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "got more than one exact match for movie \"$title,
$year\"\n");
+- }
+- my @exact=@{$res->{exactMatch}};
+- if ( $exact[0]->{title} ne $title ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "title associated with key \"$title, $year\" is bad\n");
+- }
+-
+- if ( $exact[0]->{year} ne "$year" ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "year associated with key \"$title, $year\" is bad\n");
+- }
+-
+- my $id=$exact[0]->{id};
+- $res=$self->getMovieIdDetails($id);
+- if ( !defined($res) ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "no movie details for movie \"$title, $year\" (id=$id)\n");
+- }
+-
+- if ( !defined($res->{directors}) ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't provide any director for movie \"$title,
$year\" (id=$id)\n");
+- }
+- if ( !$res->{directors}[0]=~m/Raimi/o ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't show Raimi as the main director for movie
\"$title, $year\" (id=$id)\n");
+- }
+- if ( !defined($res->{actors}) ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't provide any cast movie \"$title,
$year\" (id=$id)\n");
+- }
+- if ( !$res->{actors}[0]=~m/Campbell/o ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't show Bruce Campbell as the main actor in movie
\"$title, $year\" (id=$id)\n");
+- }
+- my $matches=0;
+- for (@{$res->{genres}}) {
+- if ( $_ eq "Action" ||
+- $_ eq "Comedy" ||
+- $_ eq "Fantasy" ||
+- $_ eq "Horror" ||
+- $_ eq "Romance" ) {
+- $matches++;
+- }
+- }
+- if ( $matches == 0 ) {
+- $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't show genres correctly for movie \"$title,
$year\" (id=$id)\n");
+- }
+- if ( !defined($res->{ratingDist}) ||
++ my $self=shift;
++
++ # check that the imdbdir is invalid and up and running
++ my $title="Army of Darkness";
++ my $year=1992;
++
++ $self->openMovieIndex() || return("basic verification of indexes
failed\n".
++ "database index isn't readable");
++
++ my $verbose = $self->{verbose}; $self->{verbose} = 0;
++ my $res=$self->getMovieMatches($title, $year);
++ $self->{verbose} = $verbose; undef $verbose;
++ if ( !defined($res) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "no match for basic verification of movie \"$title,
$year\"\n");
++ }
++ if ( !defined($res->{exactMatch}) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "no exact match for movie \"$title, $year\"\n");
++ }
++ if ( scalar(@{$res->{exactMatch}})!= 1) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "got more than one exact match for movie \"$title,
$year\"\n");
++ }
++ my @exact=@{$res->{exactMatch}};
++ if ( $exact[0]->{title} ne $title ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "title associated with key \"$title, $year\" is bad\n");
++ }
++
++ if ( $exact[0]->{year} ne "$year" ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "year associated with key \"$title, $year\" is bad\n");
++ }
++
++ my $id=$exact[0]->{id};
++ $res=$self->getMovieIdDetails($id);
++ if ( !defined($res) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "no movie details for movie \"$title, $year\" (id=$id)\n");
++ }
++
++ if ( !defined($res->{directors}) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't provide any director for movie \"$title,
$year\" (id=$id)\n");
++ }
++ if ( !$res->{directors}[0]=~m/Raimi/o ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't show Raimi as the main director for movie
\"$title, $year\" (id=$id)\n");
++ }
++ if ( !defined($res->{actors}) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't provide any cast movie \"$title, $year\"
(id=$id)\n");
++ }
++ if ( !$res->{actors}[0]=~m/Campbell/o ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't show Bruce Campbell as the main actor in movie
\"$title, $year\" (id=$id)\n");
++ }
++ my $matches=0;
++ for (@{$res->{genres}}) {
++ if ( $_ eq "Action" ||
++ $_ eq "Comedy" ||
++ $_ eq "Fantasy" ||
++ $_ eq "Horror" ||
++ $_ eq "Romance" ) {
++ $matches++;
++ }
++ }
++ if ( $matches == 0 ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't show genres correctly for movie \"$title,
$year\" (id=$id)\n");
++ }
++ if ( !defined($res->{ratingDist}) ||
+ !defined($res->{ratingVotes}) ||
+ !defined($res->{ratingRank}) ) {
++ $self->closeMovieIndex();
++ return("basic verification of indexes failed\n".
++ "movie details didn't show imdbratings for movie \"$title,
$year\" (id=$id)\n");
++ }
++
+ $self->closeMovieIndex();
+- return("basic verification of indexes failed\n".
+- "movie details didn't show imdbratings for movie \"$title,
$year\" (id=$id)\n");
+- }
+- $self->closeMovieIndex();
+- return(undef);
++ return(undef);
+
+ }
+
+ sub sanityCheckDatabase($)
+ {
+- my $self=shift;
+- my $errline;
++ my $self=shift;
++ my $errline;
+
+- $errline=$self->checkIndexesOkay();
+- return($errline) if ( defined($errline) );
+- $errline=$self->basicVerificationOfIndexes();
+- return($errline) if ( defined($errline) );
++ $errline=$self->checkIndexesOkay();
++ return($errline) if ( defined($errline) );
++ $errline=$self->basicVerificationOfIndexes();
++ return($errline) if ( defined($errline) );
+
+- # all okay
+- return(undef);
++ # all okay
++ return(undef);
+ }
+
+ sub error($$)
+ {
+- print STDERR "tv_imdb: $_[1]\n";
++ print STDERR "tv_imdb: $_[1]\n";
+ }
+
+ sub status($$)
+ {
+- if ( $_[0]->{verbose} ) {
++ if ( $_[0]->{verbose} ) {
+ print STDERR "tv_imdb: $_[1]\n";
+- }
++ }
+ }
+
+ sub debug($$)
+ {
+- my $self=shift;
+- my $mess=shift;
+- if ( $self->{verbose} > 1 ) {
++ my $self=shift;
++ my $mess=shift;
++ if ( $self->{verbose} > 1 ) {
+ print STDERR "tv_imdb: $mess\n";
+- }
++ }
+ }
+
+ use Search::Dict;
+
+ sub openMovieIndex($)
+ {
+- my $self=shift;
++ my $self=shift;
+
+- if ( !open($self->{INDEX_FD}, "< $self->{moviedbIndex}") ) {
+- return(undef);
+- }
+- if ( !open($self->{DBASE_FD}, "< $self->{moviedbData}") ) {
+- close($self->{INDEX_FD});
+- return(undef);
+- }
+- return(1);
++ if ( !open($self->{INDEX_FD}, "< $self->{moviedbIndex}") ) {
++ return(undef);
++ }
++ if ( !open($self->{DBASE_FD}, "< $self->{moviedbData}") ) {
++ close($self->{INDEX_FD});
++ return(undef);
++ }
++ return(1);
+ }
+
+ sub closeMovieIndex($)
+ {
+- my $self=shift;
++ my $self=shift;
+
+- close($self->{INDEX_FD});
+- delete($self->{INDEX_FD});
++ close($self->{INDEX_FD});
++ delete($self->{INDEX_FD});
+
+- close($self->{DBASE_FD});
+- delete($self->{DBASE_FD});
++ close($self->{DBASE_FD});
++ delete($self->{DBASE_FD});
+
+- return(1);
++ return(1);
+ }
+
+ # moviedbIndex file has the format:
+@@ -350,565 +351,565 @@ sub closeMovieIndex($)
+ # where key is a url encoded title followed by the year of production and a colon
+ sub getMovieMatches($$$)
+ {
+- my $self=shift;
+- my $title=shift;
+- my $year=shift;
+-
+- # Articles are put at the end of a title ( in all languages )
+- #$match=~s/^(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2,
$1/og;
+-
+- my $match="$title";
+- if ( defined($year) ) {
+- $match.=" ($year)";
+- }
+-
+- # to encode s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg
+- # to decode s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+-
+- # url encode
+- $match=lc($match);
+- $match=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
+-
+- $self->debug("looking for \"$match\" in
$self->{moviedbIndex}");
+- if ( !$self->{INDEX_FD} ) {
+- die "internal error: index not open";
+- }
+-
+- my $FD=$self->{INDEX_FD};
+- Search::Dict::look(*{$FD}, $match, 0, 0);
+- my $results;
+- while (<$FD>) {
++ my $self=shift;
++ my $title=shift;
++ my $year=shift;
++
++ # Articles are put at the end of a title ( in all languages )
++ #$match=~s/^(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/og;
++
++ my $match="$title";
++ if ( defined($year) ) {
++ $match.=" ($year)";
++ }
++
++ # to encode s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg
++ # to decode s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++
++ # url encode
++ $match=lc($match);
++ $match=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
++
++ $self->debug("looking for \"$match\" in
$self->{moviedbIndex}");
++ if ( !$self->{INDEX_FD} ) {
++ die "internal error: index not open";
++ }
++
++ my $FD=$self->{INDEX_FD};
++ Search::Dict::look(*{$FD}, $match, 0, 0);
++ my $results;
++ while (<$FD>) {
+ last if ( !m/^$match/ );
+
+ chop();
+ my @arr=split('\t', $_);
+ if ( scalar(@arr) != 5 ) {
+- warn "$self->{moviedbIndex} corrupt (correct key:$_)";
+- next;
++ warn "$self->{moviedbIndex} corrupt (correct key:$_)";
++ next;
+ }
+
+ if ( $arr[0] eq $match ) {
+- # return title and id
+- #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+-
+- #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3]
id=$arr[4]");
+- my $title=$arr[1];
+- if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+- }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
+- }
+- else {
+- die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
+- }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+- $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- push(@{$results->{exactMatch}}, {'key'=> $arr[1],
+- 'title'=>$title,
+- 'year'=>$arr[2],
+- 'qualifier'=>$arr[3],
+- 'id'=>$arr[4]});
++ # return title and id
++ #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
++
++ #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ my $title=$arr[1];
++ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
++ }
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
++ }
++ else {
++ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
++ $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ push(@{$results->{exactMatch}}, {'key'=> $arr[1],
++ 'title'=>$title,
++ 'year'=>$arr[2],
++ 'qualifier'=>$arr[3],
++ 'id'=>$arr[4]});
+ }
+ else {
+- # decode
+- #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- # return title
+- #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+- #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3]
id=$arr[4]");
+- my $title=$arr[1];
+-
+- if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
+- $title=~s/^\"//o; #"
+- $title=~s/\"(\s*\()/$1/o; #"
+- }
+-
+- if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+- }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
+- }
+- else {
+- die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
+- }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+- $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- push(@{$results->{closeMatch}}, {'key'=> $arr[1],
+- 'title'=>$title,
+- 'year'=>$arr[2],
+- 'qualifier'=>$arr[3],
+- 'id'=>$arr[4]});
+- }
+- }
+- #print "MovieMatches on ($match) = ".Dumper($results)."\n";
+- return($results);
++ # decode
++ #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ # return title
++ #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
++ #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ my $title=$arr[1];
++
++ if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
++ $title=~s/^\"//o; #"
++ $title=~s/\"(\s*\()/$1/o; #"
++ }
++
++ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
++ }
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
++ }
++ else {
++ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
++ $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ push(@{$results->{closeMatch}}, {'key'=> $arr[1],
++ 'title'=>$title,
++ 'year'=>$arr[2],
++ 'qualifier'=>$arr[3],
++ 'id'=>$arr[4]});
++ }
++ }
++ #print "MovieMatches on ($match) = ".Dumper($results)."\n";
++ return($results);
+ }
+
+ sub getMovieExactMatch($$$)
+ {
+- my $self=shift;
+- my $title=shift;
+- my $year=shift;
+- my $res=$self->getMovieMatches($title, $year);
++ my $self=shift;
++ my $title=shift;
++ my $year=shift;
++ my $res=$self->getMovieMatches($title, $year);
+
+- return(undef) if ( !defined($res) );
+- if ( !defined($res->{exactMatch}) ) {
+- return(undef);
+- }
+- if ( scalar(@{$res->{exactMatch}}) != 1 ) {
+- return(undef);
+- }
+- return($res->{exactMatch}[0]);
++ return(undef) if ( !defined($res) );
++ if ( !defined($res->{exactMatch}) ) {
++ return(undef);
++ }
++ if ( scalar(@{$res->{exactMatch}}) != 1 ) {
++ return(undef);
++ }
++ return($res->{exactMatch}[0]);
+ }
+
+ sub getMovieCloseMatches($$)
+ {
+- my $self=shift;
+- my $title=shift;
+-
+- my $res=$self->getMovieMatches($title, undef) || return(undef);
+-
+- if ( defined($res->{exactMatch})) {
+- die "corrupt imdb database - hit on \"$title\"";
+- }
+- return(undef) if ( !defined($res->{closeMatch}) );
+- my @arr=@{$res->{closeMatch}};
+- #print "CLOSE DUMP=".Dumper((a)arr)."\n";
+- return(@arr);
++ my $self=shift;
++ my $title=shift;
++
++ my $res=$self->getMovieMatches($title, undef) || return(undef);
++
++ if ( defined($res->{exactMatch})) {
++ die "corrupt imdb database - hit on \"$title\"";
++ }
++ return(undef) if ( !defined($res->{closeMatch}) );
++ my @arr=@{$res->{closeMatch}};
++ #print "CLOSE DUMP=".Dumper((a)arr)."\n";
++ return(@arr);
+ }
+
+ sub getMovieIdDetails($$)
+ {
+- my $self=shift;
+- my $id=shift;
+-
+- if ( !$self->{DBASE_FD} ) {
+- die "internal error: index not open";
+- }
+- my $results;
+- my $FD=$self->{DBASE_FD};
+- Search::Dict::look(*{$FD}, "$id:", 0, 0);
+- while (<$FD>) {
++ my $self=shift;
++ my $id=shift;
++
++ if ( !$self->{DBASE_FD} ) {
++ die "internal error: index not open";
++ }
++ my $results;
++ my $FD=$self->{DBASE_FD};
++ Search::Dict::look(*{$FD}, "$id:", 0, 0);
++ while (<$FD>) {
+ last if ( !m/^$id:/ );
+ chop();
+ if ( s/^$id:// ) {
+- my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank,
$keywords, $plot)=split('\t', $_);
+- if ( $directors ne "<>" ) {
+- for my $name (split('\|', $directors)) {
+- # remove (I) etc from
imdb.com names (kept in place for reference)
+- $name=~s/\s\([IVX]+\)$//o;
+- # switch name around to be surname last
+- $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+- push(@{$results->{directors}}, $name);
+- }
+- }
+- if ( $actors ne "<>" ) {
+- for my $name (split('\|', $actors)) {
+- # remove (I) etc from
imdb.com names (kept in place for reference)
+- my $HostNarrator;
+- if ( $name=~s/\[([^\]]+)\]$//o ) {
+- $HostNarrator=$1;
+- }
+- $name=~s/\s\([IVX]+\)$//o;
+-
+- # switch name around to be surname last
+- $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+- if ( $HostNarrator ) {
+- if ( $HostNarrator=~s/,*Host//o ) {
+- push(@{$results->{presenter}}, $name);
+- }
+- if ( $HostNarrator=~s/,*Narrator//o ) {
+- push(@{$results->{commentator}}, $name);
+- }
+- }
+- else {
+- push(@{$results->{actors}}, $name);
+- }
+- }
+- }
+- if ( $genres ne "<>" ) {
+- push(@{$results->{genres}}, split('\|', $genres));
+- }
+- if ( $keywords ne "<>" ) {
+- push(@{$results->{keywords}}, split(',', $keywords));
+- }
+- $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" );
+- $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>"
);
+- $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" );
+- $results->{plot}=$plot if ( $plot ne "<>" );
++ my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank, $keywords,
$plot)=split('\t', $_);
++ if ( $directors ne "<>" ) {
++ for my $name (split('\|', $directors)) {
++ # remove (I) etc from
imdb.com names (kept in place for reference)
++ $name=~s/\s\([IVX]+\)$//o;
++ # switch name around to be surname last
++ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
++ push(@{$results->{directors}}, $name);
++ }
++ }
++ if ( $actors ne "<>" ) {
++ for my $name (split('\|', $actors)) {
++ # remove (I) etc from
imdb.com names (kept in place for reference)
++ my $HostNarrator;
++ if ( $name=~s/\[([^\]]+)\]$//o ) {
++ $HostNarrator=$1;
++ }
++ $name=~s/\s\([IVX]+\)$//o;
++
++ # switch name around to be surname last
++ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
++ if ( $HostNarrator ) {
++ if ( $HostNarrator=~s/,*Host//o ) {
++ push(@{$results->{presenter}}, $name);
++ }
++ if ( $HostNarrator=~s/,*Narrator//o ) {
++ push(@{$results->{commentator}}, $name);
++ }
++ }
++ else {
++ push(@{$results->{actors}}, $name);
++ }
++ }
++ }
++ if ( $genres ne "<>" ) {
++ push(@{$results->{genres}}, split('\|', $genres));
++ }
++ if ( $keywords ne "<>" ) {
++ push(@{$results->{keywords}}, split(',', $keywords));
++ }
++ $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" );
++ $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>" );
++ $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" );
++ $results->{plot}=$plot if ( $plot ne "<>" );
+ }
+ else {
+- warn "lookup of movie (id=$id) resulted in garbage ($_)";
+- }
+- }
+- if ( !defined($results) ) {
+- # some movies we don't have any details for
+- $results->{noDetails}=1;
+- }
+- #print "MovieDetails($id) = ".Dumper($results)."\n";
+- return($results);
++ warn "lookup of movie (id=$id) resulted in garbage ($_)";
++ }
++ }
++ if ( !defined($results) ) {
++ # some movies we don't have any details for
++ $results->{noDetails}=1;
++ }
++ #print "MovieDetails($id) = ".Dumper($results)."\n";
++ return($results);
+ }
+
+ #
+ # FUTURE - close hit could be just missing or extra
+-# punctuation:
+-# "Run Silent, Run Deep" for imdb's "Run Silent Run Deep"
+-# "Cherry, Harry and Raquel" for imdb's "Cherry, Harry and
Raquel!"
+-# "Cat Women of the Moon" for imdb's "Cat-Women of the
Moon"
+-# "Baywatch Hawaiian Wedding" for imdb's "Baywatch: Hawaiian
Wedding" :)
++# punctuation:
++# "Run Silent, Run Deep" for imdb's "Run Silent Run Deep"
++# "Cherry, Harry and Raquel" for imdb's "Cherry, Harry and
Raquel!"
++# "Cat Women of the Moon" for imdb's "Cat-Women of the Moon"
++# "Baywatch Hawaiian Wedding" for imdb's "Baywatch: Hawaiian
Wedding" :)
+ #
+ # FIXED - "Victoria and Albert" appears for imdb's "Victoria &
Albert" (and -> &)
+ # FIXED - "Columbo Cries Wolf" appears instead of "Columbo:Columbo Cries
Wolf"
+ # FIXED - Place the article last, for multiple languages. For instance
+-# Los amantes del c�rculo polar -> amantes del c�rculo polar, Los
++# Los amantes del c�rculo polar -> amantes del c�rculo polar, Los
+ # FIXED - common international vowel changes. For instance
+-# "Anna Kar�nin" (�->e)
++# "Anna Kar�nin" (�->e)
+ #
+ sub alternativeTitles($)
+ {
+- my $title=shift;
+- my @titles;
+-
+- push(@titles, $title);
+-
+- # try the & -> and conversion
+- if ( $title=~m/\&/o ) {
+- my $t=$title;
+- while ( $t=~s/(\s)\&(\s)/$1and$2/o ) {
+- push(@titles, $t);
+- }
+- }
+-
+- # try the and -> & conversion
+- if ( $title=~m/\sand\s/io ) {
+- my $t=$title;
+- while ( $t=~s/(\s)and(\s)/$1\&$2/io ) {
+- push(@titles, $t);
+- }
+- }
+-
+- # try the "Columbo: Columbo cries Wolf" -> "Columbo cries
Wolf" conversion
+- my $max=scalar(@titles);
+- for (my $i=0; $i<$max ; $i++) {
+- my $t=$titles[$i];
+- if ( $t=~m/^[^:]+:.+$/io ) {
+- while ( $t=~s/^[^:]+:\s*(.+)\s*$/$1/io ) {
+- push(@titles, $t);
+- }
+- }
+- }
+-
+- # Place the articles last
+- $max=scalar(@titles);
+- for (my $i=0; $i<$max ; $i++) {
+- my $t=$titles[$i];
+- if ( $t=~m/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/io )
{
+- $t=~s/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2,
$1/iog;
+- push(@titles, $t);
+- }
+- if ( $t=~m/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/io
) {
+- $t=~s/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/iog;
+- push(@titles, $t);
+- }
+- }
+-
+- # convert all the special language characters
+- $max=scalar(@titles);
+- for (my $i=0; $i<$max ; $i++) {
+- my $t=$titles[$i];
+- if ( $t=~m/[����������������������������������������������������������]/io ) {
+- $t=~s/[������������]/a/gio;
+- $t=~s/[��������]/e/gio;
+- $t=~s/[��������]/i/gio;
+- $t=~s/[������������]/o/gio;
+- $t=~s/[��������]/u/gio;
+- $t=~s/[��]/ae/gio;
+- $t=~s/[��]/c/gio;
+- $t=~s/[��]/n/gio;
+- $t=~s/[�]/ss/gio;
+- $t=~s/[���]/y/gio;
+- $t=~s/[�]//gio;
+- push(@titles, $t);
+- }
+- }
+-
+- # optional later possible titles include removing the '.' from titles
+- # ie "Project V.I.P.E.R." matching imdb "Project VIPER"
+- $max=scalar(@titles);
+- for (my $i=0; $i<$max ; $i++) {
+- my $t=$titles[$i];
+- if ( $t=~s/\.//go ) {
+- push(@titles,$t);
+- }
+- }
+- return(\@titles);
+-}
++ my $title=shift;
++ my @titles;
+
+-sub findMovieInfo($$$$)
+-{
+- my ($self, $title, $year, $exact)=@_;
++ push(@titles, $title);
+
+- my @titles=@{alternativeTitles($title)};
++ # try the & -> and conversion
++ if ( $title=~m/\&/o ) {
++ my $t=$title;
++ while ( $t=~s/(\s)\&(\s)/$1and$2/o ) {
++ push(@titles, $t);
++ }
++ }
+
+- if ( $exact == 1 ) {
+- # try an exact match first :)
+- for my $mytitle ( @titles ) {
+- my $info=$self->getMovieExactMatch($mytitle, $year);
+- if ( defined($info) ) {
+- if ( $info->{qualifier} eq "movie" ) {
+- $self->status("perfect hit on movie \"$info->{key}\"");
+- $info->{matchLevel}="perfect";
+- return($info);
++ # try the and -> & conversion
++ if ( $title=~m/\sand\s/io ) {
++ my $t=$title;
++ while ( $t=~s/(\s)and(\s)/$1\&$2/io ) {
++ push(@titles, $t);
+ }
+- elsif ( $info->{qualifier} eq "tv_movie" ) {
+- $self->status("perfect hit on made-for-tv-movie
\"$info->{key}\"");
+- $info->{matchLevel}="perfect";
+- return($info);
++ }
++
++ # try the "Columbo: Columbo cries Wolf" -> "Columbo cries Wolf"
conversion
++ my $max=scalar(@titles);
++ for (my $i=0; $i<$max ; $i++) {
++ my $t=$titles[$i];
++ if ( $t=~m/^[^:]+:.+$/io ) {
++ while ( $t=~s/^[^:]+:\s*(.+)\s*$/$1/io ) {
++ push(@titles, $t);
++ }
+ }
+- elsif ( $info->{qualifier} eq "video_movie" ) {
+- $self->status("perfect hit on made-for-video-movie
\"$info->{key}\"");
+- $info->{matchLevel}="perfect";
+- return($info);
++ }
++
++ # Place the articles last
++ $max=scalar(@titles);
++ for (my $i=0; $i<$max ; $i++) {
++ my $t=$titles[$i];
++ if ( $t=~m/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/io ) {
++ $t=~s/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/iog;
++ push(@titles, $t);
+ }
+- elsif ( $info->{qualifier} eq "video_game" ) {
+- next;
++ if ( $t=~m/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/io ) {
++ $t=~s/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/iog;
++ push(@titles, $t);
+ }
+- elsif ( $info->{qualifier} eq "tv_series" ) {
++ }
++
++ # convert all the special language characters
++ $max=scalar(@titles);
++ for (my $i=0; $i<$max ; $i++) {
++ my $t=$titles[$i];
++ if ( $t=~m/[����������������������������������������������������������]/io ) {
++ $t=~s/[������������]/a/gio;
++ $t=~s/[��������]/e/gio;
++ $t=~s/[��������]/i/gio;
++ $t=~s/[������������]/o/gio;
++ $t=~s/[��������]/u/gio;
++ $t=~s/[��]/ae/gio;
++ $t=~s/[��]/c/gio;
++ $t=~s/[��]/n/gio;
++ $t=~s/[�]/ss/gio;
++ $t=~s/[���]/y/gio;
++ $t=~s/[�]//gio;
++ push(@titles, $t);
+ }
+- elsif ( $info->{qualifier} eq "tv_mini_series" ) {
++ }
++
++ # optional later possible titles include removing the '.' from titles
++ # ie "Project V.I.P.E.R." matching imdb "Project VIPER"
++ $max=scalar(@titles);
++ for (my $i=0; $i<$max ; $i++) {
++ my $t=$titles[$i];
++ if ( $t=~s/\.//go ) {
++ push(@titles,$t);
+ }
+- else {
+- $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
+- $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
+- $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ }
++ return(\@titles);
++}
++
++sub findMovieInfo($$$$)
++{
++ my ($self, $title, $year, $exact)=@_;
++
++ my @titles=@{alternativeTitles($title)};
++
++ if ( $exact == 1 ) {
++ # try an exact match first :)
++ for my $mytitle ( @titles ) {
++ my $info=$self->getMovieExactMatch($mytitle, $year);
++ if ( defined($info) ) {
++ if ( $info->{qualifier} eq "movie" ) {
++ $self->status("perfect hit on movie \"$info->{key}\"");
++ $info->{matchLevel}="perfect";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "tv_movie" ) {
++ $self->status("perfect hit on made-for-tv-movie
\"$info->{key}\"");
++ $info->{matchLevel}="perfect";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_movie" ) {
++ $self->status("perfect hit on made-for-video-movie
\"$info->{key}\"");
++ $info->{matchLevel}="perfect";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_game" ) {
++ next;
++ }
++ elsif ( $info->{qualifier} eq "tv_series" ) {
++ }
++ elsif ( $info->{qualifier} eq "tv_mini_series" ) {
++ }
++ else {
++ $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
++ $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
++ $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ }
++ }
++ $self->debug("no exact title/year hit on \"$mytitle
($year)\"");
++ }
++ return(undef);
++ }
++ elsif ( $exact == 2 ) {
++ # looking for first exact match on the title, don't have a year to compare
++
++ for my $mytitle ( @titles ) {
++ # try close hit if only one :)
++ my $cnt=0;
++ my @closeMatches=$self->getMovieCloseMatches("$mytitle");
++
++ # we traverse the hits twice, first looking for success,
++ # then again to produce warnings about missed close matches
++ for my $info (@closeMatches) {
++ next if ( !defined($info) );
++ $cnt++;
++
++ # within one year with exact match good enough
++ if ( lc($mytitle) eq lc($info->{title}) ) {
++
++ if ( $info->{qualifier} eq "movie" ) {
++ $self->status("close enough hit on movie \"$info->{key}\"
(since no 'date' field present)");
++ $info->{matchLevel}="close";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "tv_movie" ) {
++ $self->status("close enough hit on made-for-tv-movie
\"$info->{key}\" (since no 'date' field present)");
++ $info->{matchLevel}="close";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_movie" ) {
++ $self->status("close enough hit on made-for-video-movie
\"$info->{key}\" (since no 'date' field present)");
++ $info->{matchLevel}="close";
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_game" ) {
++ next;
++ }
++ elsif ( $info->{qualifier} eq "tv_series" ) {
++ }
++ elsif ( $info->{qualifier} eq "tv_mini_series" ) {
++ }
++ else {
++ $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
++ $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
++ $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ }
++ }
++ }
+ }
+- }
+- $self->debug("no exact title/year hit on \"$mytitle
($year)\"");
++ # nothing worked
++ return(undef);
+ }
+- return(undef);
+- }
+- elsif ( $exact == 2 ) {
+- # looking for first exact match on the title, don't have a year to compare
+
++ # otherwise we're looking for a title match with a close year
+ for my $mytitle ( @titles ) {
+- # try close hit if only one :)
+- my $cnt=0;
+- my @closeMatches=$self->getMovieCloseMatches("$mytitle");
+-
+- # we traverse the hits twice, first looking for success,
+- # then again to produce warnings about missed close matches
+- for my $info (@closeMatches) {
+- next if ( !defined($info) );
+- $cnt++;
+-
+- # within one year with exact match good enough
+- if ( lc($mytitle) eq lc($info->{title}) ) {
+-
+- if ( $info->{qualifier} eq "movie" ) {
+- $self->status("close enough hit on movie \"$info->{key}\" (since
no 'date' field present)");
+- $info->{matchLevel}="close";
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "tv_movie" ) {
+- $self->status("close enough hit on made-for-tv-movie
\"$info->{key}\" (since no 'date' field present)");
+- $info->{matchLevel}="close";
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "video_movie" ) {
+- $self->status("close enough hit on made-for-video-movie
\"$info->{key}\" (since no 'date' field present)");
+- $info->{matchLevel}="close";
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "video_game" ) {
+- next;
+- }
+- elsif ( $info->{qualifier} eq "tv_series" ) {
+- }
+- elsif ( $info->{qualifier} eq "tv_mini_series" ) {
+- }
+- else {
+- $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
+- $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
+- $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
+- }
+- }
+- }
+- }
+- # nothing worked
+- return(undef);
+- }
+-
+- # otherwise we're looking for a title match with a close year
+- for my $mytitle ( @titles ) {
+- # try close hit if only one :)
+- my $cnt=0;
+- my @closeMatches=$self->getMovieCloseMatches("$mytitle");
+-
+- # we traverse the hits twice, first looking for success,
+- # then again to produce warnings about missed close matches
+- for my $info (@closeMatches) {
+- next if ( !defined($info) );
+- $cnt++;
+-
+- # within one year with exact match good enough
+- if ( lc($mytitle) eq lc($info->{title}) ) {
+- my $yearsOff=abs(int($info->{year})-$year);
+-
+- $info->{matchLevel}="close";
+-
+- if ( $yearsOff <= 2 ) {
+- my $showYear=int($info->{year});
+-
+- if ( $info->{qualifier} eq "movie" ) {
+- $self->status("close enough hit on movie \"$info->{key}\" (off
by $yearsOff years)");
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "tv_movie" ) {
+- $self->status("close enough hit on made-for-tv-movie
\"$info->{key}\" (off by $yearsOff years)");
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "video_movie" ) {
+- $self->status("close enough hit on made-for-video-movie
\"$info->{key}\" (off by $yearsOff years)");
+- return($info);
+- }
+- elsif ( $info->{qualifier} eq "video_game" ) {
+- $self->status("ignoring close hit on video-game
\"$info->{key}\"");
+- next;
+- }
+- elsif ( $info->{qualifier} eq "tv_series" ) {
+- $self->status("ignoring close hit on tv series
\"$info->{key}\"");
+- #$self->status("close enough hit on tv series \"$info->{key}\"
(off by $yearsOff years)");
+- }
+- elsif ( $info->{qualifier} eq "tv_mini_series" ) {
+- $self->status("ignoring close hit on tv mini-series
\"$info->{key}\"");
+- #$self->status("close enough hit on tv mini-series
\"$info->{key}\" (off by $yearsOff years)");
+- }
+- else {
+- $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
+- $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
+- $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
+- }
+- }
+- }
+- }
+-
+- # if we found at least something, but nothing matched
+- # produce warnings about missed, but close matches
+- for my $info (@closeMatches) {
+- next if ( !defined($info) );
+-
+- # within one year with exact match good enough
+- if ( lc($mytitle) eq lc($info->{title}) ) {
+- my $yearsOff=abs(int($info->{year})-$year);
+- if ( $yearsOff <= 2 ) {
+- #die "internal error: key \"$info->{key}\" failed to be
processed properly";
+- }
+- elsif ( $yearsOff <= 5 ) {
+- # report these as status
+- $self->status("ignoring close, but not good enough hit on
\"$info->{key}\" (off by $yearsOff years)");
++ # try close hit if only one :)
++ my $cnt=0;
++ my @closeMatches=$self->getMovieCloseMatches("$mytitle");
++
++ # we traverse the hits twice, first looking for success,
++ # then again to produce warnings about missed close matches
++ for my $info (@closeMatches) {
++ next if ( !defined($info) );
++ $cnt++;
++
++ # within one year with exact match good enough
++ if ( lc($mytitle) eq lc($info->{title}) ) {
++ my $yearsOff=abs(int($info->{year})-$year);
++
++ $info->{matchLevel}="close";
++
++ if ( $yearsOff <= 2 ) {
++ my $showYear=int($info->{year});
++
++ if ( $info->{qualifier} eq "movie" ) {
++ $self->status("close enough hit on movie \"$info->{key}\"
(off by $yearsOff years)");
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "tv_movie" ) {
++ $self->status("close enough hit on made-for-tv-movie
\"$info->{key}\" (off by $yearsOff years)");
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_movie" ) {
++ $self->status("close enough hit on made-for-video-movie
\"$info->{key}\" (off by $yearsOff years)");
++ return($info);
++ }
++ elsif ( $info->{qualifier} eq "video_game" ) {
++ $self->status("ignoring close hit on video-game
\"$info->{key}\"");
++ next;
++ }
++ elsif ( $info->{qualifier} eq "tv_series" ) {
++ $self->status("ignoring close hit on tv series
\"$info->{key}\"");
++ #$self->status("close enough hit on tv series
\"$info->{key}\" (off by $yearsOff years)");
++ }
++ elsif ( $info->{qualifier} eq "tv_mini_series" ) {
++ $self->status("ignoring close hit on tv mini-series
\"$info->{key}\"");
++ #$self->status("close enough hit on tv mini-series
\"$info->{key}\" (off by $yearsOff years)");
++ }
++ else {
++ $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
++ $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
++ $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ }
++ }
++ }
+ }
+- else {
+- # report these as debug messages
+- $self->debug("ignoring close hit on \"$info->{key}\" (off by
$yearsOff years)");
++
++ # if we found at least something, but nothing matched
++ # produce warnings about missed, but close matches
++ for my $info (@closeMatches) {
++ next if ( !defined($info) );
++
++ # within one year with exact match good enough
++ if ( lc($mytitle) eq lc($info->{title}) ) {
++ my $yearsOff=abs(int($info->{year})-$year);
++ if ( $yearsOff <= 2 ) {
++ #die "internal error: key \"$info->{key}\" failed to be processed
properly";
++ }
++ elsif ( $yearsOff <= 5 ) {
++ # report these as status
++ $self->status("ignoring close, but not good enough hit on
\"$info->{key}\" (off by $yearsOff years)");
++ }
++ else {
++ # report these as debug messages
++ $self->debug("ignoring close hit on \"$info->{key}\" (off by
$yearsOff years)");
++ }
++ }
++ else {
++ $self->debug("ignoring close hit on \"$info->{key}\" (title did
not match)");
++ }
+ }
+- }
+- else {
+- $self->debug("ignoring close hit on \"$info->{key}\" (title did
not match)");
+- }
+ }
+- }
+- #$self->status("failed to lookup \"$title ($year)\"");
+- return(undef);
++ #$self->status("failed to lookup \"$title ($year)\"");
++ return(undef);
+ }
+
+ sub findTVSeriesInfo($$)
+ {
+- my ($self, $title)=@_;
++ my ($self, $title)=@_;
+
+- if ( $self->{cacheLookups} ) {
++ if ( $self->{cacheLookups} ) {
+ my $id=$self->{cachedLookups}->{tv_series}->{$title};
+
+ if ( defined($id) ) {
+- #print STDERR "REF= (".ref($id).")\n";
+- if ( $id ne '' ) {
+- return($id);
+- }
+- return(undef);
++ #print STDERR "REF= (".ref($id).")\n";
++ if ( $id ne '' ) {
++ return($id);
++ }
++ return(undef);
++ }
+ }
+- }
+-
+- my @titles=@{alternativeTitles($title)};
+-
+- # try an exact match first :)
+- my $idInfo;
+-
+- for my $mytitle ( @titles ) {
+- # try close hit if only one :)
+- my $cnt=0;
+- my @closeMatches=$self->getMovieCloseMatches("$mytitle");
+-
+- for my $info (@closeMatches) {
+- next if ( !defined($info) );
+- $cnt++;
+
+- if ( lc($mytitle) eq lc($info->{title}) ) {
++ my @titles=@{alternativeTitles($title)};
+
+- $info->{matchLevel}="perfect";
++ # try an exact match first :)
++ my $idInfo;
+
+- if ( $info->{qualifier} eq "movie" ) {
+- #$self->status("ignoring close hit on movie
\"$info->{key}\"");
+- }
+- elsif ( $info->{qualifier} eq "tv_movie" ) {
+- #$self->status("ignoring close hit on tv movie
\"$info->{key}\"");
+- }
+- elsif ( $info->{qualifier} eq "video_movie" ) {
+- #$self->status("ignoring close hit on made-for-video-movie
\"$info->{key}\"");
+- }
+- elsif ( $info->{qualifier} eq "video_game" ) {
+- #$self->status("ignoring close hit on made-for-video-movie
\"$info->{key}\"");
+- next;
++ for my $mytitle ( @titles ) {
++ # try close hit if only one :)
++ my $cnt=0;
++ my @closeMatches=$self->getMovieCloseMatches("$mytitle");
++
++ for my $info (@closeMatches) {
++ next if ( !defined($info) );
++ $cnt++;
++
++ if ( lc($mytitle) eq lc($info->{title}) ) {
++
++ $info->{matchLevel}="perfect";
++
++ if ( $info->{qualifier} eq "movie" ) {
++ #$self->status("ignoring close hit on movie
\"$info->{key}\"");
++ }
++ elsif ( $info->{qualifier} eq "tv_movie" ) {
++ #$self->status("ignoring close hit on tv movie
\"$info->{key}\"");
++ }
++ elsif ( $info->{qualifier} eq "video_movie" ) {
++ #$self->status("ignoring close hit on made-for-video-movie
\"$info->{key}\"");
++ }
++ elsif ( $info->{qualifier} eq "video_game" ) {
++ #$self->status("ignoring close hit on made-for-video-movie
\"$info->{key}\"");
++ next;
++ }
++ elsif ( $info->{qualifier} eq "tv_series" ) {
++ $idInfo=$info;
++ $self->status("perfect hit on tv series
\"$info->{key}\"");
++ last;
++ }
++ elsif ( $info->{qualifier} eq "tv_mini_series" ) {
++ $idInfo=$info;
++ $self->status("perfect hit on tv mini-series
\"$info->{key}\"");
++ last;
++ }
++ else {
++ $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
++ $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
++ $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ }
++ }
+ }
+- elsif ( $info->{qualifier} eq "tv_series" ) {
+- $idInfo=$info;
+- $self->status("perfect hit on tv series
\"$info->{key}\"");
+- last;
++ last if ( defined($idInfo) );
++ }
++
++ if ( $self->{cacheLookups} ) {
++ # flush cache after this lookup if its gotten too big
++ if ( $self->{cachedLookups}->{tv_series}->{_cacheSize_} >
++ $self->{cacheLookupSize} ) {
++ delete($self->{cachedLookups}->{tv_series});
++ $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0;
+ }
+- elsif ( $info->{qualifier} eq "tv_mini_series" ) {
+- $idInfo=$info;
+- $self->status("perfect hit on tv mini-series
\"$info->{key}\"");
+- last;
++ if ( defined($idInfo) ) {
++ $self->{cachedLookups}->{tv_series}->{$title}=$idInfo;
+ }
+ else {
+- $self->error("$self->{moviedbIndex} responded with wierd entry for
\"$info->{key}\"");
+- $self->error("weird trailing qualifier
\"$info->{qualifier}\"");
+- $self->error("submit bug report to xmltv-devel\(a)lists.sf.net");
++ $self->{cachedLookups}->{tv_series}->{$title}="";
+ }
+- }
+- }
+- last if ( defined($idInfo) );
+- }
+-
+- if ( $self->{cacheLookups} ) {
+- # flush cache after this lookup if its gotten too big
+- if ( $self->{cachedLookups}->{tv_series}->{_cacheSize_} >
+- $self->{cacheLookupSize} ) {
+- delete($self->{cachedLookups}->{tv_series});
+- $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0;
++ $self->{cachedLookups}->{tv_series}->{_cacheSize_}++;
+ }
+ if ( defined($idInfo) ) {
+- $self->{cachedLookups}->{tv_series}->{$title}=$idInfo;
++ return($idInfo);
+ }
+ else {
+- $self->{cachedLookups}->{tv_series}->{$title}="";
+- }
+- $self->{cachedLookups}->{tv_series}->{_cacheSize_}++;
+- }
+- if ( defined($idInfo) ) {
+- return($idInfo);
+- }
+- else {
+- #$self->status("failed to lookup tv series \"$title\"");
+- return(undef);
+- }
++ #$self->status("failed to lookup tv series \"$title\"");
++ return(undef);
++ }
+ }
+
+ #
+@@ -921,405 +922,405 @@ sub findTVSeriesInfo($$)
+ # todo - producer
+ # todo - running time (duration)
+ # todo - identify 'Host' and 'Narrator's and put them in as
+-# credits:presenter and credits:commentator resp.
++# credits:presenter and credits:commentator resp.
+ # todo - check program length - probably a warning if longer ?
+-# can we update length (separate from runnning time in the output ?)
++# can we update length (separate from runnning time in the output ?)
+ # todo - icon - url from
www.imdb.com of programme image ?
+-# this could be done by scraping for the hyper linked poster
+-# <a name="poster"><img
src="http://ia.imdb.com/media/imdb/01/I/60/69/80m.jpg" height="139"
width="99" border="0"></a>
+-# and grabbin' out the img entry. (BTW ..../npa.jpg seems to line up with no
poster available)
++# this could be done by scraping for the hyper linked poster
++# <a name="poster"><img
src="http://ia.imdb.com/media/imdb/01/I/60/69/80m.jpg" height="139"
width="99" border="0"></a>
++# and grabbin' out the img entry. (BTW ..../npa.jpg seems to line up with no poster
available)
+ #
+ #
+ sub applyFound($$$)
+ {
+- my ($self, $prog, $idInfo)=@_;
++ my ($self, $prog, $idInfo)=@_;
+
+- my $title=$prog->{title}->[0]->[0];
++ my $title=$prog->{title}->[0]->[0];
+
+- if ( $self->{updateDates} ) {
+- my $date;
++ if ( $self->{updateDates} ) {
++ my $date;
+
+- # don't add dates only fix them for tv_series
+- if ( $idInfo->{qualifier} eq "movie" ||
+- $idInfo->{qualifier} eq "video_movie" ||
+- $idInfo->{qualifier} eq "tv_movie" ) {
+- #$self->debug("adding 'date' field
(\"$idInfo->{year}\") on \"$title\"");
+- $date=int($idInfo->{year});
+- }
+- else {
+- #$self->debug("not adding 'date' field to $idInfo->{qualifier}
\"$title\"");
+- $date=undef;
+- }
++ # don't add dates only fix them for tv_series
++ if ( $idInfo->{qualifier} eq "movie" ||
++ $idInfo->{qualifier} eq "video_movie" ||
++ $idInfo->{qualifier} eq "tv_movie" ) {
++ #$self->debug("adding 'date' field (\"$idInfo->{year}\")
on \"$title\"");
++ $date=int($idInfo->{year});
++ }
++ else {
++ #$self->debug("not adding 'date' field to $idInfo->{qualifier}
\"$title\"");
++ $date=undef;
++ }
+
+- if ( $self->{replaceDates} ) {
+- if ( defined($prog->{date}) && defined($date) ) {
+- $self->debug("replacing 'date' field");
+- delete($prog->{date});
+- $prog->{date}=$date;
+- }
++ if ( $self->{replaceDates} ) {
++ if ( defined($prog->{date}) && defined($date) ) {
++ $self->debug("replacing 'date' field");
++ delete($prog->{date});
++ $prog->{date}=$date;
++ }
++ }
++ else {
++ # only set date if not already defined
++ if ( !defined($prog->{date}) && defined($date) ) {
++ $prog->{date}=$date;
++ }
++ }
+ }
+- else {
+- # only set date if not already defined
+- if ( !defined($prog->{date}) && defined($date) ) {
+- $prog->{date}=$date;
+- }
++
++ if ( $self->{updateTitles} ) {
++ if ( $idInfo->{title} ne $title ) {
++ if ( $self->{replaceTitles} ) {
++ $self->debug("replacing (all) 'title' from \"$title\" to
\"$idInfo->{title}\"");
++ delete($prog->{title});
++ }
++
++ my @list;
++
++ push(@list, [$idInfo->{title}, undef]);
++
++ if ( defined($prog->{title}) ) {
++ my $name=$idInfo->{title};
++ my $found=0;
++ for my $v (@{$prog->{title}}) {
++ if ( lc($v->[0]) eq lc($name) ) {
++ $found=1;
++ }
++ else {
++ push(@list, $v);
++ }
++ }
++ }
++ $prog->{title}=\@list;
++ }
+ }
+- }
+
+- if ( $self->{updateTitles} ) {
+- if ( $idInfo->{title} ne $title ) {
+- if ( $self->{replaceTitles} ) {
+- $self->debug("replacing (all) 'title' from \"$title\" to
\"$idInfo->{title}\"");
+- delete($prog->{title});
+- }
++ if ( $self->{updateURLs} ) {
++ if ( $self->{replaceURLs} ) {
++ if ( defined($prog->{url}) ) {
++ $self->debug("replacing (all) 'url'");
++ delete($prog->{url});
++ }
++ }
+
+- my @list;
++ # add url to programme on
www.imdb.com
++ my $url=$idInfo->{key};
+
+- push(@list, [$idInfo->{title}, undef]);
++ $url=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
++ $url="http://us.imdb.com/M/title-exact?".$url;
+
+- if ( defined($prog->{title}) ) {
+- my $name=$idInfo->{title};
+- my $found=0;
+- for my $v (@{$prog->{title}}) {
+- if ( lc($v->[0]) eq lc($name) ) {
+- $found=1;
+- }
+- else {
+- push(@list, $v);
+- }
++ if ( defined($prog->{url}) ) {
++ my @rep;
++ push(@rep, $url);
++ for (@{$prog->{url}}) {
++ # skip urls for
imdb.com that we're probably safe to replace
++ if ( !m;^http://us.imdb.com/M/title-exact;o ) {
++ push(@rep, $_);
++ }
++ }
++ $prog->{url}=\@rep;
+ }
+- }
+- $prog->{title}=\@list;
++ else {
++ push(@{$prog->{url}}, $url);
++ }
++ }
++
++ # squirrel away movie qualifier so its first on the list of replacements
++ my @categories;
++ push(@categories, [$self->{categories}->{$idInfo->{qualifier}},
'en']);
++ if ( !defined($self->{categories}->{$idInfo->{qualifier}}) ) {
++ die "how did we get here with an invalid qualifier
'$idInfo->{qualifier}'";
+ }
+- }
+
+- if ( $self->{updateURLs} ) {
+- if ( $self->{replaceURLs} ) {
+- if ( defined($prog->{url}) ) {
+- $self->debug("replacing (all) 'url'");
+- delete($prog->{url});
+- }
++ my $details=$self->getMovieIdDetails($idInfo->{id});
++ if ( $details->{noDetails} ) {
++ # we don't have any details on this movie
+ }
++ else {
++ # add directors list
++ if ( $self->{updateDirectors} && defined($details->{directors}) ) {
++ # only update directors if we have exactly one or if
++ # its a movie of some kind, add more than one.
++ if ( scalar(@{$details->{directors}}) == 1 ||
++ $idInfo->{qualifier} eq "movie" ||
++ $idInfo->{qualifier} eq "video_movie" ||
++ $idInfo->{qualifier} eq "tv_movie" ) {
++
++ if ( $self->{replaceDirectors} ) {
++ if ( defined($prog->{credits}->{director}) ) {
++ $self->debug("replacing director(s)");
++ delete($prog->{credits}->{director});
++ }
++ }
++
++ my @list;
++ # add top 3 billing directors list form
www.imdb.com
++ for my $name (splice(@{$details->{directors}},0,3)) {
++ push(@list, $name);
++ }
++
++ # preserve all existing directors listed if we did't already have them.
++ if ( defined($prog->{credits}->{director}) ) {
++ for my $name (@{$prog->{credits}->{director}}) {
++ my $found=0;
++ for(@list) {
++ if ( lc eq lc($name) ) {
++ $found=1;
++ }
++ }
++ if ( !$found ) {
++ push(@list, $name);
++ }
++ }
++ }
++ $prog->{credits}->{director}=\@list;
++ }
++ else {
++ $self->debug("not adding 'director' field to $idInfo->{qualifier}
\"$title\"");
++ }
++ }
++
++ if ( $self->{updateActors} && defined($details->{actors}) ) {
++ if ( $self->{replaceActors} ) {
++ if ( defined($prog->{credits}->{actor}) ) {
++ $self->debug("replacing actor(s) on $idInfo->{qualifier}
\"$idInfo->{key}\"");
++ delete($prog->{credits}->{actor});
++ }
++ }
++
++ my @list;
++ # add top billing actors (default = 3) from
www.imdb.com
++ for my $name (splice(@{$details->{actors}},0,$self->{numActors})) {
++ push(@list, $name);
++ }
++ # preserve all existing actors listed if we did't already have them.
++ if ( defined($prog->{credits}->{actor}) ) {
++ for my $name (@{$prog->{credits}->{actor}}) {
++ my $found=0;
++ for(@list) {
++ if ( lc eq lc($name) ) {
++ $found=1;
++ }
++ }
++ if ( !$found ) {
++ push(@list, $name);
++ }
++ }
++ }
++ $prog->{credits}->{actor}=\@list;
++ }
+
+- # add url to programme on
www.imdb.com
+- my $url=$idInfo->{key};
++ if ( $self->{updatePresentors} && defined($details->{presenter}) ) {
++ if ( $self->{replacePresentors} ) {
++ if ( defined($prog->{credits}->{presenter}) ) {
++ $self->debug("replacing presentor");
++ delete($prog->{credits}->{presenter});
++ }
++ }
++ $prog->{credits}->{presenter}=$details->{presenter};
++ }
++ if ( $self->{updateCommentators} && defined($details->{commentator}) )
{
++ if ( $self->{replaceCommentators} ) {
++ if ( defined($prog->{credits}->{commentator}) ) {
++ $self->debug("replacing commentator");
++ delete($prog->{credits}->{commentator});
++ }
++ }
++ $prog->{credits}->{commentator}=$details->{commentator};
++ }
+
+- $url=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
+- $url="http://us.imdb.com/M/title-exact?".$url;
++ # push genres as categories
++ if ( $self->{updateCategoriesWithGenres} ) {
++ if ( defined($details->{genres}) ) {
++ for (@{$details->{genres}}) {
++ push(@categories, [$_, 'en']);
++ }
++ }
++ }
+
+- if ( defined($prog->{url}) ) {
+- my @rep;
+- push(@rep, $url);
+- for (@{$prog->{url}}) {
+- # skip urls for
imdb.com that we're probably safe to replace
+- if ( !m;^http://us.imdb.com/M/title-exact;o ) {
+- push(@rep, $_);
++ if ( $self->{updateStarRatings} && defined($details->{ratingRank}) ) {
++ if ( $self->{replaceStarRatings} ) {
++ if ( defined($prog->{'star-rating'}) ) {
++ $self->debug("replacing 'star-rating'");
++ delete($prog->{'star-rating'});
++ }
++ unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} .
"/10", 'IMDB User Rating' ] );
++ }
++ else {
++ # add IMDB User Rating in front of all other star-ratings
++ unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} .
"/10", 'IMDB User Rating' ] );
++ }
++ }
++
++ if ( $self->{updateKeywords} ) {
++ my @keywords;
++ if ( defined($details->{keywords}) ) {
++ for (@{$details->{keywords}}) {
++ push(@keywords, [$_, 'en']);
++ }
++ }
++
++ if ( $self->{replaceKeywords} ) {
++ if ( defined($prog->{keywords}) ) {
++ $self->debug("replacing (all) 'keywords'");
++ delete($prog->{keywords});
++ }
++ }
++ if ( defined($prog->{keyword}) ) {
++ for my $value (@{$prog->{keyword}}) {
++ my $found=0;
++ for my $k (@keywords) {
++ if ( lc($k->[0]) eq lc($value->[0]) ) {
++ $found=1;
++ }
++ }
++ if ( !$found ) {
++ push(@keywords, $value);
++ }
++ }
++ }
++ $prog->{keyword}=\@keywords;
++ }
++
++ if ( $self->{updatePlot} ) {
++ # plot is held as a <desc> entity
++ # if 'replacePlot' then delete all existing <desc> entities and add
new
++ # else add this plot as an additional <desc> entity
++ #
++ if ( $self->{replacePlot} ) {
++ if ( defined($prog->{desc}) ) {
++ $self->debug("replacing (all) 'desc'");
++ delete($prog->{desc});
++ }
++ }
++ if ( defined($details->{plot}) ) {
++ # check it's not already there
++ my $found = 0;
++ for my $_desc ( @{$prog->{desc}} ) {
++ $found = 1 if ( @{$_desc}[0] eq $details->{plot} );
++ }
++ push @{$prog->{desc}}, [ $details->{plot}, 'en' ] if !$found;
++ }
+ }
+- }
+- $prog->{url}=\@rep;
++
+ }
+- else {
+- push(@{$prog->{url}}, $url);
+- }
+- }
+-
+- # squirrel away movie qualifier so its first on the list of replacements
+- my @categories;
+- push(@categories, [$self->{categories}->{$idInfo->{qualifier}},
'en']);
+- if ( !defined($self->{categories}->{$idInfo->{qualifier}}) ) {
+- die "how did we get here with an invalid qualifier
'$idInfo->{qualifier}'";
+- }
+-
+- my $details=$self->getMovieIdDetails($idInfo->{id});
+- if ( $details->{noDetails} ) {
+- # we don't have any details on this movie
+- }
+- else {
+- # add directors list
+- if ( $self->{updateDirectors} && defined($details->{directors}) ) {
+- # only update directors if we have exactly one or if
+- # its a movie of some kind, add more than one.
+- if ( scalar(@{$details->{directors}}) == 1 ||
+- $idInfo->{qualifier} eq "movie" ||
+- $idInfo->{qualifier} eq "video_movie" ||
+- $idInfo->{qualifier} eq "tv_movie" ) {
+-
+- if ( $self->{replaceDirectors} ) {
+- if ( defined($prog->{credits}->{director}) ) {
+- $self->debug("replacing director(s)");
+- delete($prog->{credits}->{director});
+- }
+- }
+-
+- my @list;
+- # add top 3 billing directors list form
www.imdb.com
+- for my $name (splice(@{$details->{directors}},0,3)) {
+- push(@list, $name);
+- }
+-
+- # preserve all existing directors listed if we did't already have them.
+- if ( defined($prog->{credits}->{director}) ) {
+- for my $name (@{$prog->{credits}->{director}}) {
+- my $found=0;
+- for(@list) {
+- if ( lc eq lc($name) ) {
+- $found=1;
+- }
+- }
+- if ( !$found ) {
+- push(@list, $name);
+- }
+- }
+- }
+- $prog->{credits}->{director}=\@list;
+- }
+- else {
+- $self->debug("not adding 'director' field to $idInfo->{qualifier}
\"$title\"");
+- }
+- }
+-
+- if ( $self->{updateActors} && defined($details->{actors}) ) {
+- if ( $self->{replaceActors} ) {
+- if ( defined($prog->{credits}->{actor}) ) {
+- $self->debug("replacing actor(s) on $idInfo->{qualifier}
\"$idInfo->{key}\"");
+- delete($prog->{credits}->{actor});
+- }
+- }
+-
+- my @list;
+- # add top billing actors (default = 3) from
www.imdb.com
+- for my $name (splice(@{$details->{actors}},0,$self->{numActors})) {
+- push(@list, $name);
+- }
+- # preserve all existing actors listed if we did't already have them.
+- if ( defined($prog->{credits}->{actor}) ) {
+- for my $name (@{$prog->{credits}->{actor}}) {
+- my $found=0;
+- for(@list) {
+- if ( lc eq lc($name) ) {
+- $found=1;
+- }
+- }
+- if ( !$found ) {
+- push(@list, $name);
+- }
+- }
+- }
+- $prog->{credits}->{actor}=\@list;
+- }
+-
+- if ( $self->{updatePresentors} && defined($details->{presenter}) ) {
+- if ( $self->{replacePresentors} ) {
+- if ( defined($prog->{credits}->{presenter}) ) {
+- $self->debug("replacing presentor");
+- delete($prog->{credits}->{presenter});
+- }
+- }
+- $prog->{credits}->{presenter}=$details->{presenter};
+- }
+- if ( $self->{updateCommentators} && defined($details->{commentator}) ) {
+- if ( $self->{replaceCommentators} ) {
+- if ( defined($prog->{credits}->{commentator}) ) {
+- $self->debug("replacing commentator");
+- delete($prog->{credits}->{commentator});
+- }
+- }
+- $prog->{credits}->{commentator}=$details->{commentator};
+- }
+-
+- # push genres as categories
+- if ( $self->{updateCategoriesWithGenres} ) {
+- if ( defined($details->{genres}) ) {
+- for (@{$details->{genres}}) {
+- push(@categories, [$_, 'en']);
+- }
+- }
+- }
+-
+- if ( $self->{updateStarRatings} && defined($details->{ratingRank}) ) {
+- if ( $self->{replaceStarRatings} ) {
+- if ( defined($prog->{'star-rating'}) ) {
+- $self->debug("replacing 'star-rating'");
+- delete($prog->{'star-rating'});
+- }
+- unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} .
"/10", 'IMDB User Rating' ] );
+- }
+- else {
+- # add IMDB User Rating in front of all other star-ratings
+- unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} .
"/10", 'IMDB User Rating' ] );
+- }
+- }
+-
+- if ( $self->{updateKeywords} ) {
+- my @keywords;
+- if ( defined($details->{keywords}) ) {
+- for (@{$details->{keywords}}) {
+- push(@keywords, [$_, 'en']);
+- }
+- }
+-
+- if ( $self->{replaceKeywords} ) {
+- if ( defined($prog->{keywords}) ) {
+- $self->debug("replacing (all) 'keywords'");
+- delete($prog->{keywords});
+- }
+- }
+- if ( defined($prog->{keyword}) ) {
+- for my $value (@{$prog->{keyword}}) {
+- my $found=0;
+- for my $k (@keywords) {
+- if ( lc($k->[0]) eq lc($value->[0]) ) {
+- $found=1;
+- }
+- }
+- if ( !$found ) {
+- push(@keywords, $value);
+- }
+- }
+- }
+- $prog->{keyword}=\@keywords;
+- }
+-
+- if ( $self->{updatePlot} ) {
+- # plot is held as a <desc> entity
+- # if 'replacePlot' then delete all existing <desc> entities
and add new
+- # else add this plot as an additional <desc> entity
+- #
+- if ( $self->{replacePlot} ) {
+- if ( defined($prog->{desc}) ) {
+- $self->debug("replacing (all) 'desc'");
+- delete($prog->{desc});
+- }
+- }
+- if ( defined($details->{plot}) ) {
+- # check it's not already there
+- my $found = 0;
+- for my $_desc ( @{$prog->{desc}} ) {
+- $found = 1 if ( @{$_desc}[0] eq $details->{plot} );
+- }
+- push @{$prog->{desc}}, [ $details->{plot}, 'en' ] if
!$found;
+- }
+- }
+-
+- }
+-
+- if ( $self->{updateCategories} ) {
+- if ( $self->{replaceCategories} ) {
+- if ( defined($prog->{category}) ) {
+- $self->debug("replacing (all) 'category'");
+- delete($prog->{category});
+- }
+- }
+- if ( defined($prog->{category}) ) {
+- for my $value (@{$prog->{category}}) {
+- my $found=0;
+- #print "checking category $value->[0] with $mycategory\n";
+- for my $c (@categories) {
+- if ( lc($c->[0]) eq lc($value->[0]) ) {
+- $found=1;
+- }
+- }
+- if ( !$found ) {
+- push(@categories, $value);
+- }
+- }
+- }
+- $prog->{category}=\@categories;
+- }
+-
+- return($prog);
++
++ if ( $self->{updateCategories} ) {
++ if ( $self->{replaceCategories} ) {
++ if ( defined($prog->{category}) ) {
++ $self->debug("replacing (all) 'category'");
++ delete($prog->{category});
++ }
++ }
++ if ( defined($prog->{category}) ) {
++ for my $value (@{$prog->{category}}) {
++ my $found=0;
++ #print "checking category $value->[0] with $mycategory\n";
++ for my $c (@categories) {
++ if ( lc($c->[0]) eq lc($value->[0]) ) {
++ $found=1;
++ }
++ }
++ if ( !$found ) {
++ push(@categories, $value);
++ }
++ }
++ }
++ $prog->{category}=\@categories;
++ }
++
++ return($prog);
+ }
+
+ sub augmentProgram($$$)
+ {
+- my ($self, $prog, $movies_only)=@_;
++ my ($self, $prog, $movies_only)=@_;
+
+- $self->{stats}->{programCount}++;
++ $self->{stats}->{programCount}++;
+
+- # assume first title in first language is the one we want.
+- my $title=$prog->{title}->[0]->[0];
++ # assume first title in first language is the one we want.
++ my $title=$prog->{title}->[0]->[0];
+
+- if ( defined($prog->{date}) && $prog->{date}=~m/^\d\d\d\d$/o ) {
++ if ( defined($prog->{date}) && $prog->{date}=~m/^\d\d\d\d$/o ) {
+
+- # for programs with dates we try:
+- # - exact matches on movies
+- # - exact matches on tv series
+- # - close matches on movies
+- my $id=$self->findMovieInfo($title, $prog->{date}, 1); # exact match
+- if ( !defined($id) ) {
+- $id=$self->findTVSeriesInfo($title);
+- if ( !defined($id) ) {
+- $id=$self->findMovieInfo($title, $prog->{date}, 0); # close match
+- }
+- }
+- if ( defined($id) ) {
+- $self->{stats}->{$id->{matchLevel}."Matches"}++;
+- $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
+- return($self->applyFound($prog, $id));
++ # for programs with dates we try:
++ # - exact matches on movies
++ # - exact matches on tv series
++ # - close matches on movies
++ my $id=$self->findMovieInfo($title, $prog->{date}, 1); # exact match
++ if ( !defined($id) ) {
++ $id=$self->findTVSeriesInfo($title);
++ if ( !defined($id) ) {
++ $id=$self->findMovieInfo($title, $prog->{date}, 0); # close match
++ }
++ }
++ if ( defined($id) ) {
++ $self->{stats}->{$id->{matchLevel}."Matches"}++;
++ $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
++ return($self->applyFound($prog, $id));
++ }
++ $self->status("failed to find a match for movie \"$title
($prog->{date})\"");
++ return(undef);
++ # fall through and try again as a tv series
++ }
++
++ if ( !$movies_only ) {
++ my $id=$self->findTVSeriesInfo($title);
++ if ( defined($id) ) {
++ $self->{stats}->{$id->{matchLevel}."Matches"}++;
++ $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
++ return($self->applyFound($prog, $id));
++ }
++
++ if ( 0 ) {
++ # this has hard to support 'close' results, unless we know
++ # for certain we're looking for a movie (ie duration etc)
++ # this is a bad idea.
++ my $id=$self->findMovieInfo($title, undef, 2); # any title match
++ if ( defined($id) ) {
++ $self->{stats}->{$id->{matchLevel}."Matches"}++;
++ $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
++ return($self->applyFound($prog, $id));
++ }
++ }
++ $self->status("failed to find a match for show \"$title\"");
+ }
+- $self->status("failed to find a match for movie \"$title
($prog->{date})\"");
+ return(undef);
+- # fall through and try again as a tv series
+- }
+-
+- if ( !$movies_only ) {
+- my $id=$self->findTVSeriesInfo($title);
+- if ( defined($id) ) {
+- $self->{stats}->{$id->{matchLevel}."Matches"}++;
+- $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
+- return($self->applyFound($prog, $id));
+- }
+-
+- if ( 0 ) {
+- # this has hard to support 'close' results, unless we know
+- # for certain we're looking for a movie (ie duration etc)
+- # this is a bad idea.
+- my $id=$self->findMovieInfo($title, undef, 2); # any title match
+- if ( defined($id) ) {
+- $self->{stats}->{$id->{matchLevel}."Matches"}++;
+- $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
+- return($self->applyFound($prog, $id));
+- }
+- }
+- $self->status("failed to find a match for show \"$title\"");
+- }
+- return(undef);
+ }
+
+ #
+ # todo - add in stats on other things added (urls ?, actors, directors,categories)
+-# separate out from what was added or updated
++# separate out from what was added or updated
+ #
+ sub getStatsLines($)
+ {
+- my $self=shift;
+- my $totalChannelsParsed=shift;
++ my $self=shift;
++ my $totalChannelsParsed=shift;
+
+- my $endTime=time();
+- my %stats=%{$self->{stats}};
++ my $endTime=time();
++ my %stats=%{$self->{stats}};
+
+- my $ret=sprintf("Checked %d programs, on %d channels\n",
$stats{programCount}, $totalChannelsParsed);
++ my $ret=sprintf("Checked %d programs, on %d channels\n",
$stats{programCount}, $totalChannelsParsed);
+
+- for my $cat (sort keys %{$self->{categories}}) {
+- $ret.=sprintf(" found %d %s titles",
$stats{perfect}->{$cat}+$stats{close}->{$cat},
+- $self->{categories}->{$cat});
+- if ( $stats{close}->{$cat} != 0 ) {
+- if ( $stats{close}->{$cat} == 1 ) {
+- $ret.=sprintf(" (%d was not perfect)", $stats{close}->{$cat});
+- }
+- else {
+- $ret.=sprintf(" (%d were not perfect)", $stats{close}->{$cat});
+- }
++ for my $cat (sort keys %{$self->{categories}}) {
++ $ret.=sprintf(" found %d %s titles",
$stats{perfect}->{$cat}+$stats{close}->{$cat},
++ $self->{categories}->{$cat});
++ if ( $stats{close}->{$cat} != 0 ) {
++ if ( $stats{close}->{$cat} == 1 ) {
++ $ret.=sprintf(" (%d was not perfect)", $stats{close}->{$cat});
++ }
++ else {
++ $ret.=sprintf(" (%d were not perfect)", $stats{close}->{$cat});
++ }
++ }
++ $ret.="\n";
+ }
+- $ret.="\n";
+- }
+
+- $ret.=sprintf(" augmented %.2f%% of the programs, parsing %.2f
programs/sec\n",
++ $ret.=sprintf(" augmented %.2f%% of the programs, parsing %.2f
programs/sec\n",
+
($stats{programCount}!=0)?(($stats{perfectMatches}+$stats{closeMatches})*100)/$stats{programCount}:0,
+ ($endTime!=$stats{startTime} && $stats{programCount} != 0)?
+ $stats{programCount}/($endTime-$stats{startTime}):0);
+
+- return($ret);
++ return($ret);
+ }
+
+ 1;
+@@ -1334,8 +1335,8 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+
+ # Use Term::ProgressBar if installed.
+ use constant Have_bar => eval {
+- require Term::ProgressBar;
+- $Term::ProgressBar::VERSION >= 2;
++ require Term::ProgressBar;
++ $Term::ProgressBar::VERSION >= 2;
+ };
+
+ #
+@@ -1344,8 +1345,8 @@ use constant Have_bar => eval {
+ # details)
+ #
+ # I might, given time build a download manager that:
+-# - downloads the latest plain text files
+-# - understands how to download each week's diffs and apply them
++# - downloads the latest plain text files
++# - understands how to download each week's diffs and apply them
+ # Currently, the 'downloadMissingFiles' flag in the hash of attributes
+ # passed triggers a simple-minded downloader.
+ #
+@@ -1356,128 +1357,129 @@ use constant Have_bar => eval {
+
+ sub new
+ {
+- my ($type) = shift;
+- my $self={ @_ }; # remaining args become attributes
+- for ($self->{downloadMissingFiles}) {
+- $_=0 if not defined; # default
+- }
++ my ($type) = shift;
++ my $self={ @_ }; # remaining args become attributes
++ for ($self->{downloadMissingFiles}) {
++ $_=0 if not defined; # default
++ }
+
+- for ('imdbDir', 'verbose') {
+- die "invalid usage - no $_" if ( !defined($self->{$_}));
+- }
++ for ('imdbDir', 'verbose') {
++ die "invalid usage - no $_" if ( !defined($self->{$_}));
++ }
+
+- $self->{stageLast} = 9; # set the final stage in the build - i.e. the one
which builds the final database
+- $self->{stages} = { 1=>'movies', 2=>'directors',
3=>'actors', 4=>'actresses', 5=>'genres',
6=>'ratings', 7=>'keywords', 8=>'plot' };
+- $self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 };
# list of optional stages - no need to download files for these
++ $self->{stageLast} = 9; # set the final stage in the build - i.e. the one which
builds the final database
++ $self->{stages} = { 1=>'movies', 2=>'directors',
3=>'actors', 4=>'actresses', 5=>'genres',
6=>'ratings', 7=>'keywords', 8=>'plot' };
++ $self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 }; #
list of optional stages - no need to download files for these
+
+- $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx";
+- $self->{moviedbData}="$self->{imdbDir}/moviedb.dat";
+- $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info";
+- $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline";
++ $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx";
++ $self->{moviedbData}="$self->{imdbDir}/moviedb.dat";
++ $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info";
++ $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline";
+
+- # only leave progress bar on if its available
+- if ( !Have_bar ) {
+- $self->{showProgressBar}=0;
+- }
++ # only leave progress bar on if its available
++ if ( !Have_bar ) {
++ $self->{showProgressBar}=0;
++ }
+
+- bless($self, $type);
++ bless($self, $type);
+
+- if ( $self->{stageToRun} ne $self->{stageLast} ) {
+- # unless this is the last stage, check we have the necessary files
+- return(undef) if ( $self->checkFiles() != 0 );
+- }
++ if ( $self->{stageToRun} ne $self->{stageLast} ) {
++ # unless this is the last stage, check we have the necessary files
++ return(undef) if ( $self->checkFiles() != 0 );
++ }
+
+- return($self);
++ return($self);
+ }
+
+
+ sub checkFiles () {
+
+- my ($self)=@_;
++ my ($self)=@_;
+
+- if ( ! -d "$self->{imdbDir}" ) {
+- if ( $self->{downloadMissingFiles} ) {
+- warn "creating directory $self->{imdbDir}\n";
+- mkdir $self->{imdbDir}, 0777
+- or die "cannot mkdir $self->{imdbDir}: $!";
++ if ( ! -d "$self->{imdbDir}" ) {
++ if ( $self->{downloadMissingFiles} ) {
++ warn "creating directory $self->{imdbDir}\n";
++ mkdir $self->{imdbDir}, 0777
++ or die "cannot mkdir $self->{imdbDir}: $!";
++ }
++ else {
++ die "$self->{imdbDir}:does not exist";
++ }
+ }
+- else {
+- die "$self->{imdbDir}:does not exist";
++ my $listsDir = "$self->{imdbDir}/lists";
++ if ( ! -d $listsDir ) {
++ mkdir $listsDir, 0777 or die "cannot mkdir $listsDir: $!";
+ }
+- }
+- my $listsDir = "$self->{imdbDir}/lists";
+- if ( ! -d $listsDir ) {
+- mkdir $listsDir, 0777 or die "cannot mkdir $listsDir: $!";
+- }
+
+ CHECK_FILES:
+- my %missingListFiles; # maps 'movies' to filename ...movies.gz
+-
+- FILES_CHECK:
+- while ( my( $key, $value ) = each %{ $self->{stages} } ) {
+- # don't check *all* files - only the ones we are crunching
+- next FILES_CHECK if ( lc($self->{stageToRun}) ne 'all' &&
$key != int($self->{stageToRun}) );
+- my $file=$value;
+- my $filename="$listsDir/$file.list";
+- my $filenameGz="$filename.gz";
+- my $filenameExists = -f $filename;
+- my $filenameSize = -s $filename;
+- my $filenameGzExists = -f $filenameGz;
+- my $filenameGzSize = -s $filenameGz;
+-
+- if ( $filenameExists and not $filenameSize ) {
+- warn "removing zero-length $filename\n";
+- unlink $filename or die "cannot unlink $filename: $!";
+- $filenameExists = 0;
+- }
+- if ( $filenameGzExists and not $filenameGzSize ) {
+- warn "removing zero-length $filenameGz\n";
+- unlink $filenameGz or die "cannot unlink $filenameGz: $!";
+- $filenameGzExists = 0;
+- }
+-
+- if ( not $filenameExists and not $filenameGzExists ) {
+- # Just report one of the filenames, keep the message simple.
+- warn "$filenameGz does not exist\n";
+- if ( $self->{optionalStages}{$file} && lc($self->{stageToRun})
eq 'all' ) {
+- warn "$file will not be added to database\n";
+- } else {
+- $missingListFiles{$file}=$filenameGz;
+- }
+- }
+- elsif ( not $filenameExists and $filenameGzExists ) {
+- $self->{imdbListFiles}->{$file}=$filenameGz;
+- }
+- elsif ( $filenameExists and not $filenameGzExists ) {
+- $self->{imdbListFiles}->{$file}=$filename;
+- }
+- elsif ( $filenameExists and $filenameGzExists ) {
+- die "both $filename and $filenameGz exist, remove one of them\n";
+- }
+- else { die }
+- }
+- if ( $self->{downloadMissingFiles} ) {
+- my $baseUrl = 'ftp://ftp.fu-berlin.de/pub/misc/movies/database/frozendata';
+- foreach ( sort keys %missingListFiles ) {
+- my $url = "$baseUrl/$_.list.gz";
+- my $filename = delete $missingListFiles{$_};
+- my $partial = "$filename.partial";
+- if (-e $partial) {
+- if (not -s $partial) {
+- print STDERR "removing empty $partial\n";
+- unlink $partial or die "cannot unlink $partial: $!";
++ my %missingListFiles; # maps 'movies' to filename ...movies.gz
++
++ FILES_CHECK:
++ while ( my( $key, $value ) = each %{ $self->{stages} } ) {
++ # don't check *all* files - only the ones we are crunching
++ next FILES_CHECK if ( lc($self->{stageToRun}) ne 'all' && $key !=
int($self->{stageToRun}) );
++ my $file=$value;
++ my $filename="$listsDir/$file.list";
++ my $filenameGz="$filename.gz";
++ my $filenameExists = -f $filename;
++ my $filenameSize = -s $filename;
++ my $filenameGzExists = -f $filenameGz;
++ my $filenameGzSize = -s $filenameGz;
++
++ if ( $filenameExists and not $filenameSize ) {
++ warn "removing zero-length $filename\n";
++ unlink $filename or die "cannot unlink $filename: $!";
++ $filenameExists = 0;
++ }
++ if ( $filenameGzExists and not $filenameGzSize ) {
++ warn "removing zero-length $filenameGz\n";
++ unlink $filenameGz or die "cannot unlink $filenameGz: $!";
++ $filenameGzExists = 0;
++ }
++
++ if ( not $filenameExists and not $filenameGzExists ) {
++ # Just report one of the filenames, keep the message simple.
++ warn "$filenameGz does not exist\n";
++ if ( $self->{optionalStages}{$file} && lc($self->{stageToRun}) eq
'all' ) {
++ warn "$file will not be added to database\n";
++ } else {
++ $missingListFiles{$file}=$filenameGz;
++ }
+ }
+- else {
+- die <<END
++ elsif ( not $filenameExists and $filenameGzExists ) {
++ $self->{imdbListFiles}->{$file}=$filenameGz;
++ }
++ elsif ( $filenameExists and not $filenameGzExists ) {
++ $self->{imdbListFiles}->{$file}=$filename;
++ }
++ elsif ( $filenameExists and $filenameGzExists ) {
++ die "both $filename and $filenameGz exist, remove one of them\n";
++ }
++ else { die }
++ }
++
++ if ( $self->{downloadMissingFiles} ) {
++ my $baseUrl = 'ftp://ftp.fu-berlin.de/pub/misc/movies/database/frozendata';
++ foreach ( sort keys %missingListFiles ) {
++ my $url = "$baseUrl/$_.list.gz";
++ my $filename = delete $missingListFiles{$_};
++ my $partial = "$filename.partial";
++ if (-e $partial) {
++ if (not -s $partial) {
++ print STDERR "removing empty $partial\n";
++ unlink $partial or die "cannot unlink $partial: $!";
++ }
++ else {
++ die <<END
+ $partial already exists, remove it or try renaming to $filename and
+ resuming the download of <$url> by hand.
+
+ END
+ ;
+- }
+- }
++ }
++ }
+
+- print STDERR <<END
++ print STDERR <<END
+ Trying to download <$url>.
+ With a slow network link this could fail; it might be better to
+ download the file by hand and save it as
+@@ -1485,93 +1487,94 @@ $filename.
+
+ END
+ ;
+- # For downloading we use LWP
+- #
+- my $ua = LWP::UserAgent->new();
+- $ua->env_proxy();
+- $ua->show_progress(1);
+-
+- my $req = HTTP::Request->new(GET => $url);
+- $req->authorization_basic('anonymous', 'tv_imdb');
+-
+- my $resp = $ua->request($req, $filename);
+- my $got_size = -s $filename;
+- if (defined $resp and $resp->is_success ) {
+- die if not $got_size;
+- print STDERR "<$url>\n\t-> $filename, success\n\n";
+- }
+- else {
+- my $msg = "failed to download $url to $filename";
+- $msg .= ", http response code: ".$resp->status_line if defined $resp;
+- warn $msg;
+- if ($got_size) {
+- warn "renaming $filename -> $partial\n";
+- rename $filename, $partial
+- or die "cannot rename $filename to $partial: $!";
+- warn "You might try continuing the download of <$url>
manually.\n";
+- }
+- exit(1);
+- }
+- }
+- $self->{downloadMissingFiles} = 0;
+- goto CHECK_FILES;
+- }
+-
+- if ( %missingListFiles ) {
+- print STDERR "tv_imdb: requires you to download the above files from
ftp.imdb.com\n";
+- print STDERR " see
http://www.imdb.com/interfaces for details\n";
+- print STDERR " or try the --download option\n";
+- #return(undef);
+- return 1;
+- }
+-
+- return 0;
++ # For downloading we use LWP
++ #
++ my $ua = LWP::UserAgent->new();
++ $ua->env_proxy();
++ $ua->show_progress(1);
++
++ my $req = HTTP::Request->new(GET => $url);
++ $req->authorization_basic('anonymous', 'tv_imdb');
++
++ my $resp = $ua->request($req, $filename);
++ my $got_size = -s $filename;
++ if (defined $resp and $resp->is_success ) {
++ die if not $got_size;
++ print STDERR "<$url>\n\t-> $filename, success\n\n";
++ }
++ else {
++ my $msg = "failed to download $url to $filename";
++ $msg .= ", http response code: ".$resp->status_line if defined $resp;
++ warn $msg;
++ if ($got_size) {
++ warn "renaming $filename -> $partial\n";
++ rename $filename, $partial
++ or die "cannot rename $filename to $partial: $!";
++ warn "You might try continuing the download of <$url>
manually.\n";
++ }
++ exit(1);
++ }
++ }
++
++ $self->{downloadMissingFiles} = 0;
++ goto CHECK_FILES;
++ }
++
++ if ( %missingListFiles ) {
++ print STDERR "tv_imdb: requires you to download the above files from
ftp.imdb.com\n";
++ print STDERR " see
http://www.imdb.com/interfaces for details\n";
++ print STDERR " or try the --download option\n";
++ #return(undef);
++ return 1;
++ }
++
++ return 0;
+ }
+
+ sub redirect($$)
+ {
+- my ($self, $file)=@_;
+-
+- if ( defined($file) ) {
+- if ( !open($self->{logfd}, "> $file") ) {
+- print STDERR "$file:$!\n";
+- return(0);
+- }
+- $self->{errorCountInLog}=0;
+- }
+- else {
+- close($self->{logfd});
+- $self->{logfd}=undef;
+- }
+- return(1);
++ my ($self, $file)=@_;
++
++ if ( defined($file) ) {
++ if ( !open($self->{logfd}, "> $file") ) {
++ print STDERR "$file:$!\n";
++ return(0);
++ }
++ $self->{errorCountInLog}=0;
++ }
++ else {
++ close($self->{logfd});
++ $self->{logfd}=undef;
++ }
++ return(1);
+ }
+
+ sub error($$)
+ {
+- my $self=shift;
+- if ( defined($self->{logfd}) ) {
+- print {$self->{logfd}} $_[0]."\n";
+- $self->{errorCountInLog}++;
+- }
+- else {
+- print STDERR $_[0]."\n";
+- }
++ my $self=shift;
++ if ( defined($self->{logfd}) ) {
++ print {$self->{logfd}} $_[0]."\n";
++ $self->{errorCountInLog}++;
++ }
++ else {
++ print STDERR $_[0]."\n";
++ }
+ }
+
+ sub status($$)
+ {
+- my $self=shift;
++ my $self=shift;
+
+- if ( $self->{verbose} ) {
+- print STDERR $_[0]."\n";
+- }
++ if ( $self->{verbose} ) {
++ print STDERR $_[0]."\n";
++ }
+ }
+
+ sub withThousands ($)
+ {
+- my ($val) = @_;
+- $val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g;
+- return $val;
++ my ($val) = @_;
++ $val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g;
++ return $val;
+ }
+
+ use XMLTV::Gunzip;
+@@ -1579,1854 +1582,1852 @@ use IO::File;
+
+ sub openMaybeGunzip($)
+ {
+- for ( shift ) {
+- return gunzip_open($_) if m/\.gz$/;
+- return new IO::File("< $_");
+- }
++ for ( shift ) {
++ return gunzip_open($_) if m/\.gz$/;
++ return new IO::File("< $_");
++ }
+ }
+
+ sub closeMaybeGunzip($$)
+ {
+- if ( $_[0]=~m/\.gz$/o ) {
+- # Would close($fh) but that causes segfaults on my system.
+- # Investigating, but in the meantime just leave it open.
+- #
+- #return gunzip_close($_[1]);
+- }
+-
+- # Apparently this can also segfault (wtf?).
+- #return close($_[1]);
++ if ( $_[0]=~m/\.gz$/o ) {
++ # Would close($fh) but that causes segfaults on my system.
++ # Investigating, but in the meantime just leave it open.
++ #
++ #return gunzip_close($_[1]);
++ }
++
++ # Apparently this can also segfault (wtf?).
++ #return close($_[1]);
+ }
+
+ sub readMoviesOrGenres($$$$)
+ {
+- my ($self, $whichMoviesOrGenres, $countEstimate, $file)=@_;
+- my $startTime=time();
+- my $header;
+- my $whatAreWeParsing;
+- my $lineCount=0;
+-
+- if ( $whichMoviesOrGenres eq "Movies" ) {
+- $header="MOVIES LIST";
+- $whatAreWeParsing=1;
+- }
+- elsif ( $whichMoviesOrGenres eq "Genres" ) {
+- $header="8: THE GENRES LIST";
+- $whatAreWeParsing=2;
+- }
+- my $fh = openMaybeGunzip($file) || return(-2);
+- while(<$fh>) {
+- $lineCount++;
+- if ( m/^$header/ ) {
+- if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after $header at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^\s*$/o ) {
+- $self->error("missing empty line after ======= at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- last;
+- }
+- elsif ( $lineCount > 1000 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- }
+-
+- my $progress=Term::ProgressBar->new({name => "parsing
$whichMoviesOrGenres",
++ my ($self, $whichMoviesOrGenres, $countEstimate, $file)=@_;
++ my $startTime=time();
++ my $header;
++ my $whatAreWeParsing;
++ my $lineCount=0;
++
++ if ( $whichMoviesOrGenres eq "Movies" ) {
++ $header="MOVIES LIST";
++ $whatAreWeParsing=1;
++ }
++ elsif ( $whichMoviesOrGenres eq "Genres" ) {
++ $header="8: THE GENRES LIST";
++ $whatAreWeParsing=2;
++ }
++ my $fh = openMaybeGunzip($file) || return(-2);
++ while(<$fh>) {
++ $lineCount++;
++ if ( m/^$header/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after $header at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after ======= at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ last;
++ }
++ elsif ( $lineCount > 1000 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++ my $progress=Term::ProgressBar->new({name => "parsing
$whichMoviesOrGenres",
+ count => $countEstimate,
+ ETA => 'linear'})
+- if ( $self->{showProgressBar} );
++ if ( $self->{showProgressBar} );
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ while(<$fh>) {
++ $lineCount++;
++ my $line=$_;
++ #print "read line $lineCount:$line\n";
++
++ # end is line consisting of only '-'
++ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ $line=~s/\n$//o;
++
++ my $tab=index($line, "\t");
++ if ( $tab != -1 ) {
++ my $mkey=substr($line, 0, $tab);
++
++ next if ($mkey=~m/\s*\{\{SUSPENDED\}\}/o);
++
++ if ( $whatAreWeParsing == 2 ) {
++ # don't see what these are...?
++ # ignore {{SUSPENDED}}
++ $mkey=~s/\s*\{\{SUSPENDED\}\}//o;
++
++ # ignore {Twelve Angry Men (1954)}
++ $mkey=~s/\s*\{[^\}]+\}//go;
++
++ # skip enties that have {} in them since they're tv episodes
++ #next if ( $mkey=~s/\s*\{[^\}]+\}$//o );
++
++ my $genre=substr($line, $tab);
++
++ # genres sometimes has more than one tab
++ $genre=~s/^\t+//og;
++ if ( defined($self->{movies}{$mkey}) ) {
++ $self->{movies}{$mkey}.="|".$genre;
++ }
++ else {
++ $self->{movies}{$mkey}=$genre;
++ # returned count is number of unique titles found
++ $count++;
++ }
++ }
++ else {
++ push(@{$self->{movies}}, $mkey);
++ # returned count is number of titles found
++ $count++;
++ }
++
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+1000);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ else {
++ $self->error("$file:$lineCount: unrecognized format (missing tab)");
++ $next_update=$progress->update($count) if ($self->{showProgressBar});
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
+
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ $self->status(sprintf("parsing $whichMoviesOrGenres found
".withThousands($count)." titles in ".
++ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+- my $count=0;
+- while(<$fh>) {
+- $lineCount++;
+- my $line=$_;
+- #print "read line $lineCount:$line\n";
++ closeMaybeGunzip($file, $fh);
++ return($count);
++}
+
+- # end is line consisting of only '-'
+- last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++sub readCastOrDirectors($$$)
++{
++ my ($self, $whichCastOrDirector, $castCountEstimate, $file)=@_;
++ my $startTime=time();
+
+- $line=~s/\n$//o;
++ my $header;
++ my $whatAreWeParsing;
++ my $lineCount=0;
+
+- my $tab=index($line, "\t");
+- if ( $tab != -1 ) {
+- my $mkey=substr($line, 0, $tab);
++ if ( $whichCastOrDirector eq "Actors" ) {
++ $header="THE ACTORS LIST";
++ $whatAreWeParsing=1;
++ }
++ elsif ( $whichCastOrDirector eq "Actresses" ) {
++ $header="THE ACTRESSES LIST";
++ $whatAreWeParsing=2;
++ }
++ elsif ( $whichCastOrDirector eq "Directors" ) {
++ $header="THE DIRECTORS LIST";
++ $whatAreWeParsing=3;
++ }
++ else {
++ die "why are we here ?";
++ }
+
+- next if ($mkey=~m/\s*\{\{SUSPENDED\}\}/o);
++ my $fh = openMaybeGunzip($file) || return(-2);
++ my $progress=Term::ProgressBar->new({name => "parsing
$whichCastOrDirector",
++ count => $castCountEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ while(<$fh>) {
++ $lineCount++;
++ if ( m/^$header/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after $header at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after ======= at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) {
++ $self->error("missing name/titles line after ======= at line
$lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) {
++ $self->error("missing name/titles suffix line after ======= at line
$lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ last;
++ }
++ elsif ( $lineCount > 1000 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++ my $cur_name;
++ my $count=0;
++ my $castNames=0;
++ while(<$fh>) {
++ $lineCount++;
++ my $line=$_;
++ $line=~s/\n$//o;
++ #$self->status("read line $lineCount:$line");
++
++ # end is line consisting of only '-'
++ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ next if ( length($line) == 0 );
++
++ if ( $line=~s/^([^\t]+)\t+//o ) {
++ $cur_name=$1;
++ $castNames++;
++
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $castNames > $castCountEstimate ) {
++ $castCountEstimate = $progress->target($castNames+100);
++ $next_update=$progress->update($castNames);
++ }
++ elsif ( $castNames > $next_update ) {
++ $next_update=$progress->update($castNames);
++ }
++ }
++ }
++
++ my $billing;
++ my $HostNarrator="";
++ if ( $whatAreWeParsing < 3 ) {
++ # actors or actresses
++ $billing="9999";
++ if ( $line=~s/\s*<(\d+)>//o ) {
++ $billing=sprintf("%04d", int($1));
++ }
++
++ if ( (my $start=index($line, " [")) != -1 ) {
++ #my $end=rindex($line, "]");
++ my $ex=substr($line, $start+1);
++
++ if ( $ex=~s/Host//o ) {
++ if ( length($HostNarrator) ) {
++ $HostNarrator.=",";
++ }
++ $HostNarrator.="Host";
++ }
++ if ( $ex=~s/Narrator//o ) {
++ if ( length($HostNarrator) ) {
++ $HostNarrator.=",";
++ }
++ $HostNarrator.="Narrator";
++ }
++ $line=substr($line, 0, $start);
++ # ignore character name
++ }
++ }
++ # try ignoring these
++ next if ($line=~m/\s*\{\{SUSPENDED\}\}/o);
+
+- if ( $whatAreWeParsing == 2 ) {
+ # don't see what these are...?
+ # ignore {{SUSPENDED}}
+- $mkey=~s/\s*\{\{SUSPENDED\}\}//o;
++ $line=~s/\s*\{\{SUSPENDED\}\}//o;
+
+- # ignore {Twelve Angry Men (1954)}
+- $mkey=~s/\s*\{[^\}]+\}//go;
++ # [honir] this is wrong - this puts cast from all the episodes as though they are in
the entire series!
++ # ##ignore {Twelve Angry Men (1954)}
++ $line=~s/\s*\{[^\}]+\}//o;
+
+- # skip enties that have {} in them since they're tv episodes
+- #next if ( $mkey=~s/\s*\{[^\}]+\}$//o );
+-
+- my $genre=substr($line, $tab);
++ if ( $whatAreWeParsing < 3 ) {
++ if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) {
++ # $attr=$1;
++ }
++ }
++ if ( $line=~s/ (\(.*)$//o ) {
++ # $attrs=$1;
++ }
++ $line=~s/^\s+//og;
++ $line=~s/\s+$//og;
+
+- # genres sometimes has more than one tab
+- $genre=~s/^\t+//og;
+- if ( defined($self->{movies}{$mkey}) ) {
+- $self->{movies}{$mkey}.="|".$genre;
++ if ( $whatAreWeParsing < 3 ) {
++ if ( $line=~s/\s+Narrator$//o ) {
++ # ignore
++ }
+ }
+- else {
+- $self->{movies}{$mkey}=$genre;
+- # returned count is number of unique titles found
+- $count++;
+- }
+- }
+- else {
+- push(@{$self->{movies}}, $mkey);
+- # returned count is number of titles found
+- $count++;
+- }
+
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
++ my $val=$self->{movies}{$line};
++ my $name=$cur_name;
++ if ( length($HostNarrator) ) {
++ $name.="[$HostNarrator]";
+ }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
++ if ( defined($billing) ) {
++ if ( defined($val) ) {
++ $self->{movies}{$line}=$val."|$billing:$name";
++ }
++ else {
++ $self->{movies}{$line}="$billing:$name";
++ }
+ }
+- }
+- }
+- else {
+- $self->error("$file:$lineCount: unrecognized format (missing tab)");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
++ else {
++ if ( defined($val) ) {
++ $self->{movies}{$line}=$val."|$name";
++ }
++ else {
++ $self->{movies}{$line}=$name;
++ }
++ }
++ $count++;
+ }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing $whichMoviesOrGenres found
".withThousands($count)." titles in ".
+- withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+-
+- closeMaybeGunzip($file, $fh);
+- return($count);
+-}
++ $progress->update($castCountEstimate) if ($self->{showProgressBar});
+
+-sub readCastOrDirectors($$$)
+-{
+- my ($self, $whichCastOrDirector, $castCountEstimate, $file)=@_;
+- my $startTime=time();
+-
+- my $header;
+- my $whatAreWeParsing;
+- my $lineCount=0;
+-
+- if ( $whichCastOrDirector eq "Actors" ) {
+- $header="THE ACTORS LIST";
+- $whatAreWeParsing=1;
+- }
+- elsif ( $whichCastOrDirector eq "Actresses" ) {
+- $header="THE ACTRESSES LIST";
+- $whatAreWeParsing=2;
+- }
+- elsif ( $whichCastOrDirector eq "Directors" ) {
+- $header="THE DIRECTORS LIST";
+- $whatAreWeParsing=3;
+- }
+- else {
+- die "why are we here ?";
+- }
+-
+- my $fh = openMaybeGunzip($file) || return(-2);
+- my $progress=Term::ProgressBar->new({name => "parsing
$whichCastOrDirector",
+- count => $castCountEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+- while(<$fh>) {
+- $lineCount++;
+- if ( m/^$header/ ) {
+- if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after $header at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^\s*$/o ) {
+- $self->error("missing empty line after ======= at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) {
+- $self->error("missing name/titles line after ======= at line
$lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) {
+- $self->error("missing name/titles suffix line after ======= at line
$lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- last;
+- }
+- elsif ( $lineCount > 1000 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- }
+-
+- my $cur_name;
+- my $count=0;
+- my $castNames=0;
+- while(<$fh>) {
+- $lineCount++;
+- my $line=$_;
+- $line=~s/\n$//o;
+- #$self->status("read line $lineCount:$line");
+-
+- # end is line consisting of only '-'
+- last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+-
+- next if ( length($line) == 0 );
+-
+- if ( $line=~s/^([^\t]+)\t+//o ) {
+- $cur_name=$1;
+- $castNames++;
+-
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $castNames > $castCountEstimate ) {
+- $castCountEstimate = $progress->target($castNames+100);
+- $next_update=$progress->update($castNames);
+- }
+- elsif ( $castNames > $next_update ) {
+- $next_update=$progress->update($castNames);
+- }
+- }
+- }
+-
+- my $billing;
+- my $HostNarrator="";
+- if ( $whatAreWeParsing < 3 ) {
+- # actors or actresses
+- $billing="9999";
+- if ( $line=~s/\s*<(\d+)>//o ) {
+- $billing=sprintf("%04d", int($1));
+- }
+-
+- if ( (my $start=index($line, " [")) != -1 ) {
+- #my $end=rindex($line, "]");
+- my $ex=substr($line, $start+1);
+-
+- if ( $ex=~s/Host//o ) {
+- if ( length($HostNarrator) ) {
+- $HostNarrator.=",";
+- }
+- $HostNarrator.="Host";
+- }
+- if ( $ex=~s/Narrator//o ) {
+- if ( length($HostNarrator) ) {
+- $HostNarrator.=",";
+- }
+- $HostNarrator.="Narrator";
+- }
+- $line=substr($line, 0, $start);
+- # ignore character name
+- }
+- }
+- # try ignoring these
+- next if ($line=~m/\s*\{\{SUSPENDED\}\}/o);
+-
+- # don't see what these are...?
+- # ignore {{SUSPENDED}}
+- $line=~s/\s*\{\{SUSPENDED\}\}//o;
+-
+- # [honir] this is wrong - this puts cast from all the episodes as though they are in
the entire series!
+- # ##ignore {Twelve Angry Men (1954)}
+- $line=~s/\s*\{[^\}]+\}//o;
+-
+- if ( $whatAreWeParsing < 3 ) {
+- if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) {
+- # $attr=$1;
+- }
+- }
+- if ( $line=~s/ (\(.*)$//o ) {
+- # $attrs=$1;
+- }
+- $line=~s/^\s+//og;
+- $line=~s/\s+$//og;
+-
+- if ( $whatAreWeParsing < 3 ) {
+- if ( $line=~s/\s+Narrator$//o ) {
+- # ignore
+- }
+- }
+-
+- my $val=$self->{movies}{$line};
+- my $name=$cur_name;
+- if ( length($HostNarrator) ) {
+- $name.="[$HostNarrator]";
+- }
+- if ( defined($billing) ) {
+- if ( defined($val) ) {
+- $self->{movies}{$line}=$val."|$billing:$name";
+- }
+- else {
+- $self->{movies}{$line}="$billing:$name";
+- }
+- }
+- else {
+- if ( defined($val) ) {
+- $self->{movies}{$line}=$val."|$name";
+- }
+- else {
+- $self->{movies}{$line}=$name;
+- }
+- }
+- $count++;
+- }
+- $progress->update($castCountEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing $whichCastOrDirector found
".withThousands($castNames)." names, ".
++ $self->status(sprintf("parsing $whichCastOrDirector found
".withThousands($castNames)." names, ".
+ withThousands($count)." titles in ".withThousands($lineCount)."
lines in %d seconds",time()-$startTime));
+
+- closeMaybeGunzip($file, $fh);
++ closeMaybeGunzip($file, $fh);
+
+- return($castNames);
++ return($castNames);
+ }
+
+ sub readRatings($$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
+- my $startTime=time();
+- my $lineCount=0;
+-
+- my $fh = openMaybeGunzip($file) || return(-2);
+- while(<$fh>) {
+- $lineCount++;
+- if ( m/^MOVIE RATINGS REPORT/o ) {
+- if ( !($_=<$fh>) || !m/^\s*$/o) {
+- $self->error("missing empty line after \"MOVIE RATINGS REPORT\" at
line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) {
+- $self->error("missing \"New Distribution Votes Rank Title\" at
line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- last;
+- }
+- elsif ( $lineCount > 1000 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see
\"MOVIE RATINGS REPORT\" line");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- }
+-
+- my $progress=Term::ProgressBar->new({name => "parsing Ratings",
++ my ($self, $countEstimate, $file)=@_;
++ my $startTime=time();
++ my $lineCount=0;
++
++ my $fh = openMaybeGunzip($file) || return(-2);
++ while(<$fh>) {
++ $lineCount++;
++ if ( m/^MOVIE RATINGS REPORT/o ) {
++ if ( !($_=<$fh>) || !m/^\s*$/o) {
++ $self->error("missing empty line after \"MOVIE RATINGS REPORT\" at
line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) {
++ $self->error("missing \"New Distribution Votes Rank Title\" at
line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ last;
++ }
++ elsif ( $lineCount > 1000 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see \"MOVIE
RATINGS REPORT\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++ my $progress=Term::ProgressBar->new({name => "parsing Ratings",
+ count => $countEstimate,
+ ETA => 'linear'})
+- if ($self->{showProgressBar});
+-
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- while(<$fh>) {
+- $lineCount++;
+- my $line=$_;
+- #print "read line $lineCount:$line";
+-
+- $line=~s/\n$//o;
+-
+- # skip empty lines (only really appear right before last line ending with ----
+- next if ( $line=~m/^\s*$/o );
+- # end is line consisting of only '-'
+- last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+-
+- # e.g. New Distribution Votes Rank Title
+- # 0000000133 225568 8.9 12 Angry Men (1957)
+- if ( $line=~s/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+)\.(\d+)\s+//o ) {
+- $self->{movies}{$line}=[$1,$2,"$3.$4"];
+- $count++;
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ while(<$fh>) {
++ $lineCount++;
++ my $line=$_;
++ #print "read line $lineCount:$line";
++
++ $line=~s/\n$//o;
++
++ # skip empty lines (only really appear right before last line ending with ----
++ next if ( $line=~m/^\s*$/o );
++ # end is line consisting of only '-'
++ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ # e.g. New Distribution Votes Rank Title
++ # 0000000133 225568 8.9 12 Angry Men (1957)
++ if ( $line=~s/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+)\.(\d+)\s+//o ) {
++ $self->{movies}{$line}=[$1,$2,"$3.$4"];
++ $count++;
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+1000);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
+ }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
++ else {
++ $self->error("$file:$lineCount: unrecognized format");
++ $next_update=$progress->update($count) if ($self->{showProgressBar});
+ }
+- }
+- }
+- else {
+- $self->error("$file:$lineCount: unrecognized format");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
+ }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
++ $progress->update($countEstimate) if ($self->{showProgressBar});
+
+- $self->status(sprintf("parsing Ratings found
".withThousands($count)." titles in ".
++ $self->status(sprintf("parsing Ratings found
".withThousands($count)." titles in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+- closeMaybeGunzip($file, $fh);
+- return($count);
++ closeMaybeGunzip($file, $fh);
++ return($count);
+ }
+
+ sub readKeywords($$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
+- my $startTime=time();
+- my $lineCount=0;
+-
+- my $fh = openMaybeGunzip($file) || return(-2);
+- while(<$fh>) {
+- $lineCount++;
+-
+- if ( m/THE KEYWORDS LIST/ ) {
+- if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after \"THE KEYWORDS LIST\" at line
$lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^\s*$/o ) {
+- $self->error("missing empty line after ======= at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- last;
+- }
+- elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23
+- $self->error("$file: stopping at line $lineCount, didn't see \"THE
KEYWORDS LIST\" line");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- }
+-
+- my $progress=Term::ProgressBar->new({name => "parsing keywords",
++ my ($self, $countEstimate, $file)=@_;
++ my $startTime=time();
++ my $lineCount=0;
++
++ my $fh = openMaybeGunzip($file) || return(-2);
++ while(<$fh>) {
++ $lineCount++;
++
++ if ( m/THE KEYWORDS LIST/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after \"THE KEYWORDS LIST\" at line
$lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after ======= at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ last;
++ }
++ elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23
++ $self->error("$file: stopping at line $lineCount, didn't see \"THE
KEYWORDS LIST\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++ my $progress=Term::ProgressBar->new({name => "parsing keywords",
+ count => $countEstimate,
+ ETA => 'linear'})
+- if ($self->{showProgressBar});
+-
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- while(<$fh>) {
+- $lineCount++;
+- my $line=$_;
+- chomp($line);
+- next if ($line =~ m/^\s*$/);
+- my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/);
+- if ( defined($title) and defined($keyword) ) {
+-
+- my ($episode) = $title =~ m/^.*\s+(\{.*\})$/;
+-
+- # ignore anything which is an episode (e.g. "{Doctor Who
(#10.22)}" )
+- if ( !defined $episode || $episode eq '' )
+- {
+- if ( defined($self->{movies}{$title}) ) {
+- $self->{movies}{$title}.=",".$keyword;
+- } else {
+- $self->{movies}{$title}=$keyword;
+- # returned count is number of unique titles found
+- $count++;
+- }
+- }
+-
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- } else {
+- $self->error("$file:$lineCount: unrecognized format
\"$line\"");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing Keywords found
".withThousands($count)." titles in ".
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ while(<$fh>) {
++ $lineCount++;
++ my $line=$_;
++ chomp($line);
++ next if ($line =~ m/^\s*$/);
++ my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/);
++ if ( defined($title) and defined($keyword) ) {
++
++ my ($episode) = $title =~ m/^.*\s+(\{.*\})$/;
++
++ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
++ if ( !defined $episode || $episode eq '' )
++ {
++ if ( defined($self->{movies}{$title}) ) {
++ $self->{movies}{$title}.=",".$keyword;
++ } else {
++ $self->{movies}{$title}=$keyword;
++ # returned count is number of unique titles found
++ $count++;
++ }
++ }
++
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+1000);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ } else {
++ $self->error("$file:$lineCount: unrecognized format
\"$line\"");
++ $next_update=$progress->update($count) if ($self->{showProgressBar});
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++
++ $self->status(sprintf("parsing Keywords found
".withThousands($count)." titles in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+- closeMaybeGunzip($file, $fh);
+- return($count);
++ closeMaybeGunzip($file, $fh);
++ return($count);
+ }
+
+ sub readPlots($$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
+- my $startTime=time();
+- my $lineCount=0;
+-
+- my $fh = openMaybeGunzip($file) || return(-2);
+- while(<$fh>) {
+- $lineCount++;
+-
+- if ( m/PLOT SUMMARIES LIST/ ) {
+- if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after \"PLOT SUMMARIES LIST\" at line
$lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- if ( !($_=<$fh>) || !m/^-----------/o ) {
+- $self->error("missing ------- line after ======= at line $lineCount");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- last;
+- }
+- elsif ( $lineCount > 500 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see
\"PLOT SUMMARIES LIST\" line");
+- closeMaybeGunzip($file, $fh);
+- return(-1);
+- }
+- }
+-
+- my $progress=Term::ProgressBar->new({name => "parsing plots",
++ my ($self, $countEstimate, $file)=@_;
++ my $startTime=time();
++ my $lineCount=0;
++
++ my $fh = openMaybeGunzip($file) || return(-2);
++ while(<$fh>) {
++ $lineCount++;
++
++ if ( m/PLOT SUMMARIES LIST/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after \"PLOT SUMMARIES LIST\" at
line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ if ( !($_=<$fh>) || !m/^-----------/o ) {
++ $self->error("missing ------- line after ======= at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ last;
++ }
++ elsif ( $lineCount > 500 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see \"PLOT
SUMMARIES LIST\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++ my $progress=Term::ProgressBar->new({name => "parsing plots",
+ count => $countEstimate,
+ ETA => 'linear'})
+- if ($self->{showProgressBar});
+-
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- while(<$fh>) {
+- $lineCount++;
+- my $line=$_;
+- chomp($line);
+- next if ($line =~ m/^\s*$/);
+- my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/);
+- if ( defined($title) ) {
+-
+- # ignore anything which is an episode (e.g. "{Doctor Who
(#10.22)}" )
+- if ( !defined $episode || $episode eq '' )
+- {
+- my $plot = '';
+- LOOP:
+- while (1) {
+- if ( $line = <$fh> ) {
+- $lineCount++;
+- chomp($line);
+- next if ($line =~ m/^\s*$/);
+- if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number
of lines starting "PL:"
+- $plot .= ($plot ne ''?' ':'') . $1;
+- }
+- last LOOP if ( $line =~ m/BY:\s(.*)$/ ); # the author line
"BY:" signals the end of the plot summary
+- } else {
+- last LOOP;
+- }
+- }
+-
+- if ( !defined($self->{movies}{$title}) ) {
+- # ensure there's no tab chars in the plot or else the db stage
will barf
+- $plot =~ s/\t//og;
+- $self->{movies}{$title}=$plot;
+- # returned count is number of unique titles found
+- $count++;
+- }
+- }
+-
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- } else {
+- # skip lines up to the next "MV:"
+- if ($line !~ m/^(---|PL:|BY:)/ ) {
+- $self->error("$file:$lineCount: unrecognized format
\"$line\"");
+- }
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing Plots found $count
".withThousands($count)." in ".
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ while(<$fh>) {
++ $lineCount++;
++ my $line=$_;
++ chomp($line);
++ next if ($line =~ m/^\s*$/);
++ my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/);
++ if ( defined($title) ) {
++
++ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
++ if ( !defined $episode || $episode eq '' )
++ {
++ my $plot = '';
++ LOOP:
++ while (1) {
++ if ( $line = <$fh> ) {
++ $lineCount++;
++ chomp($line);
++ next if ($line =~ m/^\s*$/);
++ if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting
"PL:"
++ $plot .= ($plot ne ''?' ':'') . $1;
++ }
++ last LOOP if ( $line =~ m/BY:\s(.*)$/ ); # the author line "BY:"
signals the end of the plot summary
++ } else {
++ last LOOP;
++ }
++ }
++
++ if ( !defined($self->{movies}{$title}) ) {
++ # ensure there's no tab chars in the plot or else the db stage will barf
++ $plot =~ s/\t//og;
++ $self->{movies}{$title}=$plot;
++ # returned count is number of unique titles found
++ $count++;
++ }
++ }
++
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+1000);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ } else {
++ # skip lines up to the next "MV:"
++ if ($line !~ m/^(---|PL:|BY:)/ ) {
++ $self->error("$file:$lineCount: unrecognized format
\"$line\"");
++ }
++ $next_update=$progress->update($count) if ($self->{showProgressBar});
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++
++ $self->status(sprintf("parsing Plots found $count
".withThousands($count)." in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+- closeMaybeGunzip($file, $fh);
+- return($count);
++ closeMaybeGunzip($file, $fh);
++ return($count);
+ }
+
+ sub stageComplete($)
+ {
+- my ($self, $stage)=@_;
++ my ($self, $stage)=@_;
+
+- if ( -f "$self->{imdbDir}/stage$stage.data" ) {
+- return(1);
+- }
+- return(0);
++ if ( -f "$self->{imdbDir}/stage$stage.data" ) {
++ return(1);
++ }
++ return(0);
+ }
+
+ sub dbinfoLoad($)
+ {
+- my $self=shift;
++ my $self=shift;
+
+- my $ret=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo});
+- if ( ref $ret eq 'SCALAR' ) {
+- return($ret);
+- }
+- $self->{dbinfo}=$ret;
+- return(undef);
++ my $ret=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo});
++ if ( ref $ret eq 'SCALAR' ) {
++ return($ret);
++ }
++ $self->{dbinfo}=$ret;
++ return(undef);
+ }
+
+ sub dbinfoAdd($$$)
+ {
+- my ($self, $key, $value)=@_;
+- $self->{dbinfo}->{$key}=$value;
++ my ($self, $key, $value)=@_;
++ $self->{dbinfo}->{$key}=$value;
+ }
+
+ sub dbinfoGet($$$)
+ {
+- my ($self, $key, $defaultValue)=@_;
+- if ( defined($self->{dbinfo}->{$key}) ) {
+- return($self->{dbinfo}->{$key});
+- }
+- return($defaultValue);
++ my ($self, $key, $defaultValue)=@_;
++ if ( defined($self->{dbinfo}->{$key}) ) {
++ return($self->{dbinfo}->{$key});
++ }
++ return($defaultValue);
+ }
+
+ sub dbinfoSave($)
+ {
+- my $self=shift;
+- open(INFO, "> $self->{moviedbInfo}") || return(1);
+- for (sort keys %{$self->{dbinfo}}) {
+- print INFO "".$_.":".$self->{dbinfo}->{$_}."\n";
+- }
+- close(INFO);
+- return(0);
++ my $self=shift;
++ open(INFO, "> $self->{moviedbInfo}") || return(1);
++ for (sort keys %{$self->{dbinfo}}) {
++ print INFO "".$_.":".$self->{dbinfo}->{$_}."\n";
++ }
++ close(INFO);
++ return(0);
+ }
+
+ sub dbinfoGetFileSize($$)
+ {
+- my ($self, $key)=@_;
++ my ($self, $key)=@_;
+
+- if ( !defined($self->{imdbListFiles}->{$key}) ) {
+- die ("invalid call");
+- }
+- my $fileSize=int(-s "$self->{imdbListFiles}->{$key}");
++ if ( !defined($self->{imdbListFiles}->{$key}) ) {
++ die ("invalid call");
++ }
++ my $fileSize=int(-s "$self->{imdbListFiles}->{$key}");
+
+- # if compressed, then attempt to run gzip -l
+- if ( $self->{imdbListFiles}->{$key}=~m/.gz$/) {
+- if ( open(my $fd, "gzip -l
".$self->{imdbListFiles}->{$key}."|") ) {
+- # if parse fails, then defalt to wild ass guess of compression of 65%
+- $fileSize=int(($fileSize*100)/(100-65));
++ # if compressed, then attempt to run gzip -l
++ if ( $self->{imdbListFiles}->{$key}=~m/.gz$/) {
++ if ( open(my $fd, "gzip -l
".$self->{imdbListFiles}->{$key}."|") ) {
++ # if parse fails, then defalt to wild ass guess of compression of 65%
++ $fileSize=int(($fileSize*100)/(100-65));
+
+- while(<$fd>) {
+- if ( m/^\s*\d+\s+(\d+)/ ) {
+- $fileSize=$1;
++ while(<$fd>) {
++ if ( m/^\s*\d+\s+(\d+)/ ) {
++ $fileSize=$1;
++ }
++ }
++ close($fd);
++ }
++ else {
++ # wild ass guess of compression of 65%
++ $fileSize=int(($fileSize*100)/(100-65));
+ }
+- }
+- close($fd);
+- }
+- else {
+- # wild ass guess of compression of 65%
+- $fileSize=int(($fileSize*100)/(100-65));
+ }
+- }
+- return($fileSize);
++ return($fileSize);
+ }
+
+ sub dbinfoCalcEstimate($$$)
+ {
+- my ($self, $key, $estimateSizePerEntry)=@_;
++ my ($self, $key, $estimateSizePerEntry)=@_;
+
+- my $fileSize=$self->dbinfoGetFileSize($key);
++ my $fileSize=$self->dbinfoGetFileSize($key);
+
+- my $countEstimate=int($fileSize/$estimateSizePerEntry);
++ my $countEstimate=int($fileSize/$estimateSizePerEntry);
+
+- $self->dbinfoAdd($key."_list_file",
$self->{imdbListFiles}->{$key});
+- $self->dbinfoAdd($key."_list_file_size", int(-s
"$self->{imdbListFiles}->{$key}"));
+- $self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize);
+- $self->dbinfoAdd($key."_list_count_estimate", $countEstimate);
+- return($countEstimate);
++ $self->dbinfoAdd($key."_list_file",
$self->{imdbListFiles}->{$key});
++ $self->dbinfoAdd($key."_list_file_size", int(-s
"$self->{imdbListFiles}->{$key}"));
++ $self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize);
++ $self->dbinfoAdd($key."_list_count_estimate", $countEstimate);
++ return($countEstimate);
+ }
+
+ sub dbinfoCalcBytesPerEntry($$$)
+ {
+- my ($self, $key, $calcActualForThisNumber)=@_;
++ my ($self, $key, $calcActualForThisNumber)=@_;
+
+- my $fileSize=$self->dbinfoGetFileSize($key);
++ my $fileSize=$self->dbinfoGetFileSize($key);
+
+- return(int($fileSize/$calcActualForThisNumber));
++ return(int($fileSize/$calcActualForThisNumber));
+ }
+
+ sub invokeStage($$)
+ {
+- my ($self, $stage)=@_;
+-
+- my $startTime=time();
+- if ( $stage == 1 ) {
+- $self->status("parsing Movies list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("movies", 47);
+-
+- my $num=$self->readMoviesOrGenres("Movies", $countEstimate,
"$self->{imdbListFiles}->{movies}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{movies} from
ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("movies", $num);
+- $self->status("ARG estimate of $countEstimate for movies needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_movie_count", "$num");
+-
+- $self->status("writing stage1 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing titles",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (@{$self->{movies}}) {
+- print OUT "$movie\n";
++ my ($self, $stage)=@_;
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 2 ) {
+- $self->status("parsing Directors list for stage $stage..");
+-
+- my $countEstimate=$self->dbinfoCalcEstimate("directors", 258);
+-
+- my $num=$self->readCastOrDirectors("Directors", $countEstimate,
"$self->{imdbListFiles}->{directors}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{directors}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("directors", $num);
+- $self->status("ARG estimate of $countEstimate for directors needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_director_count", "$num");
+-
+- $self->status("writing stage2 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing directors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- my %dir;
+- for (split('\|', $self->{movies}{$key})) {
+- $dir{$_}++;
+- }
+- my @list;
+- for (keys %dir) {
+- push(@list, sprintf("%03d:%s", $dir{$_}, $_));
+- }
+- my $value="";
+- for my $c (reverse sort {$a cmp $b} @list) {
+- my ($num, $name)=split(':', $c);
+- $value.=$name."|";
+- }
+- $value=~s/\|$//o;
+- print OUT "$key\t$value\n";
++ my $startTime=time();
++ if ( $stage == 1 ) {
++ $self->status("parsing Movies list for stage $stage..");
++ my $countEstimate=$self->dbinfoCalcEstimate("movies", 47);
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- #unlink("$self->{imdbDir}/stage1.data");
+- }
+- elsif ( $stage == 3 ) {
+- $self->status("parsing Actors list for stage $stage..");
+-
+- #print "re-reading movies into memory for reverse lookup..\n";
+- my $countEstimate=$self->dbinfoCalcEstimate("actors", 449);
+-
+- my $num=$self->readCastOrDirectors("Actors", $countEstimate,
"$self->{imdbListFiles}->{actors}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{actors} from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("actors", $num);
+- $self->status("ARG estimate of $countEstimate for actors needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_actor_count", "$num");
+-
+- $self->status("writing stage3 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing actors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- print OUT "$key\t$self->{movies}{$key}\n";
++ my $num=$self->readMoviesOrGenres("Movies", $countEstimate,
"$self->{imdbListFiles}->{movies}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{movies}
from ftp.imdb.com");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("movies", $num);
++ $self->status("ARG estimate of $countEstimate for movies needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_movie_count", "$num");
++
++ $self->status("writing stage1 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing titles",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ my $count=0;
++ for my $movie (@{$self->{movies}}) {
++ print OUT "$movie\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
++ }
++ }
++ elsif ( $stage == 2 ) {
++ $self->status("parsing Directors list for stage $stage..");
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 4 ) {
+- $self->status("parsing Actresses list for stage $stage..");
+-
+- my $countEstimate=$self->dbinfoCalcEstimate("actresses", 483);
+- my $num=$self->readCastOrDirectors("Actresses", $countEstimate,
"$self->{imdbListFiles}->{actresses}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{actresses}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("actresses", $num);
+- $self->status("ARG estimate of $countEstimate for actresses needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_actress_count", "$num");
+-
+- $self->status("writing stage4 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing actresses",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- print OUT "$key\t$self->{movies}{$key}\n";
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- #unlink("$self->{imdbDir}/stage3.data");
+- }
+- elsif ( $stage == 5 ) {
+- $self->status("parsing Genres list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("genres", 68);
+-
+- my $num=$self->readMoviesOrGenres("Genres", $countEstimate,
"$self->{imdbListFiles}->{genres}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{genres} from
ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("genres", $num);
+- $self->status("ARG estimate of $countEstimate for genres needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_genres_count", "$num");
+-
+- $self->status("writing stage5 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing genres",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++ my $countEstimate=$self->dbinfoCalcEstimate("directors", 258);
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 6 ) {
+- $self->status("parsing Ratings list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("ratings", 68);
+-
+- my $num=$self->readRatings($countEstimate,
"$self->{imdbListFiles}->{ratings}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{ratings}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("ratings", $num);
+- $self->status("ARG estimate of $countEstimate for ratings needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_ratings_count", "$num");
+-
+- $self->status("writing stage6 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing ratings",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- my @value=@{$self->{movies}->{$movie}};
+- print OUT "$movie\t$value[0]\t$value[1]\t$value[2]\n";
++ my $num=$self->readCastOrDirectors("Directors", $countEstimate,
"$self->{imdbListFiles}->{directors}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{directors}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("directors", $num);
++ $self->status("ARG estimate of $countEstimate for directors needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_director_count", "$num");
++
++ $self->status("writing stage2 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing directors",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ for my $key (keys %{$self->{movies}}) {
++ my %dir;
++ for (split('\|', $self->{movies}{$key})) {
++ $dir{$_}++;
++ }
++ my @list;
++ for (keys %dir) {
++ push(@list, sprintf("%03d:%s", $dir{$_}, $_));
++ }
++ my $value="";
++ for my $c (reverse sort {$a cmp $b} @list) {
++ my ($num, $name)=split(':', $c);
++ $value.=$name."|";
++ }
++ $value=~s/\|$//o;
++ print OUT "$key\t$value\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
++ }
++ #unlink("$self->{imdbDir}/stage1.data");
++ }
++ elsif ( $stage == 3 ) {
++ $self->status("parsing Actors list for stage $stage..");
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 7 ) {
+- $self->status("parsing Keywords list for stage $stage..");
+-
+- if ( !defined($self->{imdbListFiles}->{keywords}) ) {
+- $self->status("no keywords file downloaded, see --with-keywords details in
documentation");
+- return(0);
+- }
+-
+- my $countEstimate=5630000;
+- my $num=$self->readKeywords($countEstimate,
"$self->{imdbListFiles}->{keywords}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{keywords}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
+- $self->status("ARG estimate of $countEstimate for keywords needs updating,
found $num");
+- }
+- $self->dbinfoAdd("keywords_list_file",
"$self->{imdbListFiles}->{keywords}");
+- $self->dbinfoAdd("keywords_list_file_size", -s
"$self->{imdbListFiles}->{keywords}");
+- $self->dbinfoAdd("db_stat_keywords_count", "$num");
+-
+- $self->status("writing stage$stage data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing keywords",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+-
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++ #print "re-reading movies into memory for reverse lookup..\n";
++ my $countEstimate=$self->dbinfoCalcEstimate("actors", 449);
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 8 ) {
+- $self->status("parsing Plot list for stage $stage..");
+-
+- if ( !defined($self->{imdbListFiles}->{plot}) ) {
+- $self->status("no plot file downloaded, see --with-plot details in
documentation");
+- return(0);
+- }
+-
+- my $countEstimate=222222;
+- my $num=$self->readPlots($countEstimate,
"$self->{imdbListFiles}->{plot}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{plot} from
ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
+- $self->status("ARG estimate of $countEstimate for plots needs updating,
found $num");
+- }
+- $self->dbinfoAdd("plots_list_file",
"$self->{imdbListFiles}->{plot}");
+- $self->dbinfoAdd("plots_list_file_size", -s
"$self->{imdbListFiles}->{plot}");
+- $self->dbinfoAdd("db_stat_plots_count", "$num");
+-
+- $self->status("writing stage$stage data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing plots",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+-
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++ my $num=$self->readCastOrDirectors("Actors", $countEstimate,
"$self->{imdbListFiles}->{actors}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{actors}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("actors", $num);
++ $self->status("ARG estimate of $countEstimate for actors needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_actor_count", "$num");
++
++ $self->status("writing stage3 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing actors",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ for my $key (keys %{$self->{movies}}) {
++ print OUT "$key\t$self->{movies}{$key}\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
++ }
++ }
++ elsif ( $stage == 4 ) {
++ $self->status("parsing Actresses list for stage $stage..");
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == $self->{stageLast} ) {
+- my $tab=sprintf("\t");
+-
+- $self->status("indexing all previous stage's data for stage
".$self->{stageLast}."..");
+-
+- $self->status("parsing stage 1 data (movie list)..");
+- my %movies;
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "reading titles",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage1.data") || die
"$self->{imdbDir}/stage1.data:$!";
+- while(<IN>) {
+- chop();
+- $movies{$_}="";
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- close(IN);
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- }
+-
+- $self->status("merging in stage 2 data (directors)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging directors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage2.data") || die
"$self->{imdbDir}/stage2.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- if ( !defined($movies{$1}) ) {
+- $self->error("directors list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$1}=$_;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+-
+- if ( 1 ) {
+- # fill in default for movies we didn't have a director for
+- while (my ($key, $val) = each (%movies)) {
+- if (!length($val)) {
+- $movies{$key}="<>";
+- }
+- }
+- }
+-
+- $self->status("merging in stage 3 data (actors)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging actors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage3.data") || die
"$self->{imdbDir}/stage3.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("actors list references unidentified title
'$dbkey'");
+- next;
+- }
+- if ( $val=~m/$tab/o ) {
+- $movies{$dbkey}=$val."|".$_;
++ my $countEstimate=$self->dbinfoCalcEstimate("actresses", 483);
++ my $num=$self->readCastOrDirectors("Actresses", $countEstimate,
"$self->{imdbListFiles}->{actresses}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{actresses}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("actresses", $num);
++ $self->status("ARG estimate of $countEstimate for actresses needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_actress_count", "$num");
++
++ $self->status("writing stage4 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing actresses",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ for my $key (keys %{$self->{movies}}) {
++ print OUT "$key\t$self->{movies}{$key}\n";
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
+ }
+- else {
+- $movies{$dbkey}=$val.$tab.$_;
+- }
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+-
+- $self->status("merging in stage 4 data (actresses)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging actresses",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage4.data") || die
"$self->{imdbDir}/stage4.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("actresses list references unidentified title
'$dbkey'");
+- next;
+- }
+- if ( $val=~m/$tab/o ) {
+- $movies{$dbkey}=$val."|".$_;
++ #unlink("$self->{imdbDir}/stage3.data");
++ }
++ elsif ( $stage == 5 ) {
++ $self->status("parsing Genres list for stage $stage..");
++ my $countEstimate=$self->dbinfoCalcEstimate("genres", 68);
++
++ my $num=$self->readMoviesOrGenres("Genres", $countEstimate,
"$self->{imdbListFiles}->{genres}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{genres}
from ftp.imdb.com");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("genres", $num);
++ $self->status("ARG estimate of $countEstimate for genres needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_genres_count", "$num");
++
++ $self->status("writing stage5 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing genres",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ my $count=0;
++ for my $movie (keys %{$self->{movies}}) {
++ print OUT "$movie\t$self->{movies}->{$movie}\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
+ }
+- else {
+- $movies{$dbkey}=$val.$tab.$_;
+- }
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+- if ( 1 ) {
+- # fill in placeholder if no actors were found
+- while (my ($key, $val) = each (%movies)) {
+- if ( !($val=~m/$tab/o) ) {
+- $movies{$key}.=$tab."<>";
+- }
+- }
+- }
+-
+- $self->status("merging in stage 5 data (genres)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging genres",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage5.data") || die
"$self->{imdbDir}/stage5.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $genres=$_;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("genres list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$genres;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+-
+- if ( 1 ) {
+- # fill in placeholder if no genres were found
+- while (my ($key, $val) = each (%movies)) {
+- my $t=index($val, $tab);
+- if ( $t == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- if ( index($val, $tab, $t+1) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
+- }
+- }
+-
+- $self->status("merging in stage 6 data (ratings)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging ratings",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage6.data") || die
"$self->{imdbDir}/stage6.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)$//o;
+- my $dbkey=$1;
+- my ($ratingDist, $ratingVotes, $ratingRank)=($2,$3,$4);
+-
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("ratings list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$ratingDist.$tab.$ratingVotes.$tab.$ratingRank;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+-
+- if ( 1 ) {
+- # fill in placeholder if no genres were found
+- while (my ($key, $val) = each (%movies)) {
+- my $t=index($val, $tab);
+- if ( $t == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- my $j=index($val, $tab, $t+1);
+- if ( $j == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- if ( index($val, $tab, $j+1) == -1 ) {
+-
$movies{$key}.=$tab."<>".$tab."<>".$tab."<>";
+- }
+- }
+- }
+-
+- $self->status("merging in stage 7 data (keywords)..");
+- #if ( 1 ) { # this stage is optional
+- if ( -f "$self->{imdbDir}/stage7.data" ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging keywords",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage7.data") || die
"$self->{imdbDir}/stage7.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t+//o;
+- my $dbkey=$1;
+- my $keywords=$_;
+- if ( !defined($movies{$dbkey}) ) {
+- $self->error("keywords list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$keywords;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+-
+- if ( 1 ) {
+- # fill in default for movies we didn't have any keywords for
+- while (my ($key, $val) = each (%movies)) {
+- #keyword is 6th entry
+- my $t = 0;
+- for my $i (0..4) {
+- $t=index($val, $tab, $t);
+- if ( $t == -1 ) {
+- die "Corrupt entry '$key' '$val'";
+- }
+- $t+=1;
+- }
+- if ( index($val, $tab, $t) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
+- }
+- }
+-
+- $self->status("merging in stage 8 data (plots)..");
+- #if ( 1 ) { # this stage is optional
+- if ( -f "$self->{imdbDir}/stage8.data" ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging plots",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage8.data") || die
"$self->{imdbDir}/stage8.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t+//o;
+- my $dbkey=$1;
+- my $plot=$_;
+- if ( !defined($movies{$dbkey}) ) {
+- $self->error("plot list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$plot;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+- if ( 1 ) {
+- # fill in default for movies we didn't have any plot for
+- while (my ($key, $val) = each (%movies)) {
+- #plot is 7th entry
+- my $t = 0;
+- for my $i (0..5) {
+- $t=index($val, $tab, $t);
+- if ( $t == -1 ) {
+- die "Corrupt entry '$key' '$val'";
+- }
+- $t+=1;
+- }
+- if ( index($val, $tab, $t) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
+- }
+- }
+-
+- #unlink("$self->{imdbDir}/stage1.data");
+- #unlink("$self->{imdbDir}/stage2.data");
+- #unlink("$self->{imdbDir}/stage3.data");
+-
+- #
---------------------------------------------------------------------------------------
+-
+-
+- #
+- # note: not all movies end up with a cast, but we include them anyway.
+- #
+-
+- my %nmovies;
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "computing index",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- for my $key (keys %movies) {
+- my $dbkey=$key;
+-
+- # drop episode information - ex: {Twelve Angry Men (1954)}
+- $dbkey=~s/\s*\{[^\}]+\}//go;
+-
+- # todo - this would make things easier
+- # change double-quotes around title to be (made-for-tv) suffix instead
+- if ( $dbkey=~m/^\"/o && #"
+- $dbkey=~m/\"\s*\(/o ) { #"
+- $dbkey.=" (tv_series)";
+- }
+- # how rude, some entries have (TV) appearing more than once.
+- $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o;
+-
+- my $qualifier;
+- if ( $dbkey=~s/\s+\(TV\)$//o ) {
+- $qualifier="tv_movie";
+- }
+- elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) {
+- $qualifier="tv_mini_series";
+- }
+- elsif ( $dbkey=~s/\s+\(tv_series\)$// ) {
+- $qualifier="tv_series";
+- }
+- elsif ( $dbkey=~s/\s+\(mini\)$//o ) {
+- $qualifier="tv_mini_series";
+- }
+- elsif ( $dbkey=~s/\s+\(V\)$//o ) {
+- $qualifier="video_movie";
+- }
+- elsif ( $dbkey=~s/\s+\(VG\)$//o ) {
+- #$qualifier="video_game";
+- delete($movies{$key});
+- next;
++ }
++ elsif ( $stage == 6 ) {
++ $self->status("parsing Ratings list for stage $stage..");
++ my $countEstimate=$self->dbinfoCalcEstimate("ratings", 68);
++
++ my $num=$self->readRatings($countEstimate,
"$self->{imdbListFiles}->{ratings}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{ratings}
from ftp.imdb.com");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("ratings", $num);
++ $self->status("ARG estimate of $countEstimate for ratings needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_ratings_count", "$num");
++
++ $self->status("writing stage6 data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing ratings",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ my $count=0;
++ for my $movie (keys %{$self->{movies}}) {
++ my @value=@{$self->{movies}->{$movie}};
++ print OUT "$movie\t$value[0]\t$value[1]\t$value[2]\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
+ }
+- else {
+- $qualifier="movie";
++ }
++ elsif ( $stage == 7 ) {
++ $self->status("parsing Keywords list for stage $stage..");
++
++ if ( !defined($self->{imdbListFiles}->{keywords}) ) {
++ $self->status("no keywords file downloaded, see --with-keywords details in
documentation");
++ return(0);
+ }
+- #if ( $dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o
) {
+- # $qualifier=$1;
+- #}
+- my $year;
+- my $title=$dbkey;
+
+- if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
+- $title=~s/^\"//o; #"
+- $title=~s/\"(\s*\()/$1/o; #"
++ my $countEstimate=5630000;
++ my $num=$self->readKeywords($countEstimate,
"$self->{imdbListFiles}->{keywords}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{keywords}
from ftp.imdb.com");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
++ $self->status("ARG estimate of $countEstimate for keywords needs updating,
found $num");
++ }
++ $self->dbinfoAdd("keywords_list_file",
"$self->{imdbListFiles}->{keywords}");
++ $self->dbinfoAdd("keywords_list_file_size", -s
"$self->{imdbListFiles}->{keywords}");
++ $self->dbinfoAdd("db_stat_keywords_count", "$num");
++
++ $self->status("writing stage$stage data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing keywords",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++
++ my $count=0;
++ for my $movie (keys %{$self->{movies}}) {
++ print OUT "$movie\t$self->{movies}->{$movie}\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
+ }
++ }
++ elsif ( $stage == 8 ) {
++ $self->status("parsing Plot list for stage $stage..");
+
+- if ( $title=~s/\s+\((\d\d\d\d)\)$//o ||
+- $title=~s/\s+\((\d\d\d\d)\/[IVX]+\)$//o ) {
+- $year=$1;
++ if ( !defined($self->{imdbListFiles}->{plot}) ) {
++ $self->status("no plot file downloaded, see --with-plot details in
documentation");
++ return(0);
++ }
++
++ my $countEstimate=222222;
++ my $num=$self->readPlots($countEstimate,
"$self->{imdbListFiles}->{plot}");
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{plot} from
ftp.imdb.com");
++ }
++ return(1);
++ }
++ elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
++ $self->status("ARG estimate of $countEstimate for plots needs updating, found
$num");
++ }
++ $self->dbinfoAdd("plots_list_file",
"$self->{imdbListFiles}->{plot}");
++ $self->dbinfoAdd("plots_list_file_size", -s
"$self->{imdbListFiles}->{plot}");
++ $self->dbinfoAdd("db_stat_plots_count", "$num");
++
++ $self->status("writing stage$stage data ..");
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing plots",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++
++ my $count=0;
++ for my $movie (keys %{$self->{movies}}) {
++ print OUT "$movie\t$self->{movies}->{$movie}\n";
++
++ $count++;
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(OUT);
++ delete($self->{movies});
++ }
++ }
++ elsif ( $stage == $self->{stageLast} ) {
++ my $tab=sprintf("\t");
++
++ $self->status("indexing all previous stage's data for stage
".$self->{stageLast}."..");
++
++ $self->status("parsing stage 1 data (movie list)..");
++ my %movies;
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "reading titles",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage1.data") || die
"$self->{imdbDir}/stage1.data:$!";
++ while(<IN>) {
++ chop();
++ $movies{$_}="";
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ close(IN);
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ }
++
++ $self->status("merging in stage 2 data (directors)..");
++ if ( 1 ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "merging directors",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage2.data") || die
"$self->{imdbDir}/stage2.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t//o;
++ if ( !defined($movies{$1}) ) {
++ $self->error("directors list references unidentified title
'$1'");
++ next;
++ }
++ $movies{$1}=$_;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
+ }
+- elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o ||
+- $title=~s/\s+\((\?\?\?\?)\/[IVX]+\)$//o ) {
+- $year="0000";
++
++ if ( 1 ) {
++ # fill in default for movies we didn't have a director for
++ while (my ($key, $val) = each (%movies)) {
++ if (!length($val)) {
++ $movies{$key}="<>";
++ }
++ }
+ }
+- else {
+- $self->error("movie list format failed to decode year from title
'$title'");
+- $year="0000";
++
++ $self->status("merging in stage 3 data (actors)..");
++ if ( 1 ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "merging actors",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage3.data") || die
"$self->{imdbDir}/stage3.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t//o;
++ my $dbkey=$1;
++ my $val=$movies{$dbkey};
++ if ( !defined($val) ) {
++ $self->error("actors list references unidentified title
'$dbkey'");
++ next;
++ }
++ if ( $val=~m/$tab/o ) {
++ $movies{$dbkey}=$val."|".$_;
++ }
++ else {
++ $movies{$dbkey}=$val.$tab.$_;
++ }
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++
++ $self->status("merging in stage 4 data (actresses)..");
++ if ( 1 ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "merging actresses",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage4.data") || die
"$self->{imdbDir}/stage4.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t//o;
++ my $dbkey=$1;
++ my $val=$movies{$dbkey};
++ if ( !defined($val) ) {
++ $self->error("actresses list references unidentified title
'$dbkey'");
++ next;
++ }
++ if ( $val=~m/$tab/o ) {
++ $movies{$dbkey}=$val."|".$_;
++ }
++ else {
++ $movies{$dbkey}=$val.$tab.$_;
++ }
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++ if ( 1 ) {
++ # fill in placeholder if no actors were found
++ while (my ($key, $val) = each (%movies)) {
++ if ( !($val=~m/$tab/o) ) {
++ $movies{$key}.=$tab."<>";
++ }
++ }
+ }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
+
+- my $hashkey=lc("$title ($year)");
+- $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
++ $self->status("merging in stage 5 data (genres)..");
++ if ( 1 ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
++ my $progress=Term::ProgressBar->new({name => "merging genres",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage5.data") || die
"$self->{imdbDir}/stage5.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t//o;
++ my $dbkey=$1;
++ my $genres=$_;
++ my $val=$movies{$dbkey};
++ if ( !defined($val) ) {
++ $self->error("genres list references unidentified title
'$1'");
++ next;
++ }
++ $movies{$dbkey}.=$tab.$genres;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++
++ if ( 1 ) {
++ # fill in placeholder if no genres were found
++ while (my ($key, $val) = each (%movies)) {
++ my $t=index($val, $tab);
++ if ( $t == -1 ) {
++ die "corrupt entry '$key' '$val'";
++ }
++ if ( index($val, $tab, $t+1) == -1 ) {
++ $movies{$key}.=$tab."<>";
++ }
++ }
++ }
+
+- if ( defined($movies{$hashkey}) ) {
+- die "unable to place moviedb key for $key, report to
xmltv-devel\(a)lists.sf.net";
++ $self->status("merging in stage 6 data (ratings)..");
++ if ( 1 ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
++ my $progress=Term::ProgressBar->new({name => "merging ratings",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage6.data") || die
"$self->{imdbDir}/stage6.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)$//o;
++ my $dbkey=$1;
++ my ($ratingDist, $ratingVotes, $ratingRank)=($2,$3,$4);
++
++ my $val=$movies{$dbkey};
++ if ( !defined($val) ) {
++ $self->error("ratings list references unidentified title
'$1'");
++ next;
++ }
++ $movies{$dbkey}.=$tab.$ratingDist.$tab.$ratingVotes.$tab.$ratingRank;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++
++ if ( 1 ) {
++ # fill in placeholder if no genres were found
++ while (my ($key, $val) = each (%movies)) {
++ my $t=index($val, $tab);
++ if ( $t == -1 ) {
++ die "corrupt entry '$key' '$val'";
++ }
++ my $j=index($val, $tab, $t+1);
++ if ( $j == -1 ) {
++ die "corrupt entry '$key' '$val'";
++ }
++ if ( index($val, $tab, $j+1) == -1 ) {
++ $movies{$key}.=$tab."<>".$tab."<>".$tab."<>";
++ }
++ }
+ }
+- die "title \"$title\" contains a tab" if ( $title=~m/\t/o );
+- #print
"key:$dbkey\n\ttitle=$title\n\tyear=$year\n\tqualifier=$qualifier\n";
+- #print "key $key: value=\"$movies{$key}\"\n";
+
+- $nmovies{$hashkey}=$dbkey.$tab.$year.$tab.$qualifier.$tab.delete($movies{$key});
+- $count++;
++ $self->status("merging in stage 7 data (keywords)..");
++ #if ( 1 ) { # this stage is optional
++ if ( -f "$self->{imdbDir}/stage7.data" ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
++ my $progress=Term::ProgressBar->new({name => "merging keywords",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage7.data") || die
"$self->{imdbDir}/stage7.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t+//o;
++ my $dbkey=$1;
++ my $keywords=$_;
++ if ( !defined($movies{$dbkey}) ) {
++ $self->error("keywords list references unidentified title
'$1'");
++ next;
++ }
++ $movies{$dbkey}.=$tab.$keywords;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++
++ if ( 1 ) {
++ # fill in default for movies we didn't have any keywords for
++ while (my ($key, $val) = each (%movies)) {
++ #keyword is 6th entry
++ my $t = 0;
++ for my $i (0..4) {
++ $t=index($val, $tab, $t);
++ if ( $t == -1 ) {
++ die "Corrupt entry '$key' '$val'";
++ }
++ $t+=1;
++ }
++ if ( index($val, $tab, $t) == -1 ) {
++ $movies{$key}.=$tab."<>";
++ }
++ }
++ }
+
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- if ( scalar(keys %movies) != 0 ) {
+- die "what happened, we have keys left ?";
+- }
+- undef(%movies);
+- }
+-
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing database",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IDX, "> $self->{moviedbIndex}") || die
"$self->{moviedbIndex}:$!";
+- open(DAT, "> $self->{moviedbData}") || die
"$self->{moviedbData}:$!";
+- my $count=0;
+- for my $key (sort {$a cmp $b} keys %nmovies) {
+- my $val=delete($nmovies{$key});
+- #print "movie $key: $val\n";
+- #$val=~s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t//o || die "internal failure
($key:$val)";
+- my ($dbkey, $year, $qualifier,$directors,$actors,@rest)=split('\t', $val);
+- #die ("no 1") if ( !defined($dbkey));
+- #die ("no 2") if ( !defined($year));
+- #die ("no 3") if ( !defined($qualifier));
+- #die ("no 4") if ( !defined($directors));
+- #die ("no 5") if ( !defined($actors));
+- #print "key:$key\n\ttitle=$dbkey\n\tyear=$year\n\tqualifier=$qualifier\n";
+-
+- #my ($directors, $actors)=split('\t', $val);
+-
+- my $details="";
+-
+- if ( $directors eq "<>" ) {
+- $details.="<>";
++ $self->status("merging in stage 8 data (plots)..");
++ #if ( 1 ) { # this stage is optional
++ if ( -f "$self->{imdbDir}/stage8.data" ) {
++ my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
++ my $progress=Term::ProgressBar->new({name => "merging plots",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IN, "< $self->{imdbDir}/stage8.data") || die
"$self->{imdbDir}/stage8.data:$!";
++ while(<IN>) {
++ chop();
++ s/^([^\t]+)\t+//o;
++ my $dbkey=$1;
++ my $plot=$_;
++ if ( !defined($movies{$dbkey}) ) {
++ $self->error("plot list references unidentified title '$1'");
++ next;
++ }
++ $movies{$dbkey}.=$tab.$plot;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $. > $countEstimate ) {
++ $countEstimate = $progress->target($.+100);
++ $next_update=$progress->update($.);
++ }
++ elsif ( $. > $next_update ) {
++ $next_update=$progress->update($.);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(IN);
++ }
++
++ if ( 1 ) {
++ # fill in default for movies we didn't have any plot for
++ while (my ($key, $val) = each (%movies)) {
++ #plot is 7th entry
++ my $t = 0;
++ for my $i (0..5) {
++ $t=index($val, $tab, $t);
++ if ( $t == -1 ) {
++ die "Corrupt entry '$key' '$val'";
++ }
++ $t+=1;
++ }
++ if ( index($val, $tab, $t) == -1 ) {
++ $movies{$key}.=$tab."<>";
++ }
++ }
+ }
+- else {
+- # sort directors by last name, removing duplicates
+- my $last='';
+- for my $name (sort {$a cmp $b} split('\|', $directors)) {
+- if ( $name ne $last ) {
+- $details.="$name|";
+- $last=$name;
++
++ #unlink("$self->{imdbDir}/stage1.data");
++ #unlink("$self->{imdbDir}/stage2.data");
++ #unlink("$self->{imdbDir}/stage3.data");
++
++ #
---------------------------------------------------------------------------------------
++
++
++ #
++ # note: not all movies end up with a cast, but we include them anyway.
++ #
++
++ my %nmovies;
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "computing index",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ my $count=0;
++ for my $key (keys %movies) {
++ my $dbkey=$key;
++
++ # drop episode information - ex: {Twelve Angry Men (1954)}
++ $dbkey=~s/\s*\{[^\}]+\}//go;
++
++ # todo - this would make things easier
++ # change double-quotes around title to be (made-for-tv) suffix instead
++ if ( $dbkey=~m/^\"/o && #"
++ $dbkey=~m/\"\s*\(/o ) { #"
++ $dbkey.=" (tv_series)";
++ }
++ # how rude, some entries have (TV) appearing more than once.
++ $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o;
++
++ my $qualifier;
++ if ( $dbkey=~s/\s+\(TV\)$//o ) {
++ $qualifier="tv_movie";
++ }
++ elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) {
++ $qualifier="tv_mini_series";
++ }
++ elsif ( $dbkey=~s/\s+\(tv_series\)$// ) {
++ $qualifier="tv_series";
++ }
++ elsif ( $dbkey=~s/\s+\(mini\)$//o ) {
++ $qualifier="tv_mini_series";
++ }
++ elsif ( $dbkey=~s/\s+\(V\)$//o ) {
++ $qualifier="video_movie";
++ }
++ elsif ( $dbkey=~s/\s+\(VG\)$//o ) {
++ #$qualifier="video_game";
++ delete($movies{$key});
++ next;
++ }
++ else {
++ $qualifier="movie";
++ }
++ #if (
$dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o ) {
++ # $qualifier=$1;
++ #}
++ my $year;
++ my $title=$dbkey;
++
++ if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
++ $title=~s/^\"//o; #"
++ $title=~s/\"(\s*\()/$1/o; #"
++ }
++
++ if ( $title=~s/\s+\((\d\d\d\d)\)$//o ||
++ $title=~s/\s+\((\d\d\d\d)\/[IVX]+\)$//o ) {
++ $year=$1;
++ }
++ elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o ||
++ $title=~s/\s+\((\?\?\?\?)\/[IVX]+\)$//o ) {
++ $year="0000";
++ }
++ else {
++ $self->error("movie list format failed to decode year from title
'$title'");
++ $year="0000";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
++
++ my $hashkey=lc("$title ($year)");
++ $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
++
++ if ( defined($movies{$hashkey}) ) {
++ die "unable to place moviedb key for $key, report to
xmltv-devel\(a)lists.sf.net";
++ }
++ die "title \"$title\" contains a tab" if ( $title=~m/\t/o );
++ #print
"key:$dbkey\n\ttitle=$title\n\tyear=$year\n\tqualifier=$qualifier\n";
++ #print "key $key: value=\"$movies{$key}\"\n";
++
++ $nmovies{$hashkey}=$dbkey.$tab.$year.$tab.$qualifier.$tab.delete($movies{$key});
++ $count++;
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++
++ if ( scalar(keys %movies) != 0 ) {
++ die "what happened, we have keys left ?";
+ }
+- }
+- $details=~s/\|$//o;
++ undef(%movies);
++ }
++
++ {
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++ my $progress=Term::ProgressBar->new({name => "writing database",
++ count => $countEstimate,
++ ETA => 'linear'})
++ if ($self->{showProgressBar});
++ $progress->minor(0) if ($self->{showProgressBar});
++ $progress->max_update_rate(1) if ($self->{showProgressBar});
++ my $next_update=0;
++
++ open(IDX, "> $self->{moviedbIndex}") || die
"$self->{moviedbIndex}:$!";
++ open(DAT, "> $self->{moviedbData}") || die
"$self->{moviedbData}:$!";
++ my $count=0;
++ for my $key (sort {$a cmp $b} keys %nmovies) {
++ my $val=delete($nmovies{$key});
++ #print "movie $key: $val\n";
++ #$val=~s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t//o || die "internal failure
($key:$val)";
++ my ($dbkey, $year, $qualifier,$directors,$actors,@rest)=split('\t', $val);
++ #die ("no 1") if ( !defined($dbkey));
++ #die ("no 2") if ( !defined($year));
++ #die ("no 3") if ( !defined($qualifier));
++ #die ("no 4") if ( !defined($directors));
++ #die ("no 5") if ( !defined($actors));
++ #print
"key:$key\n\ttitle=$dbkey\n\tyear=$year\n\tqualifier=$qualifier\n";
++
++ #my ($directors, $actors)=split('\t', $val);
++
++ my $details="";
++
++ if ( $directors eq "<>" ) {
++ $details.="<>";
++ }
++ else {
++ # sort directors by last name, removing duplicates
++ my $last='';
++ for my $name (sort {$a cmp $b} split('\|', $directors)) {
++ if ( $name ne $last ) {
++ $details.="$name|";
++ $last=$name;
++ }
++ }
++ $details=~s/\|$//o;
++ }
++
++ #print " $dbkey: $val\n";
++ if ( $actors eq "<>" ) {
++ $details.=$tab."<>";
++ }
++ else {
++ $details.=$tab;
++
++ # sort actors by billing, removing repeated entries
++ # be warned, two actors may have the same billing level
++ my $last='';
++ for my $c (sort {$a cmp $b} split('\|', $actors)) {
++ my ($billing, $name)=split(':', $c);
++ # remove Host/Narrators from end
++ # BUG - should remove (I)'s from actors/actresses names when details are
generated
++ $name=~s/\s\([IVX]+\)\[/\[/o;
++ $name=~s/\s\([IVX]+\)$//o;
++
++ if ( $name ne $last ) {
++ $details.="$name|";
++ $last=$name;
++ }
++ #print " $c: split gives'$billing' and '$name'\n";
++ }
++ $details=~s/\|$//o;
++ }
++ $count++;
++ my $lineno=sprintf("%07d", $count);
++ print IDX
$key."\t".$dbkey."\t".$year."\t".$qualifier."\t".$lineno."\n";
++ print DAT $lineno.":".$details."\t".join($tab,
@rest)."\n";
++
++ if ($self->{showProgressBar}) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $countEstimate ) {
++ $countEstimate = $progress->target($count+100);
++ $next_update=$progress->update($count);
++ }
++ elsif ( $count > $next_update ) {
++ $next_update=$progress->update($count);
++ }
++ }
++ }
++ $progress->update($countEstimate) if ($self->{showProgressBar});
++ close(DAT);
++ close(IDX);
+ }
+
+- #print " $dbkey: $val\n";
+- if ( $actors eq "<>" ) {
+- $details.=$tab."<>";
++ $self->dbinfoAdd("db_version", $XMLTV::IMDB::VERSION);
++
++ if ( $self->dbinfoSave() ) {
++ $self->error("$self->{moviedbInfo}:$!");
++ return(1);
+ }
+- else {
+- $details.=$tab;
+-
+- # sort actors by billing, removing repeated entries
+- # be warned, two actors may have the same billing level
+- my $last='';
+- for my $c (sort {$a cmp $b} split('\|', $actors)) {
+- my ($billing, $name)=split(':', $c);
+- # remove Host/Narrators from end
+- # BUG - should remove (I)'s from actors/actresses names when details are
generated
+- $name=~s/\s\([IVX]+\)\[/\[/o;
+- $name=~s/\s\([IVX]+\)$//o;
+-
+- if ( $name ne $last ) {
+- $details.="$name|";
+- $last=$name;
+- }
+- #print " $c: split gives'$billing' and '$name'\n";
+- }
+- $details=~s/\|$//o;
++
++ $self->status("running quick sanity check on database indexes...");
++ my $imdb=new XMLTV::IMDB('imdbDir' => $self->{imdbDir},
++ 'verbose' => $self->{verbose});
++
++ if ( -e "$self->{moviedbOffline}" ) {
++ unlink("$self->{moviedbOffline}");
+ }
+- $count++;
+- my $lineno=sprintf("%07d", $count);
+- print IDX
$key."\t".$dbkey."\t".$year."\t".$qualifier."\t".$lineno."\n";
+- print DAT $lineno.":".$details."\t".join($tab,
@rest)."\n";
+
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
++ if ( my $errline=$imdb->sanityCheckDatabase() ) {
++ open(OFF, "> $self->{moviedbOffline}") || die
"$self->{moviedbOffline}:$!";
++ print OFF $errline."\n";
++ print OFF "one of the prep stages' must have produced corrupt data\n";
++ print OFF "report the following details to xmltv-devel\(a)lists.sf.net\n";
++
++ my $info=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo});
++ if ( ref $info eq 'SCALAR' ) {
++ print OFF "\tdbinfo file corrupt\n";
++ print OFF "\t$info";
++ }
++ else {
++ for my $key (sort keys %{$info}) {
++ print OFF "\t$key:$info->{$key}\n";
++ }
++ }
++ print OFF "database taken offline\n";
++ close(OFF);
++ open(OFF, "< $self->{moviedbOffline}") || die
"$self->{moviedbOffline}:$!";
++ while(<OFF>) {
++ chop();
++ $self->error($_);
++ }
++ close(OFF);
++ return(1);
+ }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(DAT);
+- close(IDX);
++ $self->status("sanity intact :)");
++ }
++ else {
++ $self->error("tv_imdb: invalid stage $stage: only
1-".$self->{stageLast}." are valid");
++ return(1);
+ }
+
+- $self->dbinfoAdd("db_version", $XMLTV::IMDB::VERSION);
+-
++ $self->dbinfoAdd("seconds_to_complete_prep_stage_$stage",
(time()-$startTime));
+ if ( $self->dbinfoSave() ) {
+- $self->error("$self->{moviedbInfo}:$!");
+- return(1);
+- }
+-
+- $self->status("running quick sanity check on database indexes...");
+- my $imdb=new XMLTV::IMDB('imdbDir' => $self->{imdbDir},
+- 'verbose' => $self->{verbose});
+-
+- if ( -e "$self->{moviedbOffline}" ) {
+- unlink("$self->{moviedbOffline}");
+- }
+-
+- if ( my $errline=$imdb->sanityCheckDatabase() ) {
+- open(OFF, "> $self->{moviedbOffline}") || die
"$self->{moviedbOffline}:$!";
+- print OFF $errline."\n";
+- print OFF "one of the prep stages' must have produced corrupt
data\n";
+- print OFF "report the following details to xmltv-devel\(a)lists.sf.net\n";
+-
+- my $info=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo});
+- if ( ref $info eq 'SCALAR' ) {
+- print OFF "\tdbinfo file corrupt\n";
+- print OFF "\t$info";
+- }
+- else {
+- for my $key (sort keys %{$info}) {
+- print OFF "\t$key:$info->{$key}\n";
+- }
+- }
+- print OFF "database taken offline\n";
+- close(OFF);
+- open(OFF, "< $self->{moviedbOffline}") || die
"$self->{moviedbOffline}:$!";
+- while(<OFF>) {
+- chop();
+- $self->error($_);
+- }
+- close(OFF);
+- return(1);
+- }
+- $self->status("sanity intact :)");
+- }
+- else {
+- $self->error("tv_imdb: invalid stage $stage: only
1-".$self->{stageLast}." are valid");
+- return(1);
+- }
+-
+- $self->dbinfoAdd("seconds_to_complete_prep_stage_$stage",
(time()-$startTime));
+- if ( $self->dbinfoSave() ) {
+- $self->error("$self->{moviedbInfo}:$!");
+- return(1);
+- }
+- return(0);
++ $self->error("$self->{moviedbInfo}:$!");
++ return(1);
++ }
++ return(0);
+ }
+
+ sub crunchStage($$)
+ {
+- my ($self, $stage)=@_;
+-
+- if ( $stage == $self->{stageLast} ) {
+- # check all the pre-requisite stages have been run
+- for (my $st=1 ; $st < $self->{stageLast}; $st++ ) {
+- if ( !$self->stageComplete($st) ) {
+- #$self->error("prep stages must be run in
sequence..");
+- $self->error("prepStage $st either has never been run or failed");
+- if ( grep { $_ == $st } values %{$self->{optionalStages}} ) {
+- $self->error("data for this stage will NOT be added");
+- } else {
+- $self->error("rerun tv_imdb with --prepStage=$st");
+- return(1);
+- }
+- }
+- }
+- }
+-
+- if ( -f "$self->{moviedbInfo}" && $stage != 1 ) {
+- my $ret=$self->dbinfoLoad();
+- if ( $ret ) {
+- $self->error($ret);
+- return(1);
+- }
+- }
+-
+- $self->redirect("$self->{imdbDir}/stage$stage.log") || return(1);
+- my $ret=$self->invokeStage($stage);
+- $self->redirect(undef);
+-
+- if ( $ret == 0 ) {
+- if ( $self->{errorCountInLog} == 0 ) {
+- $self->status("prep stage $stage succeeded with no errors");
++ my ($self, $stage)=@_;
++
++ if ( $stage == $self->{stageLast} ) {
++ # check all the pre-requisite stages have been run
++ for (my $st=1 ; $st < $self->{stageLast}; $st++ ) {
++ if ( !$self->stageComplete($st) ) {
++ #$self->error("prep stages must be run in sequence..");
++ $self->error("prepStage $st either has never been run or failed");
++ if ( grep { $_ == $st } values %{$self->{optionalStages}} ) {
++ $self->error("data for this stage will NOT be added");
++ } else {
++ $self->error("rerun tv_imdb with --prepStage=$st");
++ return(1);
++ }
++ }
++ }
+ }
+- else {
+- $self->status("prep stage $stage succeeded with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
+- if ( $stage == $self->{stageLast} && $self->{errorCountInLog} > 30
&& $self->{errorCountInLog} < 80 ) {
+- $self->status("this stage commonly produces around 60 (or so) warnings because
of imdb");
+- $self->status("list file inconsistancies, they can usually be safely
ignored");
+- }
++
++ if ( -f "$self->{moviedbInfo}" && $stage != 1 ) {
++ my $ret=$self->dbinfoLoad();
++ if ( $ret ) {
++ $self->error($ret);
++ return(1);
++ }
+ }
+- }
+- else {
+- if ( $self->{errorCountInLog} == 0 ) {
+- $self->status("prep stage $stage failed (with no logged errors)");
++
++ $self->redirect("$self->{imdbDir}/stage$stage.log") || return(1);
++ my $ret=$self->invokeStage($stage);
++ $self->redirect(undef);
++
++ if ( $ret == 0 ) {
++ if ( $self->{errorCountInLog} == 0 ) {
++ $self->status("prep stage $stage succeeded with no errors");
++ }
++ else {
++ $self->status("prep stage $stage succeeded with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
++ if ( $stage == $self->{stageLast} && $self->{errorCountInLog} > 30
&& $self->{errorCountInLog} < 80 ) {
++ $self->status("this stage commonly produces around 60 (or so) warnings
because of imdb");
++ $self->status("list file inconsistancies, they can usually be safely
ignored");
++ }
++ }
+ }
+ else {
+- $self->status("prep stage $stage failed with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
++ if ( $self->{errorCountInLog} == 0 ) {
++ $self->status("prep stage $stage failed (with no logged errors)");
++ }
++ else {
++ $self->status("prep stage $stage failed with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
++ }
+ }
+- }
+- return($ret);
++ return($ret);
+ }
+
+ 1;
+--
+2.29.2
+
diff --git a/0023-iltapulu-fix-channel-parser.patch
b/0023-iltapulu-fix-channel-parser.patch
new file mode 100644
index 0000000..2a830bd
--- /dev/null
+++ b/0023-iltapulu-fix-channel-parser.patch
@@ -0,0 +1,77 @@
+From 43402a23dee06d7d6304cf6e07e9f634cccf7820 Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Sat, 26 Dec 2020 18:16:35 +0200
+Subject: [PATCH 23/50] iltapulu: fix channel() parser
+
+The HTML layout on the page has changed.
+
+NOTE: Channel numbers have changed!
+---
+ grab/fi/fi/source/iltapulu.pm | 43 +++++++++++++++++++----------------
+ 1 file changed, 24 insertions(+), 19 deletions(-)
+
+diff --git a/grab/fi/fi/source/iltapulu.pm b/grab/fi/fi/source/iltapulu.pm
+index 6ea298f6..a9c553fe 100644
+--- a/grab/fi/fi/source/iltapulu.pm
++++ b/grab/fi/fi/source/iltapulu.pm
+@@ -32,33 +32,38 @@ sub channels {
+ my %channels;
+
+ # Fetch & parse HTML
+- my $root = fetchTree("https://www.iltapulu.fi/?&all=1");
++ my $root = fetchTree("https://www.iltapulu.fi/kaikki-kanavat",
++ undef, undef, 1);
+ if ($root) {
+ #
+- # Channel list can be found in table rows
++ # Channel list can be found in sections
+ #
+- # <table class="channel-row">
+- # <tbody>
+- # <tr>
+- # <td class="channel-name">...</td>
+- # <td class="channel-name">...</td>
++ # <div id="content">
++ # <div id="programtable" class="programtable-running">
++ # <section id="channel-1" ...>
++ # <a href="/kanava/yle-tv1">
++ # <h2 class="channel-logo">
++ # <img src="/static/img/kanava/yle_tv1.png" alt="YLE TV1
tv-ohjelmat 26.12.2020">
++ # </h2>
++ # </a>
+ # ...
+- # </tr>
+- # </tbody>
+- # ...
+- # </table>
+- # ...
++ # </section>
++ # ...
++ # </div>
++ # </div>
+ #
+- if (my @tables = $root->look_down("class" =>
"channel-row")) {
+- foreach my $table (@tables) {
+- if (my @cells = $table->look_down("class" => "channel-name"))
{
+- foreach my $cell (@cells) {
+- if (my $image = $cell->find("img")) {
++ if (my $table = $root->look_down("id" => "programtable"))
{
++ if (my @sections = $table->look_down("_tag" =>
"section",
++ "id" => qr/^channel-\d+$/)) {
++ foreach my $section (@sections) {
++ if (my $header = $section->look_down("class" =>
"channel-logo")) {
++ if (my $image = $header->find("img")) {
+ my $name = $image->attr("alt");
+- $name =~ s/\s+tv-ohjelmat$//;
++ $name =~ s/\s+tv-ohjelmat.*$//;
+
+ if (defined($name) && length($name)) {
+- my $channel_id = (scalar(keys %channels) + 1) . ".iltapulu.fi";
++ my($channel_id) = $section->attr("id") =~ /(\d+)$/;
++ $channel_id .= ".iltapulu.fi";
+ debug(3, "channel '$name' ($channel_id)");
+ $channels{$channel_id} = "fi $name";
+ }
+--
+2.29.2
+
diff --git a/0024-iltapulu-fix-grab-parser.patch b/0024-iltapulu-fix-grab-parser.patch
new file mode 100644
index 0000000..d3c8b8d
--- /dev/null
+++ b/0024-iltapulu-fix-grab-parser.patch
@@ -0,0 +1,199 @@
+From ffc8b16daa4733e0a2c9c28019b84f942d33d85c Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Sat, 26 Dec 2020 18:49:23 +0200
+Subject: [PATCH 24/50] iltapulu: fix grab() parser
+
+The HTML layout on the pages has changed.
+---
+ grab/fi/fi/source/iltapulu.pm | 148 +++++++++++++++-------------------
+ 1 file changed, 65 insertions(+), 83 deletions(-)
+
+diff --git a/grab/fi/fi/source/iltapulu.pm b/grab/fi/fi/source/iltapulu.pm
+index a9c553fe..e2ffaad5 100644
+--- a/grab/fi/fi/source/iltapulu.pm
++++ b/grab/fi/fi/source/iltapulu.pm
+@@ -23,6 +23,20 @@ use Carp;
+
+ # Import from internal modules
+ fi::common->import();
++fi::programmeStartOnly->import();
++
++# Category mapping
++our %categories = (
++ e => "elokuvat",
++ f => "fakta",
++ kf => "kotimainen fiktio",
++ l => "lapsi",
++ nan => undef, # ??? e.g. "Astral TV"
++ u => "uutiset",
++ ur => "urheilu",
++ us => "ulkomaiset sarjat",
++ vm => "viihde", # "ja musiiki"???
++);
+
+ # Description
+ sub description { 'iltapulu.fi' }
+@@ -89,98 +103,65 @@ sub grab {
+ return unless my($channel) = ($id =~ /^([-\w]+)\.iltapulu\.fi$/);
+
+ # Fetch & parse HTML
+- my $root = fetchTree("https://www.iltapulu.fi/?all=1&date=" .
$today->ymdd());
++ my $root = fetchTree("https://www.iltapulu.fi/" . $today->ymdd(),
++ undef, undef, 1);
+ if ($root) {
+- my $count = 0;
+- my @objects;
++ my $opaque = startProgrammeList($id, "fi");
+
+ #
+- # Programme data is contained inside a div class="<full-row>"
++ # Programme data is contained inside a li class="g-<category>"
+ #
+- # <table class="channel-row">
+- # <tbody>
+- # <tr>
+- # <td class="channel-name">...</td>
+- # <td class="channel-name">...</td>
+- # ...
+- # </tr>
+- # <tr class="full-row...">
+- # <td>
+- # <div class="schedule">
+- # <div class="full-row" data-starttime="1424643300"
data-endtime="1424656800">
+- # <table>
+- # <tr>
+- # <td class="time">00.15</td>
+- # <td class="title[ movie]">
+- # <a class="program-open..." ... title="... description
...">
+- # Uutisikkuna
+- # </a>
+- # </td>
+- # </tr>
+- # </table>
+- # </div>
+- # </div>
++ # <div id="content">
++ # <div id="programtable" class="programtable-running">
++ # <section id="channel-1" ...>
++ # <a href="/kanava/yle-tv1">
++ # <h2 class="channel-logo">
++ # <img src="/static/img/kanava/yle_tv1.png" alt="YLE TV1
tv-ohjelmat 26.12.2020">
++ # </h2>
++ # </a>
++ # <ul>
++ # <li class="running g-e">
++ # <time
datetime="2020-12-26T15:20:00+02:00">15.20</time>
++ # <b class="pl">
++ # <a href="/joulumaa" class="op" ... title="...
description ...">
++ # Joulumaa
++ # </a>
++ # ...
++ # </b>
++ # ...
++ # </li>
+ # ...
+- # </td>
+- # ...
+- # </tr>
+- # ...
+- # </tbody>
+- # </table>
+- # ...
++ # </ul>
++ # <ul>
+ #
+- if (my @tables = $root->look_down("class" =>
"channel-row")) {
+-
+- TABLES:
+- foreach my $table (@tables) {
+- if (my @cells = $table->look_down("class" => "channel-name"))
{
+-
+- # Channel in this table?
+- my $index = $channel - $count - 1;
+- $count += @cells;
+- if ($channel <= $count) {
+-
+- # Extract from each row the div's from the same index
+- my @divs;
+- if (my @rows = $table->look_down("_tag" => "tr",
+- "class" => qr/full-row/)) {
+- foreach my $row (@rows) {
+- my $children = $row->content_array_ref;
+- if ($children) {
+- my $td = $children->[$index];
+- push(@divs, $td->look_down("class" => qr/full-row/))
+- if defined($td);
+- }
+- }
+- }
+-
+- for my $div (@divs) {
+- my $start = $div->attr("data-starttime");
+- my $end = $div->attr("data-endtime");
+- my $link = $div->look_down("class" => qr/program-open/);
+-
+- if ($start && $end && $link) {
+- my $title = $link->as_text();
+-
+- if (length($title)) {
+- my $desc = $link->attr("title");
+- my $category = ($link->parent()->attr("class") =~ /movie/) ?
"elokuvat" : undef;
+-
+- debug(3, "List entry ${id} ($start -> $end) $title");
++ if (my $table = $root->look_down("id" => "programtable"))
{
++ if (my $section = $table->look_down("_tag" =>
"section",
++ "id" => qr/^channel-${channel}/)) {
++ if (my @entries = $section->look_down("_tag" => "li")) {
++ foreach my $entry (@entries) {
++ my $start = $entry->look_down("_tag" => "time");
++ my $link = $entry->look_down("class" => "op");
++
++ if ($start && $link) {
++ if (my($hour, $minute) =
++ $start->as_text() =~ /^(\d{2})[:.](\d{2})$/) {
++ my $title = $link->as_text();
++
++ if (length($title)) {
++ my $desc = $link->attr("title");
++ my($category) = ($entry->attr("class") =~ /g-(\w+)$/);
++ $category = $categories{$category} if $category;
++
++ debug(3, "List entry ${id} ($hour:$minute) $title");
+ debug(4, $desc) if $desc;
+ debug(4, $category) if defined $category;
+
+- # Create program object
+- my $object = fi::programme->new($id, "fi", $title, $start, $end);
+- $object->category($category);
++ my $object = appendProgramme($opaque, $hour, $minute, $title);
+ $object->description($desc);
+- push(@objects, $object);
++ $object->category($category);
+ }
+ }
+ }
+-
+- # skip the rest of the data
+- last TABLES;
+ }
+ }
+ }
+@@ -189,10 +170,11 @@ sub grab {
+ # Done with the HTML tree
+ $root->delete();
+
+- # Fix overlapping programmes
+- fi::programme->fixOverlaps(\@objects);
+-
+- return(\@objects);
++ # Convert list to program objects
++ #
++ # First entry always starts on $yesteday
++ # Last entry always ends on $tomorrow.
++ return(convertProgrammeList($opaque, $yesterday, $today, $tomorrow));
+ }
+
+ return;
+--
+2.29.2
+
diff --git a/0025-source-avoid-name-clashes-between-modules.patch
b/0025-source-avoid-name-clashes-between-modules.patch
new file mode 100644
index 0000000..8590ab8
--- /dev/null
+++ b/0025-source-avoid-name-clashes-between-modules.patch
@@ -0,0 +1,65 @@
+From b1a03a4a3a982596284806b37b37901589f87710 Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Sat, 26 Dec 2020 18:59:44 +0200
+Subject: [PATCH 25/50] source: avoid name clashes between modules
+
+Don't use lexically-scoped variables on the top-level of a module, as
+they will clash with same-named variables in another module in the
+merged script. Although they are private use "our" instead of "my".
+---
+ grab/fi/fi/source/foxtv.pm | 2 +-
+ grab/fi/fi/source/telkku.pm | 4 ++--
+ grab/fi/fi/source/yle.pm | 2 +-
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/grab/fi/fi/source/foxtv.pm b/grab/fi/fi/source/foxtv.pm
+index 0ecd2b2c..f42d97d4 100644
+--- a/grab/fi/fi/source/foxtv.pm
++++ b/grab/fi/fi/source/foxtv.pm
+@@ -20,7 +20,7 @@ fi::common->import();
+ fi::programmeStartOnly->import();
+
+ # Cleanup filter regexes
+-my $cleanup_match =
qr!\s*(?:(?:\d+\.\s+)?(?:Kausi|Jakso|Osa)\.?(?:\s+(:?\d+/)?\d+\.\s+)?){1,2}!i;
++our $cleanup_match =
qr!\s*(?:(?:\d+\.\s+)?(?:Kausi|Jakso|Osa)\.?(?:\s+(:?\d+/)?\d+\.\s+)?){1,2}!i;
+
+ # Description
+ sub description { 'foxtv.fi' }
+diff --git a/grab/fi/fi/source/telkku.pm b/grab/fi/fi/source/telkku.pm
+index b852da13..99dd8c13 100644
+--- a/grab/fi/fi/source/telkku.pm
++++ b/grab/fi/fi/source/telkku.pm
+@@ -23,7 +23,7 @@ fi::common->import();
+ # Description
+ sub description { 'telkku.com' }
+
+-my %categories = (
++our %categories = (
+ SPORTS => "urheilu",
+ MOVIE => "elokuvat",
+ );
+@@ -52,7 +52,7 @@ sub _getJSON($) {
+ }
+
+ # cache for group name to API ID mapping
+-my %group2id;
++our %group2id;
+
+ # Grab channel list
+ sub channels {
+diff --git a/grab/fi/fi/source/yle.pm b/grab/fi/fi/source/yle.pm
+index 78e611c2..6dbaadc3 100644
+--- a/grab/fi/fi/source/yle.pm
++++ b/grab/fi/fi/source/yle.pm
+@@ -22,7 +22,7 @@ fi::common->import();
+ # Description
+ sub description { 'yle.fi' }
+
+-my %languages = (
++our %languages = (
+ "fi" => [ "areena", "opas" ],
+ "sv" => [ "arenan", "guide" ],
+ );
+--
+2.29.2
+
diff --git a/0026-test.conf-update-to-latest-list-channels-output.patch
b/0026-test.conf-update-to-latest-list-channels-output.patch
new file mode 100644
index 0000000..b9f167e
--- /dev/null
+++ b/0026-test.conf-update-to-latest-list-channels-output.patch
@@ -0,0 +1,266 @@
+From e88322ebc9b0ed070b20b6e89b6e538e325443db Mon Sep 17 00:00:00 2001
+From: Stefan Becker <chemobejk(a)gmail.com>
+Date: Sat, 26 Dec 2020 20:43:44 +0200
+Subject: [PATCH 26/50] test.conf: update to latest list channels output
+
+---
+ grab/fi/test.conf | 147 ++++++++++++++++++++--------------------------
+ 1 file changed, 64 insertions(+), 83 deletions(-)
+
+diff --git a/grab/fi/test.conf b/grab/fi/test.conf
+index 220f9854..188fc833 100644
+--- a/grab/fi/test.conf
++++ b/grab/fi/test.conf
+@@ -14,63 +14,55 @@
+ # NOTE: ##channel are those channels that should not be unmasked during testing
+ #
+ #channel 10.iltapulu.fi AVA
+-#channel 11.iltapulu.fi TV5
+-#channel 12.iltapulu.fi Kutonen
+-#channel 13.iltapulu.fi Hero
+-##channel 14.iltapulu.fi FRII
+-##channel 15.iltapulu.fi TLC
+-##channel 16.iltapulu.fi National Geographic
+-##channel 17.iltapulu.fi MTV
+-##channel 18.iltapulu.fi Discovery Channel
+-##channel 19.iltapulu.fi Disney Channel
++#channel 11.iltapulu.fi Yle Teema & Fem
++##channel 12.iltapulu.fi C More Juniori
++#channel 13.iltapulu.fi Liv
++##channel 17.iltapulu.fi C More Max
+ channel 1.iltapulu.fi YLE TV1
+-##channel 20.iltapulu.fi Eurosport
+-##channel 21.iltapulu.fi Eurosport 2
+-##channel 22.iltapulu.fi C More Max
+-##channel 23.iltapulu.fi C More Juniori
+-##channel 24.iltapulu.fi C More Sport 1
+-##channel 25.iltapulu.fi C More Sport 2
+-##channel 26.iltapulu.fi C More First
+-##channel 27.iltapulu.fi C More Series
+-##channel 28.iltapulu.fi C More Hits
+-##channel 29.iltapulu.fi C More Stars
++##channel 26.iltapulu.fi C More Sport 2
++##channel 28.iltapulu.fi C More First
++##channel 29.iltapulu.fi C More Hits
+ channel 2.iltapulu.fi YLE TV2
+-##channel 30.iltapulu.fi SF-kanalen
+-##channel 31.iltapulu.fi C More First HD
+-##channel 32.iltapulu.fi C More Golf HD
+-##channel 33.iltapulu.fi C More Hits HD
+-##channel 34.iltapulu.fi C More Series HD
+-##channel 35.iltapulu.fi Ruutu+ Lapset
+-##channel 36.iltapulu.fi Ruutu+ Leffat ja Sarjat
+-##channel 37.iltapulu.fi Ruutu+ Dokkarit
+-##channel 38.iltapulu.fi Ruutu+ Urheilu 1
+-##channel 39.iltapulu.fi Ruutu+ Urheilu 2
++##channel 32.iltapulu.fi C More Series
++##channel 34.iltapulu.fi C More Sport 1
+ channel 3.iltapulu.fi MTV3
+-##channel 40.iltapulu.fi Viasat Sport
+-##channel 41.iltapulu.fi Viasat Golf
+-##channel 42.iltapulu.fi Viasat Hockey
+-##channel 43.iltapulu.fi Viasat Urheilu HD
+-##channel 44.iltapulu.fi Viasat Jalkapallo HD
+-##channel 45.iltapulu.fi Viasat Jaakiekko HD
+-##channel 46.iltapulu.fi Viasat Sport Premium
+-##channel 47.iltapulu.fi Viasat Fotboll
+-##channel 48.iltapulu.fi Viasat Film
+-##channel 49.iltapulu.fi Viasat Film Action
++##channel 41.iltapulu.fi SF-kanalen
++##channel 42.iltapulu.fi V Film Premiere
++##channel 43.iltapulu.fi V Film Action
++##channel 46.iltapulu.fi V Film Family
++##channel 49.iltapulu.fi V Sport 1
+ #channel 4.iltapulu.fi Nelonen
+-##channel 50.iltapulu.fi Viasat Film Family
+-##channel 51.iltapulu.fi Viasat Film Comedy
+-##channel 52.iltapulu.fi Viasat Film Hits
+-##channel 53.iltapulu.fi Nickelodeon
+-##channel 54.iltapulu.fi TV7
+-##channel 55.iltapulu.fi RTL
+-#channel 5.iltapulu.fi Sub
+-#channel 6.iltapulu.fi JIM
+-#channel 7.iltapulu.fi Liv
++##channel 51.iltapulu.fi V Sport Golf
++##channel 52.iltapulu.fi V Sport Hockey
++##channel 58.iltapulu.fi Discovery Channel
++##channel 59.iltapulu.fi Eurosport
++#channel 5.iltapulu.fi TV5
++##channel 60.iltapulu.fi Eurosport 2
++##channel 61.iltapulu.fi MTV Finland
++#channel 62.iltapulu.fi Kutonen
++#channel 6.iltapulu.fi Sub
++##channel 70.iltapulu.fi V Sport Urheilu HD
++#channel 73.iltapulu.fi Hero
++##channel 74.iltapulu.fi FRII
++##channel 76.iltapulu.fi V Film Hits
++##channel 77.iltapulu.fi V Sport Jalkapallo HD
++##channel 78.iltapulu.fi V Sport Jaakiekko HD
++##channel 79.iltapulu.fi V Sport Premium
++#channel 7.iltapulu.fi JIM
++##channel 80.iltapulu.fi V Sport Football
++##channel 81.iltapulu.fi TLC
++##channel 82.iltapulu.fi National Geographic
++##channel 83.iltapulu.fi C More Stars
++##channel 85.iltapulu.fi AlfaTV
++##channel 86.iltapulu.fi Paramount Network Finland
++##channel 87.iltapulu.fi Viaplay Urheilu
++##channel 88.iltapulu.fi Cmore
+ #channel 8.iltapulu.fi FOX
+-#channel 9.iltapulu.fi Yle Teema & Fem
++##channel 90.iltapulu.fi Veikkaus TV
++##channel 91.iltapulu.fi Ruutu
+ ##channel
alfatv.ampparit.com AlfaTV
+ ##channel AlfaTV.fi.yle.fi AlfaTV
+-##channel
alfatv.muut.telkku.com AlfaTV
++##channel
alfatv.peruskanavat.telkku.com AlfaTV
+ ##channel AlfaTV.sv.yle.fi AlfaTV
+ ##channel alfatv.telsu.fi AlfaTV
+ ##channel
al-jazeera.uutiset.telkku.com Al Jazeera
+@@ -91,19 +83,16 @@ channel 3.iltapulu.fi MTV3
+ ##channel
cartoon-network.lapset.telkku.com Cartoon Network
+ ##channel
c-more-first.ampparit.com C More First
+ ##channel
cmore-first.elokuvat.telkku.com C More First
+-##channel
cmore-first-hd.elokuvat.telkku.com C More First HD
+ ##channel cmore_first.telsu.fi C More First
+-##channel cmore_golfhd.telsu.fi C More Golf HD
+-##channel
cmore-golf-hd.urheilu.telkku.com C More Golf HD
+ ##channel
c-more-hits.ampparit.com C More Hits
+ ##channel
cmore-hits.elokuvat.telkku.com C More Hits
+-##channel
cmore-hits-hd.elokuvat.telkku.com C More Hits HD
+ ##channel cmore_hits.telsu.fi C More Hits
+ ##channel
c-more-juniori.ampparit.com C More Juniori
++##channel cmore_max2.telsu.fi C More MAX 2
++##channel
cmore-max-2.urheilu.telkku.com C More Max 2
+ ##channel
c-more-max.ampparit.com C More MAX
+ ##channel
c-more-series.ampparit.com C More Series
+ ##channel
cmore-series.elokuvat.telkku.com C More Series
+-##channel
cmore-series-hd.elokuvat.telkku.com C More Series HD
+ ##channel cmore_series.telsu.fi C More Series
+ ##channel cmore_sfkanalen.telsu.fi SF-Kanalen
+ ##channel
c-more-sport-1.ampparit.com C More Sport 1
+@@ -111,19 +100,16 @@ channel 3.iltapulu.fi MTV3
+ ##channel
c-more-stars.ampparit.com C More Stars
+ ##channel
cmore-stars.elokuvat.telkku.com C More Stars
+ ##channel cmore_stars.telsu.fi C More Stars
+-##channel
cmore-tennis.urheilu.telkku.com C More Tennis
+ ##channel
cnbc.uutiset.telkku.com CNBC
+ ##channel cnn.telsu.fi CNN
+ ##channel
cnn.viasat-kulta.telkku.com CNN
+ ##channel
deutsche-welle.uutiset.telkku.com Deutsche Welle
+ ##channel
discovery-channel.ampparit.com Discovery Channel
+ ##channel
discovery-channel.dokumentit.telkku.com Discovery Channel
+-##channel
discovery-hd-showcase.dokumentit.telkku.com Discovery HD Showcase
+ ##channel
discovery-science.ampparit.com Discovery Science
+ ##channel
discovery-science.dokumentit.telkku.com Discovery Science
+ ##channel discovery.telsu.fi Discovery Channel
+ ##channel
discovery-world.ampparit.com Discovery World
+-##channel
discovery-world.dokumentit.telkku.com Discovery World
+ ##channel discoveryworld.telsu.fi Discovery World
+ ##channel
disney-channel.lapset.telkku.com Disney Channel
+ ##channel
disney-junior.lapset.telkku.com Disney Junior
+@@ -210,12 +196,7 @@ channel mtv3.telsu.fi MTV3
+ ##channel nationalgeo.telsu.fi National Geographic Channel
+ #channel
nelonen.ampparit.com Nelonen
+ #channel Nelonen.fi.yle.fi Nelonen
+-##channel
nelonen-maailma.ruutu.telkku.com Ruutu+ Dokkarit
+-##channel
nelonen-nappula.lapset.telkku.com Ruutu+ Lapset
+ #channel
nelonen.peruskanavat.telkku.com Nelonen
+-##channel
nelonen-prime.elokuvat.telkku.com Ruutu+ Leffat ja Sarjat
+-##channel
nelonen-pro-1.urheilu.telkku.com Ruutu+ Urheilu 1
+-##channel
nelonen-pro-2.urheilu.telkku.com Ruutu+ Urheilu 2
+ #channel Nelonen.sv.yle.fi Nelonen
+ #channel nelonen.telsu.fi Nelonen
+ ##channel
nickelodeon.lapset.telkku.com Nickelodeon
+@@ -223,8 +204,9 @@ channel mtv3.telsu.fi MTV3
+ ##channel
nick-jr.lapset.telkku.com Nick Jr
+ ##channel nickjr.telsu.fi Nick Jr
+ ##channel
outdoor-channel.lifestyle.telkku.com Outdoor Channel
+-##channel
paramount-network.muut.telkku.com Paramount Network
+-##channel
playboy-tv.lifestyle.telkku.com Playboy TV
++##channel
paramount-network.ampparit.com Paramount Network
++##channel
paramount-network.peruskanavat.telkku.com Paramount Network
++##channel paramount.telsu.fi Paramount Network
+ ##channel rtl2.telsu.fi RTL II
+ ##channel
rtl.muut.telkku.com RTL
+ ##channel rtl.telsu.fi RTL
+@@ -269,59 +251,58 @@ channel mtv3.telsu.fi MTV3
+ ##channel TV-Finland.sv.yle.fi TV Finland
+ ##channel tvfinland.telsu.fi TV Finland
+ #channel
tv-viisi.ampparit.com TV Viisi
++##channel v_film_action.telsu.fi V film action
++##channel v_film_family.telsu.fi V film family
++##channel v_film_hits.telsu.fi V film hits
++##channel v_film_premiere.telsu.fi V film premiere
+ ##channel
vh1-classic.musiikki.telkku.com VH1 Classic
+ ##channel
vh1.musiikki.telkku.com VH1
+-##channel viasat_action.telsu.fi Viasat Film Action
+-##channel
viasat-esport-tv.ampparit.com Viasat eSportsTV
+ ##channel
viasat-explore.ampparit.com Viasat Explore
+ ##channel viasat_explore.telsu.fi Viasat Explore
+ ##channel
viasat-explore.viasat-kulta.telkku.com Viasat Explore
+-##channel viasat_family.telsu.fi Viasat Film Family
+ ##channel
viasat-film-action.ampparit.com Viasat Film Action
+ ##channel
viasat-film-action.elokuvat.telkku.com Viasat Film Action
+ ##channel
viasat-film.elokuvat.telkku.com Viasat Film Premiere
+ ##channel
viasat-film-family.ampparit.com Viasat Film Family
+ ##channel
viasat-film-family.elokuvat.telkku.com Viasat Film Family
+ ##channel
viasat-film-hits.ampparit.com Viasat Film Hits
+-#channel
viasat-film-hits.viasat-kulta.telkku.com Viasat Film Hits
++#channel
viasat-film-hits.elokuvat.telkku.com Viasat Film Hits
+ ##channel
viasat-film-premiere.ampparit.com Viasat Film Premiere
+-##channel
viasat-fotboll.ampparit.com Viasat Fotboll
++##channel
viasat-fotboll.ampparit.com Viasat Football
+ ##channel
viasat-fotboll-hd.urheilu.telkku.com Viasat Fotboll HD
+-##channel viasat_fotboll.telsu.fi Viasat Fotboll HD
+ ##channel
viasat-golf.ampparit.com Viasat Golf
+-##channel viasat_golf.telsu.fi Viasat Golf
+ ##channel
viasat-golf.urheilu.telkku.com Viasat Golf
+ ##channel
viasat-history.ampparit.com Viasat History
+ ##channel viasat_history.telsu.fi Viasat History
+ ##channel
viasat-history.viasat-kulta.telkku.com Viasat History
+-##channel viasat_hits.telsu.fi Viasat Film Hits
+ ##channel
viasat-hockey.ampparit.com Viasat Hockey
+-##channel
viasat-hockey-finland.urheilu.telkku.com Viasat Urheilu HD
+-##channel viasat_hockey.telsu.fi Viasat Hockey
+ ##channel
viasat-hockey.urheilu.telkku.com Viasat Hockey
+ ##channel
viasat-jaakiekko.ampparit.com Viasat Jääkiekko
+ ##channel
viasat-jaakiekko-hd.urheilu.telkku.com Viasat Jääkiekko HD
+-##channel viasat_jaakiekko.telsu.fi Viasat jääkiekko HD
+ ##channel
viasat-jalkapallo.ampparit.com Viasat Jalkapallo
+ ##channel
viasat-jalkapallo-hd.urheilu.telkku.com Viasat Jalkapallo HD
+-##channel viasat_jalkapallo.telsu.fi Viasat jalkapallo HD
+ ##channel
viasat-nature.ampparit.com Viasat Nature
+ ##channel
viasat-nature-crime.viasat-kulta.telkku.com Viasat Nature/Crime
+ ##channel viasat_nature.telsu.fi Viasat Nature
+-##channel viasat_premiere.telsu.fi Viasat Film Premiere
+-##channel
viasat-sport.ampparit.com Viasat Sport
+-##channel viasat_sport_fi.telsu.fi Viasat Urheilu HD
++##channel
viasat-sport.ampparit.com Viasat Sport1
+ ##channel
viasat-sport-premium.ampparit.com Viasat Sport Premium
+ ##channel
viasat-sport-premium-hd.urheilu.telkku.com Viasat Sport Premium HD
+-##channel viasat_sport_premium.telsu.fi Viasat Sport Premium HD
+-##channel viasat_sport.telsu.fi Viasat Sport
+ ##channel
viasat-sport.urheilu.telkku.com Viasat Sport
+-##channel viasat_sport_xtra.telsu.fi Viasat Sport Xtra
+ ##channel
viasat-ultra.ampparit.com Viasat Ultra HD
+ ##channel
viasat-ultra-hd.viasat-kulta.telkku.com Viasat Ultra HD
+ ##channel
viasat-urheilu.ampparit.com Viasat Urheilu
++##channel
viasat-urheilu-hd.urheilu.telkku.com Viasat Urheilu HD
+ #channel viisi.telsu.fi TV5
+ ##channel
viron-etv.muut.telkku.com Viron ETV
++##channel v_sport_fi.telsu.fi V sport urheilu
++##channel v_sport_football.telsu.fi V sport football
++##channel v_sport_golf.telsu.fi V sport golf
++##channel v_sport_hockey.telsu.fi V sport hockey
++##channel v_sport_jaakiekko.telsu.fi V sport jääkiekko
++##channel v_sport_jalkapallo.telsu.fi V sport jalkapallo
++##channel v_sport_premium.telsu.fi V sport premium
++##channel v_sport.telsu.fi V sport 1
++##channel v_sport_ultra.telsu.fi V sport ultra HD
+ channel yle1.telsu.fi Yle TV1
+ channel yle2.telsu.fi Yle TV2
+ ##channel Yle-Areena.fi.yle.fi Yle Areena
+--
+2.29.2
+
diff --git a/0027-Make-channel-ids-compliant-with-the-DTD.-Use-legacyc.patch
b/0027-Make-channel-ids-compliant-with-the-DTD.-Use-legacyc.patch
new file mode 100644
index 0000000..4264207
--- /dev/null
+++ b/0027-Make-channel-ids-compliant-with-the-DTD.-Use-legacyc.patch
@@ -0,0 +1,66 @@
+From f25ff2b9b98cd7931f37272c64896da412a08d35 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 28 Dec 2020 08:17:56 +0000
+Subject: [PATCH 27/50] Make channel ids compliant with the DTD. Use
+ --legacychannels for previous format
+
+---
+ grab/uk_tvguide/tv_grab_uk_tvguide | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/grab/uk_tvguide/tv_grab_uk_tvguide b/grab/uk_tvguide/tv_grab_uk_tvguide
+index 9fd62d44..fb8d4329 100755
+--- a/grab/uk_tvguide/tv_grab_uk_tvguide
++++ b/grab/uk_tvguide/tv_grab_uk_tvguide
+@@ -74,7 +74,7 @@ my ($opt, $conf) = ParseOptions({
+ listchannels_sub => \&fetch_channels,
+ version => $VERSION,
+ description => $GRABBER_DESC,
+- extra_options => [qw/nodetailspage/],
++ extra_options => [qw/nodetailspage legacychannels/],
+ });
+
+ #print Dumper($conf); exit;
+@@ -198,7 +198,8 @@ sub fetch_listings {
+
+ # If we need to map the fetched channel_id to a different value
+ my $xmlchannel_id = $channel_id;
+- if (defined(&map_channel_id)) { $xmlchannel_id = map_channel_id($channel_id); }
++ $xmlchannel_id .= '.tvguide.co.uk' unless $opt->{legacychannels}; # make
channel RFC2838 compliant
++ if (defined(&map_channel_id)) { $xmlchannel_id = map_channel_id($xmlchannel_id);
}
+
+ # Fetch the page
+ # my $tree = XMLTV::Get_nice::get_nice_tree($url);
+@@ -237,6 +238,7 @@ sub fetch_listings {
+ #debug 'found channel name: '.$channelname;
+
+ # tvguide website can be very slow - try to avoid barfing when no response
++ # if no channelname then assume we got no response from website
+ if (!defined $channelname) {
+ warning "Unable to retrieve web page for $channel_id";
+ next;
+@@ -723,8 +725,8 @@ sub text_to_num {
+
+ sub map_channel_id {
+ # Map the fetched channel_id to a different value (e.g. our PVR needs specific channel
ids)
+- # mapped channels should be stored in a file called tv_grab_uk_guardian.map.conf
+- # containing lines of the form: map==fromchan==tochan e.g.
'map==5-star==5STAR'
++ # mapped channels should be stored in a file called tv_grab_uk_tvguide.map.conf
++ # containing lines of the form: map==fromchan==tochan e.g. 'map==109==BBC4'
+ #
+ my ($channel_id) = @_;
+ my $mapchannels = \%mapchannelhash;
+@@ -974,6 +976,10 @@ format for the channels you chose for available days including
today.
+
+ Please see B<tv_grab_uk_tvguide --help>
+
++Additional options may be specified on the commandline.
++use --nodetailspage to only fetch the main details of the programme schedule. (May be
useful if you have problems accessing the tvguide website.)
++Channel ids were made compliant with the XMLTV specification in December 2020. Use
--legacychannels to output channel ids in the previous format (i.e. number only).
++
+ =head1 INSTALLATION
+
+ The file F<tv_grab_uk_tvguide.map.conf> has two purposes. Firstly you can map the
channel ids used by the site into something more meaningful to your PVR. E.g.
+--
+2.29.2
+
diff --git a/0028-Change-whitespace-to-tabs.patch b/0028-Change-whitespace-to-tabs.patch
new file mode 100644
index 0000000..cd7c0c9
--- /dev/null
+++ b/0028-Change-whitespace-to-tabs.patch
@@ -0,0 +1,434 @@
+From 09a29faf43b2a93c06642f80738457b6b3d87fd8 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 28 Dec 2020 08:47:49 +0000
+Subject: [PATCH 28/50] Change whitespace to tabs
+
+---
+ filter/tv_imdb | 340 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 170 insertions(+), 170 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index 4e8652ce..83570b4b 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -193,168 +193,168 @@ END
+ use XMLTV::IMDB;
+
+ my ($opt_help,
+- $opt_output,
+- $opt_prepStage,
+- $opt_imdbDir,
+- $opt_quiet,
+- $opt_download,
+- $opt_stats,
+- $opt_debug,
+- $opt_movies_only,
+- $opt_with_keywords,
+- $opt_with_plot,
+- $opt_num_actors,
+- $opt_validate_title,
+- $opt_validate_year,
++ $opt_output,
++ $opt_prepStage,
++ $opt_imdbDir,
++ $opt_quiet,
++ $opt_download,
++ $opt_stats,
++ $opt_debug,
++ $opt_movies_only,
++ $opt_with_keywords,
++ $opt_with_plot,
++ $opt_num_actors,
++ $opt_validate_title,
++ $opt_validate_year,
+ );
+
+-GetOptions('help' => \$opt_help,
+- 'output=s' => \$opt_output,
+- 'prepStage=s' => \$opt_prepStage,
+- 'imdbdir=s' => \$opt_imdbDir,
+- 'with-keywords' => \$opt_with_keywords,
+- 'with-plot' => \$opt_with_plot,
+- 'movies-only' => \$opt_movies_only,
+- 'actors=s' => \$opt_num_actors,
+- 'quiet' => \$opt_quiet,
+- 'download' => \$opt_download,
+- 'stats' => \$opt_stats,
+- 'debug+' => \$opt_debug,
+- 'validate-title=s' => \$opt_validate_title,
+- 'validate-year=s' => \$opt_validate_year,
+- ) or usage(0);
++GetOptions('help' => \$opt_help,
++ 'output=s' => \$opt_output,
++ 'prepStage=s' => \$opt_prepStage,
++ 'imdbdir=s' => \$opt_imdbDir,
++ 'with-keywords' => \$opt_with_keywords,
++ 'with-plot' => \$opt_with_plot,
++ 'movies-only' => \$opt_movies_only,
++ 'actors=s' => \$opt_num_actors,
++ 'quiet' => \$opt_quiet,
++ 'download' => \$opt_download,
++ 'stats' => \$opt_stats,
++ 'debug+' => \$opt_debug,
++ 'validate-title=s' => \$opt_validate_title,
++ 'validate-year=s' => \$opt_validate_year,
++ ) or usage(0);
+
+ usage(1) if $opt_help;
+ usage(1) if ( not defined($opt_imdbDir) );
+
+-$opt_with_keywords=0 if ( !defined($opt_with_keywords) );
+-$opt_with_plot=0 if ( !defined($opt_with_plot) );
+-$opt_num_actors=3 if ( !defined($opt_num_actors) );
+-$opt_movies_only=0 if ( !defined($opt_movies_only) );
+-$opt_debug=0 if ( !defined($opt_debug) );
++$opt_with_keywords=0 if ( !defined($opt_with_keywords) );
++$opt_with_plot=0 if ( !defined($opt_with_plot) );
++$opt_num_actors=3 if ( !defined($opt_num_actors) );
++$opt_movies_only=0 if ( !defined($opt_movies_only) );
++$opt_debug=0 if ( !defined($opt_debug) );
+
+ $opt_quiet=(defined($opt_quiet));
+ if ( !defined($opt_stats) ) {
+- $opt_stats=!$opt_quiet;
++ $opt_stats=!$opt_quiet;
+ }
+ else {
+- $opt_stats=(defined($opt_stats));
++ $opt_stats=(defined($opt_stats));
+ }
+ $opt_debug=0 if $opt_quiet;
+
+ if ( defined($opt_prepStage) ) {
+- print STDERR <<END
++ print STDERR <<END
+ Building indices. Be warned, this needs a lot of memory for the final stage
+ (working set about 250 megabytes).
+
+ END
+- if ( ! $opt_quiet ) ;
+-
+- my %options =
+- ('imdbDir' => $opt_imdbDir,
+- 'verbose' => !$opt_quiet,
+- 'showProgressBar' => !$opt_quiet,
+- 'stageToRun' => $opt_prepStage,
+- 'downloadMissingFiles' => $opt_download,
+- );
+-
+- if ( $opt_prepStage eq "all" ) {
+- for (my $stage=1 ; $stage <= 9 ; $stage++ ) {
+- my $n=new XMLTV::IMDB::Crunch(%options);
+- if ( !$n ) {
+- exit(1);
+- }
+- my $ret=$n->crunchStage($stage);
+- if ( $ret != 0 ) {
++ if ( ! $opt_quiet ) ;
++
++ my %options =
++ ('imdbDir' => $opt_imdbDir,
++ 'verbose' => !$opt_quiet,
++ 'showProgressBar' => !$opt_quiet,
++ 'stageToRun' => $opt_prepStage,
++ 'downloadMissingFiles' => $opt_download,
++ );
++
++ if ( $opt_prepStage eq "all" ) {
++ for (my $stage=1 ; $stage <= 9 ; $stage++ ) {
++ my $n=new XMLTV::IMDB::Crunch(%options);
++ if ( !$n ) {
++ exit(1);
++ }
++ my $ret=$n->crunchStage($stage);
++ if ( $ret != 0 ) {
++ exit($ret);
++ }
++ }
++ print STDERR "database load complete, let the games begin !\n" if (
!$opt_quiet);
++ exit(0);
++ }
++ else {
++ my $n=new XMLTV::IMDB::Crunch(%options);
++ if ( !$n ) {
++ exit(1);
++ }
++ my $ret=$n->crunchStage(int($opt_prepStage));
++ if ( $ret == 0 && int($opt_prepStage) == 9 ) {
++ print STDERR "database load complete, let the games begin !\n" if (
!$opt_quiet);
++ }
+ exit($ret);
+- }
+ }
+- print STDERR "database load complete, let the games begin !\n" if (
!$opt_quiet);
+- exit(0);
+- }
+- else {
++}
++elsif ( $opt_download ) {
++ my %options =
++ ('imdbDir' => $opt_imdbDir,
++ 'verbose' => !$opt_quiet,
++ 'showProgressBar' => !$opt_quiet,
++ 'stageToRun' => 'all',
++ 'downloadMissingFiles' => $opt_download,
++ );
++
+ my $n=new XMLTV::IMDB::Crunch(%options);
+ if ( !$n ) {
+- exit(1);
+- }
+- my $ret=$n->crunchStage(int($opt_prepStage));
+- if ( $ret == 0 && int($opt_prepStage) == 9 ) {
+- print STDERR "database load complete, let the games begin !\n" if (
!$opt_quiet);
++ exit(1);
+ }
+- exit($ret);
+- }
+-}
+-elsif ( $opt_download ) {
+- my %options =
+- ('imdbDir' => $opt_imdbDir,
+- 'verbose' => !$opt_quiet,
+- 'showProgressBar' => !$opt_quiet,
+- 'stageToRun' => 'all',
+- 'downloadMissingFiles' => $opt_download,
+- );
+-
+- my $n=new XMLTV::IMDB::Crunch(%options);
+- if ( !$n ) {
+- exit(1);
+- }
+- exit(0);
++ exit(0);
+ }
+
+-my $imdb=new XMLTV::IMDB('imdbDir' => $opt_imdbDir,
+- 'verbose' => $opt_debug,
+- 'cacheLookups' => 1,
+- 'cacheLookupSize' => 1000,
+- 'updateKeywords' => $opt_with_keywords,
+- 'updatePlot' => $opt_with_plot,
+- 'numActors' => $opt_num_actors,
+- );
++my $imdb=new XMLTV::IMDB('imdbDir' => $opt_imdbDir,
++ 'verbose' => $opt_debug,
++ 'cacheLookups' => 1,
++ 'cacheLookupSize' => 1000,
++ 'updateKeywords' => $opt_with_keywords,
++ 'updatePlot' => $opt_with_plot,
++ 'numActors' => $opt_num_actors,
++ );
+
+ #$imdb->{verbose}++;
+
+ if ( my $errline=$imdb->sanityCheckDatabase() ) {
+- print STDERR "$errline";
+- print STDERR "tv_imdb: you need to use --prepStage to rebuild\n";
+- exit(1);
++ print STDERR "$errline";
++ print STDERR "tv_imdb: you need to use --prepStage to rebuild\n";
++ exit(1);
+ }
+
+ if ( !$imdb->openMovieIndex() ) {
+- print STDERR "tv_imdb: open database failed\n";
+- exit(1);
++ print STDERR "tv_imdb: open database failed\n";
++ exit(1);
+ }
+
+ if ( defined($opt_validate_title) != defined($opt_validate_year) ) {
+- print STDERR "tv_imdb: both --validate-title and --validate-year must be used
together\n";
+- exit(1);
++ print STDERR "tv_imdb: both --validate-title and --validate-year must be used
together\n";
++ exit(1);
+ }
+
+ if ( defined($opt_validate_title) && defined($opt_validate_year) ) {
+- my $prog;
+-
+- $prog->{title}->[0]->[0]=$opt_validate_title;
+- $prog->{date}=$opt_validate_year;
+- $imdb->{updateTitles}=0;
+-
+- #print Dumper($prog);
+- my $n=$imdb->augmentProgram($prog, $opt_movies_only);
+- if ( $n ) {
+- $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash
+- #my $encoding;
+- #my $w = new XMLTV::Writer((), encoding => $encoding);
+- #$w->start(shift);
+- #$w->write_programme($n);
+- print Dumper($n);
+- #$w->end();
+- }
+- $imdb->closeMovieIndex();
+- exit(0);
++ my $prog;
++
++ $prog->{title}->[0]->[0]=$opt_validate_title;
++ $prog->{date}=$opt_validate_year;
++ $imdb->{updateTitles}=0;
++
++ #print Dumper($prog);
++ my $n=$imdb->augmentProgram($prog, $opt_movies_only);
++ if ( $n ) {
++ $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash
++ #my $encoding;
++ #my $w = new XMLTV::Writer((), encoding => $encoding);
++ #$w->start(shift);
++ #$w->write_programme($n);
++ print Dumper($n);
++ #$w->end();
++ }
++ $imdb->closeMovieIndex();
++ exit(0);
+ }
+
+ # test that movie database works okay
+ my %w_args = ();
+ if (defined $opt_output) {
+- my $fh = new IO::File ">$opt_output";
+- die "cannot write to $opt_output\n" if not $fh;
+- %w_args = (OUTPUT => $fh);
++ my $fh = new IO::File ">$opt_output";
++ die "cannot write to $opt_output\n" if not $fh;
++ %w_args = (OUTPUT => $fh);
+ }
+
+ my $numberOfSeenChannels=0;
+@@ -363,78 +363,78 @@ my $w;
+ my $encoding; # store encoding of input file
+
+ sub encoding_cb( $ ) {
+- die if defined $w;
+- $encoding = shift; # callback returns the file's encoding
+- $w = new XMLTV::Writer(%w_args, encoding => $encoding);
++ die if defined $w;
++ $encoding = shift; # callback returns the file's encoding
++ $w = new XMLTV::Writer(%w_args, encoding => $encoding);
+ }
+
+ sub credits_cb( $ ) {
+- $w->start(shift);
++ $w->start(shift);
+ }
+
+ my %seen_ch;
+ sub channel_cb( $ ) {
+- my $c = shift;
+- my $id = $c->{id};
+- $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash
+- if (not defined $seen_ch{$id}) {
+- $w->write_channel($c);
+- $seen_ch{$id} = $c;
+- $numberOfSeenChannels++;
+- }
+- elsif (Dumper($seen_ch{$id}) eq Dumper($c)) {
+- # They're identical, okay.
+- }
+- else {
+- warn "channel $id may differ between two files, "
+- . "picking one arbitrarily\n";
+- }
++ my $c = shift;
++ my $id = $c->{id};
++ $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash
++ if (not defined $seen_ch{$id}) {
++ $w->write_channel($c);
++ $seen_ch{$id} = $c;
++ $numberOfSeenChannels++;
++ }
++ elsif (Dumper($seen_ch{$id}) eq Dumper($c)) {
++ # They're identical, okay.
++ }
++ else {
++ warn "channel $id may differ between two files, "
++ . "picking one arbitrarily\n";
++ }
+ }
+
+ sub programme_cb( $ ) {
+- my $prog=shift;
+-
+- # The database made by IMDB.pm is read as iso-8859-1. The xml file may be different
(e.g. utf-8).
+- # IMDB::augmentProgram does not re-encode the data it adds, so the output file has
invalid characters (bug #440).
+-
+- my $orig_prog = $prog;
+- if (lc($encoding) ne 'iso-8859-1') {
+- # decode the incoming programme
+- $prog = XMLTV::Data::Recursive::Encode->decode($encoding, $prog);
+- }
+-
+- # augmentProgram will now add imdb data as iso-8859-1
+- my $nprog=$imdb->augmentProgram($prog, $opt_movies_only);
+- if ( $nprog ) {
+- if (lc($encoding) ne 'iso-8859-1') {
+- # re-code the modified programme back to original encoding
+- $nprog = XMLTV::Data::Recursive::Encode->encode($encoding, $nprog);
+- }
++ my $prog=shift;
++
++ # The database made by IMDB.pm is read as iso-8859-1. The xml file may be different
(e.g. utf-8).
++ # IMDB::augmentProgram does not re-encode the data it adds, so the output file has
invalid characters (bug #440).
++
++ my $orig_prog = $prog;
++ if (lc($encoding) ne 'iso-8859-1') {
++ # decode the incoming programme
++ $prog = XMLTV::Data::Recursive::Encode->decode($encoding, $prog);
++ }
++
++ # augmentProgram will now add imdb data as iso-8859-1
++ my $nprog=$imdb->augmentProgram($prog, $opt_movies_only);
++ if ( $nprog ) {
++ if (lc($encoding) ne 'iso-8859-1') {
++ # re-code the modified programme back to original encoding
++ $nprog = XMLTV::Data::Recursive::Encode->encode($encoding, $nprog);
++ }
+ $prog=$nprog;
+- }
+- else {
+- $prog = $orig_prog;
+- }
+-
+- # we only add movie information to programmes
+- # that have a 'date' element defined (since we need
+- # a year to work with when verifing we got the correct
+- # hit in the imdb data)
+- $w->write_programme($prog);
++ }
++ else {
++ $prog = $orig_prog;
++ }
++
++ # we only add movie information to programmes
++ # that have a 'date' element defined (since we need
++ # a year to work with when verifing we got the correct
++ # hit in the imdb data)
++ $w->write_programme($prog);
+ }
+
+ @ARGV = ('-') if not @ARGV;
+
+-XMLTV::parsefiles_callback(\&encoding_cb, \&credits_cb,
+- \&channel_cb, \&programme_cb,
+- @ARGV);
++XMLTV::parsefiles_callback( \&encoding_cb, \&credits_cb,
++ \&channel_cb, \&programme_cb,
++ @ARGV );
+ # we only get a Writer if the encoding callback gets called
+ if ( $w ) {
+- $w->end();
++ $w->end();
+ }
+
+ if ( $opt_stats ) {
+- print STDERR $imdb->getStatsLines($numberOfSeenChannels);
++ print STDERR $imdb->getStatsLines($numberOfSeenChannels);
+ }
+ $imdb->closeMovieIndex();
+ exit(0);
+--
+2.29.2
+
diff --git a/0029-Add-info-message-about-frozen-IMDb-data.patch
b/0029-Add-info-message-about-frozen-IMDb-data.patch
new file mode 100644
index 0000000..8249ff3
--- /dev/null
+++ b/0029-Add-info-message-about-frozen-IMDb-data.patch
@@ -0,0 +1,48 @@
+From 92ed6ce8d0f9662ce4362c95a05d9cb6f9b4eff5 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 28 Dec 2020 08:51:36 +0000
+Subject: [PATCH 29/50] Add info message about frozen IMDb data
+
+---
+ filter/tv_imdb | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index 83570b4b..914a421b 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -243,12 +243,27 @@ else {
+ $opt_debug=0 if $opt_quiet;
+
+ if ( defined($opt_prepStage) ) {
+- print STDERR <<END
+-Building indices. Be warned, this needs a lot of memory for the final stage
+-(working set about 250 megabytes).
++ if ( ! $opt_quiet ) {
++ print STDERR <<END;
++Building data files.
+
++The IMDb data files used by XMLTV were frozen by Amazon in December 2017.
++No updates will be made to the data files after this date.
++No new films will be added.
++
++If you have a successful build of these data files then there is no reason
++to build them again unless you suspect your existing files are corrupt.
++
++END
++ if ($opt_prepStage eq 'all') {
++ print STDERR <<END;
++Do you wish to continue with a new IMDb data build? (y/n)
+ END
+- if ( ! $opt_quiet ) ;
++ my $yn = <>; # ask for user input
++ chomp($yn);
++ exit(1) if (lc($yn) ne "y");
++ }
++ }
+
+ my %options =
+ ('imdbDir' => $opt_imdbDir,
+--
+2.29.2
+
diff --git a/0030-Add-undocumented-sample-option-to-limit-records-proc.patch
b/0030-Add-undocumented-sample-option-to-limit-records-proc.patch
new file mode 100644
index 0000000..24d6b5b
--- /dev/null
+++ b/0030-Add-undocumented-sample-option-to-limit-records-proc.patch
@@ -0,0 +1,104 @@
+From 12fd70a6adf1b96846ae47d00d43b4ec1a3514c3 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Mon, 28 Dec 2020 09:45:17 +0000
+Subject: [PATCH 30/50] Add undocumented --sample option to limit records
+ processed (for debugging use only)
+
+---
+ filter/tv_imdb | 14 +++++++++-----
+ lib/IMDB.pm | 5 +++++
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index 914a421b..b4f1eb8d 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -206,6 +206,7 @@ my ($opt_help,
+ $opt_num_actors,
+ $opt_validate_title,
+ $opt_validate_year,
++ $opt_sample,
+ );
+
+ GetOptions('help' => \$opt_help,
+@@ -222,6 +223,7 @@ GetOptions('help' => \$opt_help,
+ 'debug+' => \$opt_debug,
+ 'validate-title=s' => \$opt_validate_title,
+ 'validate-year=s' => \$opt_validate_year,
++ 'sample=s' => \$opt_sample,
+ ) or usage(0);
+
+ usage(1) if $opt_help;
+@@ -232,6 +234,7 @@ $opt_with_plot=0 if ( !defined($opt_with_plot) );
+ $opt_num_actors=3 if ( !defined($opt_num_actors) );
+ $opt_movies_only=0 if ( !defined($opt_movies_only) );
+ $opt_debug=0 if ( !defined($opt_debug) );
++$opt_sample=0 if ( !defined($opt_sample) );
+
+ $opt_quiet=(defined($opt_quiet));
+ if ( !defined($opt_stats) ) {
+@@ -266,11 +269,12 @@ END
+ }
+
+ my %options =
+- ('imdbDir' => $opt_imdbDir,
+- 'verbose' => !$opt_quiet,
+- 'showProgressBar' => !$opt_quiet,
+- 'stageToRun' => $opt_prepStage,
+- 'downloadMissingFiles' => $opt_download,
++ ('imdbDir' => $opt_imdbDir,
++ 'verbose' => !$opt_quiet,
++ 'showProgressBar' => !$opt_quiet,
++ 'stageToRun' => $opt_prepStage,
++ 'downloadMissingFiles' => $opt_download,
++ 'sample' => $opt_sample,
+ );
+
+ if ( $opt_prepStage eq "all" ) {
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 123ceb03..f5d25a15 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -1653,6 +1653,7 @@ sub readMoviesOrGenres($$$$)
+ $lineCount++;
+ my $line=$_;
+ #print "read line $lineCount:$line\n";
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+
+ # end is line consisting of only '-'
+ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+@@ -1794,6 +1795,7 @@ sub readCastOrDirectors($$$)
+ my $line=$_;
+ $line=~s/\n$//o;
+ #$self->status("read line $lineCount:$line");
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+
+ # end is line consisting of only '-'
+ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+@@ -1948,6 +1950,7 @@ sub readRatings($$$$)
+ $lineCount++;
+ my $line=$_;
+ #print "read line $lineCount:$line";
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+
+ $line=~s/\n$//o;
+
+@@ -2027,6 +2030,7 @@ sub readKeywords($$$$)
+ my $count=0;
+ while(<$fh>) {
+ $lineCount++;
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+ my $line=$_;
+ chomp($line);
+ next if ($line =~ m/^\s*$/);
+@@ -2112,6 +2116,7 @@ sub readPlots($$$$)
+ my $count=0;
+ while(<$fh>) {
+ $lineCount++;
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+ my $line=$_;
+ chomp($line);
+ next if ($line =~ m/^\s*$/);
+--
+2.29.2
+
diff --git a/0031-Reduce-memory-usage-during-final-build-stage.patch
b/0031-Reduce-memory-usage-during-final-build-stage.patch
new file mode 100644
index 0000000..705eaa6
--- /dev/null
+++ b/0031-Reduce-memory-usage-during-final-build-stage.patch
@@ -0,0 +1,25 @@
+From c4f600054cbd1b912d2ba0bd7e1c533484b0e5c7 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Tue, 29 Dec 2020 12:06:50 +0000
+Subject: [PATCH 31/50] Reduce memory usage during final build stage
+
+---
+ lib/IMDB.pm | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index f5d25a15..0aa8e6e3 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -3138,7 +3138,7 @@ sub invokeStage($$)
+ my $next_update=0;
+
+ my $count=0;
+- for my $key (keys %movies) {
++ while (my ($key, $val) = each (%movies)) {
+ my $dbkey=$key;
+
+ # drop episode information - ex: {Twelve Angry Men (1954)}
+--
+2.29.2
+
diff --git a/0032-Remove-tv-episodes-from-intermediate-files.patch
b/0032-Remove-tv-episodes-from-intermediate-files.patch
new file mode 100644
index 0000000..ac4e371
--- /dev/null
+++ b/0032-Remove-tv-episodes-from-intermediate-files.patch
@@ -0,0 +1,76 @@
+From c365e3b0fc2c7303f961657a4994e71b40e5b135 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 30 Dec 2020 13:31:56 +0000
+Subject: [PATCH 32/50] Remove tv episodes from intermediate files
+
+---
+ lib/IMDB.pm | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 0aa8e6e3..da688aee 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -1672,10 +1672,10 @@ sub readMoviesOrGenres($$$$)
+ $mkey=~s/\s*\{\{SUSPENDED\}\}//o;
+
+ # ignore {Twelve Angry Men (1954)}
+- $mkey=~s/\s*\{[^\}]+\}//go;
++ #$mkey=~s/\s*\{[^\}]+\}//go;
+
+ # skip enties that have {} in them since they're tv episodes
+- #next if ( $mkey=~s/\s*\{[^\}]+\}$//o );
++ next if ( $mkey=~m/\s*\{[^\}]+\}$/ );
+
+ my $genre=substr($line, $tab);
+
+@@ -1691,7 +1691,8 @@ sub readMoviesOrGenres($$$$)
+ }
+ }
+ else {
+- push(@{$self->{movies}}, $mkey);
++ push(@{$self->{movies}}, $mkey) unless ( $mkey=~m/\s*\{[^\}]+\}$/ ); # skip tv
episodes
++
+ # returned count is number of titles found
+ $count++;
+ }
+@@ -1856,7 +1857,8 @@ sub readCastOrDirectors($$$)
+
+ # [honir] this is wrong - this puts cast from all the episodes as though they are in
the entire series!
+ # ##ignore {Twelve Angry Men (1954)}
+- $line=~s/\s*\{[^\}]+\}//o;
++ #$line=~s/\s*\{[^\}]+\}//o;
++ next if ( $line=~m/\s*\{[^\}]+\}/ ); # skip tv episodes
+
+ if ( $whatAreWeParsing < 3 ) {
+ if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) {
+@@ -1959,6 +1961,8 @@ sub readRatings($$$$)
+ # end is line consisting of only '-'
+ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+
++ next if ( $line=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++
+ # e.g. New Distribution Votes Rank Title
+ # 0000000133 225568 8.9 12 Angry Men (1957)
+ if ( $line=~s/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+)\.(\d+)\s+//o ) {
+@@ -2037,6 +2041,8 @@ sub readKeywords($$$$)
+ my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/);
+ if ( defined($title) and defined($keyword) ) {
+
++ next if ( $title=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++
+ my ($episode) = $title =~ m/^.*\s+(\{.*\})$/;
+
+ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
+@@ -2123,6 +2129,8 @@ sub readPlots($$$$)
+ my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/);
+ if ( defined($title) ) {
+
++ next if ( $title=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++
+ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
+ if ( !defined $episode || $episode eq '' )
+ {
+--
+2.29.2
+
diff --git a/0033-eu_xmltvse-refresh-test.conf.patch
b/0033-eu_xmltvse-refresh-test.conf.patch
new file mode 100644
index 0000000..023069f
--- /dev/null
+++ b/0033-eu_xmltvse-refresh-test.conf.patch
@@ -0,0 +1,254 @@
+From c743c5a38d9717eb33f7d5361369a970f96c0499 Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Thu, 31 Dec 2020 00:20:34 +0000
+Subject: [PATCH 33/50] eu_xmltvse: refresh test.conf
+
+---
+ grab/eu_xmltvse/test.conf | 73 +++++++++++++++++++++++++++------------
+ 1 file changed, 51 insertions(+), 22 deletions(-)
+
+diff --git a/grab/eu_xmltvse/test.conf b/grab/eu_xmltvse/test.conf
+index 800f9130..628b06e9 100644
+--- a/grab/eu_xmltvse/test.conf
++++ b/grab/eu_xmltvse/test.conf
+@@ -8,29 +8,30 @@ channel!2bar.dazn.de
+ channel!3.bluemovie.de
+ channel!360tunebox.spi.pl
+ channel!3sat.de
++channel!aachen.wdr.daserste.de
+ channel!action.sky.de
+ channel!actionhd.sky.de
+ channel!adultchannel.co.uk
+ channel!ae-tv.de
+ channel!animalplanet.discovery.de
+
channel!ar.france24.com
+-channel=arte.de
++channel!arte.de
+ channel!arthouse.spi.pl
+ channel!arts.sky.de
+ channel!asien.dw.de
+-channel!at.viva.tv
+ channel!atlantic.sky.de
+ channel!atlantichd.sky.de
+ channel!atlanticp1.sky.de
+ channel!atv.at
+ channel!atv2.at
+-channel!bangutv.com
+
channel!bbcworldnews.com
+ channel!beate-uhse.tv
+ channel!bfs.daserste.de
+ channel!bibeltv.de
+-channel!blizztv.de
++channel!bielefeld.wdr.daserste.de
++channel!bonn.wdr.daserste.de
+ channel!br-alpha.daserste.de
++channel!br-klassik.daserste.de
+ channel!brandnew.mtv.de
+
channel!brazzerstveurope.com
+ channel!bundesliga1.sky.de
+@@ -58,14 +59,15 @@ channel!bw.swr.daserste.de
+ channel!canal24h.rtve.es
+ channel!cbsreality.tv
+ channel!cinema.sky.de
+-channel!cinemagic.disneychannel.de
+ channel!cinemahd.sky.de
++channel!classic.vh1.se
+ channel!classica.de
+ channel!classics.kabel1.de
+ channel!classicshd.kabel1.de
+ channel!comedy.sky.de
+-channel!comedycentral.at
++channel=comedycentral.at
+ channel!comedycentral.de
++channel!crime.rtl.de
+ channel!crimehd.rtl.de
+ channel=daserste.de
+
channel!de.eonline.com
+@@ -76,8 +78,10 @@ channel!dmax.discovery.de
+ channel!dmaxhd.discovery.de
+ channel!docubox.spi.pl
+ channel!doku.kabel1.de
++channel!dortmund.wdr.daserste.de
+ channel!dr1.dr.dk
+-channel!dr1hd.dr.dk
++channel!duisburg.wdr.daserste.de
++channel!dusseldorf.wdr.daserste.de
+ channel!dw.de
+ channel!eins.sky.de
+ channel!einsextra.daserste.de
+@@ -85,6 +89,9 @@ channel!einsfestival.daserste.de
+ channel!einshd.sky.de
+ channel!emotion.sky.de
+ channel!emotions.sat1.de
++channel!erox.spi.pl
++channel!eroxxx.spi.pl
++channel!essen.wdr.daserste.de
+
channel!euronews.com
+ channel!europa.tve.es
+
channel!europe.bloomberg.com
+@@ -94,20 +101,30 @@
channel!europe.realitykings.com
+
channel!extremesports.com
+ channel!family.cinema.sky.de
+ channel!familyhd.cinema.sky.de
+-channel!familytv.de
+ channel!fashionbox.spi.pl
+ channel!fastandfun.spi.pl
+ channel!fightbox.spi.pl
++channel!filmboxbasic.spi.pl
++channel!filmboxfamily.spi.pl
++channel!filmboxhd.spi.pl
++channel!filmboxplus.spi.pl
++channel!filmboxpremium.spi.pl
+ channel!fixundfoxi.tv
++channel!fm4.orf.at
+ channel!foxchannel.de
+
channel!fr.france24.com
+
channel!france24.com
+ channel!fsf.fightsports.tv
+-channel!ftv.com
++channel!fuel.tv
+ channel!fun.prosieben.de
++channel!geo-television.de
++channel!ginx.tv
++channel!god.tv
+ channel!gold.sat1.de
+ channel!goldhd.sat1.de
++channel!gospel.tv
+ channel!hd.13thstreet.de
++channel!hd.3sat.de
+ channel!hd.anixehd.tv
+ channel!hd.arte.de
+ channel!hd.bibeltv.de
+@@ -116,26 +133,31 @@ channel!hd.daserste.de
+ channel!hd.deluxemusic.tv
+ channel!hd.discovery.de
+ channel!hd.disneychannel.de
++channel!hd.eurosport.de
+ channel!hd.foxchannel.de
+-channel!hd.ftv.com
+ channel!hd.historytv.de
+ channel!hd.kabel1.de
+ channel!hd.kinowelt.tv
++channel!hd.mezzo.tv
++channel!hd.mtv.de
++channel!hd.mtv.se
+ channel!hd.n-tv.de
+ channel!hd.natgeo.de
+ channel!hd.nick.de
+ channel!hd.prosieben.de
+ channel!hd.rtl.de
+-channel!hd.rtl2.de
+ channel!hd.sat1.de
+
channel!hd.servustv.com
++channel!hd.servustv.de
+ channel!hd.sixx.de
+ channel!hd.spiegel-geschichte.tv
+ channel!hd.syfy.de
+ channel!hd.tele5.de
+-channel!hd.tv2.dk
++channel!hd.viva.tv
+ channel!hd.vox.de
++channel!hd.zdf.de
+ channel!heimatkanal.de
++channel!hgtv.discovery.de
+ channel!hits.sky.de
+ channel!hitshd.sky.de
+ channel!hr.daserste.de
+@@ -148,7 +170,6 @@ channel!infokanal.zdf.de
+ channel!int.kinopolska.pl
+ channel!int.kinopolskamuzyka.pl
+
channel!international.rt.com
+-channel!international.skynews.com
+ channel!jr.disneychannel.de
+ channel!jr.nick.de
+ channel!jukebox-tv.de
+@@ -158,24 +179,32 @@ channel!kika.daserste.de
+ channel!kikahd.daserste.de
+ channel!kinowelt.tv
+ channel!krimi.sky.de
++channel!living.rtl.de
+ channel!maxx.prosieben.de
+ channel!maxxhd.prosieben.de
+ channel!mdr.daserste.de
+ channel!mdrhd.daserste.de
++channel!mezzo.tv
+ channel!motorvision.de
++channel!munster.wdr.daserste.de
++channel!mv.ndr.daserste.de
+ channel!n-tv.de
+ channel!n24doku.de
+ channel!natgeo.de
+ channel!ndr.daserste.de
+ channel!ndrhd.daserste.de
++channel!nds.ndr.daserste.de
+ channel!neo.zdf.de
+ channel!neohd.zdf.de
+ channel!nick.de
+-channel!nickelodeon.at
++channel=nickelodeon.at
+ channel!nitro.rtl.de
+ channel!nitrohd.rtl.de
++channel!nord.bfs.daserste.de
+ channel!nostalgie.sky.de
+-channel=orf1.orf.at
++channel!oe1.orf.at
++channel!oe3.orf.at
++channel!orf1.orf.at
+ channel!orf2.orf.at
+ channel!orf3.orf.at
+
channel!outdoorchannel.com
+@@ -192,7 +221,8 @@ channel!polonia.tvp.pl
+ channel!prosieben.de
+ channel!protv.ro
+ channel!puls4.at
+-channel!rbb.daserste.de
++channel!radiob.orf.at
++channel!radiobremen.tv
+ channel!rbb.rbb-online.de
+ channel!rbbberl.rbb-online.de
+ channel!rbbbra.rbb-online.de
+@@ -207,14 +237,15 @@ channel!rtlplus.de
+ channel!rts1.rts.ch
+ channel!rts2.rts.ch
+
channel!russia.rt.com
++channel!s-anhalt.mdr.daserste.de
++channel!sachsen.mdr.daserste.de
+ channel!sat1.de
+ channel!sd.anixehd.tv
+ channel!select.sky.de
+ channel!selecthd.sky.de
+
channel!servustv.com
+-channel!sf1.srf.ch
+-channel!sf2.srf.ch
+-channel!sfi.srf.ch
++channel!sh.ndr.daserste.de
++channel!siegen.wdr.daserste.de
+ channel!sixx.de
+ channel!spiegel-geschichte.tv
+ channel!sport1.sky.de
+@@ -232,7 +263,6 @@ channel!sportaustria.sky.de
+ channel!sportdigital.tv
+ channel!sporthd1.sky.de
+ channel!sporthd10.sky.de
+-channel!sporthd11.sky.de
+ channel!sporthd2.sky.de
+ channel!sporthd3.sky.de
+ channel!sporthd4.sky.de
+@@ -253,7 +283,6 @@ channel!tele5.de
+ channel!tlc.discovery.de
+ channel!travelchanneltv.eu
+ channel!tv2.dk
+-channel!tv5monde.org
+ channel!universalchannel.de
+ channel!urbanint.trace.tv
+ channel!vh1.eu
+@@ -263,5 +292,5 @@ channel!wdrhd.daserste.de
+ channel!welt.de
+ channel!wild.natgeo.de
+ channel!wildhd.natgeo.de
++channel!wuppertal.wdr.daserste.de
+ channel!xd.disneychannel.de
+-channel!zdf.de
+--
+2.29.2
+
diff --git a/0034-dk_dr-disable-grabber-after-source-site-disappeared.patch
b/0034-dk_dr-disable-grabber-after-source-site-disappeared.patch
new file mode 100644
index 0000000..e9c4a53
--- /dev/null
+++ b/0034-dk_dr-disable-grabber-after-source-site-disappeared.patch
@@ -0,0 +1,42 @@
+From 4ddda342b379fe16efaea7cb206f560f34a6129d Mon Sep 17 00:00:00 2001
+From: Nick Morrott <knowledgejunkie(a)gmail.com>
+Date: Thu, 31 Dec 2020 00:44:11 +0000
+Subject: [PATCH 34/50] dk_dr: disable grabber after source site disappeared.
+
+A new source site [1] with a JSON API was mentioned in the bug report
+(see below). This may be able to provide a replacement service, terms
+and conditions allowing.
+
+ [1]
https://www.dr-massive.com/drtv/tv-guide
+
+Refs: #119
+---
+ Makefile.PL | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/Makefile.PL b/Makefile.PL
+index 270add4c..c85c976e 100644
+--- a/Makefile.PL
++++ b/Makefile.PL
+@@ -291,12 +291,12 @@ my @opt_components
+ 'URI::URL' => 0, },
+ },
+
+- { name => 'tv_grab_dk_dr',
+- blurb => 'Grabber for Denmark (dr.dk)',
+- exes => [ 'grab/dk_dr/tv_grab_dk_dr' ],
+- prereqs => { 'DateTime' => 0,
+- 'IO::Scalar' => 0, },
+- },
++ # { name => 'tv_grab_dk_dr',
++ # blurb => 'Grabber for Denmark (dr.dk)',
++ # exes => [ 'grab/dk_dr/tv_grab_dk_dr' ],
++ # prereqs => { 'DateTime' => 0,
++ # 'IO::Scalar' => 0, },
++ # },
+
+ { name => 'tv_grab_eu_epgdata',
+ blurb => '$$ Grabber for some European countries (epgdata.com)',
+--
+2.29.2
+
diff --git a/0035-Fix-testsuite-for-the-change-to-episode-handling-63.patch
b/0035-Fix-testsuite-for-the-change-to-episode-handling-63.patch
new file mode 100644
index 0000000..14a363f
--- /dev/null
+++ b/0035-Fix-testsuite-for-the-change-to-episode-handling-63.patch
@@ -0,0 +1,24 @@
+From 077bcec488eac50741d3ac71b889cc3cd3beb394 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Thu, 31 Dec 2020 16:25:09 +0000
+Subject: [PATCH 35/50] Fix testsuite for the change to episode handling (#63)
+
+---
+ t/data-tv_imdb/lists/movies.list | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/t/data-tv_imdb/lists/movies.list b/t/data-tv_imdb/lists/movies.list
+index 545bde76..89379710 100644
+--- a/t/data-tv_imdb/lists/movies.list
++++ b/t/data-tv_imdb/lists/movies.list
+@@ -30,6 +30,7 @@ Het Movie19 (1991) 1991
+ Een Movie20 (1991) 1991
+ Movie21 aeiouaecnssy (1991) 1991
+ Movie22 dots (1991) 1991
++"The Show1" (2002) 1991
+ "The Show1" (2002) {Episode title1 (#1.1)} 1991
+ "The Show1" (2002) {Episode title2 (#1.2)} 1991
+ "The Show1" (2002) {Episode title1 (#2.1)} 1991
+--
+2.29.2
+
diff --git a/0036-update-windows-xmltv.exe-to-use-PAR-Packer-rather-th.patch
b/0036-update-windows-xmltv.exe-to-use-PAR-Packer-rather-th.patch
new file mode 100644
index 0000000..3872e8b
--- /dev/null
+++ b/0036-update-windows-xmltv.exe-to-use-PAR-Packer-rather-th.patch
@@ -0,0 +1,251 @@
+From 1747b204af78a0ed1672525a9b277ad1b2f9b906 Mon Sep 17 00:00:00 2001
+From: Robert Eden <rmeden(a)gmail.com>
+Date: Fri, 1 Jan 2021 20:45:24 -0600
+Subject: [PATCH 36/50] update windows xmltv.exe to use PAR::Packer rather than
+ discontinued PerlApp
+
+---
+ Makefile.PL | 9 +--
+ lib/exe_opt.pl | 107 ++++++++++++++++++++--------------
+ lib/{exe_wrap.pl => xmltv.pl} | 30 ++++++++--
+ 3 files changed, 90 insertions(+), 56 deletions(-)
+ rename lib/{exe_wrap.pl => xmltv.pl} (84%)
+
+diff --git a/Makefile.PL b/Makefile.PL
+index c85c976e..f5c86683 100644
+--- a/Makefile.PL
++++ b/Makefile.PL
+@@ -915,14 +915,11 @@ END
+ #
+ $inherited .= q{
+
+-xmltv.exe :: $(EXE_FILES) lib/exe_wrap.pl lib/exe_opt.pl
++xmltv.exe :: $(EXE_FILES) lib/xmltv.pl lib/exe_opt.pl
+ echo $(EXE_FILES) >exe_files.txt
+ perl lib/exe_opt.pl $(VERSION) >exe_opt.txt
+- echo -lib $(INST_ARCHLIB) --lib $(INST_LIB) >>exe_opt.txt
+- echo -add "$(EXE_FILES)" >>exe_opt.txt
+- echo -bind exe_files.txt >>exe_opt.txt
+- echo -exe xmltv.exe >>exe_opt.txt
+- perlapp @exe_opt.txt lib/exe_wrap.pl
++ echo -a exe_files.txt >>exe_opt.txt
++ pp_autolink -o xmltv.exe --cachedeps=pp.cache --reusable @exe_opt.txt lib/xmltv.pl
$(EXE_FILES)
+ $(RM_F) exe_files.txt
+ $(RM_F) exe_opt.txt
+
+diff --git a/lib/exe_opt.pl b/lib/exe_opt.pl
+index 62f9e5aa..57a285f2 100755
+--- a/lib/exe_opt.pl
++++ b/lib/exe_opt.pl
+@@ -11,39 +11,57 @@ use File::Spec;
+ #
+ # output constants
+ #
+-print '-nologo
+--force
+--add=XMLTV::
+--add=Date::Manip::
+--add DateTime::
+--add Params::Validate::**
+--add Date::Language::
+--add Class::MethodMaker::
+--add Class::MethodMaker::Engine
+--add arybase
+--bind=libexpat-1_.dll[file=C:\strawberry\c\bin\libexpat-1_.dll,extract]
+--bind=libxml2-2_.dll[file=C:\strawberry\c\bin\libxml2-2_.dll,extract]
+--bind=libiconv-2_.dll[file=C:\strawberry\c\bin\libiconv-2_.dll,extract]
+--bind=liblzma-5_.dll[file=C:\strawberry\c\bin\liblzma-5_.dll,extract]
+--bind=zlib1_.dll[file=C:\strawberry\c\bin\zlib1_.dll,extract]
+--bind=libgcc_x86_470.dll[file=C:\strawberry\perl\bin\libgcc_x86_470.dll,extract]
+--bind=libeay32_.dll[file=C:\strawberry\c\bin\libeay32_.dll,extract]
+--bind=SSLeay32_.dll[file=C:\strawberry\c\bin\SSLeay32_.dll,extract]
+--bind
DateTime/Format/Builder/Parser/Regex.pm[file=c:\Strawberry\Perl\site\lib\DateTime\Format\Builder\Parser\Regex.pm,extract]
+--trim=Class::MethodMaker::Scalar
+--trim=Class::MethodMaker::Engine
+--trim=JSON::PP58
+--trim=Test::Builder::IO::Scalar;
+--trim=Win32::Console
+--info CompanyName="XMLTV Project
http://www.xmltv.org"
+--info FileDescription="EXE bundle of XMLTV tools to manage TV Listings"
+--info InternalName=xmltv.exe
+--info OriginalFilename=xmltv.exe
+--info ProductName=xmltv
+--info LegalCopyright="GNU General Public License
http://www.gnu.org/licenses/gpl.txt"
+--icon xmltv_logo.ico
++print '
++-M XMLTV::
++-M Date::Manip::
++-M DateTime::
++-M Params::Validate::
++-M Date::Language::
++-M Class::MethodMaker::
++-X JSON::PP58
++-X Test::Builder::IO::Scalar
++-X Win32::Console
+ ';
+
++#-l C:/strawberry/c/bin/libexpat-1__.dll
++#-l C:/strawberry/c/bin/libxml2-2__.dll
++#-l C:/strawberry/c/bin/libiconv-2__.dll
++#-l C:/strawberry/c/bin/liblzma-5__.dll
++#-l C:/strawberry/c/bin/zlib1__.dll
++
++# not found
++#-l C:/strawberry/perl/bin/libgcc__x86__470.dll
++#-l C:/strawberry/c/bin/libeay32__.dll
++#-l C:/strawberry/c/bin/SSLeay32__.dll
++#-M arybase
++
++# add executable scripts
++open(FILE,"exe_files.txt");
++foreach (split(/ /,<FILE>)) {
++ chomp;
++ next unless $_;
++# print "-a $_\n";
++# print "-c $_\n"; # -a doesn't scan for dependancies
++}
++close FILE;
++
++#-info CompanyName="XMLTV Project
http://www.xmltv.org"
++#-info FileDescription="EXE bundle of XMLTV tools to manage TV Listings"
++#-info InternalName=xmltv.exe
++#-info OriginalFilename=xmltv.exe
++#-info ProductName=xmltv
++#-info LegalCopyright="GNU General Public License
http://www.gnu.org/licenses/gpl.txt"
++#-icon xmltv_logo.ico
++#-l libexpat-1_.dll[file=C:\strawberry\c\bin\libexpat-1_.dll
++#-l libxml2-2_.dll[file=C:\strawberry\c\bin\libxml2-2_.dll
++#-l libiconv-2_.dll[file=C:\strawberry\c\bin\libiconv-2_.dll
++#-l liblzma-5_.dll[file=C:\strawberry\c\bin\liblzma-5_.dll
++#-l zlib1_.dll[file=C:\strawberry\c\bin\zlib1_.dll
++#-l libgcc_x86_470.dll[file=C:\strawberry\perl\bin\libgcc_x86_470.dll
++#-l libeay32_.dll[file=C:\strawberry\c\bin\libeay32_.dll
++#-l SSLeay32_.dll[file=C:\strawberry\c\bin\SSLeay32_.dll
++#-bind
DateTime/Format/Builder/Parser/Regex.pm[file=c:\Strawberry\Perl\site\lib\DateTime\Format\Builder\Parser\Regex.pm
++
+ #
+ # Add XML\Parser\encodings
+ #
+@@ -56,20 +74,21 @@ foreach $dir (@Encoding_Path) {
+ while ($file = readdir DIR)
+ {
+ next unless $file =~ /.enc$/i;
+- print
"-bind=XML/Parser/Encodings/${file}[file=$dir/${file},extract]\n";
++# print "-l XML/Parser/Encodings/${file}[file=$dir/${file}\n";
++# print "-a
c:/Strawberry/perl/vendor/lib/XML/Parser/Encodings/${file}\n";
+ }
+ }
+
+-#
+-# put date in file version field
+-#
+-@date=localtime; $date[4]++; $date[5]+=1900;
+-printf "-info FileVersion=%4d.%d.%d.%d\n",@date[5,4,3,2];
++##
++## put date in file version field
++##
++#@date=localtime; $date[4]++; $date[5]+=1900;
++#printf "-info FileVersion=%4d.%d.%d.%d\n",@date[5,4,3,2];
+
+-#
+-# last fields in product version should ommitable, but it doesn't work.
+-#
+-$version=shift;
+-(a)_=split(/\./,$version);
+-map {$_=0 unless defined $_} @_[0..4];
+-printf "-info ProductVersion=%d.%d.%d.%d\n",@_;
++##
++## last fields in product version should ommitable, but it doesn't work.
++##
++#$version=shift;
++#(a)_=split(/\./,$version);
++#map {$_=0 unless defined $_} @_[0..4];
++#printf "-info ProductVersion=%d.%d.%d.%d\n",@_;
+diff --git a/lib/exe_wrap.pl b/lib/xmltv.pl
+similarity index 84%
+rename from lib/exe_wrap.pl
+rename to lib/xmltv.pl
+index 5ddf73d0..8540dff6 100755
+--- a/lib/exe_wrap.pl
++++ b/lib/xmltv.pl
+@@ -16,6 +16,13 @@
+
+ use File::Basename;
+ use Carp;
++use XMLTV;
++use Date::Manip;
++use DateTime;
++use Params::Validate;
++use Date::Language;
++use Class::MethodMaker;
++use Class::MethodMaker::Engine;
+
+ $Carp::MaxEvalLen=40; # limit confess output
+
+@@ -68,7 +75,7 @@ print STDERR "Timezone is $ENV{TZ}\n" unless $opt_quiet;
+ $cmd = shift || "";
+
+ # --version (and abbreviations thereof)
+-my $VERSION = '0.6.3';
++my $VERSION = '0.6.1';
+ if (index('--version', $cmd) == 0 and length $cmd >= 3) {
+ print "xmltv $VERSION\n";
+ exit;
+@@ -83,9 +90,8 @@ if ($cmd eq 'tv_grab_na_dd',
+ {
+ unless (grep(/^--share/i,@ARGV)) # don't add our --share if one supplied
+ {
+- my $dir = dirname(PerlApp::exe()); # get full program path
++ my $dir = dirname($0); # get full program path
+ $dir =~ s!\\!/!g; # use / not \
+-# die "EXE path contains spaces. This is known to cause problems.\nPlease
move xmltv.exe to a different directory\n" if $dir =~ / /;
+ $dir .= "/share/xmltv";
+ unless (-d $dir )
+ {
+@@ -109,8 +115,11 @@ if ($cmd eq 'exec')
+ {
+ my $exe=shift;
+ $0=$exe;
+- do $exe;
++ print "doing $exe\n";
++ print STDERR "STDERR doing $exe\n";
++ do "./$exe";
+ print STDERR $@ if length($@);
++ print "STDOUT $@" if length($@);
+ exit 1 if length($@);
+ exit 0;
+ }
+@@ -118,7 +127,10 @@ if ($cmd eq 'exec')
+ #
+ # scan through attached files and execute program if found
+ #
+-$files=PerlApp::get_bound_file("exe_files.txt");
++
++#main thread!
++
++$files=PAR::read_file("exe_files.txt");
+ foreach my $exe (split(/ /,$files))
+ {
+ next unless length($exe)>3; #ignore trash
+@@ -128,11 +140,17 @@ foreach my $exe (split(/ /,$files))
+
+ next unless $cmd eq $_;
+
++ $exe="script/$cmd";
++
+ #
+ # execute our command
+ #
+- $0 = $_; # set $0 to our script
++# $0 = $_; # set $0 to our script
++# print STDERR "STDERR about to execute $exe\n";
++# print STDOUT "STDOUT about to execute $exe\n";
+ do $exe;
++# print STDERR "STDERR got <$!> <$?> <$^E> <$@>\n";
++# print STDOUT "STDOUT got <$!> <$?> <$^E> <$@>\n";
+ print STDERR $@ if length($@);
+ exit 1 if length($@);
+ exit 0;
+--
+2.29.2
+
diff --git a/0037-extend-scope-of-title-person-qualifier.patch
b/0037-extend-scope-of-title-person-qualifier.patch
new file mode 100644
index 0000000..2e33edc
--- /dev/null
+++ b/0037-extend-scope-of-title-person-qualifier.patch
@@ -0,0 +1,77 @@
+From 27a9a4d873573938f961ecbd090e3bbe2578ddc7 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Sun, 10 Jan 2021 12:57:39 +0000
+Subject: [PATCH 37/50] extend scope of title/person qualifier
+
+---
+ lib/IMDB.pm | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index da688aee..62cc665e 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -397,7 +397,7 @@ sub getMovieMatches($$$)
+ my $title=$arr[1];
+ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+ }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
+ }
+ else {
+ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
+@@ -426,7 +426,7 @@ sub getMovieMatches($$$)
+
+ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+ }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) {
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
+ }
+ else {
+ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
+@@ -496,7 +496,7 @@ sub getMovieIdDetails($$)
+ if ( $directors ne "<>" ) {
+ for my $name (split('\|', $directors)) {
+ # remove (I) etc from
imdb.com names (kept in place for reference)
+- $name=~s/\s\([IVX]+\)$//o;
++ $name=~s/\s\([IVXL]+\)$//o;
+ # switch name around to be surname last
+ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+ push(@{$results->{directors}}, $name);
+@@ -509,7 +509,7 @@ sub getMovieIdDetails($$)
+ if ( $name=~s/\[([^\]]+)\]$//o ) {
+ $HostNarrator=$1;
+ }
+- $name=~s/\s\([IVX]+\)$//o;
++ $name=~s/\s\([IVXL]+\)$//o;
+
+ # switch name around to be surname last
+ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+@@ -3197,11 +3197,11 @@ sub invokeStage($$)
+ }
+
+ if ( $title=~s/\s+\((\d\d\d\d)\)$//o ||
+- $title=~s/\s+\((\d\d\d\d)\/[IVX]+\)$//o ) {
++ $title=~s/\s+\((\d\d\d\d)\/[IVXL]+\)$//o ) {
+ $year=$1;
+ }
+ elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o ||
+- $title=~s/\s+\((\?\?\?\?)\/[IVX]+\)$//o ) {
++ $title=~s/\s+\((\?\?\?\?)\/[IVXL]+\)$//o ) {
+ $year="0000";
+ }
+ else {
+@@ -3300,8 +3300,8 @@ sub invokeStage($$)
+ my ($billing, $name)=split(':', $c);
+ # remove Host/Narrators from end
+ # BUG - should remove (I)'s from actors/actresses names when details are
generated
+- $name=~s/\s\([IVX]+\)\[/\[/o;
+- $name=~s/\s\([IVX]+\)$//o;
++ $name=~s/\s\([IVXL]+\)\[/\[/o;
++ $name=~s/\s\([IVXL]+\)$//o;
+
+ if ( $name ne $last ) {
+ $details.="$name|";
+--
+2.29.2
+
diff --git a/0038-eu-epgdata-Add-channel-IDs.patch
b/0038-eu-epgdata-Add-channel-IDs.patch
new file mode 100644
index 0000000..38e8446
--- /dev/null
+++ b/0038-eu-epgdata-Add-channel-IDs.patch
@@ -0,0 +1,21 @@
+From 8ac9e1e5da7b5dbf4aceb16f337e65be76aeb4ae Mon Sep 17 00:00:00 2001
+From: Philipp Matthias Hahn <pmhahn(a)pmhahn.de>
+Date: Sun, 10 Jan 2021 16:18:22 +0100
+Subject: [PATCH 38/50] eu-epgdata: Add channel IDs
+
+---
+ grab/eu_epgdata/channel_ids | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/grab/eu_epgdata/channel_ids b/grab/eu_epgdata/channel_ids
+index efa8fe9c..ce0d65e9 100644
+--- a/grab/eu_epgdata/channel_ids
++++ b/grab/eu_epgdata/channel_ids
+@@ -180,3 +180,4 @@
+ 12046;sky1.sky.de:;Sky 1
+ 12102;esports1.sport1.de;eSports1 (ESPO1)
+ 12125;voxup.vox.de;VOXup (VOXUP)
++12052;sr.de;SR Fernsehen
+--
+2.29.2
+
diff --git a/0039-whitespace-changes.patch b/0039-whitespace-changes.patch
new file mode 100644
index 0000000..b91ec88
--- /dev/null
+++ b/0039-whitespace-changes.patch
@@ -0,0 +1,62 @@
+From d8ad8d2adbad6061610b4c77bb8d9a6f91c67214 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 16:52:59 +0000
+Subject: [PATCH 39/50] whitespace changes
+
+---
+ filter/tv_imdb | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index b4f1eb8d..d134fcf9 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -206,24 +206,24 @@ my ($opt_help,
+ $opt_num_actors,
+ $opt_validate_title,
+ $opt_validate_year,
+- $opt_sample,
+- );
++ $opt_sample,
++ );
+
+-GetOptions('help' => \$opt_help,
++GetOptions('help' => \$opt_help,
+ 'output=s' => \$opt_output,
+- 'prepStage=s' => \$opt_prepStage,
+- 'imdbdir=s' => \$opt_imdbDir,
+- 'with-keywords' => \$opt_with_keywords,
+- 'with-plot' => \$opt_with_plot,
+- 'movies-only' => \$opt_movies_only,
+- 'actors=s' => \$opt_num_actors,
+- 'quiet' => \$opt_quiet,
+- 'download' => \$opt_download,
++ 'prepStage=s' => \$opt_prepStage,
++ 'imdbdir=s' => \$opt_imdbDir,
++ 'with-keywords' => \$opt_with_keywords,
++ 'with-plot' => \$opt_with_plot,
++ 'movies-only' => \$opt_movies_only,
++ 'actors=s' => \$opt_num_actors,
++ 'quiet' => \$opt_quiet,
++ 'download' => \$opt_download,
+ 'stats' => \$opt_stats,
+ 'debug+' => \$opt_debug,
+ 'validate-title=s' => \$opt_validate_title,
+ 'validate-year=s' => \$opt_validate_year,
+- 'sample=s' => \$opt_sample,
++ 'sample=s' => \$opt_sample,
+ ) or usage(0);
+
+ usage(1) if $opt_help;
+@@ -234,7 +234,7 @@ $opt_with_plot=0 if ( !defined($opt_with_plot) );
+ $opt_num_actors=3 if ( !defined($opt_num_actors) );
+ $opt_movies_only=0 if ( !defined($opt_movies_only) );
+ $opt_debug=0 if ( !defined($opt_debug) );
+-$opt_sample=0 if ( !defined($opt_sample) );
++$opt_sample=0 if ( !defined($opt_sample) );
+
+ $opt_quiet=(defined($opt_quiet));
+ if ( !defined($opt_stats) ) {
+--
+2.29.2
+
diff --git a/0040-Reduce-memory-usage-during-database-build-bug-fixes.patch
b/0040-Reduce-memory-usage-during-database-build-bug-fixes.patch
new file mode 100644
index 0000000..c4d1061
--- /dev/null
+++ b/0040-Reduce-memory-usage-during-database-build-bug-fixes.patch
@@ -0,0 +1,3074 @@
+From 20a58f8363c8316284f56b143f3b8cfc7b6ca63e Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 17:01:55 +0000
+Subject: [PATCH 40/50] Reduce memory usage during database build + bug fixes
+
+---
+ filter/tv_imdb | 8 +-
+ lib/IMDB.pm | 2639 +++++++++++++++++++++++++-----------------------
+ 2 files changed, 1359 insertions(+), 1288 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index d134fcf9..ba8b804d 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -278,11 +278,11 @@ END
+ );
+
+ if ( $opt_prepStage eq "all" ) {
++ my $n=new XMLTV::IMDB::Crunch(%options);
++ if ( !$n ) {
++ exit(1);
++ }
+ for (my $stage=1 ; $stage <= 9 ; $stage++ ) {
+- my $n=new XMLTV::IMDB::Crunch(%options);
+- if ( !$n ) {
+- exit(1);
+- }
+ my $ret=$n->crunchStage($stage);
+ if ( $ret != 0 ) {
+ exit($ret);
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 62cc665e..cd805b35 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -42,8 +42,16 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+ # occured in episodic tv programs (reported by Alexy Khrabrov)
+ # .9 = added keywords data
+ # .10 = added plot data
++# .11 = revised method for database creation to reduce memory use
++# bug: remove duplicated genres
++# bug: if TV-version and movie in same year then one (random) was lost
++# bug: multiple films with same title in same year then one was lost
++# bug: movies with (aka...) in title not handled properly
++# bug: incorrect data generated for a tv series (only the last episode found is
stored)
++# bug: genres and cast are rolled-up from all episodes to the series record
(misleading)
+ #
+-our $VERSION = '0.10'; # version number of database
++#
++our $VERSION = '0.11'; # version number of database
+
+ sub new
+ {
+@@ -1329,7 +1337,10 @@ sub getStatsLines($)
+ #
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ package XMLTV::IMDB::Crunch;
++
+ use LWP;
++use XMLTV::Gunzip;
++use IO::File;
+
+ use open ':encoding(iso-8859-1)'; # try to enforce file encoding (does this
work in Perl <5.8.1? )
+
+@@ -1339,6 +1350,10 @@ use constant Have_bar => eval {
+ $Term::ProgressBar::VERSION >= 2;
+ };
+
++my $VERSION = '0.11'; # version number of database
++
++my %titlehash = ();
++
+ #
+ # This package parses and manages to index imdb plain text files from
+ #
ftp.imdb.com/interfaces. (see
http://www.imdb.com/interfaces for
+@@ -1355,6 +1370,11 @@ use constant Have_bar => eval {
+ # the imdb file formats.
+ #
+
++# [honir] 2020-12-27 An undocumented option --sample n will fetch only n records from
each IMDb data file
++# Note the output will not be valid (since the n records will not cross-reference from
the different files)
++# it's simply a way to avoid having to process all 4.5 million titles when you are
debugging!
++
++
+ sub new
+ {
+ my ($type) = shift;
+@@ -1521,8 +1541,8 @@ END
+ }
+
+ if ( %missingListFiles ) {
+- print STDERR "tv_imdb: requires you to download the above files from
ftp.imdb.com\n";
+- print STDERR " see
http://www.imdb.com/interfaces for details\n";
++ print STDERR "tv_imdb: requires you to download the above files from
ftp.fu-berlin.de \n";
++ #print STDERR " see
http://www.imdb.com/interfaces for details\n";
+ print STDERR " or try the --download option\n";
+ #return(undef);
+ return 1;
+@@ -1577,9 +1597,6 @@ sub withThousands ($)
+ return $val;
+ }
+
+-use XMLTV::Gunzip;
+-use IO::File;
+-
+ sub openMaybeGunzip($)
+ {
+ for ( shift ) {
+@@ -1601,24 +1618,189 @@ sub closeMaybeGunzip($$)
+ #return close($_[1]);
+ }
+
+-sub readMoviesOrGenres($$$$)
++sub beginProgressBar($$$)
++{
++ my ($self, $what, $countEstimate)=@_;
++ print STDERR $what.' '.$countEstimate;
++ if ($self->{showProgressBar}) {
++ $self->{progress} = Term::ProgressBar->new({name => "$what",
++ count => $countEstimate*1.01,
++ ETA => 'linear'});
++ $self->{progress}->minor(0) if ($self->{showProgressBar});
++ $self->{progress}->max_update_rate(1) if ($self->{showProgressBar});
++ $self->{count_estimate} = $countEstimate;
++ $self->{next_update} = 0;
++ }
++}
++
++sub updateProgressBar($$$)
++{
++ my ($self, $what, $count)=@_;
++
++ if ( $self->{showProgressBar} ) {
++ # re-adjust target so progress bar doesn't seem too wonky
++ if ( $count > $self->{count_estimate} ) {
++ $self->{count_estimate} = $self->{progress}->target($count*1.05);
++ $self->{next_update} = $self->{progress}->update($count);
++ }
++ elsif ( $count > $self->{next_update} ) {
++ $self->{next_update} = $self->{progress}->update($count);
++ }
++ }
++}
++
++sub endProgressBar($$$)
++{
++ my ($self, $what, $count)=@_;
++
++ if ( $self->{showProgressBar} ) {
++ $self->{progress}->update($self->{count_estimate});
++ }
++}
++
++sub makeTitleKey($$)
++{
++ # make a unique key for each prog title. Also determine the prog type.
++
++ # some edge cases we need to handle:
++ # 1] multiple titles with same year, e.g.
++ # '83 (2017/I)
++ # '83 (2017/II)
++ #
++ # 2] multiple films with same year but different type, e.g.
++ # Journey to the Center of the Earth (2008) # cinema release
++ # Journey to the Center of the Earth (2008) (TV) # TV movie
++ # Journey to the Center of the Earth (2008) (V) # straight to video
++ #
++ # 3] tv series and film with same year, e.g.
++ # "Ashes to Ashes" (2008) # tv series
++ # Ashes to Ashes (2008) # movie
++ #
++ # 4] titles without a year, e.g.
++ # California Cornflakes (????)
++ # Zed (????/II)
++ #
++ # 5] titles including alternatiove title, e.g.
++ # Family Prayers (aka Karim & Suha) (2010)
++ #
++
++ my ($self, $progtitle)=@_;
++
++ # tidy the film title, and extract the prog type
++ #
++ my $dbkey = $progtitle;
++ my $progtype;
++
++ # drop episode information - ex: "Supernatural" (2005) {A Very Supernatural
Christmas (#3.8)}
++ my $isepisode = $dbkey=~s/\s*\{[^\}]+\}//go;
++
++ # remove 'aka' details from prog-title
++ $dbkey =~ s/\s*\((?:aka|as) ([^\)]+)\)//o;
++
++ # todo - this would make things easier
++ # change double-quotes around title to be (made-for-tv) suffix instead
++ if ( $dbkey=~m/^\"/o && #"
++ $dbkey=~m/\"\s*\(/o ) { #"
++ $dbkey.=" (tv_series)";
++ $progtype=4;
++ }
++ # how rude, some entries have (TV) appearing more than once.
++ $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o;
++
++ my $qualifier;
++ if ( $dbkey=~m/\s+\(TV\)$/ ) { # don't strip from title - it's considered part
of the title: so we need it for matching against other source files
++ $qualifier="tv_movie";
++ $progtype=2;
++ }
++ elsif ( $dbkey=~m/\s+\(V\)$/ ) { # ditto
++ $qualifier="video_movie";
++ $progtype=3;
++ }
++ elsif ( $dbkey=~m/\s+\(VG\)$/ ) { # ditto
++ $qualifier="video_game";
++ $progtype=5;
++ }
++ elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) { # but strip the rest
++ $qualifier="tv_mini_series";
++ $progtype=4;
++ }
++ elsif ( $dbkey=~s/\s+\(tv_series\)$// ) {
++ $qualifier="tv_series";
++ $progtype=4;
++ }
++ elsif ( $dbkey=~s/\s+\(mini\)$//o ) {
++ $qualifier="tv_mini_series";
++ $progtype=4;
++ }
++ else {
++ $qualifier="movie";
++ $progtype=1;
++ }
++
++
++ # make a key from the title
++ #
++ my $year; my $yearcount;
++ my $title = $dbkey;
++
++ if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { # remove " marks
around title
++ $title=~s/^\"//o; #"
++ $title=~s/\"(\s*\()/$1/o; #"
++ }
++
++ # strip the above progtypes from the hashkey
++ $title=~s/\s*\((TV|V|VG)\)$//;
++
++ # extract the year from the title
++ if ( $title=~s/\s+\((\d\d\d\d)\)$//o ||
++ $title=~s/\s+\((\d\d\d\d)\/([IVXL]+)\)$//o ) {
++ $year=$1;
++ }
++ elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o ||
++ $title=~s/\s+\((\?\?\?\?)\/([IVXL]+)\)$//o ) {
++ $year="0000";
++ }
++ else {
++ $self->error("movie list format failed to decode year from title
'$title'");
++ $year="0000";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; #
move definite article to front of title
++
++ $title=~s/\t/ /g; # remove tab chars (there shouldn't be any but it will corrupt
our data output if we find one)
++
++ my $hashkey=lc("$title ($year)"); # use calculated year to avoid things like
"72 Hours (????/I)"
++
++ $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
++
++ #print STDERR
"input:$dbkey\n\tdbkey:$hashkey\n\ttitle=$title\n\tyear=$year\n\tcounter=$yearcount\n\tqualifier=$qualifier\n";
++
++ return ( $hashkey, $dbkey, $year, $yearcount, $qualifier, $progtype, $isepisode );
++}
++
++sub readMovies($$$$$)
+ {
+- my ($self, $whichMoviesOrGenres, $countEstimate, $file)=@_;
++ # build %movieshash from movies.list source file
++
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
+ my $startTime=time();
+ my $header;
+ my $whatAreWeParsing;
+ my $lineCount=0;
+
+- if ( $whichMoviesOrGenres eq "Movies" ) {
++ if ( $which eq "Movies" ) {
+ $header="MOVIES LIST";
+ $whatAreWeParsing=1;
+ }
+- elsif ( $whichMoviesOrGenres eq "Genres" ) {
+- $header="8: THE GENRES LIST";
+- $whatAreWeParsing=2;
+- }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
++
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
++
+ my $fh = openMaybeGunzip($file) || return(-2);
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+ if ( m/^$header/ ) {
+ if ( !($_=<$fh>) || !m/^===========/o ) {
+@@ -1633,130 +1815,127 @@ sub readMoviesOrGenres($$$$)
+ }
+ last;
+ }
+- elsif ( $lineCount > 1000 ) {
++ elsif ( $lineCount > 1000 ) { # didn't find the header within the first 1000
lines in the file! (wrong file? file corrupt? data changed?)
+ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ }
+
+- my $progress=Term::ProgressBar->new({name => "parsing
$whichMoviesOrGenres",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ( $self->{showProgressBar} );
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+
+- my $count=0;
++
++ #-----------------------------------------------------------
++ # read the movies data, and create the db IDX file (as a temporary file called
stage1.data)
++ # input data are "film-name year" separated by one or more tabs
++ # Army of Darkness (1992) 1992
++
++ my $count=0; my $countout=0;
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+ my $line=$_;
+- #print "read line $lineCount:$line\n";
+- last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+-
+- # end is line consisting of only '-'
+- last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+-
+- $line=~s/\n$//o;
+-
+- my $tab=index($line, "\t");
+- if ( $tab != -1 ) {
+- my $mkey=substr($line, 0, $tab);
+-
+- next if ($mkey=~m/\s*\{\{SUSPENDED\}\}/o);
+-
+- if ( $whatAreWeParsing == 2 ) {
+- # don't see what these are...?
+- # ignore {{SUSPENDED}}
+- $mkey=~s/\s*\{\{SUSPENDED\}\}//o;
+-
+- # ignore {Twelve Angry Men (1954)}
+- #$mkey=~s/\s*\{[^\}]+\}//go;
++ next if ( length($line) == 0 );
++ last if ( $self->{sample} != 0 && $self->{sample} < $count ); #
undocumented option (used in debugging)
++ #$self->status("read line $lineCount:$line");
+
+- # skip enties that have {} in them since they're tv episodes
+- next if ( $mkey=~m/\s*\{[^\}]+\}$/ );
++ # end of data is line consisting of only '-'
++ last if ( $line =~ m/^\-\-\-\-\-\-\-+/o );
+
+- my $genre=substr($line, $tab);
++ my $tabstop = index($line, "\t"); # there is always at least one tabstop in
the incoming data
++ if ( $tabstop != -1 ) {
++ my ($mtitle, $myear) = $line =~ m/^(.*?)\t+(.*)$/;
+
+- # genres sometimes has more than one tab
+- $genre=~s/^\t+//og;
+- if ( defined($self->{movies}{$mkey}) ) {
+- $self->{movies}{$mkey}.="|".$genre;
+- }
+- else {
+- $self->{movies}{$mkey}=$genre;
+- # returned count is number of unique titles found
+- $count++;
+- }
+- }
+- else {
+- push(@{$self->{movies}}, $mkey) unless ( $mkey=~m/\s*\{[^\}]+\}$/ ); # skip tv
episodes
+-
+- # returned count is number of titles found
+- $count++;
+- }
++ next if ($mtitle =~ m/\s*\{\{SUSPENDED\}\}/o);
++
++ # returned count is number of titles found
++ $count++;
+
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
++ # compute the data we need for the IDX file
++ # key title year title id
++ #
++ my ($hashkey, $title, $year, $yearcount, $qualifier, $progtype, $isepisode) =
$self->makeTitleKey($mtitle);
++
++ # we don't want "video games"
++ if ($qualifier eq "video_game") { next; }
++
++ # we don't keep episode information TODO: enhancement: change tv_imdb to do
episodes?
++ if ($isepisode == 1) { next; }
++
++ # store the title in a hash of $key=>{$title}
++ if ( defined($self->{movieshash}{$hashkey}) ) { # check for duplicates
++ #
++ # there's a lot (c. 9,000!) instances of duplicate titles in the movies.list
file
++ # so only report where titles are different
++ if ( defined $self->{movieshash}{$hashkey}{$title} &&
$self->{movieshash}{$hashkey}{$title} ne $year."\t".$qualifier ) { #
{."\t".$progtype}
++ $self->error("duplicate moviedb key computed $hashkey - this programme will
be ignored $mtitle");
++ #$self->error(" ".$self->{movieshash}{$hashkey}{$title});
++ next;
+ }
+ }
++
++ # the output IDX and DAT files must be sorted by dbkey (because of the way the
searching is done)
++ # so we need to store all the incoming 4 million records and then sort them TODO:
do the sorting on disc in external call
++ #
++ $self->{movieshash}{$hashkey}{$title} = $year."\t".$qualifier; # we
don't currently use the progtype flag so don't print it
{."\t".$progtype}
++
++ # return number of titles kept
++ $countout++;
++
++ $self->updateProgressBar('', $lineCount);
+ }
+ else {
+ $self->error("$file:$lineCount: unrecognized format (missing tab)");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
++ $self->updateProgressBar('', $lineCount);
+ }
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
++
++ $self->endProgressBar();
+
+- $self->status(sprintf("parsing $whichMoviesOrGenres found
".withThousands($count)." titles in ".
++ $self->status(sprintf("parsing $which found
".withThousands($countout)." titles in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+ closeMaybeGunzip($file, $fh);
+- return($count);
++
++ #-----------------------------------------------------------
++ return($count, $countout);
+ }
+
+-sub readCastOrDirectors($$$)
++sub readCastOrDirectors($$$$$)
+ {
+- my ($self, $whichCastOrDirector, $castCountEstimate, $file)=@_;
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
+ my $startTime=time();
+-
+ my $header;
+ my $whatAreWeParsing;
+ my $lineCount=0;
+
+- if ( $whichCastOrDirector eq "Actors" ) {
++ if ( $which eq "Actors" ) {
+ $header="THE ACTORS LIST";
+ $whatAreWeParsing=1;
+ }
+- elsif ( $whichCastOrDirector eq "Actresses" ) {
++ elsif ( $which eq "Actresses" ) {
+ $header="THE ACTRESSES LIST";
+ $whatAreWeParsing=2;
+ }
+- elsif ( $whichCastOrDirector eq "Directors" ) {
++ elsif ( $which eq "Directors" ) {
+ $header="THE DIRECTORS LIST";
+ $whatAreWeParsing=3;
+ }
+ else {
+ die "why are we here ?";
+ }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
+
+- my $fh = openMaybeGunzip($file) || return(-2);
+- my $progress=Term::ProgressBar->new({name => "parsing
$whichCastOrDirector",
+- count => $castCountEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ #
++ # note: not all movies end up with a cast, but we include these movies anyway.
++ #
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
+
++ my $fh = openMaybeGunzip($file) || return(-2);
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+ if ( m/^$header/ ) {
+ if ( !($_=<$fh>) || !m/^===========/o ) {
+@@ -1788,361 +1967,710 @@ sub readCastOrDirectors($$$)
+ }
+ }
+
+- my $cur_name;
++
++ #-----------------------------------------------------------
++ # read the cast or directors data, and create the stagex.data file
++ # input data are "person-name film-title" separated by one or more tabs
++ # Raimi,Sam Army of Darkness (1992)
++ # person name appears only once for multiple film entries
++
+ my $count=0;
+- my $castNames=0;
++ my $countnames=0;
++ my $cur_name;
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+ my $line=$_;
+- $line=~s/\n$//o;
++ next if ( length($line) == 0 );
++ last if ( $self->{sample} != 0 && $self->{sample} < $count ); #
undocumented option (used in debugging)
+ #$self->status("read line $lineCount:$line");
+- last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+
+ # end is line consisting of only '-'
+- last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
+-
+- next if ( length($line) == 0 );
+-
+- if ( $line=~s/^([^\t]+)\t+//o ) {
+- $cur_name=$1;
+- $castNames++;
+-
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $castNames > $castCountEstimate ) {
+- $castCountEstimate = $progress->target($castNames+100);
+- $next_update=$progress->update($castNames);
+- }
+- elsif ( $castNames > $next_update ) {
+- $next_update=$progress->update($castNames);
+- }
+- }
+- }
++ last if ( $line =~ m/^\-\-\-\-\-\-\-+/o );
++
++ my $tabstop = index($line, "\t"); # there is always at least one tabstop in
the incoming data
++ if ( $tabstop != -1 ) {
++ my ($mname, $mtitle) = $line =~ m/^(.*?)\t+(.*)$/; # get person-name (everything up
to the first tab)
+
+- my $billing;
+- my $HostNarrator="";
+- if ( $whatAreWeParsing < 3 ) {
+- # actors or actresses
+- $billing="9999";
+- if ( $line=~s/\s*<(\d+)>//o ) {
+- $billing=sprintf("%04d", int($1));
+- }
++ next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o);
++
++ # skip enties that have {} in them since they're tv episodes
++ next if ($mtitle=~m/\s*\{[^\}]+\}$/ );
++
++ # skip "video games"
++ next if ($mtitle=~m/\s+\(VG\)(\s|$)/ );
++ # note may not be end of line e.g. "Ahad, Alex (I) Skullgirls (2012) (VG)
(creative director)"
+
+- if ( (my $start=index($line, " [")) != -1 ) {
+- #my $end=rindex($line, "]");
+- my $ex=substr($line, $start+1);
+
+- if ( $ex=~s/Host//o ) {
+- if ( length($HostNarrator) ) {
+- $HostNarrator.=",";
+- }
+- $HostNarrator.="Host";
++ # returned count is number of directors found
++ $count++;
++
++ $mname =~ s/^\s+|\s+$//g; # trim
++
++ # person name appears only on the first record in a group for this person
++ if ($mname ne '') {
++ $countnames++;
++ $cur_name = $mname;
++ }
++
++
++ # Directors' processing
++ # A. Guggenheim, Sonia After Maiko (2015) (as Sonia Guggenheim)
++ # Journey (2015/III) (as Sonia Guggenheim)
++ # A. Solla, Ricardo "7 vidas" (1999) {(#2.37)}
++ # "7 vidas" (1999) {Atahualpa Yupanqui (#6.20)}
++ #
++
++ # Actors' processing
++ # -Gradowska, Kasia Lewandowska Who are the WWP Women? (2015) (V) [Herself]
<1>
++ # 'Rovel' Torres, Crystal "The Tonight Show Starring Jimmy Fallon"
(2014) {Ice T/Andrew Rannells/Lupe Fiasco (#2.105)} [Herself - Musical Support]
++ # 's Gravemade, Nienke A Short Tour & Farewell (2015)
++ # Tweeduizendseks (2010) (TV) [Yolanda van der Graaf]
++ # Bennett, Mollie "Before the Snap" (2011) (voice) [Narrator]
++ # 'Twinkie' Bird, Tracy "Casting Qs" (2010) {An Interview with
Tracy 'Twinkie' Byrd (#2.14)} (as Twinkie Byrd) [Herself]
++ # Abbott, Tasha (I) "Electives" (2018) [Julie] <41>
++ #
++
++ my $billing;
++ my $hostnarrator;
++ if ( $whatAreWeParsing < 3 ) { # actors or actresses
++
++ # extract/strip the billing
++ $billing="9999";
++ if ( $mtitle =~ s/\s*<(\d+)>//o ) { # e.g. <41>
++ $billing = sprintf("%04d", int($1));
+ }
+- if ( $ex=~s/Narrator//o ) {
+- if ( length($HostNarrator) ) {
+- $HostNarrator.=",";
++
++ # extract/strip the role/character
++ if ( $mtitle =~ s/\s*\[(.*?)\]//o ) { # e.g. [Julie] or [Narrator]
++ if ( $1 =~ m/(Host|Narrator)/ ) { # also picks up "Hostess",
"Co-Host"
++ $hostnarrator = $1;
+ }
+- $HostNarrator.="Narrator";
+ }
+- $line=substr($line, 0, $start);
+- # ignore character name
+ }
+- }
+- # try ignoring these
+- next if ($line=~m/\s*\{\{SUSPENDED\}\}/o);
+-
+- # don't see what these are...?
+- # ignore {{SUSPENDED}}
+- $line=~s/\s*\{\{SUSPENDED\}\}//o;
++
+
+- # [honir] this is wrong - this puts cast from all the episodes as though they are in
the entire series!
+- # ##ignore {Twelve Angry Men (1954)}
+- #$line=~s/\s*\{[^\}]+\}//o;
+- next if ( $line=~m/\s*\{[^\}]+\}/ ); # skip tv episodes
++ #-------------------------------------------------------
++ # tidy the title
+
+- if ( $whatAreWeParsing < 3 ) {
+- if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) {
++ # remove the episode if a series
++ if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant
+ # $attr=$1;
++ next; # skip tv episodes (we only output main titles so don't store episode
data against the main title)
+ }
+- }
+- if ( $line=~s/ (\(.*)$//o ) {
+- # $attrs=$1;
+- }
+- $line=~s/^\s+//og;
+- $line=~s/\s+$//og;
+-
+- if ( $whatAreWeParsing < 3 ) {
+- if ( $line=~s/\s+Narrator$//o ) {
+- # ignore
++
++ # remove 'aka' details from prog-title
++ if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) {
++ # $attr=$1;
+ }
+- }
+-
+- my $val=$self->{movies}{$line};
+- my $name=$cur_name;
+- if ( length($HostNarrator) ) {
+- $name.="[$HostNarrator]";
+- }
+- if ( defined($billing) ) {
+- if ( defined($val) ) {
+- $self->{movies}{$line}=$val."|$billing:$name";
++
++ # remove prog type (e.g. "(V)" or "(TV)" )
++ # no: don't strip from title - it's considered part of the title: so we need
it for matching against movies.list
++ ##if ( $mtitle =~ s/\s(\((TV|V|VG)\))//o ) {
++ # $attrs=$1;
++ ##}
++
++ # junk everything after " (" (e.g. " (collaborating director)"
)
++ if ( $mtitle =~ s/ (\(.*)$//o ) {
++ # $attrs=$1;
+ }
+- else {
+- $self->{movies}{$line}="$billing:$name";
++
++ $mtitle =~ s/^\s+|\s+$//g; # trim
++
++
++ #-------------------------------------------------------
++ # $mtitle should now contain the programme's title
++ my $title = $mtitle;
++
++ # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1
++ my $idxid = $self->{titleshash}{$title};
++
++ if (!$idxid ) {
++ ## no, don't print errors where we can't match the incoming title - there
are 100s of these in the incoming data
++ ## often where the year on the actor record is 1 year out
++ ## people will get worried if we report over 1000 errors and there's nothing we
can sensibly do about them
++ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ next;
++ }
++
++
++ #-------------------------------------------------------
++ # the output ".data" files must be sorted by id so they can be merged in
stage final
++ # so we need to store all the incoming records and then sort them
++ #
++ my $mperson = '';
++ $mperson = "$billing:" if ( defined($billing) );
++ $mperson .= $cur_name;
++ $mperson .= " [$hostnarrator]" if ( defined($hostnarrator) ); # this is
wrong: incoming data are "lastname, firstname" so this creates "Huwyler,
Fabio [Host]"
++
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mperson;
++ } else {
++ $self->{$h}{$idxid} = $mperson;
+ }
++
++
++ $self->updateProgressBar('', $lineCount);
+ }
+ else {
+- if ( defined($val) ) {
+- $self->{movies}{$line}=$val."|$name";
+- }
+- else {
+- $self->{movies}{$line}=$name;
+- }
++ $self->error("$file:$lineCount: unrecognized format (missing tab)");
++ $self->updateProgressBar('', $lineCount);
+ }
+- $count++;
+ }
+- $progress->update($castCountEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing $whichCastOrDirector found
".withThousands($castNames)." names, ".
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("parsing $which found
".withThousands($countnames)." names, ".
+ withThousands($count)." titles in ".withThousands($lineCount)."
lines in %d seconds",time()-$startTime));
+
+ closeMaybeGunzip($file, $fh);
+
+- return($castNames);
++ #-----------------------------------------------------------
++ return($count);
+ }
+
+-sub readRatings($$$$)
++sub readGenres($$$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
+ my $startTime=time();
++ my $header;
++ my $whatAreWeParsing;
+ my $lineCount=0;
+
++ if ( $which eq "Genres" ) {
++ $header="8: THE GENRES LIST";
++ $whatAreWeParsing=1;
++ }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
++
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
++
+ my $fh = openMaybeGunzip($file) || return(-2);
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+- if ( m/^MOVIE RATINGS REPORT/o ) {
+- if ( !($_=<$fh>) || !m/^\s*$/o) {
+- $self->error("missing empty line after \"MOVIE RATINGS REPORT\" at
line $lineCount");
++ if ( m/^$header/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after $header at line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+- if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) {
+- $self->error("missing \"New Distribution Votes Rank Title\" at
line $lineCount");
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after ======= at line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ last;
+ }
+ elsif ( $lineCount > 1000 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see \"MOVIE
RATINGS REPORT\" line");
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ }
+
+- my $progress=Term::ProgressBar->new({name => "parsing Ratings",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
++
++ #-----------------------------------------------------------
++ # read the genres data, and create the stagex.data file
++ # input data are "film-title genre" separated by one or more tabs
++ # multiple genres are searated by |
++ # Army of Darkness (1992) Horror
++ # King Jeff (2009) Comedy|Short
++
+ my $count=0;
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+ my $line=$_;
+- #print "read line $lineCount:$line";
++ next if ( length($line) == 0 );
+ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
++ #$self->status("read line $lineCount:$line");
+
+- $line=~s/\n$//o;
+-
+- # skip empty lines (only really appear right before last line ending with ----
+- next if ( $line=~m/^\s*$/o );
+ # end is line consisting of only '-'
+ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ my $tabstop = index($line, "\t"); # there is always at least one tabstop in
the incoming data
++ if ( $tabstop != -1 ) {
++ my ($mtitle, $mgenres) = $line =~ m/^(.*?)\t+(.*)$/; # get film-title (everything up
to the first tab)
++
++ next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o);
++
++ # skip enties that have {} in them since they're tv episodes
++ next if ($mtitle=~m/\s*\{[^\}]+\}/ );
++
++ # skip "video games"
++ next if ($mtitle=~m/\s+\(VG\)$/ );
++
++ # returned count is number of titles found
++ $count++;
++
++ if ( $whatAreWeParsing == 1 ) { # genres
++
++ # genres sometimes contains tabs
++ $mgenres=~s/^\t+//og;
++
++ }
++
+
+- next if ( $line=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++ #-------------------------------------------------------
++ # tidy the title
+
+- # e.g. New Distribution Votes Rank Title
+- # 0000000133 225568 8.9 12 Angry Men (1957)
+- if ( $line=~s/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+)\.(\d+)\s+//o ) {
+- $self->{movies}{$line}=[$1,$2,"$3.$4"];
+- $count++;
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
++ # remove the episode if a series
++ if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant
++ # $attr=$1;
++ }
++
++ # remove 'aka' details from prog-title
++ if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) {
++ # $attr=$1;
+ }
++
++ $mtitle =~ s/^\s+|\s+$//g; # trim
++
++
++ #-------------------------------------------------------
++ # $mtitle should now contain the programme's title
++ my $title = $mtitle;
++
++ # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1
++ my $idxid = $self->{titleshash}{$title};
++
++ if (!$idxid ) {
++ ## no, don't print errors where we can't match the incoming title - there
are 100s of these in the incoming data
++ ## often where the year on the actor record is 1 year out
++ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ next;
++ }
++
++
++ #-------------------------------------------------------
++ # the output ".data" files must be sorted by id so they can be merged in
stage final
++ # so we need to store all the incoming records and then sort them
++ #
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mgenres;
++ } else {
++ $self->{$h}{$idxid} = $mgenres;
++ }
++
++
++ $self->updateProgressBar('', $lineCount);
+ }
+ else {
+- $self->error("$file:$lineCount: unrecognized format");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
++ $self->error("$file:$lineCount: unrecognized format (missing tab)");
++ $self->updateProgressBar('', $lineCount);
+ }
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing Ratings found
".withThousands($count)." titles in ".
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("parsing $which found ".withThousands($count)."
titles in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+ closeMaybeGunzip($file, $fh);
++
++ #-----------------------------------------------------------
+ return($count);
+ }
+
+-sub readKeywords($$$$)
++sub readRatings($$$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
+ my $startTime=time();
++ my $header;
++ my $whatAreWeParsing;
+ my $lineCount=0;
+
++ if ( $which eq "Ratings" ) {
++ $header="MOVIE RATINGS REPORT";
++ $whatAreWeParsing=1;
++ }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
++
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
++
+ my $fh = openMaybeGunzip($file) || return(-2);
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+-
+- if ( m/THE KEYWORDS LIST/ ) {
+- if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after \"THE KEYWORDS LIST\" at line
$lineCount");
++ if ( m/^$header/ ) {
++ # there is no ====== in ratings data!
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after $header at line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+- if ( !($_=<$fh>) || !m/^\s*$/o ) {
+- $self->error("missing empty line after ======= at line $lineCount");
++ if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) {
++ $self->error("missing \"New Distribution Votes Rank Title\" at
line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ last;
+ }
+- elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23
+- $self->error("$file: stopping at line $lineCount, didn't see \"THE
KEYWORDS LIST\" line");
++ elsif ( $lineCount > 1000 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ }
+
+- my $progress=Term::ProgressBar->new({name => "parsing keywords",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
++
++ #-----------------------------------------------------------
++ # read the ratings data, and create the stagex.data file
++ # input data are "flag-new disribution votes rank film-title" separated
by one or more spaces
++ # 0000002211 000001 9.9 Army of Darkness (1992)
++ # 0000000133 225568 8.9 12 Angry Men (1957)
++
+ my $count=0;
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+- last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
+ my $line=$_;
+- chomp($line);
+- next if ($line =~ m/^\s*$/);
+- my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/);
+- if ( defined($title) and defined($keyword) ) {
++ next if ( length($line) == 0 );
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
++ #$self->status("read line $lineCount:$line");
+
+- next if ( $title=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++ # skip empty lines (only really appear right before last line ending with ----
++ next if ( $line=~m/^\s*$/o );
++ # end is line consisting of only '-'
++ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ my $tabstop = index($line, " "); # there is always at least one space in the
incoming data
++ if ( $tabstop != -1 ) {
++ my ($mdistrib, $mvotes, $mrank, $mtitle) = $line =~
m/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+\.\d+)\s+(.*)$/;
++
++ next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o);
+
+- my ($episode) = $title =~ m/^.*\s+(\{.*\})$/;
++ next if ($mtitle=~m/\s*\{[^\}]+\}/ ); # skip tv episodes
++
++ next if ($mtitle=~m/\s+\(VG\)$/ ); # we don't want "video games"
+
+- # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
+- if ( !defined $episode || $episode eq '' )
+- {
+- if ( defined($self->{movies}{$title}) ) {
+- $self->{movies}{$title}.=",".$keyword;
+- } else {
+- $self->{movies}{$title}=$keyword;
+- # returned count is number of unique titles found
+- $count++;
+- }
++ # returned count is number of titles found
++ $count++;
++
++ if ( $whatAreWeParsing == 1 ) { # ratings
++ # null
+ }
++
+
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
++ #-------------------------------------------------------
++ # tidy the title
++
++ # remove the episode if a series
++ if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant
++ # $attr=$1;
++ }
++
++ # remove 'aka' details from prog-title
++ if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) {
++ # $attr=$1;
++ }
++
++ $mtitle =~ s/^\s+|\s+$//g; # trim
++
++
++ #-------------------------------------------------------
++ # $mtitle should now contain the programme's title
++ my $title = $mtitle;
++
++ # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1
++ my $idxid = $self->{titleshash}{$title};
++
++ if (!$idxid ) {
++ ## no, don't print errors where we can't match the incoming title - there
are 100s of these in the incoming data
++ ## often where the year on the actor record is 1 year out
++ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ next;
++ }
++
++
++ #-------------------------------------------------------
++ # the output ".data" files must be sorted by id so they can be merged in
stage final
++ # so we need to store all the incoming records and then sort them
++ #
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ # we shouldn't get duplicates
++ $self->error("$file: duplicate film found at line $lineCount - this rating
will be ignored $mtitle");
++ } else {
++ $self->{$h}{$idxid} = "$mdistrib;$mvotes;$mrank";
+ }
+- } else {
+- $self->error("$file:$lineCount: unrecognized format
\"$line\"");
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
++
++
++ $self->updateProgressBar('', $lineCount);
++ }
++ else {
++ $self->error("$file:$lineCount: unrecognized format (missing tab)");
++ $self->updateProgressBar('', $lineCount);
+ }
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing Keywords found
".withThousands($count)." titles in ".
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("parsing $which found ".withThousands($count)."
titles in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+ closeMaybeGunzip($file, $fh);
++
++ #-----------------------------------------------------------
+ return($count);
+ }
+
+-sub readPlots($$$$)
++sub readKeywords($$$$$)
+ {
+- my ($self, $countEstimate, $file)=@_;
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
+ my $startTime=time();
++ my $header;
++ my $whatAreWeParsing;
+ my $lineCount=0;
+
++ if ( $which eq "Keywords" ) {
++ $header="8: THE KEYWORDS LIST";
++ $whatAreWeParsing=1;
++ }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
++
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
++
+ my $fh = openMaybeGunzip($file) || return(-2);
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
+-
+- if ( m/PLOT SUMMARIES LIST/ ) {
++ if ( m/^$header/ ) {
+ if ( !($_=<$fh>) || !m/^===========/o ) {
+- $self->error("missing ======= after \"PLOT SUMMARIES LIST\" at
line $lineCount");
++ $self->error("missing ======= after $header at line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+- if ( !($_=<$fh>) || !m/^-----------/o ) {
+- $self->error("missing ------- line after ======= at line $lineCount");
++ if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ $self->error("missing empty line after ======= at line $lineCount");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ last;
+ }
+- elsif ( $lineCount > 500 ) {
+- $self->error("$file: stopping at line $lineCount, didn't see \"PLOT
SUMMARIES LIST\" line");
++ elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
+ closeMaybeGunzip($file, $fh);
+ return(-1);
+ }
+ }
+
+- my $progress=Term::ProgressBar->new({name => "parsing plots",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
++
++ #-----------------------------------------------------------
++ # read the keywords data, and create the stagex.data file
++ # input data are "film-title keyword" separated by one or more tabs
++ # multiple keywords are searated by |
++ # Army of Darkness (1992) Horror
++ # King Jeff (2009) Comedy|Short
++
+ my $count=0;
+ while(<$fh>) {
++ chomp();
+ $lineCount++;
++ my $line=$_;
++ next if ( length($line) == 0 );
+ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
++ #$self->status("read line $lineCount:$line");
++
++ # end is line consisting of only '-'
++ last if ( $line=~m/^\-\-\-\-\-\-\-+/o );
++
++ my $tabstop = index($line, "\t"); # there is always at least one tabstop in
the incoming data
++ if ( $tabstop != -1 ) {
++ my ($mtitle, $mkeywords) = $line =~ m/^(.*?)\t+(.*)$/; # get film-title (everything
up to the first tab)
++
++ next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o);
++
++ next if ($mtitle=~m/\s*\{[^\}]+\}/ ); # skip tv episodes
++
++ next if ($mtitle=~m/\s+\(VG\)$/ ); # we don't want "video games"
++
++ # returned count is number of titles found
++ $count++;
++
++ if ( $whatAreWeParsing == 1 ) { # genres
++
++ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
++ next if $mtitle =~ m/^.*\s+(\{.*\})$/;
++
++ }
++
++
++ #-------------------------------------------------------
++ # tidy the title
++
++ # remove the episode if a series
++ # [honir] this is wrong - this puts all the keywords as though they are in the entire
series!
++ if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant
++ # $attr=$1;
++ }
++
++ # remove 'aka' details from prog-title
++ if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) {
++ # $attr=$1;
++ }
++
++ $mtitle =~ s/^\s+|\s+$//g; # trim
++
++
++ #-------------------------------------------------------
++ # $mtitle should now contain the programme's title
++ my $title = $mtitle;
++
++ # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1
++ my $idxid = $self->{titleshash}{$title};
++
++ if (!$idxid ) {
++ ## no, don't print errors where we can't match the incoming title - there
are 100s of these in the incoming data
++ ## often where the year on the actor record is 1 year out
++ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ next;
++ }
++
++
++ #-------------------------------------------------------
++ # the output ".data" files must be sorted by id so they can be merged in
stage final
++ # so we need to store all the incoming records and then sort them
++ #
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mkeywords;
++ } else {
++ $self->{$h}{$idxid} = $mkeywords;
++ }
++
++
++ $self->updateProgressBar('', $lineCount);
++ }
++ else {
++ $self->error("$file:$lineCount: unrecognized format (missing tab)");
++ $self->updateProgressBar('', $lineCount);
++ }
++ }
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("parsing $which found ".withThousands($count)."
titles in ".
++ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
++
++ closeMaybeGunzip($file, $fh);
++
++ #-----------------------------------------------------------
++ return($count);
++}
++
++sub readPlots($$$$$)
++{
++ my ($self, $which, $countEstimate, $file, $stage)=@_;
++ my $startTime=time();
++ my $header;
++ my $whatAreWeParsing;
++ my $lineCount=0;
++
++ if ( $which eq "Plot" ) {
++ $header="PLOT SUMMARIES LIST";
++ $whatAreWeParsing=1;
++ }
++
++ $self->beginProgressBar('parsing '.$which, $countEstimate);
++
++
++ #-----------------------------------------------------------
++ # find the start of the actual data
++
++ my $fh = openMaybeGunzip($file) || return(-2);
++ while(<$fh>) {
++ chomp();
++ $lineCount++;
++ if ( m/^$header/ ) {
++ if ( !($_=<$fh>) || !m/^===========/o ) {
++ $self->error("missing ======= after $header at line $lineCount");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ # no blank line in plot data!
++ ##if ( !($_=<$fh>) || !m/^\s*$/o ) {
++ ## $self->error("missing empty line after ======= at line $lineCount");
++ ## closeMaybeGunzip($file, $fh);
++ ## return(-1);
++ ##}
++ last;
++ }
++ elsif ( $lineCount > 1000 ) {
++ $self->error("$file: stopping at line $lineCount, didn't see
\"$header\" line");
++ closeMaybeGunzip($file, $fh);
++ return(-1);
++ }
++ }
++
++
++ #-----------------------------------------------------------
++ # read the plot data, and create the stagex.data file
++ # input data are "flag-new disribution votes rank film-title" separated
by one or more spaces
++ # there can be multiple entries for each film
++ # -------------------------------------------------------------------------------
++ # MV: Army of Darkness (1992)
++ #
++ # PL: Ash is transported with his car to 1,300 A.D., where he is captured by Lord
++ # PL: Arthur and turned slave with Duke Henry the Red and a couple of his men.
++ # [...]
++ # PL: battle between Ash's 20th Century tactics and the minions of darkness.
++ #
++ # BY: David Thiel <d-thiel(a)uiuc.edu>
++ #
++ # PL: Ash finds himself stranded in the year 1300 AD with his car, his shotgun,
++ # PL: and his chainsaw. Soon he is discovered and thought to be a spy for a rival
++ # [...]
++ # PL: forces at play in the land. Ash accidentally releases the Army of Darkness
++ # PL: when retrieving the book, and a fight to the finish ensues.
++ #
++ # BY: Ed Sutton <esutton(a)mindspring.com>
++
++ my $count=0;
++ while(<$fh>) {
++ chomp();
++ $lineCount++;
+ my $line=$_;
+- chomp($line);
+- next if ($line =~ m/^\s*$/);
+- my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/);
+- if ( defined($title) ) {
++ next if ( length($line) == 0 );
++ last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); #
undocumented option (used in debugging)
++ #$self->status("read line $lineCount:$line");
++
++ # skip empty lines
++ next if ( $line=~m/^\s*$/o );
++
++ next if ( $line=~m/\s*\{[^\}]+\}/ ); # skip tv episodes
++
++ next if ( $line=~m/\s+\(VG\)$/ ); # skip "video games"
+
+- next if ( $title=~m/\s*\{[^\}]+\}$/ ); # skip tv episodes
++ # process a data block - starts with "MV:"
++ #
++ my ($mtitle, $mepisode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/);
++ if ( defined($mtitle) ) {
++ my $mplot = '';
+
+ # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" )
+- if ( !defined $episode || $episode eq '' )
++ if ( !defined $mepisode || $mepisode eq '' )
+ {
+- my $plot = '';
+ LOOP:
+ while (1) {
+ if ( $line = <$fh> ) {
+ $lineCount++;
+ chomp($line);
+ next if ($line =~ m/^\s*$/);
+- if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting
"PL:"
+- $plot .= ($plot ne ''?' ':'') . $1;
++ if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting
"PL:"
++ $mplot .= ($mplot ne ''?' ':'') . $1;
+ }
+ last LOOP if ( $line =~ m/BY:\s(.*)$/ ); # the author line "BY:"
signals the end of the plot summary
+ } else {
+@@ -2150,39 +2678,77 @@ sub readPlots($$$$)
+ }
+ }
+
+- if ( !defined($self->{movies}{$title}) ) {
+- # ensure there's no tab chars in the plot or else the db stage will barf
+- $plot =~ s/\t//og;
+- $self->{movies}{$title}=$plot;
+- # returned count is number of unique titles found
+- $count++;
+- }
++ # ensure there's no tab chars in the plot or else the db stage will barf
++ $mplot =~ s/\t//og;
++
++ # returned count is number of unique titles found
++ $count++;
+ }
++
++
++ #-------------------------------------------------------
++ # tidy the title
+
+- if ( $self->{showProgressBar} ) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+1000);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
++ # remove the episode if a series
++ if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant
++ # $attr=$1;
++ }
++
++ # remove 'aka' details from prog-title
++ if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) {
++ # $attr=$1;
++ }
++
++ $mtitle =~ s/^\s+|\s+$//g; # trim
++
++
++ #-------------------------------------------------------
++ # $mtitle should now contain the programme's title
++ my $title = $mtitle;
++
++ # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1
++ my $idxid = $self->{titleshash}{$title};
++
++ if (!$idxid ) {
++ ## no, don't print errors where we can't match the incoming title - there
are 100s of these in the incoming data
++ ## often where the year on the actor record is 1 year out
++ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ next;
++ }
++
++
++ #-------------------------------------------------------
++ # the output ".data" files must be sorted by id so they can be merged in
stage final
++ # so we need to store all the incoming records and then sort them
++ #
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ # we shouldn't get duplicates
++ $self->error("$file: duplicate film found at line $lineCount - this plot
will be ignored $mtitle");
++ } else {
++ $self->{$h}{$idxid} = $mplot;
+ }
+- } else {
+- # skip lines up to the next "MV:"
++
++
++ $self->updateProgressBar('', $lineCount);
++ }
++ else {
++ # skip lines up to the next "MV:" (this means we only get the first plot
summary for each film)
+ if ($line !~ m/^(---|PL:|BY:)/ ) {
+ $self->error("$file:$lineCount: unrecognized format
\"$line\"");
+ }
+- $next_update=$progress->update($count) if ($self->{showProgressBar});
++ $self->updateProgressBar('', $lineCount);
+ }
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- $self->status(sprintf("parsing Plots found $count
".withThousands($count)." in ".
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("parsing $which found ".withThousands($count)."
in ".
+ withThousands($lineCount)." lines in %d seconds",time()-$startTime));
+
+ closeMaybeGunzip($file, $fh);
++
++ #-----------------------------------------------------------
+ return($count);
+ }
+
+@@ -2288,1050 +2854,554 @@ sub dbinfoCalcBytesPerEntry($$$)
+ return(int($fileSize/$calcActualForThisNumber));
+ }
+
+-sub invokeStage($$)
++sub gettitleshash($$)
+ {
+- my ($self, $stage)=@_;
+-
++ # load the titles list (stage1.data) into memory
++
++ my ($self, $countEstimate)=@_;
+ my $startTime=time();
+- if ( $stage == 1 ) {
+- $self->status("parsing Movies list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("movies", 47);
+-
+- my $num=$self->readMoviesOrGenres("Movies", $countEstimate,
"$self->{imdbListFiles}->{movies}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{movies}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("movies", $num);
+- $self->status("ARG estimate of $countEstimate for movies needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_movie_count", "$num");
+-
+- $self->status("writing stage1 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing titles",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ my $lineCount=0;
+
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (@{$self->{movies}}) {
+- print OUT "$movie\n";
++ undef $self->{titleshash};
++
++ $self->beginProgressBar('loading titles list', $countEstimate);
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
++ open(IN, "< $self->{imdbDir}/stage1.data") || die
"$self->{imdbDir}/stage1.data:$!";
++ my $count=0;
++ my $maxidxid=0;
++ while(<IN>) {
++ chomp();
++ my $line=$_;
++ next if ( length($line) == 0 );
++ #$self->status("read line $lineCount:$line");
++ $lineCount++;
++
++ # check the database version number
++ if ($lineCount == 1) {
++ if ( m/^0000000:version ([\d\.]*)$/ ) {
++ if ($1 ne $VERSION) {
++ $self->error("incorrect database version");
++ return(1);
++ } else {
++ next;
+ }
++ } else {
++ $self->error("missing database version at line $lineCount");
++ return(1);
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+ }
+- }
+- elsif ( $stage == 2 ) {
+- $self->status("parsing Directors list for stage $stage..");
++
+
+- my $countEstimate=$self->dbinfoCalcEstimate("directors", 258);
++ if (index($line, ":") != -1 ) {
++ $count++;
++
++ # extract the title-idx-id and the film-title
++ # 0000002:army%20of%20darkness%20%281992%29 Army of Darkness
(1992) 1992 movie 0000002
++ #
++ my ($midxid, $mhashkey, $mtitle) = $line =~ m/^(\d*):(.*?)\t+(.*?)\t/;
+
+- my $num=$self->readCastOrDirectors("Directors", $countEstimate,
"$self->{imdbListFiles}->{directors}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{directors}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
++ if ($midxid && $mtitle) {
++ $self->{titleshash}{$mtitle} = int($midxid); # build the hash
++
++ $maxidxid = $midxid if ( $midxid > $maxidxid );
+ }
+- return(1);
++
++ $self->updateProgressBar('', $lineCount);
+ }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("directors", $num);
+- $self->status("ARG estimate of $countEstimate for directors needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_director_count", "$num");
+-
+- $self->status("writing stage2 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing directors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- my %dir;
+- for (split('\|', $self->{movies}{$key})) {
+- $dir{$_}++;
+- }
+- my @list;
+- for (keys %dir) {
+- push(@list, sprintf("%03d:%s", $dir{$_}, $_));
+- }
+- my $value="";
+- for my $c (reverse sort {$a cmp $b} @list) {
+- my ($num, $name)=split(':', $c);
+- $value.=$name."|";
+- }
+- $value=~s/\|$//o;
+- print OUT "$key\t$value\n";
+-
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
++ else {
++ $self->error("$lineCount: unrecognized format (missing tab)");
++ $self->updateProgressBar('', $lineCount);
+ }
+- #unlink("$self->{imdbDir}/stage1.data");
+ }
+- elsif ( $stage == 3 ) {
+- $self->status("parsing Actors list for stage $stage..");
+-
+- #print "re-reading movies into memory for reverse lookup..\n";
+- my $countEstimate=$self->dbinfoCalcEstimate("actors", 449);
+-
+- my $num=$self->readCastOrDirectors("Actors", $countEstimate,
"$self->{imdbListFiles}->{actors}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{actors}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("actors", $num);
+- $self->status("ARG estimate of $countEstimate for actors needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_actor_count", "$num");
+-
+- $self->status("writing stage3 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing actors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- print OUT "$key\t$self->{movies}{$key}\n";
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("found ".withThousands($count)." titles in
".
++ withThousands($lineCount-1)." lines in %d
seconds",time()-$startTime)); # drop 1 for the "version" line
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- }
+- elsif ( $stage == 4 ) {
+- $self->status("parsing Actresses list for stage $stage..");
++ close(IN);
+
+- my $countEstimate=$self->dbinfoCalcEstimate("actresses", 483);
+- my $num=$self->readCastOrDirectors("Actresses", $countEstimate,
"$self->{imdbListFiles}->{actresses}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{actresses}
from
ftp.imdb.com (see
http://www.imdb.com/interfaces)");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("actresses", $num);
+- $self->status("ARG estimate of $countEstimate for actresses needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_actress_count", "$num");
+-
+- $self->status("writing stage4 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing actresses",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- for my $key (keys %{$self->{movies}}) {
+- print OUT "$key\t$self->{movies}{$key}\n";
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
+- #unlink("$self->{imdbDir}/stage3.data");
+- }
+- elsif ( $stage == 5 ) {
+- $self->status("parsing Genres list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("genres", 68);
++ #-----------------------------------------------------------
++ return($count, $maxidxid);
++}
+
+- my $num=$self->readMoviesOrGenres("Genres", $countEstimate,
"$self->{imdbListFiles}->{genres}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{genres}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("genres", $num);
+- $self->status("ARG estimate of $countEstimate for genres needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_genres_count", "$num");
+-
+- $self->status("writing stage5 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing genres",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++sub dedupe($$$)
++{
++ # basic deduping of data entries
++
++ my ($self, $data, $sep)=@_;
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
++ my @outarr;
++ my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data);
++ my %out;
++
++ foreach my $v (@arr) {
++ my ($a, $b) = $v =~ m/^(\d*):?(.*)\s*$/;
++ if (!defined $out{$b}) {
++ push @outarr, $v;
++ $out{$b} = $v;
+ }
+ }
+- elsif ( $stage == 6 ) {
+- $self->status("parsing Ratings list for stage $stage..");
+- my $countEstimate=$self->dbinfoCalcEstimate("ratings", 68);
++
++ $$data = join($sep, @outarr);
++ return;
++}
+
+- my $num=$self->readRatings($countEstimate,
"$self->{imdbListFiles}->{ratings}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{ratings}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+- my $better=$self->dbinfoCalcBytesPerEntry("ratings", $num);
+- $self->status("ARG estimate of $countEstimate for ratings needs updating,
found $num ($better bytes/entry)");
+- }
+- $self->dbinfoAdd("db_stat_ratings_count", "$num");
+-
+- $self->status("writing stage6 data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing ratings",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- my @value=@{$self->{movies}->{$movie}};
+- print OUT "$movie\t$value[0]\t$value[1]\t$value[2]\n";
++sub stripbilling($$$)
++{
++ # strip the billing from the names
++ # also strip the "(I)" etc suffix from names
++
++ my ($self, $data, $sep)=@_;
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
++ my @outarr;
++ my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data);
++
++ foreach my $v (@arr) {
++ my ($a, $b) = $v =~ m/^(\d*):?(.*)\s*$/;
++ $b=~s/\s\([IVXL]+\)\[/\[/o;
++ $b=~s/\s\([IVXL]+\)$//o;
++ push @outarr, $b;
+ }
+- elsif ( $stage == 7 ) {
+- $self->status("parsing Keywords list for stage $stage..");
++
++ $$data = join($sep, @outarr);
++ return;
++}
+
+- if ( !defined($self->{imdbListFiles}->{keywords}) ) {
+- $self->status("no keywords file downloaded, see --with-keywords details in
documentation");
+- return(0);
+- }
++sub sortnames($$$)
++{
++ # basic sorting of names
++
++ my ($self, $data, $sep)=@_;
++
++ my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data);
+
+- my $countEstimate=5630000;
+- my $num=$self->readKeywords($countEstimate,
"$self->{imdbListFiles}->{keywords}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{keywords}
from ftp.imdb.com");
+- }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
+- $self->status("ARG estimate of $countEstimate for keywords needs updating,
found $num");
+- }
+- $self->dbinfoAdd("keywords_list_file",
"$self->{imdbListFiles}->{keywords}");
+- $self->dbinfoAdd("keywords_list_file_size", -s
"$self->{imdbListFiles}->{keywords}");
+- $self->dbinfoAdd("db_stat_keywords_count", "$num");
++ $$data = join($sep, sort(@arr) );
++ return;
++}
+
+- $self->status("writing stage$stage data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing keywords",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++sub stripprogtype($$)
++{
++ # strip the (TV) or (V) or (VG) suffix from title
++
++ my ($self, $data)=@_;
++
++ my ($midx, $mtitle, $mrest) = $$data =~ m/^(.*?)\t(.*?)\t(.*)$/;
++
++ $mtitle =~ s/\s(\((TV|V|VG)\))//;
++
++ $$data = $midx ."\t". $mtitle ."\t". $mrest;
++ return;
++}
+
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++sub readfilesbyidxid($$$$)
++{
++ # read lines from the data files 2..8 looking for matches on a passed idxid
++ # (don't use this for stage1 data - use a call to readdatafile to simply get the
next record
++
++ my ($self, $fhs, $fdat, $idxid)=@_;
+
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++ while (my ($stage, $fh) = each ( %$fhs )) {
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
++ $fdat->{$stage} = { k=>0, v=>'' } if !defined
$fdat->{$stage}{k};
++
++ if ($fdat->{$stage}{k} < $idxid) {
++ #print STDERR "fetching from $stage ".$fdat->{$stage}{k}."
< $idxid \n";
++
++ my ($fstage, $fidxid, $fdata) = $self->readdatafile( $fhs->{$stage}, $stage,
$idxid );
++
++ # store the file record
++ $fdat->{$stage} = { k=>$fidxid, v=>$fdata };
+ }
+ }
+- elsif ( $stage == 8 ) {
+- $self->status("parsing Plot list for stage $stage..");
+-
+- if ( !defined($self->{imdbListFiles}->{plot}) ) {
+- $self->status("no plot file downloaded, see --with-plot details in
documentation");
+- return(0);
+- }
++
++
++ # here's a fudge: we need to merge the actors (stage 3) and actresses (stage 4)
together
++ my @pnames;
++ push ( @pnames, $fdat->{3}{v} ) if ( $fdat->{3}{k} == $idxid );
++ push ( @pnames, $fdat->{4}{v} ) if ( $fdat->{4}{k} == $idxid );
++
++ if (scalar @pnames) {
++ # join the two data values, sort, strip...
++ my $pnames = join('|', @pnames);
+
+- my $countEstimate=222222;
+- my $num=$self->readPlots($countEstimate,
"$self->{imdbListFiles}->{plot}");
+- if ( $num < 0 ) {
+- if ( $num == -2 ) {
+- $self->error("you need to download $self->{imdbListFiles}->{plot} from
ftp.imdb.com");
++ $self->sortnames(\$pnames, '|'); # sorts by "billing:name"
++ $self->stripbilling(\$pnames, '|'); # strip "billing:" and
"(I)" on name
++
++ ### ...and then store in one of the actors/actresses value while nulling the other
++ if ( $fdat->{3}{k} == $idxid ) {
++ $fdat->{3}{v} = $pnames;
++ $fdat->{4}{v} = ':::' if ( $fdat->{4}{k} == $idxid );
++ }
++ elsif ( $fdat->{4}{k} == $idxid ) {
++ $fdat->{4}{v} = $pnames;
++ $fdat->{3}{v} = ':::' if ( $fdat->{3}{k} == $idxid );
+ }
+- return(1);
+- }
+- elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) {
+- $self->status("ARG estimate of $countEstimate for plots needs updating, found
$num");
+ }
+- $self->dbinfoAdd("plots_list_file",
"$self->{imdbListFiles}->{plot}");
+- $self->dbinfoAdd("plots_list_file_size", -s
"$self->{imdbListFiles}->{plot}");
+- $self->dbinfoAdd("db_stat_plots_count", "$num");
+-
+- $self->status("writing stage$stage data ..");
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing plots",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ # end fudge
+
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+-
+- my $count=0;
+- for my $movie (keys %{$self->{movies}}) {
+- print OUT "$movie\t$self->{movies}->{$movie}\n";
++ return;
++}
++
++sub readdatafile($$$$)
++{
++ my ($self, $fh, $stage, $idxid)=@_;
+
+- $count++;
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(OUT);
+- delete($self->{movies});
+- }
++ # read a line from a file
++
++ if ( eof($fh) ) {
++ return ($stage, 9999999, '');
+ }
+- elsif ( $stage == $self->{stageLast} ) {
+- my $tab=sprintf("\t");
++
++ defined( my $line = readline $fh ) or die "readline failed on file for stage
$stage : $!";
++
++
++ # extract the idxid from the start of each line
++ # 0000002:army%20of%20darkness%20%281992%29 Army of Darkness
(1992) 1992 movie 0000002
++ my ($midxid, $mdata) = $line =~ m/^(\d*):(.*)$/;
++
++ if ($midxid) {
+
+- $self->status("indexing all previous stage's data for stage
".$self->{stageLast}."..");
+-
+- $self->status("parsing stage 1 data (movie list)..");
+- my %movies;
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "reading titles",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage1.data") || die
"$self->{imdbDir}/stage1.data:$!";
+- while(<IN>) {
+- chop();
+- $movies{$_}="";
++ # there should not be any records in datafile n which are not in datafile 1
++ if ($midxid < $idxid) {
++ $self->error("unexpected record in stage $stage data file at $midxid
(expected $idxid)");
+
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- close(IN);
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+ }
+-
+- $self->status("merging in stage 2 data (directors)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging directors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage2.data") || die
"$self->{imdbDir}/stage2.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- if ( !defined($movies{$1}) ) {
+- $self->error("directors list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$1}=$_;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
++ else {
++ # processing on the data for each interim file
++
++ # movies #1 : strip the (TV) (V) markers from the movie title
++ # directors #2 : (i) dedupe (ii) sort into name order (not correct but there's no
sequencing in the imdb data)
++ # actors/actresses #3,#4 : (i) dedeupe (ii) sort into billing order (iii) strip
billing id Note: need to merge actors and actresses
++ # genres #5 : (i) dedupe
++ # ratings #6 : (i) split elements and separate by tabs
++ # keywords #7 : (i) dedupe, (ii) replace separator with comma
++ # plots #8 :
++ #
++ if ($stage == 1) {
++ $self->stripprogtype(\$mdata);
++
++ } elsif ($stage == 2) {
++ $self->dedupe(\$mdata, '|');
++ $self->stripbilling(\$mdata, '|');
++ $self->sortnames(\$mdata, '|'); # sorts by "lastname,
firstname"
++
++ } elsif ($stage == 3 || $stage == 4) {
++ $self->dedupe(\$mdata, '|');
++ # defer sorting and strip billing deferred until after we have joined actors +
actresses
++ ## $self->sortnames(\$mdata, '|'); # sorts by "billing:name"
++ ## $self->stripbilling(\$mdata, '|');
++
++ } elsif ($stage == 5) {
++ $self->dedupe(\$mdata, '|');
++
++ } elsif ($stage == 6) {
++ $mdata =~ s/;/\t/g;
++
++ } elsif ($stage == 7) {
++ $self->dedupe(\$mdata, '|');
++ $mdata =~ s/\|/,/g;
++
++ } elsif ($stage == 8) {
++ # noop
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
++
+ }
++ }
++
++ return ($stage, $midxid, $mdata);
++}
+
+- if ( 1 ) {
+- # fill in default for movies we didn't have a director for
+- while (my ($key, $val) = each (%movies)) {
+- if (!length($val)) {
+- $movies{$key}="<>";
+- }
+- }
+- }
++sub invokeStage($$)
++{
++ my ($self, $stage)=@_;
+
+- $self->status("merging in stage 3 data (actors)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging actors",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ my $startTime=time();
+
+- open(IN, "< $self->{imdbDir}/stage3.data") || die
"$self->{imdbDir}/stage3.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("actors list references unidentified title
'$dbkey'");
+- next;
+- }
+- if ( $val=~m/$tab/o ) {
+- $movies{$dbkey}=$val."|".$_;
+- }
+- else {
+- $movies{$dbkey}=$val.$tab.$_;
+- }
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
++ #----------------------------------------------------------------------------
++ if ( $stage == 1 ) {
+
+- $self->status("merging in stage 4 data (actresses)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "merging actresses",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ $self->status("parsing Movies list for stage $stage ...");
++ my $countEstimate=$self->dbinfoCalcEstimate("movies", 45);
+
+- open(IN, "< $self->{imdbDir}/stage4.data") || die
"$self->{imdbDir}/stage4.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("actresses list references unidentified title
'$dbkey'");
+- next;
+- }
+- if ( $val=~m/$tab/o ) {
+- $movies{$dbkey}=$val."|".$_;
+- }
+- else {
+- $movies{$dbkey}=$val.$tab.$_;
+- }
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+- }
+- if ( 1 ) {
+- # fill in placeholder if no actors were found
+- while (my ($key, $val) = each (%movies)) {
+- if ( !($val=~m/$tab/o) ) {
+- $movies{$key}.=$tab."<>";
+- }
++ my ($num, $numout) = $self->readMovies("Movies", $countEstimate,
"$self->{imdbListFiles}->{movies}", $stage);
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download $self->{imdbListFiles}->{movies}
from the ftp site, or use the --download option");
+ }
++ return(1);
+ }
+-
+- $self->status("merging in stage 5 data (genres)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging genres",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage5.data") || die
"$self->{imdbDir}/stage5.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t//o;
+- my $dbkey=$1;
+- my $genres=$_;
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("genres list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$genres;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
++ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry("movies", $num);
++ $self->status("ARG estimate of $countEstimate for movies needs updating,
found $num ($better bytes/entry)");
+ }
+-
+- if ( 1 ) {
+- # fill in placeholder if no genres were found
+- while (my ($key, $val) = each (%movies)) {
+- my $t=index($val, $tab);
+- if ( $t == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- if ( index($val, $tab, $t+1) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
++ $self->dbinfoAdd("db_stat_movie_count", "$numout");
++
++ #use Data::Dumper;print STDERR Dumper($self->{movieshash});
++ #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper(
$self->{$_h} );
++
++
++ #-----------------------------------------------------------
++ # sort the title keys and write the stage1.data file
++ #
++ $self->beginProgressBar("writing stage $stage data", $num);
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print OUT '0000000:version '.$VERSION."\n";
++
++ my $count=0;
++ foreach my $k (sort keys( %{$self->{movieshash}} )) {
++
++ while ( my ($k2, $v2) = each %{$self->{movieshash}{$k}} ) { # movieshash is a hash
of hashes
++
++ $count++;
++ my $idxid=sprintf("%07d", $count);
++
++ # the following equates to
++ # print OUT
$idxid.":".$dbkey."\t".$title."\t".$year."\t".$qualifier."\t".$lineno."\n";
++ print OUT
$idxid.':'.$k."\t".$k2."\t".$v2."\t".$idxid."\n";
++
++ # and create a shared hash of $title=>$lineno (i.e. IDX 'id')
++ $self->{titleshash}{$k2} = $count; # store the int version of the id for this
title
++ # (note multiple titles may have the same hashkey)
+ }
++
++ delete( $self->{movieshash}{$k} );
++
++ $self->updateProgressBar('', $count);
+ }
+-
+- $self->status("merging in stage 6 data (ratings)..");
+- if ( 1 ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging ratings",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IN, "< $self->{imdbDir}/stage6.data") || die
"$self->{imdbDir}/stage6.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)$//o;
+- my $dbkey=$1;
+- my ($ratingDist, $ratingVotes, $ratingRank)=($2,$3,$4);
+-
+- my $val=$movies{$dbkey};
+- if ( !defined($val) ) {
+- $self->error("ratings list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$ratingDist.$tab.$ratingVotes.$tab.$ratingRank;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
++
++ $self->endProgressBar();
++
++ $self->{maxid} = $count; # remember the largest values of title id (for loop
stop)
++
++ close(OUT);
++
++ #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );die;
++
++ }
++
++
++ #----------------------------------------------------------------------------
++ elsif ( $stage >= 2 && $stage < $self->{stageLast} ) {
++
++ # these stages need the hash of film-title=>idxid
++ # if we have come from stage 1 (i.e. "prep-stage=all" then we will have
that from stage=1
++ # otherwise we will need to build *.e.g "prep-stage=2"
++ #
++ if (!defined( $self->{titleshash} ) ) {
++ my $countEstimate = $self->dbinfoGet("db_stat_movie_count", 0);
++ my ($titlecount, $maxid) = $self->gettitleshash($countEstimate);
++ if ($titlecount == -1) {
++ $self->error('could not make title list - quitting');
++ return(1);
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
++ $self->{maxid} = $maxid; # remember the largest values of title id (for loop
stop)
++ #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );
+ }
++
++ # nb: {stages} = { 1=>'movies', 2=>'directors',
3=>'actors', 4=>'actresses', 5=>'genres',
6=>'ratings', 7=>'keywords', 8=>'plot' };
++ my $stagename = $self->{stages}{$stage};
++ my $stagenametext = ucfirst $self->{stages}{$stage};
+
+- if ( 1 ) {
+- # fill in placeholder if no genres were found
+- while (my ($key, $val) = each (%movies)) {
+- my $t=index($val, $tab);
+- if ( $t == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- my $j=index($val, $tab, $t+1);
+- if ( $j == -1 ) {
+- die "corrupt entry '$key' '$val'";
+- }
+- if ( index($val, $tab, $j+1) == -1 ) {
+- $movies{$key}.=$tab."<>".$tab."<>".$tab."<>";
+- }
+- }
++ $self->status("parsing $stagenametext list for stage $stage ...");
++
++ # skip optional stages
++ if ( ( !defined $self->{imdbListFiles}->{$stagename} ) && ( defined
$self->{optionalStages}->{$stagename} ) ) {
++ return(0);
+ }
++
++ # approx average record length for each incoming data file (used to guesstimate number
of records in file)
++ my %countestimates = ( 1=>'45', 2=> '80', 3=> '60',
4=> '60', 5=> '35', 6=> '115', 7=> '20',
8=> '50' );
++ my $countEstimate = $self->dbinfoCalcEstimate($stagename,
$countestimates{$stage});
+
+- $self->status("merging in stage 7 data (keywords)..");
+- #if ( 1 ) { # this stage is optional
+- if ( -f "$self->{imdbDir}/stage7.data" ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging keywords",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ my %stagefunctions = ( 1=>\&readMovies, 2=>\&readCastOrDirectors,
++ 3=>\&readCastOrDirectors, 4=>\&readCastOrDirectors,
++ 5=>\&readGenres, 6=>\&readRatings,
++ 7=>\&readKeywords, 8=>\&readPlots
++ );
+
+- open(IN, "< $self->{imdbDir}/stage7.data") || die
"$self->{imdbDir}/stage7.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t+//o;
+- my $dbkey=$1;
+- my $keywords=$_;
+- if ( !defined($movies{$dbkey}) ) {
+- $self->error("keywords list references unidentified title
'$1'");
+- next;
+- }
+- $movies{$dbkey}.=$tab.$keywords;
++ my $num=$stagefunctions{$stage}->($self, $stagenametext, $countEstimate,
"$self->{imdbListFiles}->{$stagename}", $stage);
+
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
++ if ( $num < 0 ) {
++ if ( $num == -2 ) {
++ $self->error("you need to download
$self->{imdbListFiles}->{$stagename} from the ftp site, or use the --download
option");
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
++ return(1);
+ }
++ elsif ( $num > 0 && abs($num - $countEstimate) > $countEstimate*.10 ) {
++ my $better=$self->dbinfoCalcBytesPerEntry($stagename, $num);
++ $self->status("ARG estimate of $countEstimate for $stagename needs updating,
found $num ($better bytes/entry)");
++ }
++ $self->dbinfoAdd("db_stat_${stagename}_count", "$num");
++
++
++
++ #-----------------------------------------------------------
++ # print the title keys in IDX id order : write the stagex.data file
++ #
++ #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper(
$self->{$_h} );
++
++ $self->beginProgressBar("writing stage $stage data", $num);
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print OUT '0000000:version '.$VERSION."\n";
++
++ # don't sort the hash keys - that will just cost memory. Just pull them out in
numerical order.
++ my $h = "stage${stage}hash";
++ #
++ # read the stage data hash in idxid order
++ for (my $i = 0; $i <= $self->{maxid}; $i++){
++
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for each
record
++ my $k = sprintf("%07d", $i);
+
+- if ( 1 ) {
+- # fill in default for movies we didn't have any keywords for
+- while (my ($key, $val) = each (%movies)) {
+- #keyword is 6th entry
+- my $t = 0;
+- for my $i (0..4) {
+- $t=index($val, $tab, $t);
+- if ( $t == -1 ) {
+- die "Corrupt entry '$key' '$val'";
+- }
+- $t+=1;
+- }
+- if ( index($val, $tab, $t) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
++ if ( $self->{$h}{$i} ) {
++ my $v = $self->{$h}{$i};
++ delete ( $self->{$h}{$i} );
++ #
++ print OUT $k.':'.$v."\n";
+ }
++
++ $self->updateProgressBar('', $i);
+ }
++
++ $self->endProgressBar();
++
++ close(OUT);
++
++ #use Data::Dumper;print STDERR "leftovers: $stage ".Dumper( $self->{$h}
)."\n";
++
++ delete ( $self->{$h} );
++
++ #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );
++ }
++
++
++ #----------------------------------------------------------------------------
++ elsif ( $stage == $self->{stageLast} ) {
++
++ # delete existing IDX; trim stage1.data to IDX; merge stage 2-8.data into DAT
++
++ # free up some memory
++ undef $self->{titleshash};
++
++ my $tab=sprintf("\t");
+
+- $self->status("merging in stage 8 data (plots)..");
+- #if ( 1 ) { # this stage is optional
+- if ( -f "$self->{imdbDir}/stage8.data" ) {
+- my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 1); #
'1' prevents the spurious "(nothing to do)" msg
+- my $progress=Term::ProgressBar->new({name => "merging plots",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
++ $self->status("indexing all previous stage's data for stage
".$self->{stageLast}."...");
++
++
++ #----------------------------------------------------------------------
++ # read all the parsed data files created in stages 1-8 and merges them
++ # read one record at a time from each file!
++
++ my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
++
++ $self->beginProgressBar('writing database', $countEstimate);
+
+- open(IN, "< $self->{imdbDir}/stage8.data") || die
"$self->{imdbDir}/stage8.data:$!";
+- while(<IN>) {
+- chop();
+- s/^([^\t]+)\t+//o;
+- my $dbkey=$1;
+- my $plot=$_;
+- if ( !defined($movies{$dbkey}) ) {
+- $self->error("plot list references unidentified title '$1'");
++ open(IDX, "> $self->{moviedbIndex}") || die
"$self->{moviedbIndex}:$!";
++ open(DAT, "> $self->{moviedbData}") || die
"$self->{moviedbData}:$!";
++
++ my $i;
++ my %fh;
++ for $i (1..($self->{stageLast}-1)) {
++ # skip optional files if they don't exist
++ if ( ($i == 7 && !( -f "$self->{imdbDir}/stage7.data" ))
++ || ($i == 8 && !( -f "$self->{imdbDir}/stage8.data" )) ) {
++ next;
++ }
++ #
++ open($fh{$i}, "< $self->{imdbDir}/stage$i.data") || die
"$self->{imdbDir}/stage$i.data:$!";
++ }
++
++ # check the file version numbers
++ while (my ($k, $v) = each (%fh)) {
++ $_ = readline $v;
++ if ( m/^0000000:version ([\d\.]*)$/ ) {
++ if ($1 ne $VERSION) {
++ $self->error("incorrect database version in stage $k file");
++ return(1);
++ } else {
+ next;
+ }
+- $movies{$dbkey}.=$tab.$plot;
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $. > $countEstimate ) {
+- $countEstimate = $progress->target($.+100);
+- $next_update=$progress->update($.);
+- }
+- elsif ( $. > $next_update ) {
+- $next_update=$progress->update($.);
+- }
+- }
++ } else {
++ $self->error("missing database version in stage $k file");
++ return(1);
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(IN);
+ }
++
++
++ #----------------------------------------------------------------------
++ my %fdat;
+
+- if ( 1 ) {
+- # fill in default for movies we didn't have any plot for
+- while (my ($key, $val) = each (%movies)) {
+- #plot is 7th entry
+- my $t = 0;
+- for my $i (0..5) {
+- $t=index($val, $tab, $t);
+- if ( $t == -1 ) {
+- die "Corrupt entry '$key' '$val'";
+- }
+- $t+=1;
+- }
+- if ( index($val, $tab, $t) == -1 ) {
+- $movies{$key}.=$tab."<>";
+- }
+- }
+- }
+-
+- #unlink("$self->{imdbDir}/stage1.data");
+- #unlink("$self->{imdbDir}/stage2.data");
+- #unlink("$self->{imdbDir}/stage3.data");
+-
+- #
---------------------------------------------------------------------------------------
+-
+-
+- #
+- # note: not all movies end up with a cast, but we include them anyway.
+- #
+-
+- my %nmovies;
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "computing index",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- my $count=0;
+- while (my ($key, $val) = each (%movies)) {
+- my $dbkey=$key;
+-
+- # drop episode information - ex: {Twelve Angry Men (1954)}
+- $dbkey=~s/\s*\{[^\}]+\}//go;
+-
+- # todo - this would make things easier
+- # change double-quotes around title to be (made-for-tv) suffix instead
+- if ( $dbkey=~m/^\"/o && #"
+- $dbkey=~m/\"\s*\(/o ) { #"
+- $dbkey.=" (tv_series)";
+- }
+- # how rude, some entries have (TV) appearing more than once.
+- $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o;
+-
+- my $qualifier;
+- if ( $dbkey=~s/\s+\(TV\)$//o ) {
+- $qualifier="tv_movie";
+- }
+- elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) {
+- $qualifier="tv_mini_series";
+- }
+- elsif ( $dbkey=~s/\s+\(tv_series\)$// ) {
+- $qualifier="tv_series";
+- }
+- elsif ( $dbkey=~s/\s+\(mini\)$//o ) {
+- $qualifier="tv_mini_series";
+- }
+- elsif ( $dbkey=~s/\s+\(V\)$//o ) {
+- $qualifier="video_movie";
+- }
+- elsif ( $dbkey=~s/\s+\(VG\)$//o ) {
+- #$qualifier="video_game";
+- delete($movies{$key});
+- next;
+- }
+- else {
+- $qualifier="movie";
+- }
+- #if (
$dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o ) {
+- # $qualifier=$1;
+- #}
+- my $year;
+- my $title=$dbkey;
+-
+- if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
+- $title=~s/^\"//o; #"
+- $title=~s/\"(\s*\()/$1/o; #"
+- }
++ my $count=0;
++ my $go=1;
++ while ($go) {
+
+- if ( $title=~s/\s+\((\d\d\d\d)\)$//o ||
+- $title=~s/\s+\((\d\d\d\d)\/[IVXL]+\)$//o ) {
+- $year=$1;
+- }
+- elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o ||
+- $title=~s/\s+\((\?\?\?\?)\/[IVXL]+\)$//o ) {
+- $year="0000";
+- }
+- else {
+- $self->error("movie list format failed to decode year from title
'$title'");
+- $year="0000";
+- }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+-
+- my $hashkey=lc("$title ($year)");
+- $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
+-
+- if ( defined($movies{$hashkey}) ) {
+- die "unable to place moviedb key for $key, report to
xmltv-devel\(a)lists.sf.net";
+- }
+- die "title \"$title\" contains a tab" if ( $title=~m/\t/o );
+- #print
"key:$dbkey\n\ttitle=$title\n\tyear=$year\n\tqualifier=$qualifier\n";
+- #print "key $key: value=\"$movies{$key}\"\n";
++ last if ( eof($fh{1}) ); # I suppose we ought to check if there any recs remaining in
the other files (todo)
++
++ # read a movie record
++ my ($fstage, $fidxid, $fdata) = $self->readdatafile($fh{1}, 1, -1);
+
+- $nmovies{$hashkey}=$dbkey.$tab.$year.$tab.$qualifier.$tab.delete($movies{$key});
++ $fdat{$fstage} = { k=>$fidxid, v=>$fdata };
++
++ if ($fidxid) {
+ $count++;
+
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
++ # get matching records from other data files
++ $self->readfilesbyidxid(\%fh, \%fdat, $fidxid);
++
++ # merge data from other records
++ my $mdata = $fidxid.':';
++
++ for $i (2..($self->{stageLast}-1)) {
++
++ # we can join actors and actresses - only 1 of them will have data now
++ next if ( $fdat{$i}{k} == $fidxid && $fdat{$i}{v} eq ':::' );
++ # only output either actors or actresses but not both (otherwise we'll get an
extra marker in the output
++ next if ($i == 3) && ( $fdat{3}{k} != $fidxid );
++ next if ($i == 4) && ( $fdat{4}{k} != $fidxid ) && ( $fdat{3}{k}
== $fidxid ); # dont output marker if we've just done it for actors
++ # drop through if actresses (#4) and no actors (#3) for this film
++
++
++ if ( $fdat{$i}{k} == $fidxid ) {
++ $mdata .= $fdat{$i}{v};
+ }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
++ else {
++ $mdata .= '<>';
++ if ($i == 6) { $mdata .=
"\t".'<>'."\t".'<>'; } # fudge to add
extra spacers in ratings data
+ }
++
++ $mdata .= "\t" unless $i == ($self->{stageLast}-1);
+ }
++
++ #print STDERR "mdata ".$mdata."\n";
++
++ # write the DAT record
++ print DAT $mdata ."\n";
++
++ # write the IDX record
++ print IDX $fdata ."\n";
+ }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+-
+- if ( scalar(keys %movies) != 0 ) {
+- die "what happened, we have keys left ?";
+- }
+- undef(%movies);
+- }
+-
+- {
+- my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0);
+- my $progress=Term::ProgressBar->new({name => "writing database",
+- count => $countEstimate,
+- ETA => 'linear'})
+- if ($self->{showProgressBar});
+- $progress->minor(0) if ($self->{showProgressBar});
+- $progress->max_update_rate(1) if ($self->{showProgressBar});
+- my $next_update=0;
+-
+- open(IDX, "> $self->{moviedbIndex}") || die
"$self->{moviedbIndex}:$!";
+- open(DAT, "> $self->{moviedbData}") || die
"$self->{moviedbData}:$!";
+- my $count=0;
+- for my $key (sort {$a cmp $b} keys %nmovies) {
+- my $val=delete($nmovies{$key});
+- #print "movie $key: $val\n";
+- #$val=~s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t//o || die "internal failure
($key:$val)";
+- my ($dbkey, $year, $qualifier,$directors,$actors,@rest)=split('\t', $val);
+- #die ("no 1") if ( !defined($dbkey));
+- #die ("no 2") if ( !defined($year));
+- #die ("no 3") if ( !defined($qualifier));
+- #die ("no 4") if ( !defined($directors));
+- #die ("no 5") if ( !defined($actors));
+- #print
"key:$key\n\ttitle=$dbkey\n\tyear=$year\n\tqualifier=$qualifier\n";
+-
+- #my ($directors, $actors)=split('\t', $val);
+-
+- my $details="";
+-
+- if ( $directors eq "<>" ) {
+- $details.="<>";
+- }
+- else {
+- # sort directors by last name, removing duplicates
+- my $last='';
+- for my $name (sort {$a cmp $b} split('\|', $directors)) {
+- if ( $name ne $last ) {
+- $details.="$name|";
+- $last=$name;
+- }
+- }
+- $details=~s/\|$//o;
+- }
+
+- #print " $dbkey: $val\n";
+- if ( $actors eq "<>" ) {
+- $details.=$tab."<>";
+- }
+- else {
+- $details.=$tab;
+-
+- # sort actors by billing, removing repeated entries
+- # be warned, two actors may have the same billing level
+- my $last='';
+- for my $c (sort {$a cmp $b} split('\|', $actors)) {
+- my ($billing, $name)=split(':', $c);
+- # remove Host/Narrators from end
+- # BUG - should remove (I)'s from actors/actresses names when details are
generated
+- $name=~s/\s\([IVXL]+\)\[/\[/o;
+- $name=~s/\s\([IVXL]+\)$//o;
+-
+- if ( $name ne $last ) {
+- $details.="$name|";
+- $last=$name;
+- }
+- #print " $c: split gives'$billing' and '$name'\n";
+- }
+- $details=~s/\|$//o;
+- }
+- $count++;
+- my $lineno=sprintf("%07d", $count);
+- print IDX
$key."\t".$dbkey."\t".$year."\t".$qualifier."\t".$lineno."\n";
+- print DAT $lineno.":".$details."\t".join($tab,
@rest)."\n";
+-
+- if ($self->{showProgressBar}) {
+- # re-adjust target so progress bar doesn't seem too wonky
+- if ( $count > $countEstimate ) {
+- $countEstimate = $progress->target($count+100);
+- $next_update=$progress->update($count);
+- }
+- elsif ( $count > $next_update ) {
+- $next_update=$progress->update($count);
+- }
+- }
+- }
+- $progress->update($countEstimate) if ($self->{showProgressBar});
+- close(DAT);
+- close(IDX);
++
++ $self->updateProgressBar('', $count);
+ }
++
++ $self->endProgressBar();
++
++ $self->status(sprintf("wrote ".withThousands($count)." titles in %d
seconds",time()-$startTime));
+
++ close(IDX);
++ close(IN);
++ while (my ($k, $v) = each (%fh)) {
++ close($v);
++ }
++
++
++
++ #
---------------------------------------------------------------------------------------
++
+ $self->dbinfoAdd("db_version", $XMLTV::IMDB::VERSION);
+
+ if ( $self->dbinfoSave() ) {
+@@ -3399,7 +3469,7 @@ sub crunchStage($$)
+ #$self->error("prep stages must be run in sequence..");
+ $self->error("prepStage $st either has never been run or failed");
+ if ( grep { $_ == $st } values %{$self->{optionalStages}} ) {
+- $self->error("data for this stage will NOT be added");
++ $self->error("data for this stage will NOT be added"); ####### todo:
unless flag present
+ } else {
+ $self->error("rerun tv_imdb with --prepStage=$st");
+ return(1);
+@@ -3416,6 +3486,7 @@ sub crunchStage($$)
+ }
+ }
+
++ # open stage logfile and run the requested stage
+ $self->redirect("$self->{imdbDir}/stage$stage.log") || return(1);
+ my $ret=$self->invokeStage($stage);
+ $self->redirect(undef);
+@@ -3425,7 +3496,7 @@ sub crunchStage($$)
+ $self->status("prep stage $stage succeeded with no errors");
+ }
+ else {
+- $self->status("prep stage $stage succeeded with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
++ $self->status("prep stage $stage succeeded with $self->{errorCountInLog}
errors in $self->{imdbDir}/stage$stage.log");
+ if ( $stage == $self->{stageLast} && $self->{errorCountInLog} > 30
&& $self->{errorCountInLog} < 80 ) {
+ $self->status("this stage commonly produces around 60 (or so) warnings
because of imdb");
+ $self->status("list file inconsistancies, they can usually be safely
ignored");
+--
+2.29.2
+
diff --git a/0041-bugfixes-in-augment-function.patch
b/0041-bugfixes-in-augment-function.patch
new file mode 100644
index 0000000..89e5765
--- /dev/null
+++ b/0041-bugfixes-in-augment-function.patch
@@ -0,0 +1,401 @@
+From 2f8939e54e89c01a03a9a2d2495002fb0deb7c95 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 17:13:12 +0000
+Subject: [PATCH 41/50] bugfixes in augment() function
+
+---
+ lib/IMDB.pm | 255 +++++++++++++++++++++++++++-------------------------
+ 1 file changed, 135 insertions(+), 120 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index cd805b35..4592a29e 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -30,6 +30,8 @@ use strict;
+
+ package XMLTV::IMDB;
+
++use Search::Dict;
++
+ use open ':encoding(iso-8859-1)'; # try to enforce file encoding (does this
work in Perl <5.8.1? )
+
+ #
+@@ -49,6 +51,8 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+ # bug: movies with (aka...) in title not handled properly
+ # bug: incorrect data generated for a tv series (only the last episode found is
stored)
+ # bug: genres and cast are rolled-up from all episodes to the series record
(misleading)
++# bug: multiple matches can sometimes extract the first one it comes across as a
'hit'
++# (potentially wrong - it should not augment incoming prog when multiple matches)
+ #
+ #
+ our $VERSION = '0.11'; # version number of database
+@@ -129,10 +133,10 @@ sub loadDBInfo($)
+
+ open(INFO, "< $file") || return("imdbDir index file
\"$file\":$!\n");
+ while(<INFO>) {
+- chop();
+- if ( s/^([^:]+)://o ) {
+- $info->{$1}=$_;
+- }
++ chomp();
++ if ( s/^([^:]+)://o ) {
++ $info->{$1}=$_;
++ }
+ }
+ close(INFO);
+ return($info);
+@@ -196,7 +200,7 @@ sub basicVerificationOfIndexes($)
+ {
+ my $self=shift;
+
+- # check that the imdbdir is invalid and up and running
++ # check that the imdbdir is valid and up and running
+ my $title="Army of Darkness";
+ my $year=1992;
+
+@@ -286,6 +290,7 @@ sub basicVerificationOfIndexes($)
+ }
+
+ $self->closeMovieIndex();
++ # all okay
+ return(undef);
+
+ }
+@@ -325,8 +330,6 @@ sub debug($$)
+ }
+ }
+
+-use Search::Dict;
+-
+ sub openMovieIndex($)
+ {
+ my $self=shift;
+@@ -354,9 +357,9 @@ sub closeMovieIndex($)
+ return(1);
+ }
+
+-# moviedbIndex file has the format:
+-# title:lineno
+-# where key is a url encoded title followed by the year of production and a colon
++# moviedbIndex is a TSV file with the format:
++# searchtitle title year progtype lineno
++#
+ sub getMovieMatches($$$)
+ {
+ my $self=shift;
+@@ -387,66 +390,66 @@ sub getMovieMatches($$$)
+ Search::Dict::look(*{$FD}, $match, 0, 0);
+ my $results;
+ while (<$FD>) {
+- last if ( !m/^$match/ );
++ last if ( !m/^$match/ );
+
+- chop();
+- my @arr=split('\t', $_);
+- if ( scalar(@arr) != 5 ) {
+- warn "$self->{moviedbIndex} corrupt (correct key:$_)";
+- next;
+- }
++ chomp();
++ my @arr=split('\t', $_);
++ if ( scalar(@arr) != 5 ) {
++ warn "$self->{moviedbIndex} corrupt (correct key:$_)";
++ next;
++ }
+
+- if ( $arr[0] eq $match ) {
+- # return title and id
+- #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
++ if ( $arr[0] eq $match ) {
++ # return title and id
++ #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+
+- #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- my $title=$arr[1];
+- if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+- }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
++ #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ my $title=$arr[1];
++ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
++ }
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
++ }
++ else {
++ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
++ $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ push(@{$results->{exactMatch}}, {'key'=> $arr[1],
++ 'title'=>$title,
++ 'year'=>$arr[2],
++ 'qualifier'=>$arr[3],
++ 'id'=>$arr[4]});
+ }
+ else {
+- die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
+- }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
+- $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- push(@{$results->{exactMatch}}, {'key'=> $arr[1],
+- 'title'=>$title,
+- 'year'=>$arr[2],
+- 'qualifier'=>$arr[3],
+- 'id'=>$arr[4]});
+- }
+- else {
+- # decode
+- #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- # return title
+- #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
+- #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
+- #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- my $title=$arr[1];
++ # decode
++ #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ # return title
++ #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2
$1/og;
++ #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) :
utf8_chr(hex($2))/oge;
++ #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ my $title=$arr[1];
+
+- if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
+- $title=~s/^\"//o; #"
+- $title=~s/\"(\s*\()/$1/o; #"
+- }
++ if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #"
++ $title=~s/^\"//o; #"
++ $title=~s/\"(\s*\()/$1/o; #"
++ }
+
+- if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
+- }
+- elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
+- }
+- else {
+- die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
++ if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) {
++ }
++ elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) {
++ }
++ else {
++ die "unable to decode year from title key \"$title\", report to
xmltv-devel\(a)lists.sf.net";
++ }
++ $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
++ $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
++ push(@{$results->{closeMatch}}, {'key'=> $arr[1],
++ 'title'=>$title,
++ 'year'=>$arr[2],
++ 'qualifier'=>$arr[3],
++ 'id'=>$arr[4]});
+ }
+- $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og;
+- $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]");
+- push(@{$results->{closeMatch}}, {'key'=> $arr[1],
+- 'title'=>$title,
+- 'year'=>$arr[2],
+- 'qualifier'=>$arr[3],
+- 'id'=>$arr[4]});
+- }
+ }
+ #print "MovieMatches on ($match) = ".Dumper($results)."\n";
+ return($results);
+@@ -459,14 +462,14 @@ sub getMovieExactMatch($$$)
+ my $year=shift;
+ my $res=$self->getMovieMatches($title, $year);
+
+- return(undef) if ( !defined($res) );
++ return(undef, 0) if ( !defined($res) );
+ if ( !defined($res->{exactMatch}) ) {
+- return(undef);
++ return(undef, 0);
+ }
+ if ( scalar(@{$res->{exactMatch}}) != 1 ) {
+- return(undef);
++ return(undef, scalar(@{$res->{exactMatch}}));
+ }
+- return($res->{exactMatch}[0]);
++ return($res->{exactMatch}[0], 1);
+ }
+
+ sub getMovieCloseMatches($$)
+@@ -485,6 +488,9 @@ sub getMovieCloseMatches($$)
+ return(@arr);
+ }
+
++# moviedbData file is a TSV file with the format:
++# lineno:directors actors genres ratingDist ratingVotes ratingRank keywords plot
++#
+ sub getMovieIdDetails($$)
+ {
+ my $self=shift;
+@@ -497,57 +503,57 @@ sub getMovieIdDetails($$)
+ my $FD=$self->{DBASE_FD};
+ Search::Dict::look(*{$FD}, "$id:", 0, 0);
+ while (<$FD>) {
+- last if ( !m/^$id:/ );
+- chop();
+- if ( s/^$id:// ) {
+- my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank, $keywords,
$plot)=split('\t', $_);
+- if ( $directors ne "<>" ) {
+- for my $name (split('\|', $directors)) {
+- # remove (I) etc from
imdb.com names (kept in place for reference)
+- $name=~s/\s\([IVXL]+\)$//o;
+- # switch name around to be surname last
+- $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+- push(@{$results->{directors}}, $name);
+- }
+- }
+- if ( $actors ne "<>" ) {
+- for my $name (split('\|', $actors)) {
+- # remove (I) etc from
imdb.com names (kept in place for reference)
+- my $HostNarrator;
+- if ( $name=~s/\[([^\]]+)\]$//o ) {
+- $HostNarrator=$1;
++ last if ( !m/^$id:/ );
++ chomp();
++ if ( s/^$id:// ) {
++ my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank, $keywords,
$plot)=split('\t', $_);
++ if ( $directors ne "<>" ) {
++ for my $name (split('\|', $directors)) {
++ # remove (I) etc from
imdb.com names (kept in place for reference)
++ $name=~s/\s\([IVXL]+\)$//o;
++ # switch name around to be surname last
++ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
++ push(@{$results->{directors}}, $name);
+ }
+- $name=~s/\s\([IVXL]+\)$//o;
++ }
++ if ( $actors ne "<>" ) {
++ for my $name (split('\|', $actors)) {
++ # remove (I) etc from
imdb.com names (kept in place for reference)
++ my $HostNarrator;
++ if ( $name=~s/\s?\[([^\]]+)\]$//o ) {
++ $HostNarrator=$1;
++ }
++ $name=~s/\s\([IVXL]+\)$//o;
+
+- # switch name around to be surname last
+- $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
+- if ( $HostNarrator ) {
+- if ( $HostNarrator=~s/,*Host//o ) {
+- push(@{$results->{presenter}}, $name);
++ # switch name around to be surname last
++ $name=~s/^([^,]+),\s*(.*)$/$2 $1/o;
++ if ( $HostNarrator ) {
++ if ( $HostNarrator=~s/,*Host//o ) {
++ push(@{$results->{presenter}}, $name);
++ }
++ if ( $HostNarrator=~s/,*Narrator//o ) {
++ push(@{$results->{commentator}}, $name);
++ }
+ }
+- if ( $HostNarrator=~s/,*Narrator//o ) {
+- push(@{$results->{commentator}}, $name);
++ else {
++ push(@{$results->{actors}}, $name);
+ }
+ }
+- else {
+- push(@{$results->{actors}}, $name);
+- }
+ }
++ if ( $genres ne "<>" ) {
++ push(@{$results->{genres}}, split('\|', $genres));
++ }
++ if ( $keywords ne "<>" ) {
++ push(@{$results->{keywords}}, split(',', $keywords));
++ }
++ $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" );
++ $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>"
);
++ $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" );
++ $results->{plot}=$plot if ( $plot ne "<>" );
+ }
+- if ( $genres ne "<>" ) {
+- push(@{$results->{genres}}, split('\|', $genres));
+- }
+- if ( $keywords ne "<>" ) {
+- push(@{$results->{keywords}}, split(',', $keywords));
++ else {
++ warn "lookup of movie (id=$id) resulted in garbage ($_)";
+ }
+- $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" );
+- $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>" );
+- $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" );
+- $results->{plot}=$plot if ( $plot ne "<>" );
+- }
+- else {
+- warn "lookup of movie (id=$id) resulted in garbage ($_)";
+- }
+ }
+ if ( !defined($results) ) {
+ # some movies we don't have any details for
+@@ -661,7 +667,12 @@ sub findMovieInfo($$$$)
+ if ( $exact == 1 ) {
+ # try an exact match first :)
+ for my $mytitle ( @titles ) {
+- my $info=$self->getMovieExactMatch($mytitle, $year);
++ my ($info,$matchcount) = $self->getMovieExactMatch($mytitle, $year);
++ if ($matchcount > 1) {
++ # if multiple records exactly match title+year then we don't know which one is
correct
++ $self->status("multiple hits on movie \"$mytitle
($year)\"");
++ return(undef, $matchcount);
++ }
+ if ( defined($info) ) {
+ if ( $info->{qualifier} eq "movie" ) {
+ $self->status("perfect hit on movie \"$info->{key}\"");
+@@ -834,15 +845,15 @@ sub findTVSeriesInfo($$)
+ my ($self, $title)=@_;
+
+ if ( $self->{cacheLookups} ) {
+- my $id=$self->{cachedLookups}->{tv_series}->{$title};
++ my $id=$self->{cachedLookups}->{tv_series}->{$title};
+
+- if ( defined($id) ) {
+- #print STDERR "REF= (".ref($id).")\n";
+- if ( $id ne '' ) {
+- return($id);
++ if ( defined($id) ) {
++ #print STDERR "REF= (".ref($id).")\n";
++ if ( $id ne '' ) {
++ return($id);
++ }
++ return(undef);
+ }
+- return(undef);
+- }
+ }
+
+ my @titles=@{alternativeTitles($title)};
+@@ -1254,11 +1265,15 @@ sub augmentProgram($$$)
+ # - exact matches on movies
+ # - exact matches on tv series
+ # - close matches on movies
+- my $id=$self->findMovieInfo($title, $prog->{date}, 1); # exact match
++ my ($id, $matchcount) = $self->findMovieInfo($title, $prog->{date}, 1); # exact
match
++ if (defined $matchcount && $matchcount > 1) {
++ $self->status("failed to find a sole match for movie \"$title
($prog->{date})\"");
++ return(undef);
++ }
+ if ( !defined($id) ) {
+ $id=$self->findTVSeriesInfo($title);
+ if ( !defined($id) ) {
+- $id=$self->findMovieInfo($title, $prog->{date}, 0); # close match
++ ($id, $matchcount) = $self->findMovieInfo($title, $prog->{date}, 0); # close
match
+ }
+ }
+ if ( defined($id) ) {
+@@ -1283,7 +1298,7 @@ sub augmentProgram($$$)
+ # this has hard to support 'close' results, unless we know
+ # for certain we're looking for a movie (ie duration etc)
+ # this is a bad idea.
+- my $id=$self->findMovieInfo($title, undef, 2); # any title match
++ my ($id, $matchcount) = $self->findMovieInfo($title, undef, 2); # any title match
+ if ( defined($id) ) {
+ $self->{stats}->{$id->{matchLevel}."Matches"}++;
+ $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++;
+--
+2.29.2
+
diff --git a/0042-Add-tests-for-edge-cases.patch b/0042-Add-tests-for-edge-cases.patch
new file mode 100644
index 0000000..d683c4f
--- /dev/null
+++ b/0042-Add-tests-for-edge-cases.patch
@@ -0,0 +1,1112 @@
+From c32d625fb7fa32f3ed2f7cf6eff386a3d1cba8a0 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 17:29:53 +0000
+Subject: [PATCH 42/50] Add tests for edge cases
+
+---
+ MANIFEST | 64 +++++++++++++++----
+ t/data-tv_imdb/After-data-freeze.xml | 8 +++
+ t/data-tv_imdb/After-data-freeze.xml-expected | 9 +++
+ t/data-tv_imdb/Cast-actor-with-generation.xml | 8 +++
+ .../Cast-actor-with-generation.xml-expected | 14 ++++
+ t/data-tv_imdb/Cast-actors-and-actresses.xml | 8 +++
+ .../Cast-actors-and-actresses.xml-expected | 15 +++++
+ t/data-tv_imdb/Cast-billing.xml | 8 +++
+ t/data-tv_imdb/Cast-billing.xml-expected | 16 +++++
+ t/data-tv_imdb/Cast-duplicate.xml | 8 +++
+ t/data-tv_imdb/Cast-duplicate.xml-expected | 14 ++++
+ t/data-tv_imdb/Cast-host-or-narrator.xml | 28 ++++++++
+ .../Cast-host-or-narrator.xml-expected | 59 +++++++++++++++++
+ t/data-tv_imdb/Cast-name-with-suffix.xml | 9 +++
+ .../Cast-name-with-suffix.xml-expected | 17 +++++
+ t/data-tv_imdb/Cast-role.xml | 12 ++++
+ t/data-tv_imdb/Cast-role.xml-expected | 25 ++++++++
+ ...ector-multiple-and-duplicate-directors.xml | 12 ++++
+ ...tiple-and-duplicate-directors.xml-expected | 24 +++++++
+ t/data-tv_imdb/Director-name-with-suffix.xml | 8 +++
+ .../Director-name-with-suffix.xml-expected | 14 ++++
+ t/data-tv_imdb/Director-with-generation.xml | 8 +++
+ .../Director-with-generation.xml-expected | 14 ++++
+ t/data-tv_imdb/Genres-duplicate.xml | 8 +++
+ t/data-tv_imdb/Genres-duplicate.xml-expected | 14 ++++
+ t/data-tv_imdb/Genres-multiple.xml | 8 +++
+ t/data-tv_imdb/Genres-multiple.xml-expected | 14 ++++
+ t/data-tv_imdb/Genres-single.xml | 8 +++
+ t/data-tv_imdb/Genres-single.xml-expected | 12 ++++
+ .../Movie-same-year-movie-and-series.xml | 14 ++++
+ ...ie-same-year-movie-and-series.xml-expected | 15 +++++
+ t/data-tv_imdb/Movie-startswith-hyphen.xml | 8 +++
+ .../Movie-startswith-hyphen.xml-expected | 11 ++++
+ t/data-tv_imdb/Movie-two-in-same-year.xml | 9 +++
+ .../Movie-two-in-same-year.xml-expected | 10 +++
+ t/data-tv_imdb/Movie-with-aka.xml | 8 +++
+ t/data-tv_imdb/Movie-with-aka.xml-expected | 11 ++++
+ t/data-tv_imdb/Movie-with-unknown-year.xml | 14 ++++
+ .../Movie-with-unknown-year.xml-expected | 15 +++++
+ t/data-tv_imdb/Ratings.xml | 8 +++
+ t/data-tv_imdb/Ratings.xml-expected | 14 ++++
+ t/data-tv_imdb/lists/actors.list | 14 ++++
+ t/data-tv_imdb/lists/actresses.list | 21 ++++++
+ t/data-tv_imdb/lists/directors.list | 12 ++++
+ t/data-tv_imdb/lists/genres.list | 8 +++
+ t/data-tv_imdb/lists/movies.list | 41 ++++++++++++
+ t/data-tv_imdb/lists/ratings.list | 1 +
+ 47 files changed, 688 insertions(+), 12 deletions(-)
+ create mode 100644 t/data-tv_imdb/After-data-freeze.xml
+ create mode 100644 t/data-tv_imdb/After-data-freeze.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-actor-with-generation.xml
+ create mode 100644 t/data-tv_imdb/Cast-actor-with-generation.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-actors-and-actresses.xml
+ create mode 100644 t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-billing.xml
+ create mode 100644 t/data-tv_imdb/Cast-billing.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-duplicate.xml
+ create mode 100644 t/data-tv_imdb/Cast-duplicate.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-host-or-narrator.xml
+ create mode 100644 t/data-tv_imdb/Cast-host-or-narrator.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-name-with-suffix.xml
+ create mode 100644 t/data-tv_imdb/Cast-name-with-suffix.xml-expected
+ create mode 100644 t/data-tv_imdb/Cast-role.xml
+ create mode 100644 t/data-tv_imdb/Cast-role.xml-expected
+ create mode 100644 t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml
+ create mode 100644
t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
+ create mode 100644 t/data-tv_imdb/Director-name-with-suffix.xml
+ create mode 100644 t/data-tv_imdb/Director-name-with-suffix.xml-expected
+ create mode 100644 t/data-tv_imdb/Director-with-generation.xml
+ create mode 100644 t/data-tv_imdb/Director-with-generation.xml-expected
+ create mode 100644 t/data-tv_imdb/Genres-duplicate.xml
+ create mode 100644 t/data-tv_imdb/Genres-duplicate.xml-expected
+ create mode 100644 t/data-tv_imdb/Genres-multiple.xml
+ create mode 100644 t/data-tv_imdb/Genres-multiple.xml-expected
+ create mode 100644 t/data-tv_imdb/Genres-single.xml
+ create mode 100644 t/data-tv_imdb/Genres-single.xml-expected
+ create mode 100644 t/data-tv_imdb/Movie-same-year-movie-and-series.xml
+ create mode 100644 t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected
+ create mode 100644 t/data-tv_imdb/Movie-startswith-hyphen.xml
+ create mode 100644 t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
+ create mode 100644 t/data-tv_imdb/Movie-two-in-same-year.xml
+ create mode 100644 t/data-tv_imdb/Movie-two-in-same-year.xml-expected
+ create mode 100644 t/data-tv_imdb/Movie-with-aka.xml
+ create mode 100644 t/data-tv_imdb/Movie-with-aka.xml-expected
+ create mode 100644 t/data-tv_imdb/Movie-with-unknown-year.xml
+ create mode 100644 t/data-tv_imdb/Movie-with-unknown-year.xml-expected
+ create mode 100644 t/data-tv_imdb/Ratings.xml
+ create mode 100644 t/data-tv_imdb/Ratings.xml-expected
+
+diff --git a/MANIFEST b/MANIFEST
+index d96fc2fe..fcf6a34e 100644
+--- a/MANIFEST
++++ b/MANIFEST
+@@ -911,20 +911,40 @@ t/data-tv_imdb/lists/keywords.list
+ t/data-tv_imdb/lists/movies.list
+ t/data-tv_imdb/lists/plot.list
+ t/data-tv_imdb/lists/ratings.list
++t/data-tv_imdb/After-data-freeze.xml
++t/data-tv_imdb/After-data-freeze.xml-expected
++t/data-tv_imdb/Cast-actor-with-generation.xml
++t/data-tv_imdb/Cast-actor-with-generation.xml-expected
++t/data-tv_imdb/Cast-actors-and-actresses.xml
++t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
++t/data-tv_imdb/Cast-billing.xml
++t/data-tv_imdb/Cast-billing.xml-expected
++t/data-tv_imdb/Cast-duplicate.xml
++t/data-tv_imdb/Cast-duplicate.xml-expected
++t/data-tv_imdb/Cast-host-or-narrator.xml
++t/data-tv_imdb/Cast-host-or-narrator.xml-expected
++t/data-tv_imdb/Cast-name-with-suffix.xml
++t/data-tv_imdb/Cast-name-with-suffix.xml-expected
++t/data-tv_imdb/Cast-role.xml
++t/data-tv_imdb/Cast-role.xml-expected
++t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml
++t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
++t/data-tv_imdb/Director-name-with-suffix.xml
++t/data-tv_imdb/Director-name-with-suffix.xml-expected
++t/data-tv_imdb/Director-with-generation.xml
++t/data-tv_imdb/Director-with-generation.xml-expected
++t/data-tv_imdb/Genres-duplicate.xml
++t/data-tv_imdb/Genres-duplicate.xml-expected
++t/data-tv_imdb/Genres-multiple.xml
++t/data-tv_imdb/Genres-multiple.xml-expected
++t/data-tv_imdb/Genres-single.xml
++t/data-tv_imdb/Genres-single.xml-expected
++t/data-tv_imdb/Movie1.xml
++t/data-tv_imdb/Movie1.xml-expected
+ t/data-tv_imdb/Movie1-case-insensitive.xml
+ t/data-tv_imdb/Movie1-case-insensitive.xml-expected
+ t/data-tv_imdb/Movie1-movies-only.xml
+ t/data-tv_imdb/Movie1-movies-only.xml-expected
+-t/data-tv_imdb/Movie1.xml
+-t/data-tv_imdb/Movie1.xml-expected
+-t/data-tv_imdb/Movie100-years.xml
+-t/data-tv_imdb/Movie100-years.xml-expected
+-t/data-tv_imdb/Movie101-movie-and-tv.xml
+-t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
+-t/data-tv_imdb/Movie21-accents.xml
+-t/data-tv_imdb/Movie21-accents.xml-expected
+-t/data-tv_imdb/Movie22-dots.xml
+-t/data-tv_imdb/Movie22-dots.xml-expected
+ t/data-tv_imdb/Movie3-and-amp.xml
+ t/data-tv_imdb/Movie3-and-amp.xml-expected
+ t/data-tv_imdb/Movie5-ignore-punc.xml
+@@ -933,10 +953,30 @@ t/data-tv_imdb/Movie5-with-punc.xml
+ t/data-tv_imdb/Movie5-with-punc.xml-expected
+ t/data-tv_imdb/Movie6-articles.xml
+ t/data-tv_imdb/Movie6-articles.xml-expected
+-t/data-tv_imdb/Show1-movies-only.xml
+-t/data-tv_imdb/Show1-movies-only.xml-expected
++t/data-tv_imdb/Movie21-accents.xml
++t/data-tv_imdb/Movie21-accents.xml-expected
++t/data-tv_imdb/Movie22-dots.xml
++t/data-tv_imdb/Movie22-dots.xml-expected
++t/data-tv_imdb/Movie100-years.xml
++t/data-tv_imdb/Movie100-years.xml-expected
++t/data-tv_imdb/Movie101-movie-and-tv.xml
++t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
++t/data-tv_imdb/Movie-same-year-movie-and-series.xml
++t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected
++t/data-tv_imdb/Movie-startswith-hyphen.xml
++t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
++t/data-tv_imdb/Movie-two-in-same-year.xml
++t/data-tv_imdb/Movie-two-in-same-year.xml-expected
++t/data-tv_imdb/Movie-with-aka.xml
++t/data-tv_imdb/Movie-with-aka.xml-expected
++t/data-tv_imdb/Movie-with-unknown-year.xml
++t/data-tv_imdb/Movie-with-unknown-year.xml-expected
++t/data-tv_imdb/Ratings.xml
++t/data-tv_imdb/Ratings.xml-expected
+ t/data-tv_imdb/Show1.xml
+ t/data-tv_imdb/Show1.xml-expected
++t/data-tv_imdb/Show1-movies-only.xml
++t/data-tv_imdb/Show1-movies-only.xml-expected
+ t/test_tv_imdb.t
+ t/data/tv_sort_all_UTF8.expected
+ t/data/tv_sort_amp_xml_amp_xml.expected
+diff --git a/t/data-tv_imdb/After-data-freeze.xml b/t/data-tv_imdb/After-data-freeze.xml
+new file mode 100644
+index 00000000..4f0e1aac
+--- /dev/null
++++ b/t/data-tv_imdb/After-data-freeze.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Unarmed Man</title>
++ <date>2019</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/After-data-freeze.xml-expected
b/t/data-tv_imdb/After-data-freeze.xml-expected
+new file mode 100644
+index 00000000..24b1003f
+--- /dev/null
++++ b/t/data-tv_imdb/After-data-freeze.xml-expected
+@@ -0,0 +1,9 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Unarmed Man</title>
++ <date>2019</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-actor-with-generation.xml
b/t/data-tv_imdb/Cast-actor-with-generation.xml
+new file mode 100644
+index 00000000..076b8ed1
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-actor-with-generation.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Murder101</title>
++ <date>2014</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
b/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
+new file mode 100644
+index 00000000..0ba748c2
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Murder101</title>
++ <credits>
++ <actor>Percy Daggs III</actor>
++ </credits>
++ <date>2014</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?Murder101%20%282014%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-actors-and-actresses.xml
b/t/data-tv_imdb/Cast-actors-and-actresses.xml
+new file mode 100644
+index 00000000..307f6bb1
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-actors-and-actresses.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Titanic</title>
++ <date>1997</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
b/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
+new file mode 100644
+index 00000000..ce979dc3
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
+@@ -0,0 +1,15 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Titanic</title>
++ <credits>
++ <actor>Leonardo DiCaprio</actor>
++ <actor>Kate Winslet</actor>
++ </credits>
++ <date>1997</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?Titanic%20%281997%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-billing.xml b/t/data-tv_imdb/Cast-billing.xml
+new file mode 100644
+index 00000000..9c237686
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-billing.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#Rip</title>
++ <date>2013</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-billing.xml-expected
b/t/data-tv_imdb/Cast-billing.xml-expected
+new file mode 100644
+index 00000000..9ff477c0
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-billing.xml-expected
+@@ -0,0 +1,16 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#Rip</title>
++ <credits>
++ <actor>Marilyn Ghigliotti</actor>
++ <actor>Missi Pyle</actor>
++ <actor>Naomi Grossman</actor>
++ </credits>
++ <date>2013</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?%23Rip%20%282013%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-duplicate.xml b/t/data-tv_imdb/Cast-duplicate.xml
+new file mode 100644
+index 00000000..3e82dbcc
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-duplicate.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#SketchPack</title>
++ <date>2015</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-duplicate.xml-expected
b/t/data-tv_imdb/Cast-duplicate.xml-expected
+new file mode 100644
+index 00000000..8e2c59ed
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-duplicate.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#SketchPack</title>
++ <credits>
++ <actor>Lucy Scott-Smith</actor>
++ </credits>
++ <date>2015</date>
++ <category lang="en">TV Series</category>
++
<
url>http://us.imdb.com/M/title-exact?%22%23SketchPack%22%20%282015%29&...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-host-or-narrator.xml
b/t/data-tv_imdb/Cast-host-or-narrator.xml
+new file mode 100644
+index 00000000..0eedc3a5
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-host-or-narrator.xml
+@@ -0,0 +1,28 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Bookclub</title>
++ <date>2015</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>LolliLove</title>
++ <date>2004</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Breaking Genres</title>
++ <date>2015</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>The Jean Bowring Show</title>
++ <date>1957</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>New Now Next Awards</title>
++ <date>2008</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>3 Weeks in Yerevan</title>
++ <date>2016</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
b/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
+new file mode 100644
+index 00000000..eba20fa5
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
+@@ -0,0 +1,59 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Bookclub</title>
++ <credits>
++ <presenter>Fabio Huwyler</presenter>
++ </credits>
++ <date>2015</date>
++ <category lang="en">TV Series</category>
++ <
url>http://us.imdb.com/M/title-exact?%22Bookclub%22%20%282015%29</u...
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>LolliLove</title>
++ <credits>
++ <commentator>Peter Alton</commentator>
++ </credits>
++ <date>2004</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?LolliLove%20%282004%29</url>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Breaking Genres</title>
++ <credits>
++ <presenter>Amrit Singh</presenter>
++ </credits>
++ <date>2015</date>
++ <category lang="en">TV Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?Breaking%20Genres%20%282015%29<...
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>The Jean Bowring Show</title>
++ <credits>
++ <presenter>Jean Bowring</presenter>
++ </credits>
++ <date>1957</date>
++ <category lang="en">TV Series</category>
++
<
url>http://us.imdb.com/M/title-exact?%22The%20Jean%20Bowring%20Show%22...
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>New Now Next Awards</title>
++ <credits>
++ <presenter>Gloria Bigelow</presenter>
++ </credits>
++ <date>2008</date>
++ <category lang="en">TV Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?New%20Now%20Next%20Awards%20%2820...
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>3 Weeks in Yerevan</title>
++ <credits>
++ <presenter>Mary Asatryan</presenter>
++ </credits>
++ <date>2016</date>
++ <category lang="en">Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?3%20Weeks%20in%20Yerevan%20%28201...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-name-with-suffix.xml
b/t/data-tv_imdb/Cast-name-with-suffix.xml
+new file mode 100644
+index 00000000..3b8d11c2
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-name-with-suffix.xml
+@@ -0,0 +1,9 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#Selfie</title>
++ <desc>cast: Elizabeth Kent should appear twice</desc>
++ <date>2015</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
b/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
+new file mode 100644
+index 00000000..3c8441e3
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
+@@ -0,0 +1,17 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#Selfie</title>
++ <desc>cast: Elizabeth Kent should appear twice</desc>
++ <credits>
++ <actor>Karina Cornwell</actor>
++ <actor>Elizabeth Kent</actor>
++ <actor>Elizabeth Kent</actor>
++ </credits>
++ <date>2015</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?%23Selfie%20%282015%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-role.xml b/t/data-tv_imdb/Cast-role.xml
+new file mode 100644
+index 00000000..51dcc97a
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-role.xml
+@@ -0,0 +1,12 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#REV</title>
++ <date>2015</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Titanic</title>
++ <date>1997</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Cast-role.xml-expected
b/t/data-tv_imdb/Cast-role.xml-expected
+new file mode 100644
+index 00000000..1553f0c3
+--- /dev/null
++++ b/t/data-tv_imdb/Cast-role.xml-expected
+@@ -0,0 +1,25 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#REV</title>
++ <credits>
++ <actor>Poroma Banerjee</actor>
++ <actor>Sharon Zachariah</actor>
++ </credits>
++ <date>2015</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?%23REV%20%282015%29</url>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Titanic</title>
++ <credits>
++ <actor>Leonardo DiCaprio</actor>
++ <actor>Kate Winslet</actor>
++ </credits>
++ <date>1997</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?Titanic%20%281997%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml
b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml
+new file mode 100644
+index 00000000..0600b9f9
+--- /dev/null
++++ b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml
+@@ -0,0 +1,12 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#Illusion</title>
++ <date>2014</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#iScream</title>
++ <date>2014</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
+new file mode 100644
+index 00000000..d159117f
+--- /dev/null
++++ b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
+@@ -0,0 +1,24 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#Illusion</title>
++ <credits>
++ <director>Teodora Berglund</director>
++ <director>Alexandra Jousset</director>
++ </credits>
++ <date>2014</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?%23Illusion%20%282014%29</url>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#iScream</title>
++ <credits>
++ <director>Gibran Tanwir</director>
++ </credits>
++ <date>2014</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?%23iScream%20%282014%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-name-with-suffix.xml
b/t/data-tv_imdb/Director-name-with-suffix.xml
+new file mode 100644
+index 00000000..8d1c547f
+--- /dev/null
++++ b/t/data-tv_imdb/Director-name-with-suffix.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Grease Monkeys</title>
++ <date>1979</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-name-with-suffix.xml-expected
b/t/data-tv_imdb/Director-name-with-suffix.xml-expected
+new file mode 100644
+index 00000000..e0654ee4
+--- /dev/null
++++ b/t/data-tv_imdb/Director-name-with-suffix.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Grease Monkeys</title>
++ <credits>
++ <director>Mark Aaron</director>
++ </credits>
++ <date>1979</date>
++ <category lang="en">Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?Grease%20Monkeys%20%281979%29<...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-with-generation.xml
b/t/data-tv_imdb/Director-with-generation.xml
+new file mode 100644
+index 00000000..1e067899
+--- /dev/null
++++ b/t/data-tv_imdb/Director-with-generation.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>The Meek</title>
++ <date>2017</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Director-with-generation.xml-expected
b/t/data-tv_imdb/Director-with-generation.xml-expected
+new file mode 100644
+index 00000000..d033ef0b
+--- /dev/null
++++ b/t/data-tv_imdb/Director-with-generation.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>The Meek</title>
++ <credits>
++ <director>Harold Jackson III</director>
++ </credits>
++ <date>2017</date>
++ <category lang="en">Movie</category>
++ <
url>http://us.imdb.com/M/title-exact?The%20Meek%20%282017%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-duplicate.xml b/t/data-tv_imdb/Genres-duplicate.xml
+new file mode 100644
+index 00000000..a852100c
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-duplicate.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>'C'-Man</title>
++ <date>1949</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-duplicate.xml-expected
b/t/data-tv_imdb/Genres-duplicate.xml-expected
+new file mode 100644
+index 00000000..9787f5b5
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-duplicate.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>'C'-Man</title>
++ <date>1949</date>
++ <category lang="en">Movie</category>
++ <category lang="en">Crime</category>
++ <category lang="en">Drama</category>
++ <category lang="en">Film-Noir</category>
++ <
url>http://us.imdb.com/M/title-exact?%27C%27-Man%20%281949%29</url>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-multiple.xml b/t/data-tv_imdb/Genres-multiple.xml
+new file mode 100644
+index 00000000..e5923e70
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-multiple.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>[Film #9 Title]</title>
++ <date>2015</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-multiple.xml-expected
b/t/data-tv_imdb/Genres-multiple.xml-expected
+new file mode 100644
+index 00000000..60cc308a
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-multiple.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>[Film #9 Title]</title>
++ <date>2015</date>
++ <category lang="en">Movie</category>
++ <category lang="en">Comedy</category>
++ <category lang="en">Fantasy</category>
++ <category lang="en">Short</category>
++
<
url>http://us.imdb.com/M/title-exact?%5BFilm%20%239%20Title%5D%20%2820...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-single.xml b/t/data-tv_imdb/Genres-single.xml
+new file mode 100644
+index 00000000..aeb16a46
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-single.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>(Mon) Jour de chance</title>
++ <date>2004</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Genres-single.xml-expected
b/t/data-tv_imdb/Genres-single.xml-expected
+new file mode 100644
+index 00000000..bebe755b
+--- /dev/null
++++ b/t/data-tv_imdb/Genres-single.xml-expected
+@@ -0,0 +1,12 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>(Mon) Jour de chance</title>
++ <date>2004</date>
++ <category lang="en">Movie</category>
++ <category lang="en">Short</category>
++
<
url>http://us.imdb.com/M/title-exact?%28Mon%29%20Jour%20de%20chance%20...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-same-year-movie-and-series.xml
b/t/data-tv_imdb/Movie-same-year-movie-and-series.xml
+new file mode 100644
+index 00000000..b99be2d8
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-same-year-movie-and-series.xml
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Journey to the Center of the Earth</title>
++ <desc>Multiple titles (movie,video,tv) with same title+year</desc>
++ <date>2008</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Ashes to Ashes</title>
++ <desc>Movie and tv-series with same title+year</desc>
++ <date>2008</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected
b/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected
+new file mode 100644
+index 00000000..2ef612ba
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected
+@@ -0,0 +1,15 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Journey to the Center of the Earth</title>
++ <desc>Multiple titles (movie,video,tv) with same title+year</desc>
++ <date>2008</date>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Ashes to Ashes</title>
++ <desc>Movie and tv-series with same title+year</desc>
++ <date>2008</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-startswith-hyphen.xml
b/t/data-tv_imdb/Movie-startswith-hyphen.xml
+new file mode 100644
+index 00000000..f70ca6a1
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-startswith-hyphen.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>-1: Minus One</title>
++ <date>2016</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
b/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
+new file mode 100644
+index 00000000..467c72c7
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
+@@ -0,0 +1,11 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>-1: Minus One</title>
++ <date>2016</date>
++ <category lang="en">Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?-1%3A%20Minus%20One%20%282016%29&...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-two-in-same-year.xml
b/t/data-tv_imdb/Movie-two-in-same-year.xml
+new file mode 100644
+index 00000000..cdc65bd6
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-two-in-same-year.xml
+@@ -0,0 +1,9 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>'83</title>
++ <desc>tv_imdb cannot identify a sole hit - two films in same year with this
title</desc>
++ <date>2017</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-two-in-same-year.xml-expected
b/t/data-tv_imdb/Movie-two-in-same-year.xml-expected
+new file mode 100644
+index 00000000..f420c42a
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-two-in-same-year.xml-expected
+@@ -0,0 +1,10 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>'83</title>
++ <desc>tv_imdb cannot identify a sole hit - two films in same year with this
title</desc>
++ <date>2017</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-with-aka.xml b/t/data-tv_imdb/Movie-with-aka.xml
+new file mode 100644
+index 00000000..c7afdebf
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-with-aka.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Family Prayers</title>
++ <date>2010</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-with-aka.xml-expected
b/t/data-tv_imdb/Movie-with-aka.xml-expected
+new file mode 100644
+index 00000000..cfd756c2
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-with-aka.xml-expected
+@@ -0,0 +1,11 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Family Prayers</title>
++ <date>2010</date>
++ <category lang="en">Movie</category>
++
<
url>http://us.imdb.com/M/title-exact?Family%20Prayers%20%282010%29<...
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-with-unknown-year.xml
b/t/data-tv_imdb/Movie-with-unknown-year.xml
+new file mode 100644
+index 00000000..319bd6b6
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-with-unknown-year.xml
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Zed</title>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>Zed</title>
++ <date>2010</date>
++ </programme>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>California Cornflakes</title>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Movie-with-unknown-year.xml-expected
b/t/data-tv_imdb/Movie-with-unknown-year.xml-expected
+new file mode 100644
+index 00000000..09481a82
+--- /dev/null
++++ b/t/data-tv_imdb/Movie-with-unknown-year.xml-expected
+@@ -0,0 +1,15 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Zed</title>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>Zed</title>
++ <date>2010</date>
++ </programme>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>California Cornflakes</title>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Ratings.xml b/t/data-tv_imdb/Ratings.xml
+new file mode 100644
+index 00000000..b445c8ba
+--- /dev/null
++++ b/t/data-tv_imdb/Ratings.xml
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++<tv>
++ <programme channel="channel0" start="20010829000500 MST">
++ <title>#nitTWITS</title>
++ <date>2011</date>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/Ratings.xml-expected b/t/data-tv_imdb/Ratings.xml-expected
+new file mode 100644
+index 00000000..d4e0885c
+--- /dev/null
++++ b/t/data-tv_imdb/Ratings.xml-expected
+@@ -0,0 +1,14 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <programme start="20010829000500 MST" channel="channel0">
++ <title>#nitTWITS</title>
++ <date>2011</date>
++ <category lang="en">TV Series</category>
++
<
url>http://us.imdb.com/M/title-exact?%22%23nitTWITS%22%20%282011%29<...
++ <star-rating system="IMDB User Rating">
++ <value>7.0/10</value>
++ </star-rating>
++ </programme>
++</tv>
+diff --git a/t/data-tv_imdb/lists/actors.list b/t/data-tv_imdb/lists/actors.list
+index 90a787a0..e9f75e9f 100644
+--- a/t/data-tv_imdb/lists/actors.list
++++ b/t/data-tv_imdb/lists/actors.list
+@@ -11,3 +11,17 @@ Name Titles
+ Campbell, Bruce (I) Army of Darkness (1992) [Ash] <1>
+ Actor, Bruce (I) Movie1 (1990) [Ash] <1>
+ Movie2 (1991) [Ash] <1>
++Dibnah, Fred A Tribute to Fred Dibnah (2004) (TV) (archive footage) [Himself]
<2>
++ Dig with Dibnah (2004) (TV) [Himself - Presenter] <1>
++ Fred Dibnah: Steeplejack (1979) (TV) [Himself] <1>
++DiCaprio, Leonardo 'Catch Me If You Can': Behind the Camera (2003) (V)
[Himself] <6>
++ Titanic (1997) [Jack Dawson] <1>
++Huwyler, Fabio "Bookclub" (2015) [Himself - Host]
++Daggs III, Percy Murder101 (2014) [Carlyle] <9>
++Alton, Peter LolliLove (2004) (voice) [Narrator] <3>
++Singh, Amrit (I) 2016 Winter Film Awards (2016) (TV) [Presenter]
++ A Social Conversation with Bernie (2016) (TV) [Himself - Host]
++ Breaking Genres (2015) (TV) [Himself - Host]
++Singh, Amit (I) Corporate (2006)
++
++
+diff --git a/t/data-tv_imdb/lists/actresses.list b/t/data-tv_imdb/lists/actresses.list
+index 446e779d..77bc7953 100644
+--- a/t/data-tv_imdb/lists/actresses.list
++++ b/t/data-tv_imdb/lists/actresses.list
+@@ -10,3 +10,24 @@ Name Titles
+ ---- ------
+ Actor, Betty (I) Movie1 (1990) [Betty] <1>
+ Movie2 (1991) [Betty] <1>
++Banerjee, Poroma (II) #REV (2015) [Cinematographer]
++Zachariah, Sharon #REV (2015) [Interviewee]
++Ghigliotti, Marilyn #Rip (2013) (voice) [Lydia Walters] <1>
++Griffin, Martina #Rip (2013) [Juanita] <10>
++Grossman, Naomi (II) #Rip (2013) [Bella Tiavas] <3>
++Lee, Michelle (XXXVI) #Rip (2013) [Female News Anchor] <11>
++Leonards, Ammie #Rip (2013) [CourtNay] <6>
++Pyle, Missi #Rip (2013) [Lydia Walters] <2>
++Shea, Beth #Rip (2013) [Liz Tanner] <4>
++Cornwell, Karina (II) #Selfie (2015) (as Karina Cornell) [Robot Girl]
++Kent, Elizabeth (V) #Selfie (2015) [The Woman]
++Kent, Elizabeth (VI) #Selfie (2015) [The Woman]
++Winslet, Kate 11th Annual Screen Actors Guild Awards (2005) (TV) [Herself - Nominee
& Presenter]
++ Titanic (1997) [Rose Dewitt Bukater] <2>
++ Reflections on Titanic (2012) [Herself] <3>
++Asatryan, Mary 3 Weeks in Yerevan (2016) [Radio Host #2]
++Bowring, Jean "The Jean Bowring Show" (1957) [Herself - Hostess]
++Bigelow, Gloria New Now Next Awards (2008) (TV) [Herself - Host]
++Haze, Roxxy "#BedTimeBitchin" (2014) [Herself - Host]
++Scott-Smith, Lucy "#SketchPack" (2015)
++Scott-Smith, Lucy "#SketchPack" (2015) [Various (2015)]
+diff --git a/t/data-tv_imdb/lists/directors.list b/t/data-tv_imdb/lists/directors.list
+index 03de5934..deb541f3 100644
+--- a/t/data-tv_imdb/lists/directors.list
++++ b/t/data-tv_imdb/lists/directors.list
+@@ -14,3 +14,15 @@ Director,Joe Movie1 (1990)
+ Director,In1915 Movie100 (1915)
+ Director,In1943 Movie100 (1943)
+ Director,In1953 Movie100 (1953)
++Aaron, Mark (I) Grease Monkeys (1979)
++ The Rivermen (1980)
++Berglund, Teodora (II) #Illusion (2014)
++ #Illusion (2014) (co-director)
++Jousset, Alexandra #Illusion (2014)
++ #Illusion (2014) (co-director)
++Tanwir, Gibran #iScream (2014) (segment "Beauty Boarding")
++ #iScream (2014) (segment "Caller ID")
++ #iScream (2014) (segment "Nightmare")
++ #iScream (2014) (segment "The Anniversary")
++ #iScream (2014) (segment "VooDoo")
++Jackson III, Harold The Meek (2017)
+diff --git a/t/data-tv_imdb/lists/genres.list b/t/data-tv_imdb/lists/genres.list
+index 5fc0c0af..96d43647 100644
+--- a/t/data-tv_imdb/lists/genres.list
++++ b/t/data-tv_imdb/lists/genres.list
+@@ -9,5 +9,13 @@
+ Army of Darkness (1992) Horror
+ Movie1 (1990) Horror
+ Movie2 (1991) Mystery
++[Film #9 Title] (2015) Comedy
++[Film #9 Title] (2015) Fantasy
++[Film #9 Title] (2015) Short
++(Mon) Jour de chance (2004) Short
++'C'-Man (1949) Crime
++'C'-Man (1949) Drama
++'C'-Man (1949) Film-Noir
++'C'-Man (1949) Crime
+
+
+diff --git a/t/data-tv_imdb/lists/movies.list b/t/data-tv_imdb/lists/movies.list
+index 89379710..5bc32861 100644
+--- a/t/data-tv_imdb/lists/movies.list
++++ b/t/data-tv_imdb/lists/movies.list
+@@ -46,3 +46,44 @@ Movie101 (1992) 1992
+ Movie101 (1993) (V) 1993
+ "Movie101" (1988) 1988
+ "Movie101" (1988) {Episode1 Part 1 (#8.1)} 1992
++'83 (2017/I) 2017
++'83 (2017/II) 2017
++Journey to the Center of the Earth (2008) 2008
++Journey to the Center of the Earth (2008) (TV) 2008
++Journey to the Center of the Earth (2008) (V) 2008
++"Ashes to Ashes" (2008) 2008
++Ashes to Ashes (2008) 2008
++California Cornflakes (????) ????
++Zed (????/II) ????
++Family Prayers (aka Karim & Suha) (2010) 2010
++"Grease Monkeys" (2003) 2003-????
++"Grease Monkeys" (2003) {Almost Blue (#1.4)} 2003
++Grease Monkeys (1979) 1979
++#Illusion (2014) 2014
++#iScream (2014) 2014
++#REV (2015) 2015
++#Rip (2013) 2013
++#Selfie (2015) 2015
++Titanic (1997) 1997
++Titanic (2012) 2012
++Fred Dibnah: Steeplejack (1979) (TV) 1979
++"Bookclub" (2015) 2015-????
++Murder101 (2014) 2014
++LolliLove (2004) 2004
++Breaking Genres (2015) (TV) 2015
++Corporate (2006) 2006
++3 Weeks in Yerevan (2016) 2016
++"The Jean Bowring Show" (1957) 1957-1960
++New Now Next Awards (2008) (TV) 2008
++"#BedTimeBitchin" (2014) 2014-????
++#ClivesClub: The Somers Solstice (2015) 2015
++"#SketchPack" (2015) 2015-????
++[Film #9 Title] (2015) 2015
++(Mon) Jour de chance (2004) 2004
++'C'-Man (1949) 1949
++"#nitTWITS" (2011) 2011-????
++The Meek (2015) 2015
++The Meek (2017) 2017
++-1: Minus One (2016) 2016
++
++
+diff --git a/t/data-tv_imdb/lists/ratings.list b/t/data-tv_imdb/lists/ratings.list
+index ee7b97c2..e114eafa 100644
+--- a/t/data-tv_imdb/lists/ratings.list
++++ b/t/data-tv_imdb/lists/ratings.list
+@@ -9,3 +9,4 @@ New Distribution Votes Rank Title
+ 0000002211 000001 9.9 Army of Darkness (1992)
+ 0000002211 000001 1.0 Movie1 (1990)
+ 0000002211 000002 1.1 Movie2 (1991)
++ 1.1..2...5 8 7.0 "#nitTWITS" (2011)
+--
+2.29.2
+
diff --git a/0043-Use-disc-sort-to-reduce-memory-usage-63.patch
b/0043-Use-disc-sort-to-reduce-memory-usage-63.patch
new file mode 100644
index 0000000..8578ee7
--- /dev/null
+++ b/0043-Use-disc-sort-to-reduce-memory-usage-63.patch
@@ -0,0 +1,840 @@
+From 0b757d4fce17bda5453fc870647f52ebe5e55e1e Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 18:17:57 +0000
+Subject: [PATCH 43/50] Use disc sort to reduce memory usage (#63)
+
+---
+ filter/tv_imdb | 33 ++--
+ lib/IMDB.pm | 516 ++++++++++++++++++++++++++++++++++++-------------
+ 2 files changed, 400 insertions(+), 149 deletions(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index ba8b804d..859d6c57 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -9,6 +9,7 @@ tv_imdb - Augment XMLTV listings files with
imdb.com data.
+ =head1 SYNOPSIS
+
+ tv_imdb --imdbdir <dir> [--help] [--quiet] [--download]
++ [--filesort] [--nosystemsort]
+ [--prepStage (1-9,all)]
+
+ tv_imdb --imdbdir <dir> [--help] [--quiet]
+@@ -114,14 +115,20 @@ the '--download' flag and be prompted for what you need to
download by
+ hand. See <
http://www.imdb.com/interfaces> for the download sites.
+ Then once you have the files rerun without '--download'.
+
+-Note: '--prepStage' sucks a bit of memory, but you can run each
+-prepStage separately by running --prepStage with each of the stages
+-(see --help for details).
++Note: '--prepStage' requires up to 520MB of memory. This can be reduced a little
++by running each prepStage separately, using --prepStage with each of the stages
++individually (see --help for details).
++Memory use can be reduced further by using --filesort option when building the
++database. This will try to use the operating system to sort the interim data files
++rather than sorting in memory. If this system sort does not work for you then you
++can use the File::Sort package if it is installed on your system, by also adding the
++option --nosystemsort (however this method of sorting is very slow). If you specify
++neither option then Perl will sort the files in memory.
+
+ B<3.> Once you have the database loaded try
+ E<39>cat tv.xml | tv_imdb --imdbdir <dir> > tv1.xmlE<39>.
+
+-Feel free to report any problems with these steps to xmltv-devel(a)lists.sf.net.
++Feel free to report any problems with these steps at
https://github.com/XMLTV/xmltv/issues.
+
+ =head1 TESTING
+
+@@ -132,12 +139,6 @@ information in the tv_imdb database. For exmple:
+
+ =head1 BUGS
+
+-The '--prepStage' needs a lot of memory to run at a reasonable speed,
+-over 250 megabytes with the current imdb data files. For there to be
+-250 megabytes free for tv_imdb, the system will need at least 512 megabytes
+-of RAM. Running with less can take hours (or days!) - although fortunately
+-this stage needs to be run only once after downloading the data files.
+-
+ Could use a --configure step just like the grabbers so you do not have
+ to specify the --imdbdir on the command line every time. Also this could
+ step you through the prep stages with more description of what is being
+@@ -185,7 +186,7 @@ use Getopt::Long;
+ use XMLTV::Data::Recursive::Encode;
+ use XMLTV::Usage <<END
+ $0: augment listings with data from
imdb.com
+-$0 --imdbdir <dir> [--help] [--quiet] [--download] [--prepStage (1-9,all)]
++$0 --imdbdir <dir> [--help] [--quiet] [--download] [--filesort] [--prepStage
(1-9,all)]
+ $0 --imdbdir <dir> [--help] [--quiet] [--download] [--with-keywords] [--with-plot]
[--movies-only] [--actors NUMBER] [--stats] [--debug] [--output FILE] [FILE...]
+
+ END
+@@ -207,6 +208,8 @@ my ($opt_help,
+ $opt_validate_title,
+ $opt_validate_year,
+ $opt_sample,
++ $opt_filesort,
++ $opt_systemsort,
+ );
+
+ GetOptions('help' => \$opt_help,
+@@ -219,11 +222,13 @@ GetOptions('help' => \$opt_help,
+ 'actors=s' => \$opt_num_actors,
+ 'quiet' => \$opt_quiet,
+ 'download' => \$opt_download,
+- 'stats' => \$opt_stats,
++ 'stats' => \$opt_stats,
+ 'debug+' => \$opt_debug,
+ 'validate-title=s' => \$opt_validate_title,
+ 'validate-year=s' => \$opt_validate_year,
+ 'sample=s' => \$opt_sample,
++ 'filesort!' => \$opt_filesort,
++ 'systemsort!' => \$opt_systemsort,
+ ) or usage(0);
+
+ usage(1) if $opt_help;
+@@ -235,6 +240,8 @@ $opt_num_actors=3 if ( !defined($opt_num_actors) );
+ $opt_movies_only=0 if ( !defined($opt_movies_only) );
+ $opt_debug=0 if ( !defined($opt_debug) );
+ $opt_sample=0 if ( !defined($opt_sample) );
++$opt_filesort=0 if ( !defined($opt_filesort) );
++$opt_systemsort=1 if ( !defined($opt_systemsort) );
+
+ $opt_quiet=(defined($opt_quiet));
+ if ( !defined($opt_stats) ) {
+@@ -275,6 +282,8 @@ END
+ 'stageToRun' => $opt_prepStage,
+ 'downloadMissingFiles' => $opt_download,
+ 'sample' => $opt_sample,
++ 'filesort' => $opt_filesort,
++ 'systemsort' => $opt_systemsort,
+ );
+
+ if ( $opt_prepStage eq "all" ) {
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 4592a29e..3ee44c7a 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -53,6 +53,8 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+ # bug: genres and cast are rolled-up from all episodes to the series record
(misleading)
+ # bug: multiple matches can sometimes extract the first one it comes across as a
'hit'
+ # (potentially wrong - it should not augment incoming prog when multiple matches)
++# dbbuild: --filesort to sort interim data on disc rather than in memory
++# dbbuild: --nosystemsort to use File::Sort rather than operating system shell's
'sort' command
+ #
+ #
+ our $VERSION = '0.11'; # version number of database
+@@ -317,7 +319,7 @@ sub error($$)
+ sub status($$)
+ {
+ if ( $_[0]->{verbose} ) {
+- print STDERR "tv_imdb: $_[1]\n";
++ print STDERR "tv_imdb: $_[1]\n";
+ }
+ }
+
+@@ -326,7 +328,7 @@ sub debug($$)
+ my $self=shift;
+ my $mess=shift;
+ if ( $self->{verbose} > 1 ) {
+- print STDERR "tv_imdb: $mess\n";
++ print STDERR "tv_imdb: $mess\n";
+ }
+ }
+
+@@ -1357,6 +1359,12 @@ use LWP;
+ use XMLTV::Gunzip;
+ use IO::File;
+
++# is system sort available?
++use constant HAS_SYSTEMSORT => ($^O=~'linux|cygwin|MSWin32');
++
++# is File::Sort available?
++use constant HAS_FILESORT => defined eval { require File::Sort };
++
+ use open ':encoding(iso-8859-1)'; # try to enforce file encoding (does this
work in Perl <5.8.1? )
+
+ # Use Term::ProgressBar if installed.
+@@ -1417,6 +1425,13 @@ sub new
+ }
+
+ bless($self, $type);
++
++ if ( $self->{filesort} && !( HAS_FILESORT || HAS_SYSTEMSORT ) ) {
++ $self->error("filesort requested but not available");
++ return(undef);
++ }
++ $self->{usefilesort} = ( (HAS_FILESORT || HAS_SYSTEMSORT) &&
$self->{filesort} ); # --filesort => 1 --nofilesort => 0
++ $self->{usesystemsort} = ( HAS_SYSTEMSORT && $self->{filesort} &&
$self->{systemsort}); # use linux sort in preference to File::Sort as it is sooo much
faster on big files
+
+ if ( $self->{stageToRun} ne $self->{stageLast} ) {
+ # unless this is the last stage, check we have the necessary files
+@@ -1566,6 +1581,56 @@ END
+ return 0;
+ }
+
++sub sortfile ($$$) {
++ my ($self, $stage, $file)=@_;
++
++ # file already written : sort it using (1) system sort command, or (2) File::Sort
package
++
++ my $f=$file;
++ my $st = time;
++ my $res;
++
++ if ($self->{usesystemsort}) { # use shell sort if we can (much faster on big
files)
++ $self->status("using system sort on stage $stage");
++
++ # which OS are we on?
++ if ($^O=~'linux|cygwin') { # TODO: untested on cygwin
++ if ($stage == 1) {
++ $res = system( "sort", "-t", "\t", qw(-k 1 -o),
"$f.sorted", "$f" );
++ } else {
++ $res = system( "sort", qw(-t : -k 1n -o), "$f.sorted",
"$f" );
++ }
++ if ($? == -1) { $self->error("failed to execute: $! \n"); }
++ elsif ( $? & 127 || $? & 128 ) { $self->error("system call died with
signal %d \n"); }
++ else { $res = $? >> 8; }
++ $res = 1 if $res == 0; # successful call returns 0 in $?
++
++ } elsif ($^O=~'MSWin32') { # TODO: untested on Windows
++ $res = system( "sort", "/O ", "$f.sorted",
"$f");
++ $res = 1 if $res == 0; # successful call returns 0 in $?
++ }
++
++ } else {
++ $self->status("using filesort on stage $stage (this might take up to 1
hour)");
++ if ($stage == 1) {
++ $res = File::Sort::sort_file({ t =>"\t", k=>'1',
y=>200000, I=>"$f", o=>"$f.sorted" });
++ } else {
++ $res = File::Sort::sort_file({ t =>':', k=>'1n', y=>200000,
I=>"$f", o=>"$f.sorted" });
++ }
++ }
++
++ $self->status("sorting took ".(int(((time - $st)/60)*10)/10)."
minutes") if (time - $st > 60);
++
++ if (!$res) {
++ die "Filesort failed on $f";
++ } else {
++ unlink($f);
++ rename "$f.sorted", $f or die "Cannot rename file: $!";
++ }
++
++ return($res);
++}
++
+ sub redirect($$)
+ {
+ my ($self, $file)=@_;
+@@ -1876,23 +1941,32 @@ sub readMovies($$$$$)
+ # we don't keep episode information TODO: enhancement: change tv_imdb to do
episodes?
+ if ($isepisode == 1) { next; }
+
+- # store the title in a hash of $key=>{$title}
+- if ( defined($self->{movieshash}{$hashkey}) ) { # check for duplicates
+- #
+- # there's a lot (c. 9,000!) instances of duplicate titles in the movies.list
file
+- # so only report where titles are different
+- if ( defined $self->{movieshash}{$hashkey}{$title} &&
$self->{movieshash}{$hashkey}{$title} ne $year."\t".$qualifier ) { #
{."\t".$progtype}
+- $self->error("duplicate moviedb key computed $hashkey - this programme will
be ignored $mtitle");
+- #$self->error(" ".$self->{movieshash}{$hashkey}{$title});
+- next;
++
++ # store the movies data
++ if ($self->{usefilesort}) {
++ # if sorting on disc then write the extracted movies data to an interim file
++ print {$self->{fhdata}}
$hashkey."\t".$title."\t".$year."\t".$qualifier."\n";
++
++ } else {
++ # store the title in a hash of $key=>{$title}
++ if ( defined($self->{movieshash}{$hashkey}) ) { # check for duplicates
++ #
++ # there's a lot (c. 9,000!) instances of duplicate titles in the movies.list
file
++ # so only report where titles are different
++ if ( defined $self->{movieshash}{$hashkey}{$title} &&
$self->{movieshash}{$hashkey}{$title} ne $year."\t".$qualifier ) { #
{."\t".$progtype}
++ $self->error("duplicate moviedb key computed $hashkey - this programme
will be ignored $mtitle");
++ #$self->error(" ".$self->{movieshash}{$hashkey}{$title});
++ next;
++ }
+ }
++
++ # the output IDX and DAT files must be sorted by dbkey (because of the way the
searching is done)
++ # so we need to store all the incoming 4 million records and then sort them
++ #
++ $self->{movieshash}{$hashkey}{$title} = $year."\t".$qualifier; # we
don't currently use the progtype flag so don't print it
{."\t".$progtype}
++
+ }
+
+- # the output IDX and DAT files must be sorted by dbkey (because of the way the
searching is done)
+- # so we need to store all the incoming 4 million records and then sort them TODO:
do the sorting on disc in external call
+- #
+- $self->{movieshash}{$hashkey}{$title} = $year."\t".$qualifier; # we
don't currently use the progtype flag so don't print it
{."\t".$progtype}
+-
+ # return number of titles kept
+ $countout++;
+
+@@ -2118,11 +2192,18 @@ sub readCastOrDirectors($$$$$)
+ $mperson .= $cur_name;
+ $mperson .= " [$hostnarrator]" if ( defined($hostnarrator) ); # this is
wrong: incoming data are "lastname, firstname" so this creates "Huwyler,
Fabio [Host]"
+
+- my $h = "stage${stage}hash";
+- if (defined( $self->{$h}{$idxid} )) {
+- $self->{$h}{$idxid} .= "|".$mperson;
++ if ($self->{usefilesort}) {
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $idxid);
++ print {$self->{fhdata}} $k.':'.$mperson."\n";
++
+ } else {
+- $self->{$h}{$idxid} = $mperson;
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mperson;
++ } else {
++ $self->{$h}{$idxid} = $mperson;
++ }
+ }
+
+
+@@ -2266,11 +2347,18 @@ sub readGenres($$$$$)
+ # the output ".data" files must be sorted by id so they can be merged in
stage final
+ # so we need to store all the incoming records and then sort them
+ #
+- my $h = "stage${stage}hash";
+- if (defined( $self->{$h}{$idxid} )) {
+- $self->{$h}{$idxid} .= "|".$mgenres;
++ if ($self->{usefilesort}) {
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $idxid);
++ print {$self->{fhdata}} $k.':'.$mgenres."\n";
++
+ } else {
+- $self->{$h}{$idxid} = $mgenres;
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mgenres;
++ } else {
++ $self->{$h}{$idxid} = $mgenres;
++ }
+ }
+
+
+@@ -2411,12 +2499,19 @@ sub readRatings($$$$$)
+ # the output ".data" files must be sorted by id so they can be merged in
stage final
+ # so we need to store all the incoming records and then sort them
+ #
+- my $h = "stage${stage}hash";
+- if (defined( $self->{$h}{$idxid} )) {
+- # we shouldn't get duplicates
+- $self->error("$file: duplicate film found at line $lineCount - this rating
will be ignored $mtitle");
++ if ($self->{usefilesort}) {
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $idxid);
++ print {$self->{fhdata}}
$k.':'."$mdistrib;$mvotes;$mrank"."\n";
++
+ } else {
+- $self->{$h}{$idxid} = "$mdistrib;$mvotes;$mrank";
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ # we shouldn't get duplicates
++ $self->error("$file: duplicate film found at line $lineCount - this rating
will be ignored $mtitle");
++ } else {
++ $self->{$h}{$idxid} = "$mdistrib;$mvotes;$mrank";
++ }
+ }
+
+
+@@ -2559,11 +2654,18 @@ sub readKeywords($$$$$)
+ # the output ".data" files must be sorted by id so they can be merged in
stage final
+ # so we need to store all the incoming records and then sort them
+ #
+- my $h = "stage${stage}hash";
+- if (defined( $self->{$h}{$idxid} )) {
+- $self->{$h}{$idxid} .= "|".$mkeywords;
++ if ($self->{usefilesort}) {
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $idxid);
++ print {$self->{fhdata}} $k.':'.$mkeywords."\n";
++
+ } else {
+- $self->{$h}{$idxid} = $mkeywords;
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ $self->{$h}{$idxid} .= "|".$mkeywords;
++ } else {
++ $self->{$h}{$idxid} = $mkeywords;
++ }
+ }
+
+
+@@ -2736,12 +2838,19 @@ sub readPlots($$$$$)
+ # the output ".data" files must be sorted by id so they can be merged in
stage final
+ # so we need to store all the incoming records and then sort them
+ #
+- my $h = "stage${stage}hash";
+- if (defined( $self->{$h}{$idxid} )) {
+- # we shouldn't get duplicates
+- $self->error("$file: duplicate film found at line $lineCount - this plot
will be ignored $mtitle");
++ if ($self->{usefilesort}) {
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $idxid);
++ print {$self->{fhdata}} $k.':'.$mplot."\n";
++
+ } else {
+- $self->{$h}{$idxid} = $mplot;
++ my $h = "stage${stage}hash";
++ if (defined( $self->{$h}{$idxid} )) {
++ # we shouldn't get duplicates
++ $self->error("$file: duplicate film found at line $lineCount - this plot
will be ignored $mtitle");
++ } else {
++ $self->{$h}{$idxid} = $mplot;
++ }
+ }
+
+
+@@ -3023,7 +3132,24 @@ sub readfilesbyidxid($$$$)
+ if ($fdat->{$stage}{k} < $idxid) {
+ #print STDERR "fetching from $stage ".$fdat->{$stage}{k}."
< $idxid \n";
+
+- my ($fstage, $fidxid, $fdata) = $self->readdatafile( $fhs->{$stage}, $stage,
$idxid );
++ my ($fstage, $fidxid, $fdata) = $self->readdatafile( $fhs->{$stage}, $stage,
$idxid, -1);
++
++ if ($self->{usefilesort}) {
++ # if we are using filesort then there will be multiple records with the same idxid
++ # we need to fetch all of these and combine them
++ my $_fidxid = $fidxid;
++ while ( $_fidxid == $fidxid && $_fidxid != 9999999 ) {
++ # read next record
++ (my $_fstage, $_fidxid, my $_fdata) = $self->readdatafile( $fhs->{$stage},
$stage, $idxid, $_fidxid );
++ if ($_fidxid == $fidxid) {
++ $fdata .= '|' . $_fdata;
++ }
++ }
++
++ # need to dedupe our merged data
++ ($fstage, $fidxid, $fdata) = $self->tidydatafile( $fstage, $fidxid, $fdata );
++
++ }
+
+ # store the file record
+ $fdat->{$stage} = { k=>$fidxid, v=>$fdata };
+@@ -3058,19 +3184,26 @@ sub readfilesbyidxid($$$$)
+ return;
+ }
+
+-sub readdatafile($$$$)
++sub readdatafile($$$$$)
+ {
+- my ($self, $fh, $stage, $idxid)=@_;
++ my ($self, $fh, $stage, $idxid, $lidxid)=@_;
+
+ # read a line from a file
++
++ my $line;
+
+- if ( eof($fh) ) {
+- return ($stage, 9999999, '');
++ # if we have a parked record then use that one
++ if ( defined $self->{datafile}{$stage} ) {
++ $line = $self->{datafile}{$stage};
++ undef $self->{datafile}{$stage};
++
++ } else {
++ if ( eof($fh) ) {
++ return ($stage, 9999999, '');
++ }
++ defined( $line = readline $fh ) or die "readline failed on file for stage $stage
: $!";
+ }
+
+- defined( my $line = readline $fh ) or die "readline failed on file for stage
$stage : $!";
+-
+-
+ # extract the idxid from the start of each line
+ # 0000002:army%20of%20darkness%20%281992%29 Army of Darkness
(1992) 1992 movie 0000002
+ my ($midxid, $mdata) = $line =~ m/^(\d*):(.*)$/;
+@@ -3078,50 +3211,70 @@ sub readdatafile($$$$)
+ if ($midxid) {
+
+ # there should not be any records in datafile n which are not in datafile 1
+- if ($midxid < $idxid) {
++ if ( $midxid < $idxid ) {
+ $self->error("unexpected record in stage $stage data file at $midxid
(expected $idxid)");
+-
+ }
+ else {
+ # processing on the data for each interim file
++ ($stage, $midxid, $mdata) = $self->tidydatafile( $stage, $midxid, $mdata );
++ }
+
+- # movies #1 : strip the (TV) (V) markers from the movie title
+- # directors #2 : (i) dedupe (ii) sort into name order (not correct but there's no
sequencing in the imdb data)
+- # actors/actresses #3,#4 : (i) dedeupe (ii) sort into billing order (iii) strip
billing id Note: need to merge actors and actresses
+- # genres #5 : (i) dedupe
+- # ratings #6 : (i) split elements and separate by tabs
+- # keywords #7 : (i) dedupe, (ii) replace separator with comma
+- # plots #8 :
+- #
+- if ($stage == 1) {
+- $self->stripprogtype(\$mdata);
+-
+- } elsif ($stage == 2) {
+- $self->dedupe(\$mdata, '|');
+- $self->stripbilling(\$mdata, '|');
+- $self->sortnames(\$mdata, '|'); # sorts by "lastname,
firstname"
+-
+- } elsif ($stage == 3 || $stage == 4) {
+- $self->dedupe(\$mdata, '|');
+- # defer sorting and strip billing deferred until after we have joined actors +
actresses
+- ## $self->sortnames(\$mdata, '|'); # sorts by "billing:name"
+- ## $self->stripbilling(\$mdata, '|');
+-
+- } elsif ($stage == 5) {
+- $self->dedupe(\$mdata, '|');
+-
+- } elsif ($stage == 6) {
+- $mdata =~ s/;/\t/g;
+-
+- } elsif ($stage == 7) {
+- $self->dedupe(\$mdata, '|');
+- $mdata =~ s/\|/,/g;
+-
+- } elsif ($stage == 8) {
+- # noop
+- }
+-
++ # if the incoming idxid has changed then park the record
++ if ( $lidxid != -1 && $midxid != $lidxid ) {
++ $self->{datafile}{$stage} = $line;
+ }
++
++ }
++
++ return ($stage, $midxid, $mdata);
++}
++
++sub tidydatafile($$$$)
++{
++ my ($self, $stage, $midxid, $mdata)=@_;
++
++ # tidy/reformat the data from a stagex.data file
++
++ if ($midxid) {
++
++ # processing on the data for each interim file
++
++ # movies #1 : strip the (TV) (V) markers from the movie title
++ # directors #2 : (i) dedupe (ii) sort into name order (not correct but there's no
sequencing in the imdb data)
++ # actors/actresses #3,#4 : (i) dedeupe (ii) sort into billing order (iii) strip
billing id Note: need to merge actors and actresses
++ # genres #5 : (i) dedupe
++ # ratings #6 : (i) split elements and separate by tabs
++ # keywords #7 : (i) dedupe, (ii) replace separator with comma
++ # plots #8 :
++ #
++ if ($stage == 1) {
++ $self->stripprogtype(\$mdata);
++
++ } elsif ($stage == 2) {
++ $self->dedupe(\$mdata, '|');
++ $self->stripbilling(\$mdata, '|');
++ $self->sortnames(\$mdata, '|'); # sorts by "lastname,
firstname"
++
++ } elsif ($stage == 3 || $stage == 4) {
++ $self->dedupe(\$mdata, '|');
++ # defer sorting and strip billing deferred until after we have joined actors +
actresses
++ ## $self->sortnames(\$mdata, '|'); # sorts by "billing:name"
++ ## $self->stripbilling(\$mdata, '|');
++
++ } elsif ($stage == 5) {
++ $self->dedupe(\$mdata, '|');
++
++ } elsif ($stage == 6) {
++ $mdata =~ s/;/\t/g; # replace ";" separator with tabs
++
++ } elsif ($stage == 7) {
++ $self->dedupe(\$mdata, '|');
++ $mdata =~ s/\|/,/g;
++
++ } elsif ($stage == 8) {
++ # noop
++ }
++
+ }
+
+ return ($stage, $midxid, $mdata);
+@@ -3139,7 +3292,17 @@ sub invokeStage($$)
+ $self->status("parsing Movies list for stage $stage ...");
+ my $countEstimate=$self->dbinfoCalcEstimate("movies", 45);
+
++ # if we are using --filesort then write output file direct (and not use a hash)
++ if ($self->{usefilesort}) {
++ open($self->{fhdata}, ">",
"$self->{imdbDir}/stage$stage.data.tmp") || die
"$self->{imdbDir}/stage$stage.data.tmp:$!";
++ }
++
+ my ($num, $numout) = $self->readMovies("Movies", $countEstimate,
"$self->{imdbListFiles}->{movies}", $stage);
++
++ if ($self->{usefilesort}) {
++ close($self->{fhdata});
++ }
++
+ if ( $num < 0 ) {
+ if ( $num == -2 ) {
+ $self->error("you need to download $self->{imdbListFiles}->{movies}
from the ftp site, or use the --download option");
+@@ -3148,7 +3311,7 @@ sub invokeStage($$)
+ }
+ elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) {
+ my $better=$self->dbinfoCalcBytesPerEntry("movies", $num);
+- $self->status("ARG estimate of $countEstimate for movies needs updating,
found $num ($better bytes/entry)");
++ ##not accurate: $self->status("ARG estimate of $countEstimate for movies
needs updating, found $num ($better bytes/entry)");
+ }
+ $self->dbinfoAdd("db_stat_movie_count", "$numout");
+
+@@ -3159,38 +3322,94 @@ sub invokeStage($$)
+ #-----------------------------------------------------------
+ # sort the title keys and write the stage1.data file
+ #
+- $self->beginProgressBar("writing stage $stage data", $num);
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- print OUT '0000000:version '.$VERSION."\n";
+-
+- my $count=0;
+- foreach my $k (sort keys( %{$self->{movieshash}} )) {
++ # if we are using --filesort then write output file direct (and not use a hash)
++ if ($self->{usefilesort}) {
++
++ $self->beginProgressBar("writing stage $stage data",
$self->dbinfoGet("db_stat_movie_count", 0) );
+
+- while ( my ($k2, $v2) = each %{$self->{movieshash}{$k}} ) { # movieshash is a hash
of hashes
+-
++ # movies are in an interim file (stage1.data.tmp).
++ # We need to (1) sort the file,
++ # (2) translate to stage1.data (adding the idxid)
++ # (3) store in %titleshash
++ my $res;
++
++ # (1) sort the file in situ
++ $res = $self->sortfile($stage,
"$self->{imdbDir}/stage$stage.data.tmp");
++ # if (!$res) { do something? }
++
++ # (2) & (3) read the sorted file and create out stage1.data while building
titleshash hash
++ undef $self->{titleshash};
++
++ open(IN, "< $self->{imdbDir}/stage$stage.data.tmp") || die
"$self->{imdbDir}/stage$stage.data.tmp:$!";
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print OUT '0000000:version '.$VERSION."\n";
++
++ my $count=0;
++ while(<IN>) {
++ my $line=$_;
++
+ $count++;
+ my $idxid=sprintf("%07d", $count);
+-
++
++ my ($k, $k2, $v2) = $line =~ m/^(.*?)\t(.*?)\t(.*?)$/;
++
+ # the following equates to
+ # print OUT
$idxid.":".$dbkey."\t".$title."\t".$year."\t".$qualifier."\t".$lineno."\n";
+ print OUT
$idxid.':'.$k."\t".$k2."\t".$v2."\t".$idxid."\n";
+
+ # and create a shared hash of $title=>$lineno (i.e. IDX 'id')
+- $self->{titleshash}{$k2} = $count; # store the int version of the id for this
title
+- # (note multiple titles may have the same hashkey)
++ $self->{titleshash}{$k2} = $count; # store the idx id for this title
++
++
++ $self->updateProgressBar('', $count);
+ }
++ $self->endProgressBar();
+
+- delete( $self->{movieshash}{$k} );
++ $self->{maxid} = $count; # remember the largest values of title id (for loop
stop)
+
+- $self->updateProgressBar('', $count);
+- }
++ close(OUT);
++ close(IN);
+
+- $self->endProgressBar();
+-
+- $self->{maxid} = $count; # remember the largest values of title id (for loop
stop)
++ unlink "$self->{imdbDir}/stage$stage.data.tmp";
++
++
++ } else {
++
++ # movies data are in a hash (%movieshash) to we need to write that to disc
(stage1.data)
++
++ $self->beginProgressBar("writing stage $stage data", $num);
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print OUT '0000000:version '.$VERSION."\n";
++
++ my $count=0;
++ foreach my $k (sort keys( %{$self->{movieshash}} )) {
+
+- close(OUT);
++ while ( my ($k2, $v2) = each %{$self->{movieshash}{$k}} ) { # movieshash is a
hash of hashes
++
++ $count++;
++ my $idxid=sprintf("%07d", $count);
++
++ # the following equates to
++ # print OUT
$idxid.":".$dbkey."\t".$title."\t".$year."\t".$qualifier."\t".$lineno."\n";
++ print OUT
$idxid.':'.$k."\t".$k2."\t".$v2."\t".$idxid."\n";
++
++ # and create a shared hash of $title=>$lineno (i.e. IDX 'id')
++ $self->{titleshash}{$k2} = $count; # store the int version of the id for this
title
++ # (note multiple titles may have the same hashkey)
++ }
++
++ delete( $self->{movieshash}{$k} );
++
++ $self->updateProgressBar('', $count);
++ }
++
++ $self->endProgressBar();
++
++ $self->{maxid} = $count; # remember the largest values of title id (for loop
stop)
++
++ close(OUT);
++ }
+
+ #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );die;
+
+@@ -3227,7 +3446,7 @@ sub invokeStage($$)
+ }
+
+ # approx average record length for each incoming data file (used to guesstimate number
of records in file)
+- my %countestimates = ( 1=>'45', 2=> '80', 3=> '60',
4=> '60', 5=> '35', 6=> '115', 7=> '20',
8=> '50' );
++ my %countestimates = ( 1=>'45', 2=> '40', 3=> '55',
4=> '55', 5=> '35', 6=> '65', 7=> '20', 8=>
'50' );
+ my $countEstimate = $self->dbinfoCalcEstimate($stagename,
$countestimates{$stage});
+
+ my %stagefunctions = ( 1=>\&readMovies, 2=>\&readCastOrDirectors,
+@@ -3236,7 +3455,18 @@ sub invokeStage($$)
+ 7=>\&readKeywords, 8=>\&readPlots
+ );
+
++
++ # if we are using --filesort then write output file direct (and not use a hash)
++ if ($self->{usefilesort}) {
++ open($self->{fhdata}, ">",
"$self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print {$self->{fhdata}} '0000000:version '.$VERSION."\n";
++ }
++
+ my $num=$stagefunctions{$stage}->($self, $stagenametext, $countEstimate,
"$self->{imdbListFiles}->{$stagename}", $stage);
++
++ if ($self->{usefilesort}) {
++ close($self->{fhdata});
++ }
+
+ if ( $num < 0 ) {
+ if ( $num == -2 ) {
+@@ -3255,39 +3485,50 @@ sub invokeStage($$)
+ #-----------------------------------------------------------
+ # print the title keys in IDX id order : write the stagex.data file
+ #
+- #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper(
$self->{$_h} );
+-
+- $self->beginProgressBar("writing stage $stage data", $num);
+-
+- open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
+- print OUT '0000000:version '.$VERSION."\n";
+-
+- # don't sort the hash keys - that will just cost memory. Just pull them out in
numerical order.
+- my $h = "stage${stage}hash";
+- #
+- # read the stage data hash in idxid order
+- for (my $i = 0; $i <= $self->{maxid}; $i++){
+-
+- # write the extracted imdb data to a temporary file, preceeded by the IDX id for each
record
+- my $k = sprintf("%07d", $i);
+-
+- if ( $self->{$h}{$i} ) {
+- my $v = $self->{$h}{$i};
+- delete ( $self->{$h}{$i} );
+- #
+- print OUT $k.':'.$v."\n";
++ if ($self->{usefilesort}) {
++
++ # file already written : just needs sorting (in situ)
++ my $f="$self->{imdbDir}/stage$stage.data";
++ my $res = $self->sortfile($stage, $f);
++ # todo: check the reply?
++
++ } else {
++ #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper(
$self->{$_h} );
++
++ # write the stage.data file from the memory hash
++
++ $self->beginProgressBar("writing stage $stage data", $num);
++
++ open(OUT, "> $self->{imdbDir}/stage$stage.data") || die
"$self->{imdbDir}/stage$stage.data:$!";
++ print OUT '0000000:version '.$VERSION."\n";
++
++ # don't sort the hash keys - that will just cost memory. Just pull them out in
numerical order.
++ my $h = "stage${stage}hash";
++ #
++ # read the stage data hash in idxid order
++ for (my $i = 0; $i <= $self->{maxid}; $i++){
++
++ # write the extracted imdb data to a temporary file, preceeded by the IDX id for
each record
++ my $k = sprintf("%07d", $i);
++
++ if ( $self->{$h}{$i} ) {
++ my $v = $self->{$h}{$i};
++ delete ( $self->{$h}{$i} );
++ #
++ print OUT $k.':'.$v."\n";
++ }
++
++ $self->updateProgressBar('', $i);
+ }
+
+- $self->updateProgressBar('', $i);
+- }
++ $self->endProgressBar();
++
++ close(OUT);
+
+- $self->endProgressBar();
+-
+- close(OUT);
+-
+- #use Data::Dumper;print STDERR "leftovers: $stage ".Dumper( $self->{$h}
)."\n";
+-
+- delete ( $self->{$h} );
++ #use Data::Dumper;print STDERR "leftovers: $stage ".Dumper( $self->{$h}
)."\n";
++
++ delete ( $self->{$h} );
++ }
+
+ #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );
+ }
+@@ -3356,7 +3597,7 @@ sub invokeStage($$)
+ last if ( eof($fh{1}) ); # I suppose we ought to check if there any recs remaining in
the other files (todo)
+
+ # read a movie record
+- my ($fstage, $fidxid, $fdata) = $self->readdatafile($fh{1}, 1, -1);
++ my ($fstage, $fidxid, $fdata) = $self->readdatafile($fh{1}, 1, -1, -1);
+
+ $fdat{$fstage} = { k=>$fidxid, v=>$fdata };
+
+@@ -3375,7 +3616,7 @@ sub invokeStage($$)
+ next if ( $fdat{$i}{k} == $fidxid && $fdat{$i}{v} eq ':::' );
+ # only output either actors or actresses but not both (otherwise we'll get an
extra marker in the output
+ next if ($i == 3) && ( $fdat{3}{k} != $fidxid );
+- next if ($i == 4) && ( $fdat{4}{k} != $fidxid ) && ( $fdat{3}{k}
== $fidxid ); # dont output marker if we've just done it for actors
++ next if ($i == 4) && ( $fdat{4}{k} != $fidxid ) && ( $fdat{3}{k}
== $fidxid ); # don't output marker if we've just done it for actors
+ # drop through if actresses (#4) and no actors (#3) for this film
+
+
+@@ -3383,6 +3624,7 @@ sub invokeStage($$)
+ $mdata .= $fdat{$i}{v};
+ }
+ else {
++ # don't data for this stage ($i) so just print the 'empty' marker
+ $mdata .= '<>';
+ if ($i == 6) { $mdata .=
"\t".'<>'."\t".'<>'; } # fudge to add
extra spacers in ratings data
+ }
+--
+2.29.2
+
diff --git a/0044-Option-to-exclude-tv-series-from-the-database-build.patch
b/0044-Option-to-exclude-tv-series-from-the-database-build.patch
new file mode 100644
index 0000000..f29dbe4
--- /dev/null
+++ b/0044-Option-to-exclude-tv-series-from-the-database-build.patch
@@ -0,0 +1,74 @@
+From f78958aaeea75341039cdfee9ee22f038061e6a3 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Wed, 13 Jan 2021 18:23:24 +0000
+Subject: [PATCH 44/50] Option to exclude tv-series from the database build
+
+---
+ filter/tv_imdb | 7 ++++++-
+ lib/IMDB.pm | 4 ++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index 859d6c57..d9e660d4 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -9,7 +9,7 @@ tv_imdb - Augment XMLTV listings files with
imdb.com data.
+ =head1 SYNOPSIS
+
+ tv_imdb --imdbdir <dir> [--help] [--quiet] [--download]
+- [--filesort] [--nosystemsort]
++ [--movies-only] [--filesort] [--nosystemsort]
+ [--prepStage (1-9,all)]
+
+ tv_imdb --imdbdir <dir> [--help] [--quiet]
+@@ -125,6 +125,10 @@ can use the File::Sort package if it is installed on your system, by
also adding
+ option --nosystemsort (however this method of sorting is very slow). If you specify
+ neither option then Perl will sort the files in memory.
+
++If you are only interested in movies, you can reduce the memory required and the
++size of the database by passing the --movies-only option to the database build,
++which will exclude tv-series from the database.
++
+ B<3.> Once you have the database loaded try
+ E<39>cat tv.xml | tv_imdb --imdbdir <dir> > tv1.xmlE<39>.
+
+@@ -284,6 +288,7 @@ END
+ 'sample' => $opt_sample,
+ 'filesort' => $opt_filesort,
+ 'systemsort' => $opt_systemsort,
++ 'moviesonly' => $opt_movies_only,
+ );
+
+ if ( $opt_prepStage eq "all" ) {
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 3ee44c7a..7db3c981 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -55,6 +55,7 @@ use open ':encoding(iso-8859-1)'; # try to enforce file
encoding (does this wo
+ # (potentially wrong - it should not augment incoming prog when multiple matches)
+ # dbbuild: --filesort to sort interim data on disc rather than in memory
+ # dbbuild: --nosystemsort to use File::Sort rather than operating system shell's
'sort' command
++# dbbuild: --movies-only to exclude tv-series (etc.) from database build
+ #
+ #
+ our $VERSION = '0.11'; # version number of database
+@@ -1941,6 +1942,8 @@ sub readMovies($$$$$)
+ # we don't keep episode information TODO: enhancement: change tv_imdb to do
episodes?
+ if ($isepisode == 1) { next; }
+
++ next if ($self->{moviesonly} && ($progtype != 1 && $progtype != 2)
); # user requested movies_only
++
+
+ # store the movies data
+ if ($self->{usefilesort}) {
+@@ -2179,6 +2182,7 @@ sub readCastOrDirectors($$$$$)
+ ## often where the year on the actor record is 1 year out
+ ## people will get worried if we report over 1000 errors and there's nothing we
can sensibly do about them
+ ##$self->error("$file:$lineCount: cannot find $title in titles list");
++ ### if we reinstate this test then we'd need to allow for 'moviesonly'
option (i.e. a lot of titles will have been deliberately excluded)
+ next;
+ }
+
+--
+2.29.2
+
diff --git a/0045-fix-broken-url-to-imdb-website.patch
b/0045-fix-broken-url-to-imdb-website.patch
new file mode 100644
index 0000000..9da53a2
--- /dev/null
+++ b/0045-fix-broken-url-to-imdb-website.patch
@@ -0,0 +1,915 @@
+From 348578ad8bc437fc93148276995ab905b0eecea5 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Thu, 14 Jan 2021 13:06:19 +0000
+Subject: [PATCH 45/50] fix broken url to imdb website
+
+---
+ lib/IMDB.pm | 4 +-
+ .../Cast-actor-with-generation.xml-expected | 2 +-
+ .../Cast-actors-and-actresses.xml-expected | 2 +-
+ t/data-tv_imdb/Cast-billing.xml-expected | 2 +-
+ t/data-tv_imdb/Cast-duplicate.xml-expected | 2 +-
+ .../Cast-host-or-narrator.xml-expected | 12 ++--
+ .../Cast-name-with-suffix.xml-expected | 2 +-
+ t/data-tv_imdb/Cast-role.xml-expected | 4 +-
+ ...tiple-and-duplicate-directors.xml-expected | 4 +-
+ .../Director-name-with-suffix.xml-expected | 2 +-
+ .../Director-with-generation.xml-expected | 2 +-
+ t/data-tv_imdb/Genres-duplicate.xml-expected | 2 +-
+ t/data-tv_imdb/Genres-multiple.xml-expected | 2 +-
+ t/data-tv_imdb/Genres-single.xml-expected | 2 +-
+ .../Movie-startswith-hyphen.xml-expected | 2 +-
+ t/data-tv_imdb/Movie-with-aka.xml-expected | 2 +-
+ .../Movie1-case-insensitive.xml-expected | 2 +-
+ .../Movie1-movies-only.xml-expected | 2 +-
+ t/data-tv_imdb/Movie1.xml-expected | 2 +-
+ t/data-tv_imdb/Movie100-years.xml-expected | 16 ++---
+ .../Movie101-movie-and-tv.xml-expected | 8 +--
+ t/data-tv_imdb/Movie21-accents.xml-expected | 26 ++++-----
+ t/data-tv_imdb/Movie22-dots.xml-expected | 6 +-
+ t/data-tv_imdb/Movie3-and-amp.xml-expected | 8 +--
+ .../Movie5-ignore-punc.xml-expected | 6 +-
+ t/data-tv_imdb/Movie5-with-punc.xml-expected | 2 +-
+ t/data-tv_imdb/Movie6-articles.xml-expected | 58 +++++++++----------
+ t/data-tv_imdb/Ratings.xml-expected | 2 +-
+ t/data-tv_imdb/Show1.xml-expected | 4 +-
+ 29 files changed, 95 insertions(+), 95 deletions(-)
+
+diff --git a/lib/IMDB.pm b/lib/IMDB.pm
+index 7db3c981..1ad31ad8 100644
+--- a/lib/IMDB.pm
++++ b/lib/IMDB.pm
+@@ -1028,14 +1028,14 @@ sub applyFound($$$)
+ my $url=$idInfo->{key};
+
+ $url=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg;
+- $url="http://us.imdb.com/M/title-exact?".$url;
++ $url="https://www.imdb.com/find?q=".$url."&s=tt&exact=true";
+
+ if ( defined($prog->{url}) ) {
+ my @rep;
+ push(@rep, $url);
+ for (@{$prog->{url}}) {
+ # skip urls for
imdb.com that we're probably safe to replace
+- if ( !m;^http://us.imdb.com/M/title-exact;o ) {
++ if ( !m;^http://us.imdb.com/M/title-exact;o &&
!m;^https://www.imdb.com/find;o ) {
+ push(@rep, $_);
+ }
+ }
+diff --git a/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
b/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
+index 0ba748c2..9a658083 100644
+--- a/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
++++ b/t/data-tv_imdb/Cast-actor-with-generation.xml-expected
+@@ -9,6 +9,6 @@
+ </credits>
+ <date>2014</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Murder101%20%282014%29</url>
++
<
url>https://www.imdb.com/find?q=Murder101%20%282014%29&s=tt&am...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
b/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
+index ce979dc3..208c8ef4 100644
+--- a/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
++++ b/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected
+@@ -10,6 +10,6 @@
+ </credits>
+ <date>1997</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Titanic%20%281997%29</url>
++
<
url>https://www.imdb.com/find?q=Titanic%20%281997%29&s=tt&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-billing.xml-expected
b/t/data-tv_imdb/Cast-billing.xml-expected
+index 9ff477c0..eea80883 100644
+--- a/t/data-tv_imdb/Cast-billing.xml-expected
++++ b/t/data-tv_imdb/Cast-billing.xml-expected
+@@ -11,6 +11,6 @@
+ </credits>
+ <date>2013</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?%23Rip%20%282013%29</url>
++
<
url>https://www.imdb.com/find?q=%23Rip%20%282013%29&s=tt&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-duplicate.xml-expected
b/t/data-tv_imdb/Cast-duplicate.xml-expected
+index 8e2c59ed..88378821 100644
+--- a/t/data-tv_imdb/Cast-duplicate.xml-expected
++++ b/t/data-tv_imdb/Cast-duplicate.xml-expected
+@@ -9,6 +9,6 @@
+ </credits>
+ <date>2015</date>
+ <category lang="en">TV Series</category>
+-
<
url>http://us.imdb.com/M/title-exact?%22%23SketchPack%22%20%282015%29&...
++
<
url>https://www.imdb.com/find?q=%22%23SketchPack%22%20%282015%29&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
b/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
+index eba20fa5..8e62386b 100644
+--- a/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
++++ b/t/data-tv_imdb/Cast-host-or-narrator.xml-expected
+@@ -9,7 +9,7 @@
+ </credits>
+ <date>2015</date>
+ <category lang="en">TV Series</category>
+- <
url>http://us.imdb.com/M/title-exact?%22Bookclub%22%20%282015%29</u...
++
<
url>https://www.imdb.com/find?q=%22Bookclub%22%20%282015%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>LolliLove</title>
+@@ -18,7 +18,7 @@
+ </credits>
+ <date>2004</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?LolliLove%20%282004%29</url>
++
<
url>https://www.imdb.com/find?q=LolliLove%20%282004%29&s=tt&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Breaking Genres</title>
+@@ -27,7 +27,7 @@
+ </credits>
+ <date>2015</date>
+ <category lang="en">TV Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Breaking%20Genres%20%282015%29<...
++
<
url>https://www.imdb.com/find?q=Breaking%20Genres%20%282015%29&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>The Jean Bowring Show</title>
+@@ -36,7 +36,7 @@
+ </credits>
+ <date>1957</date>
+ <category lang="en">TV Series</category>
+-
<
url>http://us.imdb.com/M/title-exact?%22The%20Jean%20Bowring%20Show%22...
++
<
url>https://www.imdb.com/find?q=%22The%20Jean%20Bowring%20Show%22%20%2...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>New Now Next Awards</title>
+@@ -45,7 +45,7 @@
+ </credits>
+ <date>2008</date>
+ <category lang="en">TV Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?New%20Now%20Next%20Awards%20%2820...
++
<
url>https://www.imdb.com/find?q=New%20Now%20Next%20Awards%20%282008%29...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>3 Weeks in Yerevan</title>
+@@ -54,6 +54,6 @@
+ </credits>
+ <date>2016</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?3%20Weeks%20in%20Yerevan%20%28201...
++
<
url>https://www.imdb.com/find?q=3%20Weeks%20in%20Yerevan%20%282016%29&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
b/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
+index 3c8441e3..7fe2023f 100644
+--- a/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
++++ b/t/data-tv_imdb/Cast-name-with-suffix.xml-expected
+@@ -12,6 +12,6 @@
+ </credits>
+ <date>2015</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?%23Selfie%20%282015%29</url>
++
<
url>https://www.imdb.com/find?q=%23Selfie%20%282015%29&s=tt&am...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Cast-role.xml-expected
b/t/data-tv_imdb/Cast-role.xml-expected
+index 1553f0c3..54e8a0b6 100644
+--- a/t/data-tv_imdb/Cast-role.xml-expected
++++ b/t/data-tv_imdb/Cast-role.xml-expected
+@@ -10,7 +10,7 @@
+ </credits>
+ <date>2015</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?%23REV%20%282015%29</url>
++
<
url>https://www.imdb.com/find?q=%23REV%20%282015%29&s=tt&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Titanic</title>
+@@ -20,6 +20,6 @@
+ </credits>
+ <date>1997</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Titanic%20%281997%29</url>
++
<
url>https://www.imdb.com/find?q=Titanic%20%281997%29&s=tt&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
+index d159117f..c1fb3408 100644
+--- a/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
++++ b/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected
+@@ -10,7 +10,7 @@
+ </credits>
+ <date>2014</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?%23Illusion%20%282014%29</url>
++
<
url>https://www.imdb.com/find?q=%23Illusion%20%282014%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>#iScream</title>
+@@ -19,6 +19,6 @@
+ </credits>
+ <date>2014</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?%23iScream%20%282014%29</url>
++
<
url>https://www.imdb.com/find?q=%23iScream%20%282014%29&s=tt&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Director-name-with-suffix.xml-expected
b/t/data-tv_imdb/Director-name-with-suffix.xml-expected
+index e0654ee4..87a99344 100644
+--- a/t/data-tv_imdb/Director-name-with-suffix.xml-expected
++++ b/t/data-tv_imdb/Director-name-with-suffix.xml-expected
+@@ -9,6 +9,6 @@
+ </credits>
+ <date>1979</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Grease%20Monkeys%20%281979%29<...
++
<
url>https://www.imdb.com/find?q=Grease%20Monkeys%20%281979%29&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Director-with-generation.xml-expected
b/t/data-tv_imdb/Director-with-generation.xml-expected
+index d033ef0b..3ce1ada8 100644
+--- a/t/data-tv_imdb/Director-with-generation.xml-expected
++++ b/t/data-tv_imdb/Director-with-generation.xml-expected
+@@ -9,6 +9,6 @@
+ </credits>
+ <date>2017</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?The%20Meek%20%282017%29</url>
++
<
url>https://www.imdb.com/find?q=The%20Meek%20%282017%29&s=tt&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Genres-duplicate.xml-expected
b/t/data-tv_imdb/Genres-duplicate.xml-expected
+index 9787f5b5..e500fd44 100644
+--- a/t/data-tv_imdb/Genres-duplicate.xml-expected
++++ b/t/data-tv_imdb/Genres-duplicate.xml-expected
+@@ -9,6 +9,6 @@
+ <category lang="en">Crime</category>
+ <category lang="en">Drama</category>
+ <category lang="en">Film-Noir</category>
+- <
url>http://us.imdb.com/M/title-exact?%27C%27-Man%20%281949%29</url>
++
<
url>https://www.imdb.com/find?q=%27C%27-Man%20%281949%29&s=tt&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Genres-multiple.xml-expected
b/t/data-tv_imdb/Genres-multiple.xml-expected
+index 60cc308a..6f985aba 100644
+--- a/t/data-tv_imdb/Genres-multiple.xml-expected
++++ b/t/data-tv_imdb/Genres-multiple.xml-expected
+@@ -9,6 +9,6 @@
+ <category lang="en">Comedy</category>
+ <category lang="en">Fantasy</category>
+ <category lang="en">Short</category>
+-
<
url>http://us.imdb.com/M/title-exact?%5BFilm%20%239%20Title%5D%20%2820...
++
<
url>https://www.imdb.com/find?q=%5BFilm%20%239%20Title%5D%20%282015%29...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Genres-single.xml-expected
b/t/data-tv_imdb/Genres-single.xml-expected
+index bebe755b..c89bd481 100644
+--- a/t/data-tv_imdb/Genres-single.xml-expected
++++ b/t/data-tv_imdb/Genres-single.xml-expected
+@@ -7,6 +7,6 @@
+ <date>2004</date>
+ <category lang="en">Movie</category>
+ <category lang="en">Short</category>
+-
<
url>http://us.imdb.com/M/title-exact?%28Mon%29%20Jour%20de%20chance%20...
++
<
url>https://www.imdb.com/find?q=%28Mon%29%20Jour%20de%20chance%20%2820...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
b/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
+index 467c72c7..f164c192 100644
+--- a/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
++++ b/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected
+@@ -6,6 +6,6 @@
+ <title>-1: Minus One</title>
+ <date>2016</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?-1%3A%20Minus%20One%20%282016%29&...
++
<
url>https://www.imdb.com/find?q=-1%3A%20Minus%20One%20%282016%29&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie-with-aka.xml-expected
b/t/data-tv_imdb/Movie-with-aka.xml-expected
+index cfd756c2..7e905b6e 100644
+--- a/t/data-tv_imdb/Movie-with-aka.xml-expected
++++ b/t/data-tv_imdb/Movie-with-aka.xml-expected
+@@ -6,6 +6,6 @@
+ <title>Family Prayers</title>
+ <date>2010</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Family%20Prayers%20%282010%29<...
++
<
url>https://www.imdb.com/find?q=Family%20Prayers%20%282010%29&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie1-case-insensitive.xml-expected
b/t/data-tv_imdb/Movie1-case-insensitive.xml-expected
+index 753376b3..18d831cf 100644
+--- a/t/data-tv_imdb/Movie1-case-insensitive.xml-expected
++++ b/t/data-tv_imdb/Movie1-case-insensitive.xml-expected
+@@ -15,7 +15,7 @@
+ <category lang="en">Horror</category>
+ <keyword lang="en">Horror</keyword>
+ <keyword lang="en">Mystery</keyword>
+- <
url>http://us.imdb.com/M/title-exact?Movie1%20%281990%29</url>
++
<
url>https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&a...
+ <star-rating system="IMDB User Rating">
+ <value>1.0/10</value>
+ </star-rating>
+diff --git a/t/data-tv_imdb/Movie1-movies-only.xml-expected
b/t/data-tv_imdb/Movie1-movies-only.xml-expected
+index f8061ec9..d159e5d4 100644
+--- a/t/data-tv_imdb/Movie1-movies-only.xml-expected
++++ b/t/data-tv_imdb/Movie1-movies-only.xml-expected
+@@ -15,7 +15,7 @@
+ <category lang="en">Horror</category>
+ <keyword lang="en">Horror</keyword>
+ <keyword lang="en">Mystery</keyword>
+- <
url>http://us.imdb.com/M/title-exact?Movie1%20%281990%29</url>
++
<
url>https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&a...
+ <star-rating system="IMDB User Rating">
+ <value>1.0/10</value>
+ </star-rating>
+diff --git a/t/data-tv_imdb/Movie1.xml-expected b/t/data-tv_imdb/Movie1.xml-expected
+index 753376b3..18d831cf 100644
+--- a/t/data-tv_imdb/Movie1.xml-expected
++++ b/t/data-tv_imdb/Movie1.xml-expected
+@@ -15,7 +15,7 @@
+ <category lang="en">Horror</category>
+ <keyword lang="en">Horror</keyword>
+ <keyword lang="en">Mystery</keyword>
+- <
url>http://us.imdb.com/M/title-exact?Movie1%20%281990%29</url>
++
<
url>https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&a...
+ <star-rating system="IMDB User Rating">
+ <value>1.0/10</value>
+ </star-rating>
+diff --git a/t/data-tv_imdb/Movie100-years.xml-expected
b/t/data-tv_imdb/Movie100-years.xml-expected
+index 3362cce2..343f8624 100644
+--- a/t/data-tv_imdb/Movie100-years.xml-expected
++++ b/t/data-tv_imdb/Movie100-years.xml-expected
+@@ -9,7 +9,7 @@
+ </credits>
+ <date>1915</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281915%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -18,7 +18,7 @@
+ </credits>
+ <date>1914</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281915%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -27,7 +27,7 @@
+ </credits>
+ <date>1913</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281915%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -40,7 +40,7 @@
+ </credits>
+ <date>1916</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281915%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -49,7 +49,7 @@
+ </credits>
+ <date>1917</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281915%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -62,7 +62,7 @@
+ </credits>
+ <date>1943</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281943%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281943%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -71,7 +71,7 @@
+ </credits>
+ <date>1953</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281953%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281953%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie100</title>
+@@ -81,6 +81,6 @@
+ <title>Movie100</title>
+ <date>1993</date>
+ <category lang="en">Video Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie100%20%281993%29</url>
++
<
url>https://www.imdb.com/find?q=Movie100%20%281993%29&s=tt&...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
b/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
+index b6224688..92857e43 100644
+--- a/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
++++ b/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected
+@@ -6,23 +6,23 @@
+ <title>Movie101</title>
+ <date>1992</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie101%20%281992%29</url>
++
<
url>https://www.imdb.com/find?q=Movie101%20%281992%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie101</title>
+ <date>1993</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie101%20%281993%29</url>
++
<
url>https://www.imdb.com/find?q=Movie101%20%281993%29&s=tt&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie101</title>
+ <date>1988</date>
+ <category lang="en">TV Series</category>
+- <
url>http://us.imdb.com/M/title-exact?%22Movie101%22%20%281988%29</u...
++
<
url>https://www.imdb.com/find?q=%22Movie101%22%20%281988%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie101</title>
+ <category lang="en">TV Series</category>
+- <
url>http://us.imdb.com/M/title-exact?%22Movie101%22%20%281988%29</u...
++
<
url>https://www.imdb.com/find?q=%22Movie101%22%20%281988%29&s=...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie21-accents.xml-expected
b/t/data-tv_imdb/Movie21-accents.xml-expected
+index c03bb38c..65712cbc 100644
+--- a/t/data-tv_imdb/Movie21-accents.xml-expected
++++ b/t/data-tv_imdb/Movie21-accents.xml-expected
+@@ -6,90 +6,90 @@
+ <title>Movie21 aeiouaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 Àeiouaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aÈiouaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeÌouaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiÒuaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeioÙaecnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiouÆcnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiouaeÇnssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiouaecÑssy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiouaecnßy</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 aeiouaecnssÝ</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 ÀÈÌÒÙæÇÑßÝ</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie21 aeiouaecnssy</title>
+ <title>Movie21 ÀÈÌÒÙæÇÑßÝ¿</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%...
++
<
url>https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&am...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie22-dots.xml-expected
b/t/data-tv_imdb/Movie22-dots.xml-expected
+index 2a0a193a..ca62634a 100644
+--- a/t/data-tv_imdb/Movie22-dots.xml-expected
++++ b/t/data-tv_imdb/Movie22-dots.xml-expected
+@@ -6,20 +6,20 @@
+ <title>Movie22 dots</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29</u...
++
<
url>https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie22 dots</title>
+ <title>M.o.v.i.e.2.2. dots</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29</u...
++
<
url>https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie22 dots</title>
+ <title>Movie22 d.o.t.s.</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29</u...
++
<
url>https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie3-and-amp.xml-expected
b/t/data-tv_imdb/Movie3-and-amp.xml-expected
+index b9242687..f735291c 100644
+--- a/t/data-tv_imdb/Movie3-and-amp.xml-expected
++++ b/t/data-tv_imdb/Movie3-and-amp.xml-expected
+@@ -6,26 +6,26 @@
+ <title>Movie3 and more</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie3%20and%20more%20%281991%29&...
++
<
url>https://www.imdb.com/find?q=Movie3%20and%20more%20%281991%29&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie3 and more</title>
+ <title>Movie3 & more</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie3%20and%20more%20%281991%29&...
++
<
url>https://www.imdb.com/find?q=Movie3%20and%20more%20%281991%29&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie4 & more</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie4%20%26%20more%20%281991%29&...
++
<
url>https://www.imdb.com/find?q=Movie4%20%26%20more%20%281991%29&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie4 & more</title>
+ <title>Movie4 and more</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie4%20%26%20more%20%281991%29&...
++
<
url>https://www.imdb.com/find?q=Movie4%20%26%20more%20%281991%29&a...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie5-ignore-punc.xml-expected
b/t/data-tv_imdb/Movie5-ignore-punc.xml-expected
+index 24d9e10b..65c760e1 100644
+--- a/t/data-tv_imdb/Movie5-ignore-punc.xml-expected
++++ b/t/data-tv_imdb/Movie5-ignore-punc.xml-expected
+@@ -6,20 +6,20 @@
+ <title>Movie5 no punctuation</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%2819...
++
<
url>https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie5 no punctuation</title>
+ <title>Movie5 no .....punctuation</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%2819...
++
<
url>https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Movie5 no punctuation</title>
+ <title>Movie5:Movie5 no punctuation</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%2819...
++
<
url>https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie5-with-punc.xml-expected
b/t/data-tv_imdb/Movie5-with-punc.xml-expected
+index 870f038c..fe5ff9dc 100644
+--- a/t/data-tv_imdb/Movie5-with-punc.xml-expected
++++ b/t/data-tv_imdb/Movie5-with-punc.xml-expected
+@@ -6,6 +6,6 @@
+ <title>Movie5's with punctuation</title>
+ <date>1992</date>
+ <category lang="en">Movie</category>
+-
<
url>http://us.imdb.com/M/title-exact?Movie5%27s%20with%20punctuation%2...
++
<
url>https://www.imdb.com/find?q=Movie5%27s%20with%20punctuation%20%281...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Movie6-articles.xml-expected
b/t/data-tv_imdb/Movie6-articles.xml-expected
+index 41d00b3c..0b3cfddf 100644
+--- a/t/data-tv_imdb/Movie6-articles.xml-expected
++++ b/t/data-tv_imdb/Movie6-articles.xml-expected
+@@ -6,188 +6,188 @@
+ <title>The Movie6</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?The%20Movie6%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=The%20Movie6%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>The Movie7</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?The%20Movie7%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=The%20Movie7%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>The Movie7</title>
+ <title>Movie7, The</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?The%20Movie7%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=The%20Movie7%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>A Movie8</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?A%20Movie8%20%281991%29</url>
++
<
url>https://www.imdb.com/find?q=A%20Movie8%20%281991%29&s=tt&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>A Movie8</title>
+ <title>Movie8, A</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?A%20Movie8%20%281991%29</url>
++
<
url>https://www.imdb.com/find?q=A%20Movie8%20%281991%29&s=tt&a...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Une Movie9</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Une%20Movie9%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=Une%20Movie9%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Une Movie9</title>
+ <title>Movie9, Une</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Une%20Movie9%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=Une%20Movie9%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Les Movie10</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Les%20Movie10%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Les%20Movie10%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Les Movie10</title>
+ <title>Movie10, Les</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Les%20Movie10%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Les%20Movie10%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Los Movie11</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Los%20Movie11%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Los%20Movie11%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Los Movie11</title>
+ <title>Movie11, Los</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Los%20Movie11%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Los%20Movie11%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Las Movie12</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Las%20Movie12%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Las%20Movie12%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Las Movie12</title>
+ <title>Movie12, Las</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Las%20Movie12%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Las%20Movie12%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>L' Movie13</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?L%27%20Movie13%20%281991%29</u...
++
<
url>https://www.imdb.com/find?q=L%27%20Movie13%20%281991%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>L' Movie13</title>
+ <title>Movie13, L'</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?L%27%20Movie13%20%281991%29</u...
++
<
url>https://www.imdb.com/find?q=L%27%20Movie13%20%281991%29&s=...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Le Movie14</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Le%20Movie14%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=Le%20Movie14%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Le Movie14</title>
+ <title>Movie14, Le</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Le%20Movie14%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=Le%20Movie14%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>La Movie15</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?La%20Movie15%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=La%20Movie15%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>La Movie15</title>
+ <title>Movie15, La</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?La%20Movie15%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=La%20Movie15%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>El Movie16</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?El%20Movie16%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=El%20Movie16%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>El Movie16</title>
+ <title>Movie16, El</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?El%20Movie16%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=El%20Movie16%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Das Movie17</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Das%20Movie17%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Das%20Movie17%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Das Movie17</title>
+ <title>Movie17, Das</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Das%20Movie17%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Das%20Movie17%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>De Movie18</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?De%20Movie18%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=De%20Movie18%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>De Movie18</title>
+ <title>Movie18, De</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?De%20Movie18%20%281991%29</url...
++
<
url>https://www.imdb.com/find?q=De%20Movie18%20%281991%29&s=tt...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Het Movie19</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Het%20Movie19%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Het%20Movie19%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Het Movie19</title>
+ <title>Movie19, Het</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Het%20Movie19%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Het%20Movie19%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Een Movie20</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Een%20Movie20%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Een%20Movie20%20%281991%29&s=t...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>Een Movie20</title>
+ <title>Movie20, Een</title>
+ <date>1991</date>
+ <category lang="en">Movie</category>
+- <
url>http://us.imdb.com/M/title-exact?Een%20Movie20%20%281991%29</ur...
++
<
url>https://www.imdb.com/find?q=Een%20Movie20%20%281991%29&s=t...
+ </programme>
+ </tv>
+diff --git a/t/data-tv_imdb/Ratings.xml-expected b/t/data-tv_imdb/Ratings.xml-expected
+index d4e0885c..21c8b733 100644
+--- a/t/data-tv_imdb/Ratings.xml-expected
++++ b/t/data-tv_imdb/Ratings.xml-expected
+@@ -6,7 +6,7 @@
+ <title>#nitTWITS</title>
+ <date>2011</date>
+ <category lang="en">TV Series</category>
+-
<
url>http://us.imdb.com/M/title-exact?%22%23nitTWITS%22%20%282011%29<...
++
<
url>https://www.imdb.com/find?q=%22%23nitTWITS%22%20%282011%29&...
+ <star-rating system="IMDB User Rating">
+ <value>7.0/10</value>
+ </star-rating>
+diff --git a/t/data-tv_imdb/Show1.xml-expected b/t/data-tv_imdb/Show1.xml-expected
+index 48fe3e89..4924f352 100644
+--- a/t/data-tv_imdb/Show1.xml-expected
++++ b/t/data-tv_imdb/Show1.xml-expected
+@@ -5,12 +5,12 @@
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>The Show1</title>
+ <category lang="en">TV Series</category>
+-
<
url>http://us.imdb.com/M/title-exact?%22The%20Show1%22%20%282002%29<...
++
<
url>https://www.imdb.com/find?q=%22The%20Show1%22%20%282002%29&...
+ </programme>
+ <programme start="20010829000500 MST" channel="channel0">
+ <title>The Show1</title>
+ <date>1990</date>
+ <category lang="en">TV Series</category>
+-
<
url>http://us.imdb.com/M/title-exact?%22The%20Show1%22%20%282002%29<...
++
<
url>https://www.imdb.com/find?q=%22The%20Show1%22%20%282002%29&...
+ </programme>
+ </tv>
+--
+2.29.2
+
diff --git a/0046-fix-minor-typo-in-example.patch b/0046-fix-minor-typo-in-example.patch
new file mode 100644
index 0000000..4b14e50
--- /dev/null
+++ b/0046-fix-minor-typo-in-example.patch
@@ -0,0 +1,25 @@
+From 40ce66847e8ba575d38ee6c6cd081d70cf4d6f67 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Thu, 14 Jan 2021 13:08:14 +0000
+Subject: [PATCH 46/50] fix minor typo in example
+
+---
+ filter/tv_imdb | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/filter/tv_imdb b/filter/tv_imdb
+index d9e660d4..2ca3f625 100755
+--- a/filter/tv_imdb
++++ b/filter/tv_imdb
+@@ -139,7 +139,7 @@ Feel free to report any problems with these steps at
https://github.com/XMLTV/xm
+ The --validate-title and --validate-year flags can be used to validate the
+ information in the tv_imdb database. For exmple:
+
+- tv_imdb --imdbdir . --validate-title 'Army of Darness' --validate-year 1994
++ tv_imdb --imdbdir . --validate-title 'Army of Darkness' --validate-year 1994
+
+ =head1 BUGS
+
+--
+2.29.2
+
diff --git a/0047-fix-broken-tests.patch b/0047-fix-broken-tests.patch
new file mode 100644
index 0000000..200eb09
--- /dev/null
+++ b/0047-fix-broken-tests.patch
@@ -0,0 +1,47 @@
+From 14df38c7d1618051119899efd67f8a413f11ec78 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Thu, 14 Jan 2021 20:23:54 +0000
+Subject: [PATCH 47/50] fix broken tests
+
+---
+ t/data/clump_extract.xml | 3 +++
+ t/data/test_empty.xml | 3 +++
+ t/data/test_remove_some_overlapping.xml | 3 +++
+ 3 files changed, 9 insertions(+)
+
+diff --git a/t/data/clump_extract.xml b/t/data/clump_extract.xml
+index 46c235d9..f6353842 100644
+--- a/t/data/clump_extract.xml
++++ b/t/data/clump_extract.xml
+@@ -1,3 +1,6 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
+ <tv>
+ <programme start="20021004090000 BST" stop="20021004100000 BST"
channel="south-east.bbc1.bbc.co.uk" clumpidx="0/2">
+ <title lang="en">Kilroy</title>
+diff --git a/t/data/test_empty.xml b/t/data/test_empty.xml
+index 47bf5fca..95331b99 100644
+--- a/t/data/test_empty.xml
++++ b/t/data/test_empty.xml
+@@ -1,3 +1,6 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
+ <tv>
+ <programme start="20020420131000" channel="foo.com">
+ <title>A programme with empty stuff that should not be written out
again</title>
+diff --git a/t/data/test_remove_some_overlapping.xml
b/t/data/test_remove_some_overlapping.xml
+index e97431c5..6ca52be3 100644
+--- a/t/data/test_remove_some_overlapping.xml
++++ b/t/data/test_remove_some_overlapping.xml
+@@ -1,3 +1,6 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
+ <tv>
+ <programme start="20031025070000" stop="20031025074500"
channel="3">
+ <title>Container A</title>
+--
+2.29.2
+
diff --git a/0048-Added-option-channel-id-exp-to-filter-by-regex-on-ch.patch
b/0048-Added-option-channel-id-exp-to-filter-by-regex-on-ch.patch
new file mode 100644
index 0000000..30414bc
--- /dev/null
+++ b/0048-Added-option-channel-id-exp-to-filter-by-regex-on-ch.patch
@@ -0,0 +1,105 @@
+From f0becb5ecbbdbe4cbad3c6827e6878d2a58e8ac5 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Fri, 15 Jan 2021 17:42:50 +0000
+Subject: [PATCH 48/50] Added option --channel-id-exp to filter by regex on
+ channel id (#12)
+
+---
+ filter/tv_grep.in | 41 +++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 39 insertions(+), 2 deletions(-)
+
+diff --git a/filter/tv_grep.in b/filter/tv_grep.in
+index 124133fc..b81f16e6 100755
+--- a/filter/tv_grep.in
++++ b/filter/tv_grep.in
+@@ -76,7 +76,7 @@ convenient to use the special tests described below.
+
+ =head2 CHANNEL TESTS
+
+-There are two tests for channels. These filter both <programme> and
++There are three tests for channels. These filter both <programme> and
+ <channel> elements: if a channel is filtered out then all programmes
+ on that channel are too.
+
+@@ -84,6 +84,8 @@ B<--channel-name REGEXP> True if the channel has a <name>
whose content matches
+
+ B<--channel-id CHANNEL_ID> True if the channelE<39>s XMLTV id is exactly
equal to CHANNEL_ID.
+
++B<--channel-id-exp REGEXP> True if the channel has a <id> whose content
matches REGEXP.
++
+ =head2 TIME TESTS
+
+ Normally you donE<39>t want to test time strings with a regular
+@@ -229,12 +231,14 @@ my (@chan_conjs, @curr_chan_conj);
+ # Hash mapping regexp -> channel id -> true/undef (see later)
+ my %ch_name;
+ my @ch_regexps; # regexps to populate %ch_name with
++my @chid_regexps; # regexps for matching with channel id
++
+
+ # Prepare an OptionAbbrev object with all the long options we expect
+ # to find.
+ #
+ my $oa = new OptionAbbrev(qw(--ignore-case --help --output
+- --channel-id --channel-name
++ --channel-id --channel-name --channel-id-exp
+ --on-after --on-before --eval
+ --and --or --not));
+
+@@ -413,6 +417,19 @@ while (@ARGV) {
+ next;
+ }
+
++ if (defined $lo and $lo eq '--channel-id-exp') {
++ my $regexp = shift @ARGV;
++ die "--channel-id-exp requires an argument, a Perl regular expression\n"
++ if not defined $regexp;
++ # reuses some --channel-name processing
++ #
++ $add_to_prog_conj->(sub { $ch_name{$regexp}->{$_->{channel}} });
++ $add_to_chan_conj->(sub { $ch_name{$regexp}->{$_->{id}} });
++ $not = 0;
++ push @chid_regexps, $regexp;
++ next;
++ }
++
+ if (defined $lo and $lo eq '--channel-name') {
+ my $regexp = shift @ARGV;
+ die "--channel name requires an argument, a Perl regular expression\n"
+@@ -550,6 +567,25 @@ foreach my $ch_id (keys %$ch) {
+ }
+ }
+
++# Prepare the channel id lookup.
++my %seen_chid_id;
++foreach my $ch_id (keys %$ch) {
++ $seen_chid_id{$ch_id}++ && die "duplicate channel id $ch_id\n";
++ my $ch = $ch->{$ch_id}; die if not defined $ch;
++ my %seen_re;
++ foreach my $re (@chid_regexps) {
++ next if $seen_re{$re}++;
++ my $matched = 0;
++ if ($re eq ''
++ or ($ignore_case ? $ch_id =~ /$re/i : $ch_id =~ /$re/)) {
++ $matched = 1;
++ }
++ if ($matched) {
++ $ch_name{$re}->{$ch_id}++ && die;
++ }
++ }
++}
++
+ # Filter channels. This has an effect only for the --channel-id and
+ # --channel-name predicates; we do not drop channels simply because no
+ # programmes remained on them after filtering.
+@@ -644,6 +680,7 @@ END
+ (channel matches)
+ --channel-name REGEXP
+ --channel-id CHANNEL_ID
++ --channel-id-exp REGEXP
+ (special tests)
+ --on-after DATE
+ --on-before DATE
+--
+2.29.2
+
diff --git a/0049-tv_grep-added-tests-for-new-option-channel-id-exp.patch
b/0049-tv_grep-added-tests-for-new-option-channel-id-exp.patch
new file mode 100644
index 0000000..e4f16bd
--- /dev/null
+++ b/0049-tv_grep-added-tests-for-new-option-channel-id-exp.patch
@@ -0,0 +1,689 @@
+From 13c79da0ef34e20134368caed41bc598189a2836 Mon Sep 17 00:00:00 2001
+From: Honir <honir(a)c4b.co.uk>
+Date: Fri, 15 Jan 2021 17:52:23 +0000
+Subject: [PATCH 49/50] tv_grep : added tests for new option --channel-id-exp
+
+---
+ MANIFEST | 41 ++++++++++++
+ ..._grep_channel_id_exp_sat_all_UTF8.expected | 35 +++++++++++
+ ...v_grep_channel_id_exp_sat_amp_xml.expected | 4 ++
+ ...hannel_id_exp_sat_amp_xml_amp_xml.expected | 4 ++
+ ...nnel_id_exp_sat_amp_xml_clump_xml.expected | 4 ++
+ ...annel_id_exp_sat_amp_xml_dups_xml.expected | 4 ++
+ ...nnel_id_exp_sat_amp_xml_empty_xml.expected | 4 ++
+ ...xml_empty_xml_empty_xml_clump_xml.expected | 4 ++
+ ...grep_channel_id_exp_sat_attrs_xml.expected | 4 ++
+ ...el_id_exp_sat_clump_extract_1_xml.expected | 4 ++
+ ...nnel_id_exp_sat_clump_extract_xml.expected | 4 ++
+ ...grep_channel_id_exp_sat_clump_xml.expected | 4 ++
+ ...nnel_id_exp_sat_clump_xml_amp_xml.expected | 4 ++
+ ...el_id_exp_sat_clump_xml_clump_xml.expected | 4 ++
+ ...nel_id_exp_sat_clump_xml_dups_xml.expected | 4 ++
+ ...el_id_exp_sat_clump_xml_empty_xml.expected | 4 ++
+ ..._grep_channel_id_exp_sat_dups_xml.expected | 4 ++
+ ...annel_id_exp_sat_dups_xml_amp_xml.expected | 4 ++
+ ...nel_id_exp_sat_dups_xml_clump_xml.expected | 4 ++
+ ...nnel_id_exp_sat_dups_xml_dups_xml.expected | 4 ++
+ ...nel_id_exp_sat_dups_xml_empty_xml.expected | 4 ++
+ ...grep_channel_id_exp_sat_empty_xml.expected | 4 ++
+ ...nnel_id_exp_sat_empty_xml_amp_xml.expected | 4 ++
+ ...el_id_exp_sat_empty_xml_clump_xml.expected | 4 ++
+ ...nel_id_exp_sat_empty_xml_dups_xml.expected | 4 ++
+ ...el_id_exp_sat_empty_xml_empty_xml.expected | 4 ++
+ ..._channel_id_exp_sat_intervals_xml.expected | 4 ++
+ ...rep_channel_id_exp_sat_length_xml.expected | 4 ++
+ ...ep_channel_id_exp_sat_overlap_xml.expected | 4 ++
+ ...rep_channel_id_exp_sat_simple_xml.expected | 4 ++
+ ...exp_sat_simple_xml_x_whatever_xml.expected | 4 ++
+ ...grep_channel_id_exp_sat_sort1_xml.expected | 4 ++
+ ...grep_channel_id_exp_sat_sort2_xml.expected | 4 ++
+ ..._grep_channel_id_exp_sat_sort_xml.expected | 4 ++
+ ...channel_id_exp_sat_test_empty_xml.expected | 4 ++
+ ...channel_id_exp_sat_test_livre_xml.expected | 4 ++
+ ..._test_remove_some_overlapping_xml.expected | 4 ++
+ ..._exp_sat_test_sort_by_channel_xml.expected | 4 ++
+ ..._grep_channel_id_exp_sat_test_xml.expected | 35 +++++++++++
+ ...nnel_id_exp_sat_test_xml_test_xml.expected | 62 +++++++++++++++++++
+ ...channel_id_exp_sat_whitespace_xml.expected | 4 ++
+ ...channel_id_exp_sat_x_whatever_xml.expected | 4 ++
+ t/test_filters.t | 1 +
+ 43 files changed, 326 insertions(+)
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_amp_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected
+ create mode 100644
t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_dups_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_empty_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_length_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_simple_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_sort_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected
+ create mode 100644
t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_test_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected
+ create mode 100644 t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected
+
+diff --git a/MANIFEST b/MANIFEST
+index fcf6a34e..2f0bbcee 100644
+--- a/MANIFEST
++++ b/MANIFEST
+@@ -1414,6 +1414,47 @@
t/data/tv_grep_on_after_200302161330_UTC_test_remove_some_overlapping_xml.expect
+ t/data/tv_grep_on_before_200302161330_UTC_test_remove_some_overlapping_xml.expected
+ t/data/tv_grep_premiere_test_remove_some_overlapping_xml.expected
+ t/data/tv_grep_previously_shown_test_remove_some_overlapping_xml.expected
++t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected
++t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_dups_xml.expected
++t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected
++t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected
++t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected
++t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected
++t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected
++t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected
++t/data/tv_grep_channel_id_exp_sat_length_xml.expected
++t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected
++t/data/tv_grep_channel_id_exp_sat_simple_xml.expected
++t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected
++t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected
++t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected
++t/data/tv_grep_channel_id_exp_sat_sort_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_xml.expected
++t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected
++t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected
++t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected
+ t/data/tv_remove_some_overlapping_all_UTF8.expected
+ t/data/tv_remove_some_overlapping_amp_xml.expected
+ t/data/tv_remove_some_overlapping_amp_xml_amp_xml.expected
+diff --git a/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected
b/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected
+new file mode 100644
+index 00000000..eddf9c37
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected
+@@ -0,0 +1,35 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv>
++ <channel id="3sat.de">
++ <display-name lang="de">3SAT</display-name>
++ </channel>
++ <programme start="200006031633" channel="3sat.de">
++ <title lang="de">blah</title>
++ <title lang="en">blah</title>
++ <desc lang="de">Blah Blah Blah.</desc>
++ <credits>
++ <director>blah</director>
++ <actor>a</actor>
++ <actor>b</actor>
++ </credits>
++ <date>19901011</date>
++ <country>ES</country>
++ <episode-num system="xmltv_ns">2 . 9 . 0/1</episode-num>
++ <video>
++ <aspect>16:9</aspect>
++ </video>
++ <last-chance />
++ <subtitles type="teletext">
++ <language lang="en">English</language>
++ </subtitles>
++ <rating system="MPAA">
++ <value>PG</value>
++ </rating>
++ <star-rating>
++ <value>3/3</value>
++ <icon src="stars.png" />
++ </star-rating>
++ </programme>
++</tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git
a/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_length_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_length_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_length_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected
+new file mode 100644
+index 00000000..8764a7b2
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="ISO-8859-1"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_xml.expected
+new file mode 100644
+index 00000000..ed09d03d
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_xml.expected
+@@ -0,0 +1,35 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv generator-info-name="my listings generator">
++ <channel id="3sat.de">
++ <display-name lang="de">3SAT</display-name>
++ </channel>
++ <programme start="200006031633" channel="3sat.de">
++ <title lang="de">blah</title>
++ <title lang="en">blah</title>
++ <desc lang="de">Blah Blah Blah.</desc>
++ <credits>
++ <director>blah</director>
++ <actor>a</actor>
++ <actor>b</actor>
++ </credits>
++ <date>19901011</date>
++ <country>ES</country>
++ <episode-num system="xmltv_ns">2 . 9 . 0/1</episode-num>
++ <video>
++ <aspect>16:9</aspect>
++ </video>
++ <last-chance />
++ <subtitles type="teletext">
++ <language lang="en">English</language>
++ </subtitles>
++ <rating system="MPAA">
++ <value>PG</value>
++ </rating>
++ <star-rating>
++ <value>3/3</value>
++ <icon src="stars.png" />
++ </star-rating>
++ </programme>
++</tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected
+new file mode 100644
+index 00000000..05fa6883
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected
+@@ -0,0 +1,62 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv generator-info-name="my listings generator">
++ <channel id="3sat.de">
++ <display-name lang="de">3SAT</display-name>
++ </channel>
++ <programme start="200006031633" channel="3sat.de">
++ <title lang="de">blah</title>
++ <title lang="en">blah</title>
++ <desc lang="de">Blah Blah Blah.</desc>
++ <credits>
++ <director>blah</director>
++ <actor>a</actor>
++ <actor>b</actor>
++ </credits>
++ <date>19901011</date>
++ <country>ES</country>
++ <episode-num system="xmltv_ns">2 . 9 . 0/1</episode-num>
++ <video>
++ <aspect>16:9</aspect>
++ </video>
++ <last-chance />
++ <subtitles type="teletext">
++ <language lang="en">English</language>
++ </subtitles>
++ <rating system="MPAA">
++ <value>PG</value>
++ </rating>
++ <star-rating>
++ <value>3/3</value>
++ <icon src="stars.png" />
++ </star-rating>
++ </programme>
++ <programme start="200006031633" channel="3sat.de">
++ <title lang="de">blah</title>
++ <title lang="en">blah</title>
++ <desc lang="de">Blah Blah Blah.</desc>
++ <credits>
++ <director>blah</director>
++ <actor>a</actor>
++ <actor>b</actor>
++ </credits>
++ <date>19901011</date>
++ <country>ES</country>
++ <episode-num system="xmltv_ns">2 . 9 . 0/1</episode-num>
++ <video>
++ <aspect>16:9</aspect>
++ </video>
++ <last-chance />
++ <subtitles type="teletext">
++ <language lang="en">English</language>
++ </subtitles>
++ <rating system="MPAA">
++ <value>PG</value>
++ </rating>
++ <star-rating>
++ <value>3/3</value>
++ <icon src="stars.png" />
++ </star-rating>
++ </programme>
++</tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected
b/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected
+new file mode 100644
+index 00000000..e20fb42a
+--- /dev/null
++++ b/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE tv SYSTEM "xmltv.dtd">
++
++<tv></tv>
+diff --git a/t/test_filters.t b/t/test_filters.t
+index 5c6d5468..82388ba6 100755
+--- a/t/test_filters.t
++++ b/t/test_filters.t
+@@ -86,6 +86,7 @@ if ($full) {
+ [ [ 'tv_grep', '--category', 'g', '--or',
'--title', 'h' ], 1 ],
+ [ [ 'tv_grep', '-i', '--category', 'i',
'--title', 'j' ], 1 ],
+ [ [ 'tv_grep', '-i', '--category', 'i',
'--title', 'h' ], 1 ],
++ [ [ 'tv_grep', '--channel-id-exp', 'sat'
], 1 ],
+ );
+ }
+
+--
+2.29.2
+
diff --git a/0050-clean-up-windows-xmltv.exe-s-PAR-Packer-based-build-.patch
b/0050-clean-up-windows-xmltv.exe-s-PAR-Packer-based-build-.patch
new file mode 100644
index 0000000..f418cbb
--- /dev/null
+++ b/0050-clean-up-windows-xmltv.exe-s-PAR-Packer-based-build-.patch
@@ -0,0 +1,145 @@
+From bb01ecc1a6902a7f4eab2e0cb661176841c6973f Mon Sep 17 00:00:00 2001
+From: Robert Eden <rmeden(a)gmail.com>
+Date: Mon, 18 Jan 2021 20:20:43 -0600
+Subject: [PATCH 50/50] clean up windows xmltv.exe's PAR::Packer based build
+ files.
+
+---
+ lib/exe_opt.pl | 63 --------------------------------------------------
+ lib/xmltv.pl | 25 +++++++++++++-------
+ 2 files changed, 17 insertions(+), 71 deletions(-)
+
+diff --git a/lib/exe_opt.pl b/lib/exe_opt.pl
+index 57a285f2..a93b09d6 100755
+--- a/lib/exe_opt.pl
++++ b/lib/exe_opt.pl
+@@ -1,8 +1,6 @@
+ #!perl -w
+ #
+ # This is a simple script to generate options so PerlApp can make the EXE
+-# it needs time values, so might as well put it in a perl script!
+-# (windows has a limited date function)
+ #
+ # Robert Eden rmeden(a)yahoo.com
+
+@@ -23,72 +21,11 @@ print '
+ -X Win32::Console
+ ';
+
+-#-l C:/strawberry/c/bin/libexpat-1__.dll
+-#-l C:/strawberry/c/bin/libxml2-2__.dll
+-#-l C:/strawberry/c/bin/libiconv-2__.dll
+-#-l C:/strawberry/c/bin/liblzma-5__.dll
+-#-l C:/strawberry/c/bin/zlib1__.dll
+-
+-# not found
+-#-l C:/strawberry/perl/bin/libgcc__x86__470.dll
+-#-l C:/strawberry/c/bin/libeay32__.dll
+-#-l C:/strawberry/c/bin/SSLeay32__.dll
+-#-M arybase
+-
+ # add executable scripts
+ open(FILE,"exe_files.txt");
+ foreach (split(/ /,<FILE>)) {
+ chomp;
+ next unless $_;
+-# print "-a $_\n";
+-# print "-c $_\n"; # -a doesn't scan for dependancies
+ }
+ close FILE;
+
+-#-info CompanyName="XMLTV Project
http://www.xmltv.org"
+-#-info FileDescription="EXE bundle of XMLTV tools to manage TV Listings"
+-#-info InternalName=xmltv.exe
+-#-info OriginalFilename=xmltv.exe
+-#-info ProductName=xmltv
+-#-info LegalCopyright="GNU General Public License
http://www.gnu.org/licenses/gpl.txt"
+-#-icon xmltv_logo.ico
+-#-l libexpat-1_.dll[file=C:\strawberry\c\bin\libexpat-1_.dll
+-#-l libxml2-2_.dll[file=C:\strawberry\c\bin\libxml2-2_.dll
+-#-l libiconv-2_.dll[file=C:\strawberry\c\bin\libiconv-2_.dll
+-#-l liblzma-5_.dll[file=C:\strawberry\c\bin\liblzma-5_.dll
+-#-l zlib1_.dll[file=C:\strawberry\c\bin\zlib1_.dll
+-#-l libgcc_x86_470.dll[file=C:\strawberry\perl\bin\libgcc_x86_470.dll
+-#-l libeay32_.dll[file=C:\strawberry\c\bin\libeay32_.dll
+-#-l SSLeay32_.dll[file=C:\strawberry\c\bin\SSLeay32_.dll
+-#-bind
DateTime/Format/Builder/Parser/Regex.pm[file=c:\Strawberry\Perl\site\lib\DateTime\Format\Builder\Parser\Regex.pm
+-
+-#
+-# Add XML\Parser\encodings
+-#
+-@Encoding_Path = (grep(-d $_,
+- map(File::Spec->catdir($_, qw(XML Parser Encodings)),
+- @INC)
+- ));
+-foreach $dir (@Encoding_Path) {
+- opendir DIR,$dir || die "Can't open encoding path directory\n";
+- while ($file = readdir DIR)
+- {
+- next unless $file =~ /.enc$/i;
+-# print "-l XML/Parser/Encodings/${file}[file=$dir/${file}\n";
+-# print "-a
c:/Strawberry/perl/vendor/lib/XML/Parser/Encodings/${file}\n";
+- }
+-}
+-
+-##
+-## put date in file version field
+-##
+-#@date=localtime; $date[4]++; $date[5]+=1900;
+-#printf "-info FileVersion=%4d.%d.%d.%d\n",@date[5,4,3,2];
+-
+-##
+-## last fields in product version should ommitable, but it doesn't work.
+-##
+-#$version=shift;
+-#(a)_=split(/\./,$version);
+-#map {$_=0 unless defined $_} @_[0..4];
+-#printf "-info ProductVersion=%d.%d.%d.%d\n",@_;
+diff --git a/lib/xmltv.pl b/lib/xmltv.pl
+index 8540dff6..3f6d3d3d 100755
+--- a/lib/xmltv.pl
++++ b/lib/xmltv.pl
+@@ -3,13 +3,23 @@
+ # This is a quick XMLTV shell routing to use with the windows exe
+ #
+ # A single EXE is needed to allow sharing of modules and dlls of all the
+-# programs. If PerlAPP was run on each one, the total size would be more than
+-# 12MB, even leaving out PERL56.DLL!
++# programs.
++#
++# Now users PAR::Packer to build the exe. It takes a very long time on first run, which
can
++# appear to be a problem.
++#
++# There currently isn't a way for PAR::Packer to warn users about a first time run.
++# I've modified the boot.c file in Par::Packer to do that. It's not great as it
also
++# displays when building, but it's good enough. Here's what the change is (for
documenation purposes)
++# I'm trying to work with the PAR::Packer folks for a better fix.
++#
++# boot.c:188
++# rc = my_mkdir(stmpdir, 0700);
++#// 2021-01-18 rmeden hack to print a message on first run
++# if ( rc == 0 ) fprintf(stderr,"Note: This will take a while on first
run\n");
++#// rmeden
++# if ( rc == -1 && errno != EEXIST) {
+ #
+-# Perlapp allows you to attach pathed files, but you need the same path
+-# to access them. The Makefile creates a text file of these files which is
+-# used to build a translation table, allowing users to just type the app name
+-# and not the development path.
+ #
+ # Robert Eden rmeden(a)yahoo.com
+ #
+@@ -75,9 +85,8 @@ print STDERR "Timezone is $ENV{TZ}\n" unless $opt_quiet;
+ $cmd = shift || "";
+
+ # --version (and abbreviations thereof)
+-my $VERSION = '0.6.1';
+ if (index('--version', $cmd) == 0 and length $cmd >= 3) {
+- print "xmltv $VERSION\n";
++ print "xmltv $XMLTV::VERSION\n";
+ exit;
+ }
+
+--
+2.29.2
+
diff --git a/xmltv.spec b/xmltv.spec
index 033abc4..d08dc40 100644
--- a/xmltv.spec
+++ b/xmltv.spec
@@ -1,12 +1,64 @@
Name: xmltv
Version: 0.6.3
-Release: 2%{?dist}
+Release: 3%{?dist}
Summary: A set of utilities to manage your TV viewing
License: GPLv2+
URL:
http://xmltv.org/wiki/
Source0:
https://github.com/XMLTV/xmltv/archive/v%{version}/xmltv-v%{version}.tar.gz
+# Upstream patches since release
+Patch0001: 0001-Reenable-tv_grab_ch_search.patch
+Patch0002: 0002-README.md-update-TOC.patch
+Patch0003: 0003-README.md-refresh-req-d-rec-d-modules-list.patch
+Patch0004: 0004-tv_imdb-fix-some-typos.patch
+Patch0005: 0005-tv_imdb-use-warnings.patch
+Patch0006: 0006-tv_imdb-refresh-short-description-POD.patch
+Patch0007: 0007-tv_grab_zz_sdjson_sqlite-fix-a-typo.patch
+Patch0008: 0008-Update-tv_grab_eu_xmltvse-to-use-SSL-116.patch
+Patch0009: 0009-update-version-for-cherry-pick-typo-correction.patch
+Patch0010: 0010-programme-detect-parental-level-with-white-space.patch
+Patch0011: 0011-Remove-swedb-grabber-117.patch
+Patch0012: 0012-ampparit-add-missing-empty-title-check.patch
+Patch0013: 0013-telsu-add-missing-empty-title-check.patch
+Patch0014: 0014-tv_grab_uk_tvguide-fix-for-missing-form-options-in-c.patch
+Patch0015: 0015-avoid-break-when-website-object-missing-125.patch
+Patch0016: 0016-fix-UA-page-debug-is-on-stdout-should-be-on-stderr-1.patch
+Patch0017: 0017-Fetch-programme-data-via-SSL-avoids-301-redirects.patch
+Patch0018: 0018-for-compatability-with-older-versions-of-Perl-122.patch
+Patch0019: 0019-download-optional-file-if-its-prepStage-is-specifica.patch
+Patch0020: 0020-Unbreak-parsing-of-keywords-file.patch
+Patch0021: 0021-Reduce-memory-consumption-in-building-database-63.patch
+Patch0022: 0022-replace-spaces-with-tabs-and-prettify-the-code.patch
+Patch0023: 0023-iltapulu-fix-channel-parser.patch
+Patch0024: 0024-iltapulu-fix-grab-parser.patch
+Patch0025: 0025-source-avoid-name-clashes-between-modules.patch
+Patch0026: 0026-test.conf-update-to-latest-list-channels-output.patch
+Patch0027: 0027-Make-channel-ids-compliant-with-the-DTD.-Use-legacyc.patch
+Patch0028: 0028-Change-whitespace-to-tabs.patch
+Patch0029: 0029-Add-info-message-about-frozen-IMDb-data.patch
+Patch0030: 0030-Add-undocumented-sample-option-to-limit-records-proc.patch
+Patch0031: 0031-Reduce-memory-usage-during-final-build-stage.patch
+Patch0032: 0032-Remove-tv-episodes-from-intermediate-files.patch
+Patch0033: 0033-eu_xmltvse-refresh-test.conf.patch
+Patch0034: 0034-dk_dr-disable-grabber-after-source-site-disappeared.patch
+Patch0035: 0035-Fix-testsuite-for-the-change-to-episode-handling-63.patch
+Patch0036: 0036-update-windows-xmltv.exe-to-use-PAR-Packer-rather-th.patch
+Patch0037: 0037-extend-scope-of-title-person-qualifier.patch
+Patch0038: 0038-eu-epgdata-Add-channel-IDs.patch
+Patch0039: 0039-whitespace-changes.patch
+Patch0040: 0040-Reduce-memory-usage-during-database-build-bug-fixes.patch
+Patch0041: 0041-bugfixes-in-augment-function.patch
+Patch0042: 0042-Add-tests-for-edge-cases.patch
+Patch0043: 0043-Use-disc-sort-to-reduce-memory-usage-63.patch
+Patch0044: 0044-Option-to-exclude-tv-series-from-the-database-build.patch
+Patch0045: 0045-fix-broken-url-to-imdb-website.patch
+Patch0046: 0046-fix-minor-typo-in-example.patch
+Patch0047: 0047-fix-broken-tests.patch
+Patch0048: 0048-Added-option-channel-id-exp-to-filter-by-regex-on-ch.patch
+Patch0049: 0049-tv_grep-added-tests-for-new-option-channel-id-exp.patch
+Patch0050: 0050-clean-up-windows-xmltv.exe-s-PAR-Packer-based-build-.patch
+
BuildArch: noarch
BuildRequires: perl-interpreter
@@ -251,6 +303,9 @@ make test
%changelog
+* Sat Jan 23 2021 Gary Buhrmaster <gary.buhrmaster(a)gmail.com> - 0.6.3-3
+- update to recent upstream patches
+
* Wed Nov 04 2020 Gary Buhrmaster <gary.buhrmaster(a)gmail.com> - 0.6.3-2
- Add BR make