aisa/scripts/admin/oneoff/match_team_to_region.pl
2025-11-26 11:31:54 +02:00

338 lines
12 KiB
Perl

#!/usr/bin/perl
BEGIN { use lib '/usr/home/cfg' ; require push_inc ; }
require cfg ;
use CGI::Carp qw(fatalsToBrowser) ;
use Text::CSV ;
# use Date::Calc qw(:all);
# use DBI;
# https://itvadmin.co.za/cgi-bin/scripts/admin/oneoff/match_team_to_region.pl?1
#-------------------------------------------------------------------------------
my @ARGV = split(/\&/, $ENV{'QUERY_STRING'});
our $debug = $ARGV[0] ;
# our $username = 'rory' ;
if ($debug) { print "Content-type: text/html\n\n"; }
#------------------------------------------------------------------------------------------
&today ;
&db_open_upd ;
# &db_open_ro ;
our $db_ignore_open_close = 1 ; # do one open and one close instead of repeating it
&load_vars ;
$db_ignore_open_close = 0 ; # do one open and one close instead of repeating it
&db_close_conn ;
if ($debug) { print "Done."; }
exit;
#------------------------------------------------------------------------------------------
sub load_vars {
my $folder = '/home/libs/data/logs/schools_csv' ; # change this to your folder path
my $filename = 'South_Africa_Schools.csv' ; # change this to your CSV file name
my $filepath = "$folder/$filename";
open my $fh, '<', $filepath or die "Could not open file '$filepath': $!";
my $csv = Text::CSV->new({
binary => 1, # required for binary mode
auto_diag => 1, # report irregularities immediately
}) or die "Cannot use CSV: " . Text::CSV->error_diag();
use utf8;
my %seen_school_name = () ; my %multiple_region_names = () ;
while (my $row = $csv->getline($fh)) {
next if @$row[7] eq 'Institution_Name' ;
my $school_name = @$row[7] ;
my $school_type = @$row[11] ;
$school_name =~ s/\b$school_type\b//g ;
$school_name =~ s/HIGH SCHOOL//g ;
$school_name =~ s/High School//g ;
$school_name =~ s/INTERMEDIATE SCHOOL//g ;
$school_name =~ s/HOëRSKOOL//g ;
$school_name =~ s/PRIMÊRE SKOOL//g ;
$school_name =~ s/HOËRSKOOL//g ;
$school_name =~ s/PRIMARY SCHOOL//g ;
$school_name =~ s/PRIVAATSKOOL//g ;
$school_name =~ s/PRIVATE SCHOOL//g ;
$school_name =~ s/PRIMÊRE SKOOL//g ;
$school_name =~ s/SCHOOL//g ;
$school_name =~ s/SCHOOL//g ;
$school_name =~ s/\(PTY\) LTD//g ;
$school_name =~ s/\'//g ;
$school_name =~ s/\'//g ;
$school_name =~ s/\ LAERSKOOL//g ;
$school_name =~ s/Secondary School//g ;
$school_name =~ s/PRIMARY//g ;
$school_name =~ s/INTERMEDIATE//g ;
$school_name =~ s/COMBINED//g ;
$school_name =~ s/Preparatory School//g ;
$school_name =~ s/SENIOR//g ;
$school_name =~ s/HIGHER//g ;
$school_name =~ s/SECONDARY//g ;
$school_name =~ s/Primary School//g ;
$school_name =~ s/JUNIOR//g ;
$school_name =~ s/PUBLIC//g ;
$school_name =~ s/PRIAMARY//g ;
$school_name =~ s/PRIMÊRE SKOOL//g ;
$school_name =~ s/Combined School//g ;
$school_name =~ s/MIDDLE SCOOL//g ;
$school_name =~ s/High school//g ;
$school_name =~ s/Primary School//g ;
$school_name =~ s/Intermediate School//g ;
$school_name =~ s/LAERSKOOL//g ;
$school_name =~ s/Middle School//g ;
$school_name =~ s/Primary school//g ;
$school_name =~ s/Primary//g ;
$school_name =~ s/Gereformeerde Akademie//g ;
$school_name =~ s/INDEPENDENT//g ;
$school_name =~ s/Christian Academy//g ;
$school_name =~ s/Christian College//g ;
$school_name =~ s/Christian School//g ;
$school_name =~ s/Secondary School//g ;
$school_name =~ s/GEKOMBINEERDE SKOOL//g ;
$school_name =~ s/CHRISTELIKE VOLKSKOOL//g ;
$school_name =~ s/COLLEGE//g ;
$school_name =~ s/ACADEMIA//g ;
$school_name =~ s/SEKONDÊRE SKOOL//g ;
$school_name =~ s/SEKONDÊR//g ;
$school_name =~ s/GIMNASIUM//g ;
$school_name =~ s/SEKONDÊR//g ;
$school_name =~ s/VOORBEREIDINGSKOOL//g ;
$school_name =~ s/CHRISTELIKE //g ;
$school_name =~ s/NGK//g ;
$school_name =~ s/PREPARATORY//g ;
$school_name =~ s/COMPREHENSIVE//g ;
$school_name =~ s/Middle school//g ;
$school_name =~ s/Laerskool//g ;
$school_name =~ s/HOËR TEGNIESE SKOOL//g ;
$school_name =~ s/Akademie\/Academy//g ;
$school_name =~ s/Hoërskool//g ;
$school_name =~ s/Intermediate School//g ;
$school_name =~ s/SKOOL GEKOMBINEERD//g ;
$school_name =~ s/LEARNING SITE//g ;
$school_name =~ s/Muslim School//g ;
$school_name =~ s/Technical and Commercial//g ;
$school_name =~ s/PRIMERE SKOOL//g ;
$school_name =~ s/TECHNICAL HIGH//g ;
$school_name =~ s/S P//g ;
$school_name =~ s/STATE AIDED//g ;
$school_name =~ s/& TECHNICAL INSTITUTE//g ;
$school_name =~ s/& TECHNICAL//g ;
$school_name =~ s/\s*MIDDLE\s*$//;
$school_name =~ s/\s*LOWER\s*$//;
$school_name =~ s/\s*TECHNICAL\s*$//;
$school_name =~ s/^\s+|\s+$//g;
# print "\n schools_region |$school_name| , @$row[5] , $multiple_region_names{$school_name}" if lc $school_name eq 'langalibalele' ;
$multiple_region_names{$school_name} = 1 if @$row[5] && !$multiple_region_names{$school_name} && $seen_school_name{$school_name} && $seen_school_name{$school_name} ne @$row[5] ;
$seen_school_name{$school_name} = @$row[5] ;
$schools_region{$school_name}{$school_type} = @$row[5] ;
}
# my %seen_school_name = () ; my %multiple_region_names = () ;
# foreach my $school_name (sort { $schools_region{$a} cmp $schools_region{$b} } keys %schools_region) {
# foreach my $school_type (keys %{$schools_region{$school_name}}) {
# $multiple_region_names{$school_name} = 1 if $seen_school_name{$seen_school_name} && $schools_region{$school_name}{$school_type} && $seen_school_name{$seen_school_name} ne $schools_region{$school_name}{$school_type} ;
# $seen_school_name{$seen_school_name} = $schools_region{$school_name}{$school_type} ;
# }
# }
$csv->eof or $csv->error_diag();
close $fh;
$region_id{"MP"} = 6 ;
$region_id{"EC"} = 1 ;
$region_id{"GT"} = 3 ;
$region_id{"KZN"} = 4 ;
$region_id{"WC"} = 9 ;
$region_id{"LP"} = 5 ;
$region_id{"NC"} = 8 ;
$region_id{"NW"} = 7 ;
$region_id{"FS"} = 2 ;
&db_switch_conn('sss');
&db_min_ro('teams',"id,name","region_id = '0' AND matched_region_id = '0' AND (name like '% senior %' OR name like 'senior %' OR name like '% senior' OR name like '% junior %' OR name like 'junior %' OR name like '% junior' OR name like '% PS' OR name like 'PS %' OR name like '% HS' OR name like 'HS %' OR name like '% Primary' OR name like '% SECONDARY' OR name like 'SECONDARY%' OR name like 'SENIOR SECONDARY%' OR name like 'PS%' OR name like '%college%' OR name like '%school%' OR name like '%skool%' OR name like '%gimnasium%' OR name like '%akademia%' OR name like '%academy%' OR name like '%akademie%')",'','') ;
foreach my $school_name (sort { $seen_school_name{$a} cmp $seen_school_name{$b} } keys %schools_region) {
next if $multiple_region_names{$school_name} || length($school_name) < 5 || lc $school_name eq 'christian' || lc $school_name eq 'elite' || lc $school_name eq 'global' || lc $school_name eq 'curro' || lc $school_name eq 'northern' ;
# next if length($school_name) < 5 || lc $school_name eq 'christian' || lc $school_name eq 'elite' || lc $school_name eq 'global' || lc $school_name eq 'curro' || lc $school_name eq 'northern' ;
foreach my $type (keys %{$schools_region{$school_name}}) {
my $region = $schools_region{$school_name}{$type};
foreach my $team (keys %{$db{teams}}) {
my $team2 = lc $db{teams}{$team}{name} ;
$team2 =~ s/\'//g ;
my $school_name2 = lc $school_name ;
if ($team2 =~ /\b$school_name2\b/) {
print "\n UPDATE teams SET matched_region_id = '$region_id{$region}' WHERE id = '$team';" ;
}
}
}
}
$region_id{"Mpumalanga"} = 6 ;
$region_id{"Eastern Cape"} = 1 ;
$region_id{"Gauteng"} = 3 ;
$region_id{"KwaZulu-Natal"} = 4 ;
$region_id{"KwaZulu Natal"} = 4 ;
$region_id{"Western Cape"} = 9 ;
$region_id{"Limpopo"} = 5 ;
$region_id{"Northern Cape"} = 8 ;
$region_id{"North West"} = 7 ;
$region_id{"Free State"} = 2 ;
# $filename = 'South_African_cities.csv' ; # change this to your CSV file name
# my $filepath = "$folder/$filename";
# open my $fh, '<', $filepath or die "Could not open file '$filepath': $!";
# my $csv = Text::CSV->new({
# binary => 1, # required for binary mode
# auto_diag => 1, # report irregularities immediately
# }) or die "Cannot use CSV: " . Text::CSV->error_diag();
# while (my $row = $csv->getline($fh)) {
# next if @$row[0] eq 'city' ;
# my $school_name = @$row[0] ;
# $city_name{$school_name} = $region_id{@$row[5]} ;
# }
# $csv->eof or $csv->error_diag();
# close $fh;
# # # $filename = 'South_African_cities_2.csv' ; # change this to your CSV file name
# # # my $filepath = "$folder/$filename";
# # # open my $fh, '<', $filepath or die "Could not open file '$filepath': $!";
# # # my $csv = Text::CSV->new({
# # # binary => 1, # required for binary mode
# # # auto_diag => 1, # report irregularities immediately
# # # }) or die "Cannot use CSV: " . Text::CSV->error_diag();
# # # my %seen_city_name = () ;
# # # while (my $row = $csv->getline($fh)) {
# # # next if @$row[0] eq 'City' ;
# # # my $cityname = lc @$row[1] ;
# # # if ($seen_city_name{$cityname} && $seen_city_name{$cityname} ne $region_id{@$row[2]}) {
# # # $city_name{$cityname} = 0 ;
# # # next ;
# # # }
# # # $seen_city_name{$cityname} = $region_id{@$row[2]} ;
# # # $city_name{$cityname} = $region_id{@$row[2]} ;
# # # }
# # # $csv->eof or $csv->error_diag();
# # # close $fh;
# # # &db_switch_conn('sss');
# # # &db_min_ro('teams',"id,name","region_id = '0' AND matched_region_id = '0' AND id > 5000 AND name not like '%school%' AND name not like '%skool%'",'','') ;
# # # # &db_switch_conn('aisa');
# # # my %prev_match = () ;
# # # foreach my $team_id (keys %{$db{teams}}) {
# # # my $team_name = lc $db{teams}{$team_id}{name} ;
# # # $team_name =~ s/\'//g ;
# # # foreach my $cityname (keys %city_name) {
# # # my $regionid = $city_name{$cityname} ;
# # # next unless $regionid ;
# # # $cityname = lc $cityname ;
# # # $cityname =~ s/\'//g ;
# # # if ($team_name =~ /\b$cityname\b/) {
# # # if ($team_name =~ /\bst $cityname\b/) {
# # # $city_name{"st $cityname"} = $region_id{$cityname} ;
# # # $cityname = "st $cityname" ;
# # # }
# # # if ($team_name =~ /\b$cityname park\b/ && $cityname !~ /\bpark\b/ && length($cityname) < 10) {
# # # next ;
# # # }
# # # if ($prev_match{$team_id} && length($prev_match{$team_id}) > length($cityname)) {
# # # print "\n Do NOT MATCH $cityname WITH $team_name" ;
# # # next ;
# # # }
# # # $prev_match{$team_id} = $cityname ;
# # # # print "\n , $team_name , $city_name , $regionid, UPDATE `teams` SET `matched_region_id`='$regionid' WHERE `id`=' $team_id';" ;
# # # # print "\n city_name,$city_name : team_name,$team_name" ;
# # # }
# # # }
# # # }
# # # foreach (keys %prev_match) {
# # # next if length($prev_match{$_}) < 5 || !$city_name{$prev_match{$_}} ;
# # # print "\n UPDATE teams SET matched_region_id = '$city_name{$prev_match{$_}}' WHERE id = '$_' ; " ;
# # # }
# &db_min_ro('cities','id,name,region_id','','','') ;
# &db_min_ro('regions','id,name','','','') ;
# foreach my $team_id (keys %{$db{teams}}) {
# my $team_name = lc $db{teams}{$team_id}{name} ;
# next unless lc $team_name =~ /kimberly/ ;
# foreach my $city_id (keys %{$db{cities}}) {
# my $city_name = lc $db{cities}{$city_id}{name} ;
# print "\n team_name : $team_name , city_name : $city_name" ;
# # $city_name =~ s/\'//g ;
# # $team_name =~ s/\'//g ;
# if ($team_name =~ /\b$city_name\b/) {
# print "\n $team_name UPDATE teams SET matched_region_id='$db{cities}{$city_id}{region_id}' WHERE id = '$team_id' ; " ;
# }
# }
# }
} #-------------------------------------------------------------------------------
use db ;
use today ;
use common ;
use common_min ;