#!n:\perl\bin ############################### # Filename: clarinToImdi.pl # SVN location: # # Author: Evelyn Richter # Last modified: 13 Aug 2009 # # General information: # This script converts Clarin resource metadata saved in a mySQL database into Catalogue imdi files. # Additionally, parent corpus files are produced that link to the respective catalogue file. # Finally, a top node with links to all corpus files is produced. # # Requirements: # - has to be run on Clarin server or via an SSH tunnel (ssh -L 33060:localhost:3306 USERNAME@clarin.mpi.nl) # - Clarin database login (username, password) with select permissions # - the module XML::Smart::Data has to be modified a bit to avoid base64 encoding of UTF-8 characters in attributes (see sub _data in Data.pm) # - the following command line arguments: # 1. Clarin db username # 2. Clarin db password # 3. the current ISO 639.3 code list from the ISO authority # 4. the country/continent mapping from LINGUIST List # 5. an XML input file with XML header # 6. an output directory ############################### use strict; use DBD::mysql; use DBI; use Encode; use XML::Smart; # error message when command line arguments missing my $error = "Please enter the following command line arguments: Clarin db username & password, the current ISO 639.3 code list from the ISO authority, the country/continent mapping from LINGUIST List, an XML input file with XML header and an output directory.\n"; # reading in command line arguments my $dbUsername = $ARGV[0] or die $error; my $dbPassword = $ARGV[1] or die $error; my $langCodesFile = $ARGV[2] or die $error; my $LLcountryInfoFile = $ARGV[3] or die $error; my $XmlImportFile = $ARGV[4] or die $error; my $outputDir = $ARGV[5] or die $error; unless ($outputDir =~ m/\/$/){ $outputDir .= "/"; } # database handle to the clarin drupal database my $dsn = 'dbi:mysql:drupal:127.0.0.1:33060'; my $dbh = DBI->connect($dsn, $dbUsername, $dbPassword) or die $DBI::errstr; $dbh->do("set character set utf8"); # essential to ensure proper UTF-8 extraction # required resource sql my $res_sql = "SELECT ctr.nid, ctr.vid, n.title, ctr.field_languages_other_value, ctr.field_description_value, ctr.field_institute_value, ctr.field_creator_value, ctr.field_year_value, "; $res_sql .= "ctr.field_end_creation_date_value, ctr.field_format_value, ctr.field_metadata_link_url, ctr.field_publications_value, ctr.field_reference_link_url, "; $res_sql .= "ctr.field_resource_available_value, ctr.field_ethical_reference_value, ctr.field_legal_reference_value, ctr.field_license_type_value, ctr.field_description_0_value, "; $res_sql .= "ctr.field_contact_person_value, ctr.field_longterm_preservation_value, ctr.field_location_0_value, ctr.field_content_type_value, ctr.field_format_detailed_value, "; $res_sql .= "ctr.field_quality_value, ctr.field_applications_value, ctr.field_project_value, ctr.field_size_value, ctr.field_distribution_form_value, ctr.field_access_value, "; $res_sql .= "ctr.field_source_0_value, ctr.field_date_1_value, ctr.field_type_value, ctr.field_format_detailed_1_value, ctr.field_schema_reference_value, ctr.field_size_0_value, "; $res_sql .= "ctr.field_access_2_value, ctr.field_resource_urlcheck_value "; $res_sql .= "FROM node AS n, content_type_resource AS ctr WHERE ctr.nid = n.nid and ctr.vid = n.vid ORDER BY ctr.nid, ctr.vid"; # select information from resource table my $resources = $dbh->selectall_arrayref($res_sql); # get reference language codes my $langcodes = getCodes($langCodesFile); # get country information for mapping to continents/iso codes my $LLcountryinfo = getCountryInfo($LLcountryInfoFile); # begin top node XML my $topnode = makeXmlHeader($XmlImportFile, "Corpus"); $topnode->{Name}->content("Clarin_LRT_Inventory"); $topnode->{Title}->content("Clarin LRT Inventory"); $topnode->{Description}->content("One of the goals of CLARIN is to setup a full-fledged registry to which one can add any collection of language resources and services (WP2). A second goal is to get a good overview about language resources and technology (WP5) and a third goal is to get a deep overview about the rights and license situation (WP7). Therefore, we have set up an online system for creating an inventory of LRT which can be used for generating these overviews, but which can be re-used to later bootstrap the real registry and which can be dynamically extended."); $topnode->{Description}{LanguageId} = "ISO639-3:eng"; my $i = 0; foreach my $res (@{$resources}){ # if ($res->[0] = 1355){ # get the resource types, countries, languages from the database for the respective resource my $res_types = $dbh->selectall_arrayref("SELECT field_resource_type_value FROM content_field_resource_type AS cfrt WHERE cfrt.nid = " . $res->[0] . " AND cfrt.vid = " . $res->[1] . " AND TRIM(field_resource_type_value) IS NOT NULL AND TRIM(field_resource_type_value) != 'Web Service' ORDER BY field_resource_type_value"); my $res_langs = $dbh->selectall_arrayref("SELECT field_languages_value FROM content_field_languages AS cfl WHERE cfl.nid = " . $res->[0] . " AND cfl.vid = " . $res->[1] . " AND TRIM(field_languages_value) IS NOT NULL ORDER BY field_languages_value"); my $res_inst = $dbh->selectall_arrayref("SELECT field_org_institution_value, field_org_workingunit_value, cto.nid, cto.vid FROM content_type_organisation AS cto, content_field_institute_fromlist AS cfif WHERE cto.nid = cfif.field_institute_fromlist_nid AND cfif.nid = " . $res->[0] . " AND cfif.vid = " . $res->[1]); my $res_countries = $dbh->selectall_arrayref("SELECT field_country_value FROM content_field_country AS cfc WHERE cfc.nid = " . $res->[0] . " AND cfc.vid = " . $res->[1] . " AND TRIM(field_country_value) IS NOT NULL ORDER BY field_country_value"); my $res_worklang = $dbh->selectall_arrayref("SELECT field_working_languages_value FROM content_field_working_languages AS cfwl WHERE cfwl.nid = " . $res->[0] . " AND cfwl.vid = " . $res->[1] . " AND TRIM(field_working_languages_value) IS NOT NULL ORDER BY field_working_languages_value"); my $res_worklang0 = $dbh->selectall_arrayref("SELECT field_working_languages_0_value FROM content_field_working_languages_0 AS cfwl WHERE cfwl.nid = " . $res->[0] . " AND cfwl.vid = " . $res->[1] . " AND TRIM(field_working_languages_0_value) IS NOT NULL ORDER BY field_working_languages_0_value"); my $coords = (); if ($res_inst && scalar @{$res_inst} > 0){ $coords = $dbh->selectall_arrayref("SELECT latitude, longitude FROM location AS lo, location_instance AS li WHERE li.nid = " . $res_inst->[0][2] . " AND li.vid = " . $res_inst->[0][3] . " AND li.lid = lo.lid"); } if (scalar @{$res_types} != 0){ # create IMDI file my ($path, $name) = makeImdi($res, $res_types, $res_langs, $res_inst, $res_countries, $res_worklang, $res_worklang0, $coords, $langcodes, $LLcountryinfo, $XmlImportFile, $outputDir); $name =~ s/"/"/g; $topnode->{CorpusLink}[$i]->content($path); $topnode->{CorpusLink}[$i]{Name} = $name; $i++; } #} } my $toppath = $outputDir . "clarinLRT-" . getTodayDate() . ".imdi"; printFile($toppath, $topnode->data()); ############################### # Finds ISO 639.3 code for a given language name, otherwise sets the code to "Unknown" # # Arguments: input filename, array ref with language code data # Returns: matrix with data ############################### sub findCode{ my $langname = shift @_; my $langcodes = shift @_; my $code; foreach my $lc (@{$langcodes}){ if ($langname eq $lc->[6]){ if ($code){ # there are two languages with this name, so the code cannot be determined print "ambig\n"; return "Unknown"; } $code = "ISO639-3:" . $lc->[0]; } } if ($code){ return $code; } else { return "Unknown"; } } ############################### # Finds the continent for a country by comparing it to the information from the LINGUIST List database # # Arguments: country name, array ref of LL db info # Returns: continent name or "Unknown" ############################### sub findContinent{ my $country = shift @_; my $info = shift @_; my $cont = "Unknown"; my $reg1; my $reg2; for (my $i = 0; $i < scalar @{$info}; $i++){ if ($country eq $info->[$i][1]){ $reg1 = $info->[$i][2]; $reg2 = $info->[$i][3]; $cont = testIsoContinents($reg1); if ($cont eq "Unknown"){ if ($reg1 eq "Australasia"){ $cont = "Australia"; } elsif ($reg1 eq "West Indies" || $reg1 eq "Central America"){ $cont = "Middle America"; } elsif ($reg1 eq "Pacific Islands"){ $cont = "Oceania"; } elsif ($reg1 eq "Middle East"){ if ($reg2){ return testIsoContinents($reg2); } else{ return "Unknown"; } } } if ($reg2){ my $cont2 = testIsoContinents($reg2); if ($cont2 eq "Unknown"){ if ($reg1 eq "Australasia"){ $cont2 = "Australia"; } elsif ($reg1 eq "West Indies" || $reg1 eq "Central America"){ $cont2 = "Middle America"; } elsif ($reg1 eq "Pacific Islands"){ $cont2 = "Oceania"; } elsif ($reg1 eq "Middle East"){ return "Unknown"; } } if ($cont2 ne "Unknown"){ # will not validate, how to solve this? return $cont . ", " . $cont2; } } } } } ############################### # Reads in an input file with language codes # # Arguments: input filename # Returns: matrix with data ############################### sub getCodes{ my $filename = shift @_; my $aref = (); my $i = 0; open(IN,$filename); my @lines = ; close(IN); while (scalar @lines > 0){ my $line = shift @lines; if ($line =~ m/^#/){ next; } # line breaks and carriage returns deleted from line $line =~ s/\r?\n//g; my @temp = split /\t/, $line; my $k = 0; foreach my $cell (@temp){ $aref->[$i][$k] = $cell; $k++; } $i++; } return $aref; } ############################### # Reads in an input file with country information extracted from LINGUIST List database # # Arguments: input filename # Returns: matrix with data ############################### sub getCountryInfo{ my $filename = shift @_; my $aref = (); my $i = 0; open(IN, $filename); my @lines = ; close(IN); while (scalar @lines > 0){ my $line = shift @lines; # line breaks and carriage returns deleted from line $line =~ s/\r?\n//g; my @temp = split /,/, $line; my $k = 0; foreach my $cell (@temp){ $aref->[$i][$k] = $cell; $k++; } $i++; } return $aref; } ############################### # Gets today's date and returns a formatted string (YYYY-MM-DD) # # Arguments: nothing # Returns: date string ############################### sub getTodayDate{ my @f = (localtime)[3..5]; # grabs day/month/year values return ($f[2] + 1900) . "-" . (sprintf "%02d", ($f[1] + 1)) . "-" . (sprintf "%02d", $f[0]); } ############################### # Creates the XML structure and content for the imdi file from one resource entry # # Arguments: resource information, resource types, languages, institution, countries, working languages (2), # langcodes (for comparison), countryinfo (incl iso codes, for comparison) # Returns: nothing (xml data structure is given to printFile() in the end which prints the imdi file in a given directory) # # Other requirements: testXMLimport.xml with xml declaration line incl encoding set to utf-8 and root element ############################### sub makeImdi{ my $info = shift; my $types = shift; my $langs = shift; my $inst = shift; my $countries = shift; my $wlangs = shift; my $wlangs0 = shift; my $geocoords = shift; my $langcodes = shift; my $countryinfo = shift; my $baseFile = shift; my $filepath = shift; # Create XML object and load imdi template file my $p = makeXmlHeader($baseFile, "Catalogue"); # Name $p->{Name}->content($info->[2]); # Title $p->{Title}->content($info->[2]); # Id - always empty, set by Lamus? $p->{Id}->set_node(); # Description $p->{Description}->content($info->[4]); $p->{Description}{LanguageId} = "Unspecified"; # Document Languages if (scalar @{$wlangs} > 0){ for (my $i = 0; $i < scalar @{$wlangs}; $i++){ $p->{DocumentLanguages}{Language}[$i]{Id}->content(findCode($wlangs->[$i][0], $langcodes)); $p->{DocumentLanguages}{Language}[$i]{Name}->content($wlangs->[$i][0]); $p->{DocumentLanguages}{Language}[$i]{Name}{Link} = "http://www.mpi.nl/IMDI/Schema/MPI-Languages.xml"; $p->{DocumentLanguages}{Language}[$i]{Name}{Type} = "OpenVocabulary"; } } elsif(scalar @{$wlangs0} > 0){ for (my $i = 0; $i < scalar @{$wlangs0}; $i++){ $p->{DocumentLanguages}{Language}[$i]{Id}->content(findCode($wlangs0->[$i][0], $langcodes)); $p->{DocumentLanguages}{Language}[$i]{Name}->content($wlangs0->[$i][0]); $p->{DocumentLanguages}{Language}[$i]{Name}{Link} = "http://www.mpi.nl/IMDI/Schema/MPI-Languages.xml"; $p->{DocumentLanguages}{Language}[$i]{Name}{Type} = "OpenVocabulary"; } } else { $p->{DocumentLanguages}->set_node(); } # Subject Languages if (scalar @{$langs} > 0){ for (my $i = 0; $i < scalar @{$langs}; $i++){ $p->{SubjectLanguages}{Language}[$i]{Id}->content(findCode($langs->[$i][0], $langcodes)); $p->{SubjectLanguages}{Language}[$i]{Name}->content($langs->[$i][0]); $p->{SubjectLanguages}{Language}[$i]{Name}{Link} = "http://www.mpi.nl/IMDI/Schema/MPI-Languages.xml"; $p->{SubjectLanguages}{Language}[$i]{Name}{Type} = "OpenVocabulary"; $p->{SubjectLanguages}{Language}[$i]{Dominant}->content("Unknown"); $p->{SubjectLanguages}{Language}[$i]{SourceLanguage}->content("Unknown"); $p->{SubjectLanguages}{Language}[$i]{TargetLanguage}->content("Unknown"); } } else { $p->{SubjectLanguages}->set_node(); } # Location - multiple, might need to be changed depending on schema decision if (scalar @{$countries} > 0){ for (my $i = 0; $i < scalar @{$countries}; $i++){ $p->{Location}[$i]{Continent}->content(findContinent($countries->[$i][0], $countryinfo)); $p->{Location}[$i]{Continent}{Link} = "http://www.mpi.nl/IMDI/Schema/Continents.xml"; $p->{Location}[$i]{Continent}{Type} = "OpenVocabulary"; $p->{Location}[$i]{Country}->content($countries->[$i][0]); $p->{Location}[$i]{Country}{Link} = "http://www.mpi.nl/IMDI/Schema/Countries.xml"; $p->{Location}[$i]{Country}{Type} = "ClosedVocabulary"; } } else { $p->{Location}->set_node(); $p->{Location}{Continent}->set_node(); $p->{Location}{Continent}{Link} = "http://www.mpi.nl/IMDI/Schema/Continents.xml"; $p->{Location}{Continent}{Type} = "OpenVocabulary"; $p->{Location}{Country}->set_node(); $p->{Location}{Country}{Link} = "http://www.mpi.nl/IMDI/Schema/Countries.xml"; $p->{Location}{Country}{Type} = "ClosedVocabulary"; } # ContentType my $test; for (my $i = 0; $i < scalar @{$types}; $i++){ $p->{ContentType}[$i]->content($types->[$i][0]); } # Format if ($info->[9]){ $p->{Format}{Text}->content($info->[9]); } else { $p->{Format}->set_node(); } # Quality $p->{Quality}->set_node(); # Smallest Annotation Unit $p->{SmallestAnnotationUnit}->set_node(); # Applications if ($info->[24]){ $p->{Applications}->content($info->[24]); } else { $p->{Applications}->set_node(); } # Date $p->{Date}->set_node(); my @date; if ($info->[7]){ push @date, $info->[7]; } if ($info->[8]){ push @date, $info->[8]; } if (@date){ $p->{Date}->content(join('/', @date)); } # Project if ($info->[25]){ $p->{Project}{Name}->content($info->[25]); $p->{Project}{Title}->content($info->[25]); } else { $p->{Project}->set_node(); $p->{Project}{Name}->set_node(); $p->{Project}{Title}->set_node(); } $p->{Project}{Id}->set_node(); $p->{Project}{Contact}->set_node(); # Publisher (institution/organisation name) if (@{$inst}){ foreach my $publ (@{$inst}){ $p->{Publisher}->content(join(', ', @{$publ})); } } elsif ($info->[5]){ $p->{Publisher}->content($info->[5]); } else { $p->{Publisher}->set_node(); } # Author $p->{Author}->set_node(); # Size - decision with Dieter: will contain both collection and lexicon size with respective prefix and ; as delimiter my $size; if ($info->[26]){ $info->[26] =~ s/;/,/g; $size = "Collection: " . $info->[26]; } if ($info->[34]){ $info->[34] =~ s/;/,/g; if ($size){ $size .= ";" } $size .= "Lexicon: " . $info->[34]; } if ($size){ $p->{Size}->content($size) } else { $p->{Size}->set_node(); } # Distribution Form if ($info->[27]){ $p->{DistributionForm}->content($info->[27]); } else { $p->{DistributionForm}->set_node(); } # Access if ($info->[13]){ $p->{Access}{Availability}->content("available on the internet"); } else { $p->{Access}{Availability}->set_node(); } $p->{Access}{Date}->set_node(); $p->{Access}{Owner}->set_node(); $p->{Access}{Publisher}->set_node(); $p->{Access}{Contact}->set_node(); # Pricing $p->{Pricing}->set_node(); # Contact Person - optional if ($info->[6]){ $p->{ContactPerson}->content($info->[6]); } # Reference Link - optional if ($info->[12] && !($info->[36] =~ m/field_reference_link_url/g)){ $p->{ReferenceLink}->content($info->[12]); } # Metadata Link - optional if ($info->[10] && !($info->[36] =~ m/field_metadata_link_url/g)){ $p->{MetadataLink}->content($info->[10]); } # Publications - optional if ($info->[11]){ $p->{Publications}->content($info->[11]); } # Key-value pairs - all optional # Node ID my $i = 0; $p->{Keys}{Key}[$i]{Name} = "NodeId"; $p->{Keys}{Key}[$i]->content($info->[0]); $i++; # Version ID $p->{Keys}{Key}[$i]{Name} = "VersionId"; $p->{Keys}{Key}[$i]->content($info->[1]); $i++; # Latitude & Longitude if ($geocoords){ $p->{Keys}{Key}[$i]{Name} = "Latitude"; $p->{Keys}{Key}[$i]->content($geocoords->[0][0]); $i++; $p->{Keys}{Key}[$i]{Name} = "Longitude"; $p->{Keys}{Key}[$i]->content($geocoords->[0][1]); $i++; } # Other languages - no way to include that in proper language elements if ($info->[3]){ $p->{Keys}{Key}[$i]{Name} = "OtherLanguages"; $p->{Keys}{Key}[$i]->content($info->[3]); $i++; } # Intellectual Property Rights Ethical Reference if ($info->[14]){ $p->{Keys}{Key}[$i]{Name} = "IPREthicalReference"; $p->{Keys}{Key}[$i]->content($info->[14]); $i++; } # Intellectual Property Rights Legal Reference if ($info->[15]){ $p->{Keys}{Key}[$i]{Name} = "IPRLegalReference"; $p->{Keys}{Key}[$i]->content($info->[15]); $i++; } # Intellectual Property Rights License Type if ($info->[16]){ $p->{Keys}{Key}[$i]{Name} = "IPRLicenseType"; $p->{Keys}{Key}[$i]->content($info->[16]); $i++; } # Intellectual Property Rights Description if ($info->[17]){ $p->{Keys}{Key}[$i]{Name} = "IPRDescription"; $p->{Keys}{Key}[$i]->content($info->[17]); $i++; } # Intellectual Property Rights Contact Person if ($info->[18]){ $p->{Keys}{Key}[$i]{Name} = "IPRContactPerson"; $p->{Keys}{Key}[$i]->content($info->[18]); $i++; } # Collection Longterm preservation by if ($info->[19]){ $p->{Keys}{Key}[$i]{Name} = "CollectionLongtermPreservationBy"; $p->{Keys}{Key}[$i]->content($info->[19]); $i++; } # Collection Location if ($info->[20]){ $p->{Keys}{Key}[$i]{Name}= "CollectionLocation"; $p->{Keys}{Key}[$i]->content($info->[20]); $i++; } # Collection Content Type if ($info->[21]){ $p->{Keys}{Key}[$i]{Name} = "CollectionContentType"; $p->{Keys}{Key}[$i]->content($info->[21]); $i++; } # Collection Format Detailed if ($info->[22]){ $p->{Keys}{Key}[$i]{Name} = "CollectionFormatDetailed"; $p->{Keys}{Key}[$i]->content($info->[22]); $i++; } # Collection Quality if ($info->[23]){ $p->{Keys}{Key}[$i]{Name} = "CollectionQuality"; $p->{Keys}{Key}[$i]->content($info->[23]); $i++; } # Collection Access if ($info->[28]){ $p->{Keys}{Key}[$i]{Name} = "CollectionAccess"; $p->{Keys}{Key}[$i]->content($info->[24]); $i++; } # Collection Source if ($info->[29]){ $p->{Keys}{Key}[$i]{Name} = "CollectionSource"; $p->{Keys}{Key}[$i]->content($info->[25]); $i++; } # Lexicon Date if ($info->[30]){ $p->{Keys}{Key}[$i]{Name} = "LexiconDate"; $p->{Keys}{Key}[$i]->content($info->[30]); $i++; } # Lexicon Type if ($info->[31]){ $p->{Keys}{Key}{Name} = "LexiconType"; $p->{Keys}{Key}->content($info->[31]); $i++; } # Lexicon Format Detailed if ($info->[32]){ $p->{Keys}{Key}{Name} = "LexiconFormatDetailed"; $p->{Keys}{Key}->content($info->[32]); $i++; } # Lexicon Schema Reference if ($info->[33] && !($info->[36] =~ m/field_schema_reference_value/g)){ $p->{Keys}{Key}{Name} = "LexiconSchemaReference"; $p->{Keys}{Key}->content($info->[33]); $i++; } # Lexicon Access if ($info->[35]){ $p->{Keys}{Key}{Name} = "LexiconAccess"; $p->{Keys}{Key}->content($info->[35]); $i++; } # filename clarin-(nid)-(vid).imdi my $filename = "clarin-catalogue-" . $info->[0] . "-" . $info->[1] . ".imdi"; printFile(($filepath . $filename), $p->data()); return makeParentCorpus($filepath, $filename, $p, $baseFile); } ############################### # Creates the parent corpus node of the catalogue file and # # Arguments: filepath to catalogue file, xml of the catalogue file # Returns: nothing ############################### sub makeParentCorpus{ my $filepath = shift; my $filename = shift; my $xml = shift; my $baseFile = shift; my $corpus = makeXmlHeader($baseFile, "Corpus"); $corpus->{CatalogueLink} = $filename; $corpus->{Name}->content($xml->{Name}->content); $corpus->{Title}->content($xml->{Title}->content); $corpus->{Description}->set_node(); $filename =~ s/catalogue/corpus/g; printFile(($filepath . $filename), $corpus->data()); return $filename, $corpus->{Name}->content; } ############################### # Makes the header of the Corpus or Catalogue XML object # # Arguments: # - template file (required because XML Smart does not set encoding to UTF-8 unless the previous file said so), # - imdi type (Corpus, Catalogue; will be used as element name, so should be written exactly the way the element name has to be) # Returns: XML object pointing to Catalogue/Corpus element in the XML tree ############################### sub makeXmlHeader{ my $startfile = shift; my $imditype = shift; my $xml = XML::Smart->new($startfile); my $xml = $xml->{METATRANSCRIPT}; # filling attributes of METATRANSCRIPT $xml->{xmlns} = "http://www.mpi.nl/IMDI/Schema/IMDI"; $xml->{'xmlns:xsi'} = "http://www.w3.org/2001/XMLSchema-instance"; $xml->{Date} = getTodayDate(); $xml->{FormatId} = "IMDI 3.0"; $xml->{Originator} = "admin"; $xml->{Type} = uc($imditype); $xml->{Version} = "12"; $xml->{'xsi:schemaLocation'} = "http://www.mpi.nl/IMDI/Schema/IMDI ./IMDI_3.0.xsd"; # History $xml->{History}->content("CLARIN to IMDI, DATE: " . getTodayDate()); return $xml->{$imditype}; } ############################### # Prints the array reference, can be used at different stages # in the script to check whether a certain function was executed # on the array ref properly # # Arguments: array ref of file matrix # Returns: nothing ############################### sub printArrayRef{ my $aref = shift @_; for (my $i = 0; $i < scalar @$aref; $i++){ my $k = 0; foreach my $x (@{$aref->[$i]}){ print $k . ": " . $x . "\t"; $k++; } print "\n"; } } ############################### # Prints the array reference to a file # # Arguments: array ref of file matrix, file path for output file # Returns: nothing ############################### sub printFile{ my $path = shift @_; my $str = shift @_; $str =~ s/<\?meta name="GENERATOR" content="XML::Smart\/1\.6\.9 Perl\/5\.008008 \[linux\]" \?>\n//g; $str =~ s/&#/&#/g; $path = ">" . $path; open(OUT, $path) or die "$!"; print OUT $str; close(OUT); } ############################### # Tests against the continent list specified in http://www.mpi.nl/IMDI/Schema/Continents.xml # # Arguments: region name from LL db info # Returns: continent name or "Unknown" ############################### sub testIsoContinents{ my $reg = shift @_; foreach my $c ("Africa", "Asia", "Europe", "Australia", "Oceania", "North-America", "Middle-America", "South-America"){ if ($reg eq $c){ return $reg; } } return "Unknown"; }