3 # Copyright 2006 (C) LibLime
4 # Joshua Ferraro <jmf@liblime.com>
5 # Portions Copyright 2009 Chris Cormack and the Koha Dev Team
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any later
13 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with Koha; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #use warnings; FIXME - Bug 2505
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
29 if (C4::Context->ismemcached) {
30 require Memoize::Memcached;
31 import Memoize::Memcached qw(memoize_memcached);
33 memoize_memcached('getTranslatedLanguages', memcached => C4::Context->memcached);
34 memoize_memcached('getFrameworkLanguages' , memcached => C4::Context->memcached);
35 memoize_memcached('getAllLanguages', memcached => C4::Context->memcached);
40 $VERSION = 3.07.00.049;
44 &getFrameworkLanguages
45 &getTranslatedLanguages
49 @EXPORT_OK = qw(getFrameworkLanguages getTranslatedLanguages getAllLanguages getLanguages get_bidi regex_lang_subtags language_get_description accept_language);
55 C4::Languages - Perl Module containing language list functions for Koha
67 =head2 getFrameworkLanguages
69 Returns a reference to an array of hashes:
71 my $languages = getFrameworkLanguages();
72 for my $language(@$languages) {
73 print "$language->{language_code}\n"; # language code in iso 639-2
74 print "$language->{language_name}\n"; # language name in native script
75 print "$language->{language_locale_name}\n"; # language name in current locale
80 sub getFrameworkLanguages {
81 # get a hash with all language codes, names, and locale names
82 my $all_languages = getAllLanguages();
85 # find the available directory names
86 my $dir=C4::Context->config('intranetdir')."/installer/data/";
88 my @listdir= grep { !/^\.|CVS/ && -d "$dir/$_"} readdir(MYDIR);
91 # pull out all data for the dir names that exist
92 for my $dirname (@listdir) {
93 for my $language_set (@$all_languages) {
95 if ($dirname eq $language_set->{language_code}) {
97 'language_code'=>$dirname,
98 'language_description'=>$language_set->{language_description},
99 'native_descrition'=>$language_set->{language_native_description} }
106 =head2 getTranslatedLanguages
108 Returns a reference to an array of hashes:
110 my $languages = getTranslatedLanguages();
111 print "Available translated languages:\n";
112 for my $language(@$trlanguages) {
113 print "$language->{language_code}\n"; # language code in iso 639-2
114 print "$language->{language_name}\n"; # language name in native script
115 print "$language->{language_locale_name}\n"; # language name in current locale
120 sub getTranslatedLanguages {
121 my ($interface, $theme, $current_language, $which) = @_;
124 my @enabled_languages;
126 if ($interface && $interface eq 'opac' ) {
127 @enabled_languages = split ",", C4::Context->preference('opaclanguages');
128 $htdocs = C4::Context->config('opachtdocs');
129 if ( $theme and -d "$htdocs/$theme" ) {
130 (@languages) = _get_language_dirs($htdocs,$theme);
133 for my $theme ( _get_themes('opac') ) {
134 push @languages, _get_language_dirs($htdocs,$theme);
138 elsif ($interface && $interface eq 'intranet' ) {
139 @enabled_languages = split ",", C4::Context->preference('language');
140 $htdocs = C4::Context->config('intrahtdocs');
141 if ( $theme and -d "$htdocs/$theme" ) {
142 @languages = _get_language_dirs($htdocs,$theme);
145 foreach my $theme ( _get_themes('intranet') ) {
146 push @languages, _get_language_dirs($htdocs,$theme);
151 @enabled_languages = split ",", C4::Context->preference('opaclanguages');
152 my $htdocs = C4::Context->config('intrahtdocs');
153 foreach my $theme ( _get_themes('intranet') ) {
154 push @languages, _get_language_dirs($htdocs,$theme);
156 $htdocs = C4::Context->config('opachtdocs');
157 foreach my $theme ( _get_themes('opac') ) {
158 push @languages, _get_language_dirs($htdocs,$theme);
161 $seen{$_}++ for @languages;
162 @languages = keys %seen;
164 return _build_languages_arrayref(\@languages,$current_language,\@enabled_languages);
167 =head2 getAllLanguages
169 Returns a reference to an array of hashes:
171 my $alllanguages = getAllLanguages();
172 print "Available translated languages:\n";
173 for my $language(@$alllanguages) {
174 print "$language->{language_code}\n";
175 print "$language->{language_name}\n";
176 print "$language->{language_locale_name}\n";
181 sub getAllLanguages {
182 return getLanguages(shift);
187 Returns a reference to an array of hashes.
188 Extracted from getAllLanguages to limit effect on the code base.
189 This new function (name) will allow for more arguments to customize the values returned.
191 - If no parameter is passed to the function, it returns english languages names
192 - If a $lang parameter conforming to RFC4646 syntax is passed, the function returns languages names translated in $lang
193 If a language name is not translated in $lang in database, the function returns english language name
194 - If $isFiltered is set to true, only the detail of the languages selected in system preferences AdvanceSearchLanguages is returned.
200 my $isFiltered = shift;
203 my $dbh=C4::Context->dbh;
204 my $default_language = 'en';
205 my $current_language = $default_language;
206 my $language_list = $isFiltered ? C4::Context->preference("AdvancedSearchLanguages") : undef;
208 $current_language = regex_lang_subtags($lang)->{'language'};
210 my $sth = $dbh->prepare('SELECT * FROM language_subtag_registry WHERE type=\'language\'');
212 while (my $language_subtag_registry = $sth->fetchrow_hashref) {
214 # check if language name is stored in current language
215 my $sth4= $dbh->prepare("SELECT description FROM language_descriptions WHERE type='language' AND subtag =? AND lang = ?");
216 $sth4->execute($language_subtag_registry->{subtag},$current_language);
217 while (my $language_desc = $sth4->fetchrow_hashref) {
218 $desc=$language_desc->{description};
220 my $sth2= $dbh->prepare("SELECT * FROM language_descriptions LEFT JOIN language_rfc4646_to_iso639 on language_rfc4646_to_iso639.rfc4646_subtag = language_descriptions.subtag WHERE type='language' AND subtag =? AND language_descriptions.lang = ?");
222 $sth2->execute($language_subtag_registry->{subtag},$current_language);
225 $sth2->execute($language_subtag_registry->{subtag},$default_language);
227 my $sth3 = $dbh->prepare("SELECT description FROM language_descriptions WHERE type='language' AND subtag=? AND lang=?");
228 # add the correct description info
229 while (my $language_descriptions = $sth2->fetchrow_hashref) {
230 $sth3->execute($language_subtag_registry->{subtag},$language_subtag_registry->{subtag});
231 my $native_description;
232 while (my $description = $sth3->fetchrow_hashref) {
233 $native_description = $description->{description};
236 # fill in the ISO6329 code
237 $language_subtag_registry->{iso639_2_code} = $language_descriptions->{iso639_2_code};
238 # fill in the native description of the language, as well as the current language's translation of that if it exists
239 if ($native_description) {
240 $language_subtag_registry->{language_description} = $native_description;
241 $language_subtag_registry->{language_description}.=" ($language_descriptions->{description})" if $language_descriptions->{description};
244 $language_subtag_registry->{language_description} = $language_descriptions->{description};
247 if ( !$language_list || index ( $language_list, $language_subtag_registry->{ iso639_2_code } ) >= 0) {
248 push @languages_loop, $language_subtag_registry;
251 return \@languages_loop;
256 Internal function, returns an array of all available themes.
258 (@themes) = &_get_themes('opac');
259 (@themes) = &_get_themes('intranet');
264 my $interface = shift;
267 if ( $interface eq 'intranet' ) {
268 $htdocs = C4::Context->config('intrahtdocs');
271 $htdocs = C4::Context->config('opachtdocs');
273 opendir D, "$htdocs";
274 my @dirlist = readdir D;
275 foreach my $directory (@dirlist) {
276 # if there's an en dir, it's a valid theme
277 -d "$htdocs/$directory/en" and push @themes, $directory;
282 =head2 _get_language_dirs
284 Internal function, returns an array of directory names, excluding non-language directories
288 sub _get_language_dirs {
289 my ($htdocs,$theme) = @_;
291 opendir D, "$htdocs/$theme";
292 for my $lang_string ( readdir D ) {
293 next if $lang_string =~/^\./;
294 next if $lang_string eq 'all';
295 next if $lang_string =~/png$/;
296 next if $lang_string =~/js$/;
297 next if $lang_string =~/css$/;
298 next if $lang_string =~/CVS$/;
299 next if $lang_string =~/\.txt$/i; #Don't read the readme.txt !
300 next if $lang_string =~/img|images|famfam|js|less|lib|sound|pdf/;
301 push @lang_strings, $lang_string;
303 return (@lang_strings);
306 =head2 _build_languages_arrayref
308 Internal function for building the ref to array of hashes
310 FIXME: this could be rewritten and simplified using map
314 sub _build_languages_arrayref {
315 my ($translated_languages,$current_language,$enabled_languages) = @_;
316 my @translated_languages = @$translated_languages;
317 my @languages_loop; # the final reference to an array of hashrefs
318 my @enabled_languages = @$enabled_languages;
319 # how many languages are enabled, if one, take note, some contexts won't need to display it
320 my %seen_languages; # the language tags we've seen
323 my $track_language_groups;
324 my $current_language_regex = regex_lang_subtags($current_language);
325 # Loop through the translated languages
326 for my $translated_language (@translated_languages) {
327 # separate the language string into its subtag types
328 my $language_subtags_hashref = regex_lang_subtags($translated_language);
330 # is this language string 'enabled'?
331 for my $enabled_language (@enabled_languages) {
332 #warn "Checking out if $translated_language eq $enabled_language";
333 $language_subtags_hashref->{'enabled'} = 1 if $translated_language eq $enabled_language;
336 # group this language, key by langtag
337 $language_subtags_hashref->{'sublanguage_current'} = 1 if $translated_language eq $current_language;
338 $language_subtags_hashref->{'rfc4646_subtag'} = $translated_language;
339 $language_subtags_hashref->{'native_description'} = language_get_description($language_subtags_hashref->{language},$language_subtags_hashref->{language},'language');
340 $language_subtags_hashref->{'script_description'} = language_get_description($language_subtags_hashref->{script},$language_subtags_hashref->{'language'},'script');
341 $language_subtags_hashref->{'region_description'} = language_get_description($language_subtags_hashref->{region},$language_subtags_hashref->{'language'},'region');
342 $language_subtags_hashref->{'variant_description'} = language_get_description($language_subtags_hashref->{variant},$language_subtags_hashref->{'language'},'variant');
343 $track_language_groups->{$language_subtags_hashref->{'language'}}++;
344 push ( @{ $language_groups->{$language_subtags_hashref->{language}} }, $language_subtags_hashref );
346 # $key is a language subtag like 'en'
347 while( my ($key, $value) = each %$language_groups) {
349 # is this language group enabled? are any of the languages within it enabled?
351 for my $enabled_language (@enabled_languages) {
352 my $regex_enabled_language = regex_lang_subtags($enabled_language);
353 $enabled = 1 if $key eq $regex_enabled_language->{language};
355 push @languages_loop, {
356 # this is only use if there is one
357 rfc4646_subtag => @$value[0]->{rfc4646_subtag},
358 native_description => language_get_description($key,$key,'language'),
360 sublanguages_loop => $value,
361 plural => $track_language_groups->{$key} >1 ? 1 : 0,
362 current => $current_language_regex->{language} eq $key ? 1 : 0,
363 group_enabled => $enabled,
366 return \@languages_loop;
369 sub language_get_description {
370 my ($script,$lang,$type) = @_;
371 my $dbh = C4::Context->dbh;
373 my $sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
374 #warn "QUERY: SELECT description FROM language_descriptions WHERE subtag=$script AND lang=$lang AND type=$type";
375 $sth->execute($script,$lang,$type);
376 while (my $descriptions = $sth->fetchrow_hashref) {
377 $desc = $descriptions->{'description'};
380 $sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
381 $sth->execute($script,'en',$type);
382 while (my $descriptions = $sth->fetchrow_hashref) {
383 $desc = $descriptions->{'description'};
388 =head2 regex_lang_subtags
390 This internal sub takes a string composed according to RFC 4646 as
391 an input and returns a reference to a hash containing keys and values
392 for ( language, script, region, variant, extension, privateuse )
396 sub regex_lang_subtags {
399 # Regex for recognizing RFC 4646 well-formed tags
400 # http://www.rfc-editor.org/rfc/rfc4646.txt
402 # regexes based on : http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
403 # The structure requires no forward references, so it reverses the order.
404 # The uppercase comments are fragments copied from RFC 4646
406 # Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped.
408 my $alpha = qr/[a-zA-Z]/ ; # ALPHA
409 my $digit = qr/[0-9]/ ; # DIGIT
410 my $alphanum = qr/[a-zA-Z0-9]/ ; # ALPHA / DIGIT
411 my $x = qr/[xX]/ ; # private use singleton
412 my $singleton = qr/[a-w y-z A-W Y-Z]/ ; # other singleton
413 my $s = qr/[-]/ ; # separator -- lenient parsers will use [-_]
415 # Now do the components. The structure is slightly different to allow for capturing the right components.
416 # The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing.
418 my $extlang = qr{(?: $s $alpha{3} )}x ; # *3("-" 3ALPHA)
419 my $language = qr{(?: $alpha{2,3} | $alpha{4,8} )}x ;
420 #my $language = qr{(?: $alpha{2,3}$extlang{0,3} | $alpha{4,8} )}x ; # (2*3ALPHA [ extlang ]) / 4ALPHA / 5*8ALPHA
422 my $script = qr{(?: $alpha{4} )}x ; # 4ALPHA
424 my $region = qr{(?: $alpha{2} | $digit{3} )}x ; # 2ALPHA / 3DIGIT
426 my $variantSub = qr{(?: $digit$alphanum{3} | $alphanum{5,8} )}x ; # *("-" variant), 5*8alphanum / (DIGIT 3alphanum)
427 my $variant = qr{(?: $variantSub (?: $s$variantSub )* )}x ; # *("-" variant), 5*8alphanum / (DIGIT 3alphanum)
429 my $extensionSub = qr{(?: $singleton (?: $s$alphanum{2,8} )+ )}x ; # singleton 1*("-" (2*8alphanum))
430 my $extension = qr{(?: $extensionSub (?: $s$extensionSub )* )}x ; # singleton 1*("-" (2*8alphanum))
432 my $privateuse = qr{(?: $x (?: $s$alphanum{1,8} )+ )}x ; # ("x"/"X") 1*("-" (1*8alphanum))
434 # Define certain grandfathered codes, since otherwise the regex is pretty useless.
435 # Since these are limited, this is safe even later changes to the registry --
436 # the only oddity is that it might change the type of the tag, and thus
437 # the results from the capturing groups.
438 # http://www.iana.org/assignments/language-subtag-registry
439 # Note that these have to be compared case insensitively, requiring (?i) below.
441 my $grandfathered = qr{(?: (?i)
443 | i $s (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )
444 | sgn $s (?: BE $s fr | BE $s nl | CH $s de)
447 # For well-formedness, we don't need the ones that would otherwise pass, so they are commented out here
451 # | en $s (?: boont | GB $s oed | scouse )
452 # | no $s (?: bok | nyn)
453 # | zh $s (?: cmn | cmn $s Hans | cmn $s Hant | gan | guoyu | hakka | min | min $s nan | wuu | xiang | yue)
455 # Here is the final breakdown, with capturing groups for each of these components
456 # The language, variants, extensions, grandfathered, and private-use may have interior '-'
458 #my $root = qr{(?: ($language) (?: $s ($script) )? 40% (?: $s ($region) )? 40% (?: $s ($variant) )? 10% (?: $s ($extension) )? 5% (?: $s ($privateuse) )? 5% ) 90% | ($grandfathered) 5% | ($privateuse) 5% };
460 $string =~ qr{^ (?:($language)) (?:$s($script))? (?:$s($region))? (?:$s($variant))? (?:$s($extension))? (?:$s($privateuse))? $}xi; # |($grandfathered) | ($privateuse) $}xi;
462 'rfc4646_subtag' => $string,
473 # Script Direction Resources:
474 # http://www.w3.org/International/questions/qa-scripts
476 my ($language_script)= @_;
477 my $dbh = C4::Context->dbh;
479 my $sth = $dbh->prepare('SELECT bidi FROM language_script_bidi WHERE rfc4646_subtag=?');
480 $sth->execute($language_script);
481 while (my $result = $sth->fetchrow_hashref) {
482 $bidi = $result->{'bidi'};
487 sub accept_language {
488 # referenced http://search.cpan.org/src/CGILMORE/I18N-AcceptLanguage-1.04/lib/I18N/AcceptLanguage.pm
489 my ($clientPreferences,$supportedLanguages) = @_;
491 if ($clientPreferences) {
492 # There should be no whitespace anways, but a cleanliness/sanity check
493 $clientPreferences =~ s/\s//g;
494 # Prepare the list of client-acceptable languages
495 foreach my $tag (split(/,/, $clientPreferences)) {
496 my ($language, $quality) = split(/\;/, $tag);
497 $quality =~ s/^q=//i if $quality;
498 $quality = 1 unless $quality;
499 next if $quality <= 0;
500 # We want to force the wildcard to be last
501 $quality = 0 if ($language eq '*');
502 # Pushing lowercase language here saves processing later
503 push(@languages, { quality => $quality,
504 language => $language,
505 lclanguage => lc($language) });
508 carp "accept_language(x,y) called with no clientPreferences (x).";
510 # Prepare the list of server-supported languages
511 my %supportedLanguages = ();
512 my %secondaryLanguages = ();
513 foreach my $language (@$supportedLanguages) {
514 # warn "Language supported: " . $language->{language};
515 my $subtag = $language->{rfc4646_subtag};
516 $supportedLanguages{lc($subtag)} = $subtag;
517 if ( $subtag =~ /^([^-]+)-?/ ) {
518 $secondaryLanguages{lc($1)} = $subtag;
522 # Reverse sort the list, making best quality at the front of the array
523 @languages = sort { $b->{quality} <=> $a->{quality} } @languages;
524 my $secondaryMatch = '';
525 foreach my $tag (@languages) {
526 if (exists($supportedLanguages{$tag->{lclanguage}})) {
527 # Client en-us eq server en-us
528 return $supportedLanguages{$tag->{language}} if exists($supportedLanguages{$tag->{language}});
529 return $supportedLanguages{$tag->{lclanguage}};
530 } elsif (exists($secondaryLanguages{$tag->{lclanguage}})) {
531 # Client en eq server en-us
532 return $secondaryLanguages{$tag->{language}} if exists($secondaryLanguages{$tag->{language}});
533 return $supportedLanguages{$tag->{lclanguage}};
534 } elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
535 # Client en-gb eq server en-us
536 $secondaryMatch = $secondaryLanguages{$1};
537 } elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
538 # FIXME: We just checked the exact same conditional!
539 # Client en-us eq server en
540 $secondaryMatch = $supportedLanguages{$1};
541 } elsif ($tag->{lclanguage} eq '*') {
542 # * matches every language not already specified.
543 # It doesn't care which we pick, so let's pick the default,
544 # if available, then the first in the array.
545 #return $acceptor->defaultLanguage() if $acceptor->defaultLanguage();
546 return $supportedLanguages->[0];
549 # No primary matches. Secondary? (ie, en-us requested and en supported)
550 return $secondaryMatch if $secondaryMatch;
551 return undef; # else, we got nothing.