User:GreenC bot/Job 12/source

#!/usr/bin/gawk -bE

#
# austria - a bot to add {{tlx|Austria population Wikidata}} to about 2100 infoboxes
#           Home: https://en.wikipedia.org/wiki/User:GreenC_bot/Job_9
#           Dependencies: BotWikiAwk (GitHub)
#

# The MIT License (MIT)
#
# Copyright (c) March 2019
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

BEGIN {
  BotName = "austria"
}

@include "botwiki.awk"
@include "library.awk"

BEGIN {

  Mode = "bot"   # set to "find" and it will search only and exit with a 1 (found something) or 0 (found nothing)
                 #  in "find" mode, run via 'project -s' to search local cache for articles containing actionable matches
                 # set to anything else and it will process the article.

  IGNORECASE = 1

  ReSpace = "[\n\r\t]*[ ]*[\n\r\t]*[ ]*[\n\r\t]*"

  Optind = Opterr = 1
  while ((C = getopt(ARGC, ARGV, "hs:l:n:")) != -1) {
      opts++
      if(C == "s")                 #  -s <file>      article.txt source to process.
        articlename = verifyval(Optarg)
      if(C == "l")                 #  -l <dir/>      Directory where logging is sent.. end with "/"
        logdir = verifyval(Optarg)
      if(C == "n")                 #  -n <name>      Wikipedia name of article
        wikiname = verifyval(Optarg)
      if(C == "h") {
        usage()
        exit
      }
  }

  if( ! opts || articlename == "" ) {
    stdErr("Error in austria.awk (1)")
    print "0"
    exit
  }

  if(wikiname == "" || logdir == "")
    Logfile = "/dev/null"
  else {
    if(substr(logdir, length(logdir), 1) != "/")
      logdir = logdir "/"
    Logfile = logdir "logaustria"
  }

  Count = 0
  main()

}

function main(  article,articlenew,articlenewname,editsummaryname,bn) {

  checkexists(articlename, "austria.awk main()", "exit")
  article = readfile(articlename)
  if(length(article) < 10) {
    print "0"
    exit
  }

  articlenew = austria(article)

  if(article != articlenew && length(articlenew) > 10 && Count > 0) {

    articlenewname = editsummaryname = articlename

    bn = basename(articlename) "$"

    gsub(bn, "article.austria.txt", articlenewname)
    printf("%s", articlenew) > articlenewname
    close(articlenewname)

    gsub(bn, "editsummary.austria.txt", editsummaryname)

    printf("Add {{[[Template:Austria metadata Wikidata|Austria metadata Wikidata]]}} (via [[User:GreenC bot/Job 12|austria bot]])", Count) > editsummaryname  # Customize the edit summary to be more specific
    close(editsummaryname)

    print Count
    exit

  }
  print "0"
  exit

}

#
# austria - main function
#
#   . extract templates in article and do something to each. Return modified article.
#
function austria(article,  i,a,dest,G,k,point_area,point_pop,c,re,z,N,fp) {

  re = "[{]{2}" ReSpace "Infobox settlement"
  if(article !~ re) {
    print wikiname >> logdir "lognobox"
    return article
  }

  # population_total needed to orient where to insert fields. Skip and log if missing.

  if(article !~ /[|][ ]*population_total[ ]*[=][ ]*/) {
    print wikiname >> logdir "lognopop"
    return article
  }

  delete G

  # Existing fields default values

  G["population_total"] =     "| population_total = {{Austria population Wikidata|population_total}}"
  G["population_as_of"] =     "| population_as_of = {{Austria population Wikidata|population_as_of}}"
  G["population_footnotes"] = "| population_footnotes = {{Austria population Wikidata|population_footnotes}}"
  G["area_footnotes"] =       "| area_footnotes   = {{Austria population Wikidata|area_footnotes}}"
  G["area_total_km2"] =       "| area_total_km2   = {{Austria population Wikidata|area_total_km2}}"

  # Existing fields actual values (if they exist)

  for(i = 1; i <= splitn(article, a, i); i++) {
    if(match(a[i], /^[ ]*[|][ ]*population_total[ ]*[=][ ]*[^$]*[^$]/, dest))
      G["population_total"] = dest[0]
    else if(match(a[i], /^[ ]*[|][ ]*population_as_of[ ]*[=][ ]*[^$]*[^$]/, dest))
      G["population_as_of"] = dest[0]
    else if(match(a[i], /^[ ]*[|][ ]*population_footnotes[ ]*[=][ ]*[^$]*[^$]/, dest))
      G["population_footnotes"] = dest[0]
    else if(match(a[i], /^[ ]*[|][ ]*area_footnotes[ ]*[=][ ]*[^$]*[^$]/, dest))
      G["area_footnotes"] = dest[0]
    else if(match(a[i], /^[ ]*[|][ ]*area_total_km2[ ]*[=][ ]*[^$]*[^$]/, dest))
      G["area_total_km2"] = dest[0]
  }

  # New fields values

  PROCINFO["sorted_in"] = "@ind_str_asc"
  for(k in G) {
    if(G[k] !~ /Austria population Wikidata/) {
      N[k] = subs(substr(G[k], index(G[k], "=") + 1, length(G[k])), "", G[k])
      N[k] = N[k] " {{Austria population Wikidata|" k "}}"
      N[k] = subs(k, k "2", N[k])
    }
    else {
      N[k] = G[k]
      N[k] = subs(k, k "2", N[k])
    }
  }

  i = split(article, a, "\n")

  # Find location of population_total

  re = "^[ ]*[|][ ]*population_total[ ]*[=][ ]*"
  point_pop = i
  for(c = 1; c <= i; c++) {
    if(a[c] ~ re) {
      point_pop = c
    }
  }
  if(point_pop >= i) {
    print wikiname >> logdir "lognopop"
    return article
  }

  # Find location of area_metro_km2

  re = "^[ ]*[|][ ]*area_metro_km2[ ]*[=][ ]*"
  point_area = i
  for(c = 1; c <= i; c++) {
    if(a[c] ~ re) {
      point_area = c
    }
  }
  if(point_area >= i)
    point_area = 0

  # rebuild article with new fields in correct location within infobox

  for(c = 1; c <= i; c++) {
    if(c == point_pop) {
      if(point_area == 0) {      # No area_metro_km2, add all fields together
        for(z in N)
          fp = fp "\n" N[z]
      }
      else {
        for(z in N) {            # area_metro_km2 exists, add only the population fields
          if(z ~ /population/)
            fp = fp "\n" N[z]
        }
      }
      fp = fp "\n" a[c]
    }
    else if(c == point_area) {   # area_metro_km2 exists, add only the area fields
      for(z in N) {
        if(z ~ /area/)
          fp = fp "\n" N[z]
      }
      fp = fp "\n" a[c]
    }
    else if(c == 1)              # first line, don't add extra \n
      fp = a[1]
    else
      fp = fp "\n" a[c]
  }

  # delete the original fields

  for(z in G)
    fp = subs(G[z] "\n", "", fp)

  # remove the trailing "2" from new fields

  for(z in G)
    fp = subs(z "2", z, fp)

  # print fp > "o"

  Count++
  article = fp
  return article

}