aama.github.io/index.html at master · aama/aama.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>

  <head>
    <meta charset='utf-8' />
    <meta http-equiv="X-UA-Compatible" content="chrome=1" />
    <meta name="description" content="Aama.github.io : Afro-Asiatic Morphology Archive" />

    <link rel="stylesheet" type="text/css" media="screen" href="stylesheets/stylesheet.css">

      <title>aama</title>
    </head>

    <body>

      <!-- HEADER -->
      <div id="header_wrap" class="outer">
        <header class="inner">
          <a id="forkme_banner" href="https://github.com/aama">View on GitHub</a>

          <h1 id="project_title">aama</h1>
          <h2 id="project_tagline">Afro-Asiatic Morphology Archive</h2>

        </header>
      </div>

      <!-- MAIN CONTENT -->
      <div id="main_content_wrap" class="outer">
	<section id="main_content" class="inner">
          <h3>
	  <a name="welcome-to-aama---the-afro-asiatic-morphology-archive" class="anchor" href="#welcome-to-aama---the-afro-asiatic-morphology-archive"><span class="octicon octicon-link"></span></a>Welcome to AAMA - the Afro-Asiatic Morphology Archive.</h3>

	  <h4>Getting Started(v8)</h4>

	  <h5>Overview</h5>
	  <ul style="list-style-type:none;">
	    <li><a href="#project">1. The AAMA Project</a></li>
	    <li><a href="#software">2. Install required software</a></li>
	    <li><a href="#config">3. Configure Application</a></li>
	    <li><a href="#aamaquery">4. Run Application</a></li>
	    <li><a href="#aamaupdate">5. Remote Data and Webapp Update</a></li>
	  </ul>
	    <p><a href="#dataschema">Appendix 1: The Data Schema</a></p>
	    <p><a href="#datafilelist">Appendix 2: The Data Files</a></p>
		<p><a href="#aamabibliography">Appendix 3: Source Bibliography</a></p>

	  <h5>Details</h5>

	  <ul style="list-style-type:none;">
	    <li id="project">
	      <h4>1. Introduction: The AAMA Project</h4>
	      <p>The purpose of the AAMA Project is to create a morphological
		archive whose data can be:
		<ul>
		  <li>curated (edited/created) -- and hopefully shared!</li>
		  <li>inspected</li>
		  <li>manipulated</li>
		  <li>queried</li>
			<li>compared</li>
		</ul>
		on individual browsers.</p>
	      <p>
			The archive itself is neutral as far as the use it is put to is concerned:
			  comparison, reference, diachronic and synchronic research, pedagogical,
			  . . . But ultimately it is hoped that the archive will make available and
			  comparable the major morphological paradigms of a wide variety of A
			  froasiatic languages in all the major families and in the longer term help
			  situate these morphologies with respect to one another within Afroasiatic.
			  Ultimately we hope also that the archive and its accompanying software may serve
			  as a tool for exploration of typology and structure of the form of linguistic
			  organization known as the paradigm.
       </p>
	      <p>As presently configured tha AAMA project consists of
		three interconnected modules:</p>
		<ul>
		<li><a href="#datafiles">Data Files</a></li>
		<li><a href="#rdfdatastore">RDF Datastore</a></li>
		<li><a href="#qinterface">Provisional Query/Display Interface</a></li>
		</ul>
	      <ul style="list-style-type:none;">
		<li id="datafiles">
		  <h4>1.1 Data Files</h4>
		  <p>
			The base component of the archive is an extensible collection of morphological
			  paradigms in a consistent and comparable format  from a large selecton of
			  Afroasiatic languages. The data in itself is application-neutral, and could
			  be cast into any plausible datastore format, and used in conjunction with tools
			  and query-and-display applications constructed using any appropriate programming
			  tools.
		  </p>
		  <p>As an initial step presently archived data-files cover principally the verbal and
			pronominal morphological paradigms of thirty-three Cushitic and
		    six Omotic languages as represented in a selection of currently avaliable monographic
			  and scholarly publication (see bibliography). In addition there are files
		    with parallel sample data covering five Semitic languages and two varieties
		    of Egyptian taken for the moment from standard texts -- limited Berber and Chadic data is
			  in the process of being entered. The intention behind the project is,
		    with the help of collaborators, to extend the scope
		    of the archive to include eventually as complete a
		    representation  as possible of all branches of
		    the Afroasiatic language complex.</p>
		  <p> Nominal paradigms are systematically included in the archive whenever
		    they have been present in the underlying monographic source. However
		    we have found that Cushitic-Omotic nominal morphosyntax does not lend
		    itself as exhaustively to straight-forward word-level paradigmatic
		    treatment as pronominal and verbal. We are experimenting with various
		    consistent ways to systematically treat at least case, number,
		    focus morphosyntax across the archive.</p>
			<h4>The Paradigm</h4>
		  <p>Informally we can define "Paradigm" in its simplest and most obvious
		    sense as:
		    <ul>
		      <li><em>Any presentation of one or more linguistic forms
			  ("tokens": words, affixes, clitics, stems, etc.), which share
			  a set of morphological property/value pairs, and which vary
			  systematically along the values of another set of
			  properties.  </em></li>
		    </ul>
		  <p>For consistency within the archive, we are using JSON  as
		    normative/persistent paradigm format, which allows
		     a reasonable, human-readable/-editable
		    approximation to traditional paradigm notation. To illustrate what is
		    by far the most common data-structure in the archive,  the paradigm,
		    what traditionally would be termed:</p>
		  <ul>
		    <li> <em>the number, person gender paradigm  of the imperfect
			affirmative of the  Burunge glide-stem verb xaw-'come'</em></li>
		  </ul>
		  <p>In table form:</p>

		  <table>
		    <thead>

		      <tr>
			<th>Number </th><th>Person</th><th>Gender</th><th>Token</th>
		      </tr>
		    </thead>
		    <tbody>
		      <tr>
			<td> Singular</td><td>Person1</td><td>Commo</td><td><em>xaw</em></td>
		      </tr>
		      <tr>
			<td>Singular</td><td>Person2</td><td>Common</td><td><em>xaydă</em></td>
		      </tr>
		      <tr>
			<td>Singular</td><td>Person3</td><td>Masc</td><td><em>xay</em></td>
		      </tr>
		      <tr>
			<td>Singular</td><td>Person3</td><td>Fem</td><td><em>xaydă</em></td>
		      </tr>
		      <tr>
			<td>Plural</td><td>Person1</td><td>Common</td><td><em>xaynă</em></td>
		      </tr>
		      <tr>
			<td>Plural</td><td>Person2</td><td>Common</td><td><em>xayday</em></td>
		      </tr>
		      <tr>
			<td>Plural</td><td>Person3</td><td>Common</td><td><em>xayay</em></td>
		      </tr>
		    </tbody>
		  </table>
		<p>
		Our normative/persistent data format, JSON, is a rigorously defined system
			of terms (term), strings ("string"), ordered lists
			(<code>[a, b, c, d]</code>),maps/"dictionaries" (<code>{a: b, c: d, . . .}</code> read "the value
			of property a is b; of property c is d, . . .") and unordered sets
			(<code>#{a, b, c, d}</code>). It is reliably transformable into a consistent
			Resource Description Framework (RDF) notation, while at the same time
			providing a human-readable natural format for data-entry and inspection.
	      </p>
	      <p>
		Our current JSON structure (cf. below)
		while open to extension and revision,
		seems to provide a natural notation for the verbal and
		pronominal inflectional paradigms encountered in Afroasiatic,
		and perhaps for inflectional paradigms generally.
		  </p>
		  <p>
			  Paradigms are  formally rendered  in AAMA's JSON format by a nested
		    data-sturucture, we call ":termcluster": where entities are either
		    labels/indices  or data strings (enclosed in quotes);
		    where square brackets ( "[ ]") enclose lists and braces("{ }") enclose
		    indexed lists ("dictionaries"). So that the paradigm just seen in table form woud be
		    rendered by the following data structure: </p>
		  <pre>
{"termcluster":   {"label": "burunge-VBaseImperfGlideStemBaseForm-xaw",
                 "note": "Kiessling1994 ## 7.2.2,7.2.3",
                 "common": {
                          "tam": "Imperfect"
                          "polarity": "Affirmative",
                          "stemClass": "GlideStem",
                          "pos": "Verb",
                          "lexeme": "xaw",
                          },
                          "terms": [["number", "person", "gender", "token"],
                                  ["Singular", "Person1", "Common, "xaw"]
                                  ["Singular", "Person2", "Common, "xaydă"],
                                  ["Singular", "Person3", "Masc, "xay"]
                                  ["Singular", "Person3", "Fem, "xaydă"]
                                  ["Plural", "Person1", "Common, "xaynă"],
                                  ["Plural", "Person2", "Common, "xayday"],
                                  ["Plural", "Person3", "Common, "xayay"]]
                  }
 }
		  </pre>
		  <p>
			  Here,  "termcluster" is itself an indexed list, with a unique "label"
			  nd a "note" property, which always indicates at least the paradigm's
			  published source, in addition to other possible properties; "common"
			  is an indexed list of the morphological property=value pairs common
			  to every member of the paradigm ("tam=Imperfect", "polarity=Affirmative", "stemClass=GlideStem",
			  "pos=Verb", "lexeme=xaw"), and the list of lists "terms" has as its first member
			  a list of the paradigm term <em>properties</em> (= paradigm column heads), while
			  each subsequent member list contains the <em>values</em>, in order, of the
			  properties.You will observe that we rigorously follow the convention of
			  referring to properties in lower-case, while values are always
			  capitalized.
		  </p>

		  <p><em> Any or all the the data files
		    can be downloaded from the AAMA site, and
		    corrections to the existing files and submission, for on-line
		    sharing, of new language files are hereby sollicited!</em></p>
		</li>
		<li ><h4>Note on Paradigm Labels in AAMA</h4>
		<p>
		  In this application, for the purposes of display, comparison,
		  modification, in the various select-lists, checkbox-lists, and
		  text-input fields, paradigms are labeled as a comma-separated string of
		  the shared value components of the paradigm. The property associated  with
		  each value is automatic, since we follow the convention that no two property names share
		  an identical value name. If the properties whose values consititute the rows
		  of the paradigm are not <code>number, person, case. gender</code>(far and away the most frequent case),
		they are given in a comma-separated list after a delimiter '%'.
			In the, frequently long, paradigm lists automatically
		  generated from the JSON file by the "Create Paradigm Lists" utility, for
		  ease in processing the first two properties are always pos
		  (part-of-speech) and morphClass, and, for ease in reading the
		  'property=' part of the label is omitted. Thus the full form of the label of the paradigm
		  illustrated above would be:
		</p>
<pre>
pos=Verb,lex=xaw,polarity=Affirmative,stemClass=Glide,tam=Imperfect%number,person,gender
</pre>
		In paractical paradigm-lists it would be appear as:
<pre>
Verb,xaw,Affirmative,Glide,Imperfect
</pre>

		<p> and might occur in a list as:</p>
		<pre>
.  .  .
Verb,qadid,Affirmative,DentalStem,Perfect
Verb,qadid,Affirmative,DentalStem,Subjunctive
Verb,xaw,Affirmative,GlideStem,Imperfect
Verb,xaw,Affirmative,GlideStem,Perfect
Verb,xaw,Affirmative,GlideStem,Subjunctive
. . .
		</pre>
		</li>

		<li id="rdfdatastore">
		  <h4>1.2 A Resource Descripton Framework (RDF) Datastore and Related Tools</h4>
		  <p>The data archive will hopefully serve a number of
		    research and reference purposes. One such purpose is the creation of a
		    query-able datastore, which will enable easy manipulation and combination and comparison
                    of morphological information within and between different languages and language families.
                    To this end we have elected to set up such a
		    datastore  using the W3C-sanctioned
		    <a href=https://www.w3.org/RDF></a> format. </p>

		  <p>Very good introductions to RDF datastores and the associated
		    SPARQL</a> query
		    language can be found in their respective
			  <a href="https://www.w3.org/tr/sparql11-overview/">W3C</a> home sites. But, very
		    basically, RDF involves:</p>
		  <ol>
		    <li><em>Identifying units of information, and assigning them URL-like
			unique Uniform Resource Identifiers (<a href="https://www.rfc-editor.org/rfc/rfc3986">URI</a>)
			-- on URI ; see especially
				<a href=https://www.rfc-editor.org/rfc/rfc1630>Tim Berners-Lee 1994</a>
				and much subsequent literature
				on the value and necessity of assigning to each resource a unique
				identifier. </em>
		      <p>For example, in a paradigm  cited above from the  burunge-pdgms.json
			file one of the possible values of the property<em> tam </em>
			(TenseAspectMode) is <em>Imperfect</em>.  In the correspnding
			full rdf/xml format file beja-arteiga-pdgms.rdf file, we have assigned to the property
			<em>tam</em> the full URI:</p>
		      <pre>
<em>&lt;http://id.oi.uchicago.edu/aama/2013/burunge/tam&gt;</em>
		      </pre>
		      <p>where the first part of this URI will be common to all Burunge morphological
                        properties and values. In the more readable
			 <a href="https://www.w3.org/TeamSubmission/turtle/">TTL</a>
			 RDF notation format,
			this URI would be notated:
				  <pre>
<em>brn:tam</em>
				  </pre>
				  while the Burunge TTL file would
                      contain in a brief abbreviation section (typically five to ten items) the entry:</p>
<pre>
<em>@prefix brn:     &lt;http://id.oi.uchicago.edu/aama/2013/burunge/&gt;</em>
</pre>
		      <p>Similarly, the value <em>Imperfect</em>, which has the URI:</p>
		      <pre>
<em>&lt;http://id.oi.uchicago.edu/aama/2013/burunge/Imperfect&gt;</em>
 		      </pre>
		      <p>would be in more widely used <code>ttl</code> notation:
				  <pre>
	<em>brn:Imperfect</em>
				  </pre>
				  </p>
		    </li>
		    <li><em>Representing the complex pieces of information involving these
			concepts by organizing these conceptual units into tripartite
			statements called 'triples' </em>
		      <p>Triples are conventionally noted:</p>
		      <pre>
<em>s p o . </em>
		      </pre>
		      <p>and usually, but without semantic prejudice, read: </p>
		      <pre>
'subject' 'predicate' 'object' .
		      </pre>
		      <p>For example, as one might expect, an extremely common triple in a
			datastore like AAMA is of the form:</p>
			<pre>
paradigmTermID-s hasProperty-p withValue-o .
			</pre>
		      <p>Thus if the first term of the JSON paradigm given above had the
			pdgmTermID <em>aama:d3c483b1</em> one of the (many) triples
			descibing it would be (in the <code>ttl</code> notation):</p>
		      <pre>
aama:d3c483b1 brn:tam brn:Imperfect .
		      </pre>
		      <p>Where <em>aama:</em> is the <code>ttl</code> abbreviation for  </p>
		      <pre>
<em>&lt;http://id.oi.uchicago.edu/aama/2013/&gt;</em>
		      </pre>
		      <p>And the ttl representation of the first row of the paradigm might be:</p>
		      <pre>
<em>aama:d3c483b1 brn:number brn:Singular .
aama:d3c483b1 brn:person brn:Person1 .
aama:d3c483b1 brn:gender brn:Common .
aama:d3c483b1 brn:token  "xaw" .</em>
		      </pre>
		      <p>stating that 'the :person property of the term has the value
			:Person1'</p>
		      <p>. . . and so forth. A good way to see practically the relation between
			the JSON data file and its RDF transform is to take a look at a paradigm
			of interest in the
			JSON and TTL versions of a language data file of interest: e.g.
		     <pre>
burunge-pdgms:{termclusters:[{label:"brn-VerbGlideStem-xaw-ImperfectAffirmative"}]}
		     </pre>
			and its corresponding RDF transformation in the <em>burunge-pdgms.ttl</em> file.</p>
		    </li>
                    </ol>

		  <p>Not surprisingly it takes a very large number of triples to describe
		    even a moderately large datastore (AAMA on a recent count had 987,911).
		    But they are very rapidly produced and indexed (a few seconds per
		    language using the AAMA pdgmDict-json2ttl.py program),  efficiently stored,
                    and permit  extremely quick adccess to information for display, comparison,
		    manipulation, and reasoning. As mentiond, among the RDF  tools in the on-line
		    material, there is a Python script for
		    transforming the (JSON) data files into appropriate RDF datastore
		    (ttl) format, and a set of scripts to upload data files to a local
		    Fuseki  RDF server.</p>


		  <p>Transformation of morphological properties and values to formal URIs, and organization
				  into sets of triples is necessary
				  in order to build a SPARQL-queriable datastore, and also
				  valuable for distinguishing terminologies and
			building nomenclatures and ontologies. But in practice, although RDF is an extremely interesting topic in itself,
			  running the
		    relevant scripts for transforming, adding to, or correcting archive-data from the json
		    files (usually done via an application menu choice), requires no special
		    knowledge about RDF datastores. Some knowledge of the structure of an
		    RDF datastore and the SQL-like SPARQL query language however IS required
		    if you want, for example, to submit a new genre of
		    query to the datastore in order to extract new information.<p>

		    <p>Pending an on-line publicly accessible datastore, you
		    can set one up on your own  computer. Instructions
		    are given below for setting up the Fuseki RDF server on an individual
		    machine, and loading the data into it.</p>
		</li>
		<li id="qinterface" ><h4>1.3 Query/Display User Interface</h4>
		  <p> The directory 'webappy' contains a set of Python scripts which constitute
                    the elements of a rather basic 'proof-of-concept'  application1 with a prototype interface:</p>
		  <ol>
		    <li>A set of Python scripts which index the paradigm files, set up the material for the menu
                      and select lists and input forms,  and programmatically transform the JSON files into
                      ttl. These are principally:
                      <pre>
pdgmDict-schemata.py
pdgmDict-lexemespy
pdgmDict-pvlists.py
pdgmDict-json2ttl.py
                      </pre>
                      </li>
			  <li>A set of shell scripts to launch the Fuseki datastore and add new or corrected data to it, and
			  to upload or download new or corrected data to or from the remote repository:
			  	<pre>
fuseki.sh
aama-datastore-update.sh
aama-cp2lngrepo.sh
aama-pulldata.sh
				</pre>
			  </li>
                    <li>
                      A set of Python scripts to choose, display, and manipulate morphological material within
                      and between language families. For the moment we are using the native Python
                      Tcl/tk-derived  tkinter graphic library, although we plan to return to a unified menu-based
                      browser application, similar to our earlier Clojure-based application. The principal Python
                      scripts in this version are::
                      <pre>
pdgmDisp-baseApp-PDGM.py
pdgmDisp-baseApp-GPDGM.py
pdgmDispUI-formsearch.py
                      </pre>
                      These scripts generally work as follows:
                      <ol>
                        <li>They gather requested language and morphological property and value
		      information via an array of form selection-list, checkbox,
		      and text-input mechanisms;</li>
		    <li>formulate them into a SPARQL query, </li>
		    <li>which is submitted to the datastore, returning a CSV response,</li>
                      <li>which in turn is typically formatted into one or more tables
                      using 'pandas' and other Python libraries.</li>
                      </ol>

		  <p>Below we give instructions for downloading, launching, and
		    initializing the app. More details on the scripts are available in the
		    aama/webappy
			<a href="https://github.com/aama/webappy/edit/main/README.md">README</a> .
			  Also, a brief demo video of an earlier HTML/CLOJURE version can be seen at
		    <a href="https://youtu.be/JNCJsR28SEE">AAMA DEMO</a>
		</li>
	      </ul>
	    </li>
	    <li id="software">
	      <h4>2. How to Install the Required Software</h4>
			<p>Although we plan for the AAMA digital application to be hosted on a site
				where its data can be consulted, and to a certain extent manipulated, online we anticipate that
			most users will want to download the application and a selection (or all!) of the data, and work
			with it on their own machine, perhaps including data of their own, which they might wish to propose uploading
			to the home site, along with proposals for modifications and additions to the data-manipulation software.</p>
			<p>At the moment, pending the creation of an appropriate executable what we can propose, in addition
				to the downloading of a choice of the data files of interest, is the downloading and running of the set of
				scripts which constitute the application.</p>
		  <p><h5>Note on Git client</h5>
		  The AAMA project uses <a
		  href="http://github.com">GitHub</a> to store data
		  and tools; you will need a git client in order to
		  download the tools repository and the data
		  repositories you are interested in.  Follow the
		  instructions at
		  <a href="https://help.github.com/articles/set-up-git#platform-mac">SetUp Git</a>.
		  </p>
		  <p>Note that you do not need to create a github
		  account unless you want to edit the data or code.
		  Instructions for how to do that are below.
		  </p>
		</li>

	 <ul style="list-style-type:none;">

		<li>
		  <h4>2.1 Set up aama directory</h4>
		  <p>
		    We will assume that the data is placed in a directory called 'aama-data'
                    and application software is to be placed in a directory called
                    'webappy'. So create and switch to an <code>aama</code> directory
		    structure on your local drive, e.g.
		    <pre>
~/ $ mkdir aama-data
~/ $ mkdir webappy
~/ $ cd webappy
~/webappy/ $ mkdir bin
~/ $ cd aama-data
~/ $ aama-data/mkdir data
                    </pre>
		  </p>
		</li>
                <li>
                  <h4>2.2 Install Apache Jena Fuseki</h4>
		  <p>
		    Fuseki is the SPARQL server we are using to query the dataset.  <a
		    href="http://jena.apache.org/download/index.cgi">Download</a>
		    the current <code>apache-jena-fuseki-n-n-n distribution</code>
		    distribution (either the zip file or the tar file; NB, make
		    sure your Java JDK is up-to-date with the download)
		    and store it in a convenient location.
		    <code>~/jena</code> is a good place.
		   The following steps will install the <code>aama</code> dataset
		    and verify that it runs. Futher information about Fuseki,
		    as well as information and links about RDF linked data and
		    the SPARQL query language can be found at the
		    <a href="http://jena.apache.org/index.html"> Apache Jena</a>
		    site. </p>
                  </li>


	    <li id="datatools">
	      <h4>2.3. Download data</h4>


		  <p>Take a look at the <a
		  href="https://github.com/aama">Aama repositories</a> and
		  decide which languages interest you.  In general we use
		  one repository per language, or in some cases, language
		  variety, e.g. <a
		  href="https://github.com/aama/beja-hud">beja-hud</a> is the
			  variety of Beja described by Richard Hudson in
			  <a href="#HDp">Hudson1967</a>, while
		  <a href="https://github.com/aama/beja-van">beja-van</a> is the
			  variety of Beja described by Martine Verhove in
			  <a href="MV14">Verhove2014</a>
		  etc.
		  </p>
		  <p>Now you need to download the data to your local
		  harddrive.  Create a <code>data</code> directory
		  inside the <code>aama</code> directory,
		  e.g. <code>~/aama $ mkdir data</code>. Then clone
		  each language repository into the data directory:

		  <pre>
~/ $ cd aama-data/data
~/aama-data/data $ git clone https://github.com/aama/afar.git
~/aama-data/data $ git clone https://github.com/aama/geez.git
~/aama-data/data $ git clone https://github.com/aama/yemsa.git</pre>
	 </p>
		  <p>
		    Alternatively, you can create a personal github
		    account, <i>fork</i> the aama repositories (copy them
		    to your account), and then clone your repositories to
		    your local drive.  See <a
		    href="https://help.github.com/articles/fork-a-repo">Fork
		    a Repo</a> for details.
		  </p>
		</li>
		<li>
		<h4>2.4. Download Application Code</h4>

		  <p>
		    In the  <code>~/webappy</code> directory, clone the
		    aama github application repository:
		    <pre>
~/aama $ git clone https://github.com/aama/webappy.git
			</pre>
		  </p>
			<p>
			The Python scripts (<code>. . . .py</code>) will remain in this directory, while the
				shell and query scripts (<code>. . . .sh</code>. <code>. . . .qr</code>) should be moved to
                    the <code>~/webappy/bin</code> subdirectory</a>
			</p>

		</li>
	      </ul>
	      <p>
		When you have finished, your directory structure should look
		like this (assuming you have cloned afar, geez, and
		yemsa):

		<pre>
   ~/
   |-aama-data/
   |--data/
   |---afar/ <em>afar.json, afar.ttl </em>
   |---geez/ <em>geez.json, geez.ttl</em>
   |---yemsa/ <em>yemsa.json, yemsa.ttl</em>
   |-jena/
   |--apache-jena-fuseki-n.n.n/ <em>aamaconfig.ttl, . . .	</em>
   |-webappy/ <em>pdgmDict-schemata.py, . . . ,  pdgmDisp-baseApp.py, . . .</em>
   |---bin: <em>fuseki.sh, . . . list-graphs.rq, . . .</em>
   </pre>
	  </p>
	    </li>
	      <h4>3. Configuring Appication</h4>

	    <li id="config">
		<h4>3.1 Prepare data lists and indices</h4>
			<p>
				The <code>schemata</code> and <code>lexemes</code> sections of the downloaded
				json files can be transformed to RDF and incorporated innto the datastore
				as is. However if any chages are made to the property, value, or lexeme inventories,
				the relevant <code>pdgmDict</code> scripts should be run. the <code>pdgmDict-pvlist</code>
				script must be run when a language-file is first incorporatd into the datastore in order to
				generate a pick-list of paradigms.
			</p>
			<ol>
				<li><em>pdgmDict-schemata.py</em>
					<p>
This script generates a json
property:value-list dictionary <code>pvlists/[LANG]-schemata.json</code> to be
substituted into the <code>[LANG]-pdgms.json</code> file every time a property
or value is added or changed in any way. It also generates a more
succinct version of the schemata dictionary, <code>pvlists/[LANG]-pdgm-PVN.txt</code>,
which is used in the basic paradigm display, along with the current
value of the <code>pdgmPropOrder</code> variable from the <code>[LANG]-pdgms.json</code> file,
and which determines the order of the properties whose values constitute
the paradigm 'name'.

					</p>
				</li>
				<li><em>pdgmDict-lexemes.py</em>
					<p>
						The lexemes associated with paradigms in the <code>LANG-pdgms.json</code> (and
corresponding <code>LANG-pdgms.ttl</code>) files are identified by a <code>lexemeID</code> which
could in principle be any arbitrary alphanumeric symbol sequence, but which in
practice and for memnonic convenience are lower-ascii approximations of the
lexeme's lemma. This script, in conjunction with <code>pdgmDict-lexCheck.py</code> and
<code>pdgmDict-lexRev.py</code>, generates a <code>lexemes</code> section consisting of   dummy
lexeme  entries (e.g., <code>lemma = '[x]', gloss = '[y]'</code>) for every lexemeID.
This 'lexemes' section must be subsequently filled out by hand (or in an ideal
case be linked programmatically with a digital lexicon of the language in
question).

					</p>
				</li>
				<li><em>pdgmDict-pvlists.py</em>
					<p>
												This script generates a file
each paradigm, consisting of a comma-separated list of the values of
each of the morphological properties enumerated in the <code>common</code> section
of the term cluster - thus uniquely identifying each paradigm within
the language, and, with the addition of a language designation, within
the whole AAMA paradigm corpus. It also generates two <code>db</code> files,
<code>pvlists/[LANG]-pdgmdb.db</code> and <code>pvlists[LANG]-labldb.db</code>, which link
each paradigm 'name' respectively with a full 'property:value' list
and a paradigm 'label' more-or-less arbitrarily assigned to the paradigm
when the json file was first created.

					</p>
				</li>
			</ol>
	      <h4>3.2 Generate RDF data from morphological data files</h4>
	      <p>
		In order to convert JSON-format data files to TTL ("turtle"
		-- a more easily human-readable RDF format), you will
		use the <code>pdgmDict-json2ttl.py</code> file in the
		 <code>webappy</code> directory. The <code>aama-datastore-update.sh</code>
                shell script will call  <code>aama-ttl2fuseki.sh</code> which in turn
		will convert the .ttl file to the rdf-xml which is needed for
		uploading to the Fuseki SPARQL service.
	      </p>
			             <p>
		   For convenience, an already-generated TTL version is included
		   with each language's JSON file.
	       Since the JSON file is the normative/persistant data format,
	       any corrections or additions you want to make should be made
	       in this file; and if any changes are made to the SON file, you must then generate new TTL/RDF
	       files to be  uploaded to the SPARQL server. And in fact, as
	       long as you observe the above structure for JSON
	       files, you can create any number of new language files of your
	       own, transform them to RDF format, and upload them to the
	       SPARQL server for querying.
	       </p>

	      <h4>3.3 Upload RDF data to SPARQL service</h4>
	      <p>
		In order to upload the RDF files to Fuseki, you must
		first start the server by running:
		<pre>
~/aama $ webappy/bin/fuseki.sh
                </pre>
		This script, like the following, assumes that the current version
		of Fuseki, for the moment <code>apache-jena-fuseki-3.16.0</code>, has
		been placed in the <code>jena</code> directory, and that
		the file <code>aamaconfig.ttl</code> has been copied
		to the Fuseki version directory; the
                scripts should be edited for the correct locations if this
		is not the case. When run for the first time, you will notice
		that the script, which references the configuration file
		<code>aamaconfig.ttl</code>, will have placed a,
		for the moment empty, data
		sub-directory <code>aama</code> in the
		<code>jena/apache-jena-fuseki-3.16.0/</code> directory.
		</p>
	      <p>
		The following script:
		<pre>
 ~/aama $ webappy/bin/aama-datastore-update.sh "../aama-datadata/[LANG]"
                </pre>
		will load the relevant LANG-pdgms.ttl file in <code>aama-data/data/[LANG]</code>
		into the Fuseki server.
		</p>
	  <p>
	    It also automatically runs the queries <code>count-triples.rq</code>
	    ("How many triples are there in the datastore?") and
	    <code>list-graphs.rq</code> ("What are the URIs of the
	    language subgraphs?"), from the directory
	    <code>webappy/bin</code>.
	    If the upload has been successful, you will see an output such as
	    the following (assuming again that afar, geez, and yemsa are the
	    languages which have been cloned into aama/data/).
	  <pre>
Query: bin/fuquery-gen.sh bin/count-triples.rq
?sTotal
33871
Query: bin/fuquery-gen.sh bin/list-graphs.rq
?g
&lt;http://oi.uchicago.edu/aama/2013/graph/afar>
&lt;http://oi.uchicago.edu/aama/2013/graph/geez>
&lt;http://oi.uchicago.edu/aama/2013/graph/yemsa>
	  </pre>
	  </p>

	    </li>
	    <li id="aamaquery">
	      <h4>4. Running the Application</h4>
	      The SPARQL service can be accessed to explore the morphological
	      data via two distinct interfaces:
	      <ul style="list-style-type:none;">
		<li>
		  <h4>4.1The Apache Jena Fuseki interface </h4>
		  <p>
		    You can see this on your browser at
		    <code>localhost:3030</code> after you launch Fuseki.
		    SPARQL queries can be run directly
		    against the datastore
		    in the Fuseki Control Panel on the
		    <code>localhost:3030/dataset.html</code> page
		    (select the <code>/aama</code>
		    dataset when prompted). Note that the <code>pdgmDisp-...</code>
                    scripts automatically write to the terminal all SPARQL queries generated
                    in the course of the computation. These queries can be copied and
                    pasted into the Fuseki panel for inspection and debugging.
		  </p>
		  </li>
		<li>
		  <h4>4.2 An application specifically oriented to AAMA data</h4>
		  <p>
			  A preliminary menu-driven GUI application, will have
		    already been downloaded following the instructions outlined
		    above in <a href="#datatools">Download data, tools, and application code</a>.
		    This application demonstrates the
		    use of SPARQL query templates for display and comparison of
		    paradigms and morphosyntactic properties and categories.
		    It is written in Python, which has a very engaged
		    community of users who have created a formidable,
		    and constantly growing set of libraries. However essentially
		    the same functionality could be achieved by any software
		    framework which can provide a web interface for handling
		    SPARQL queries submitted to an  RDF datastore,
		  </p>
		  <p>There are at present three major options for paradigm/mmorphology
		  	display and manipulation governed by three Python scripts. They can
		  be invoked through a menu choice, or run independently as a Python script.
		  All of them involve lining up a data framework, converting this into a
		  SPARQL query by one of the functions in <code>pdgmDispQuery.py</code>, running
		  the SPARQL query, and displaying the result in some appropriate format.</p>
			  <ol>
				  <li> <em>A basic paradigm display function</em>
			        <p>
					  <pre>
pdgmDisp-baseApp-PDGM.py
					  </pre>
				has as graphic setup a two-colum
				display with, in the left column, a language select-list, where a language
				choice results in a middle box display of the property-value inventory
				of the language's paradigm set, along with an indication of the order of
				properties in the value-list names of the paradigm, followed by a
				select-list of paradigm 'names'.
					  <a href="../images/screenshot-PDGM-afr-som-pres.png">baseApp-PDGM-screen</a>
					  A 'Display Paradigm' button at the
				bottom of the column results in a sequentially numbered display in
				the right text-box of the paradigm name, source, notes if present,
				and the paradigm itself. This can be seen in
					  <img source="../images/screenshot-PDGM-afr-som-pres.png" alt="baseApp-PDGM-screen" width="500" height="600">
               </p>
			</li>
			<li><em>A generalization of the notion 'paradigm'</em>
				<p>
					<pre>
pdgmDisp-baseApp-GPDGM.py
					</pre>
					Here we have this same structure of a set of 'common'
				 property-value pairs and a "table" of 'terms', except that for the
				properties <code>commmon</code> and
				<code>terms</code> the set of property-values associated with each is
				completely at the discretion of the investigator. Obviously the regular
				paradigms of the 'LANG-pdgms.json' files are a special case of GPDGM displays,
				as are a very large number of <code>common</code>-<code>term</code> combinations which do not
				correspond to any possible or occurring form. But by exploring occurring
				combinations of interest, including ones which involve distinct languages,
				this dislay routine opens up the possibility of many potentially interesting
				and relevant form tables.
    			</p>

				  </li>

				  <li> <em>A function to search morphological features across the whole datastore</em>
					  <p>
						  <pre>
pdgmDisp-formSearch.py
						  </pre>
				This serves for the comparison of
realizations of a given property/value combination in different
languages. Two or more languages can be selected from the upper-right
language-selection list. In an Entry box below a list of
desired property=value combinations can be entered. A <code>Find Form</code> button
will display a list of paradigms in the designated languages where
the property=value combination list occurs. One or more of these
paradigms can be selected, and registered by the 'Choose Paradigm' button,
and the 'Display Paradigm' button will display the chosen paradigms in
the lower right-side text box. Finally the paradigms in question the
be combined as in the baseApp script.
			  </p>
<p>
For example one could want to see whether second-person, feminine,
pronominal forms, singular or plural, are distinguished in Arabic and
Coptic-Sahidic, and compare the way they are marked. One would choose
Arabic and Coptic-Sahidic in the language select-list, enter
 <code>person=Person2,gender=Fem,pos=Pronoun,number=?number</code> in the 'Entry'
 box, push the 'Find Forms' button and see in the upper right select
box the relevant forms, and the label  of the paradigm in which each
form occurs. One could then select any paradigms of interest and see
the full paradigm, sequentially numbered, in the lower right-hand text
box. For moore precise comparison one could then go on to 'combine'
the paradigms as in the baseApp script.
</p>
	  </ul>
<p>
In  addition there are:
</p>
		  <ul></ul>
		<li><em>A paradigm-name generation function</em>
			<p>
			<pre>
pdgmDisp-pnames.py
				</pre>

simply enables the uiser to generate and display, presumably on
an experimental basis, a "paradigm-name" list in a different order from that
dictated by the <code>pdgmPropOrder</code> feature of the <code>LANG-pdgms.json</code> file.
			  </p>
	</li>
<li><em>The Query-Generating Function</em>
	<p>
	This function is called by the each of the above scripts to generate the
		appropriateSPaRQL queries.

	<pre>
pdgmDispQuery.py
    </pre>
Each of the display scripts, 3.1, 3.2, and 3.3,
finds the data it displays by running a SPARQL query against the AAMA datastore.
These queries are formed from the display data request by one or more of the
 <code>query()</code> functions contained in this script and which have been imported
into the display script. The query itself, and its CSV output, are
for the moment printed to the terminal (or eventually to a log file).
		  </p>
</li>

   <li id="aamaupdate">
	      <h4>Remote Data and Webapp Update</h4>
	      <p>AAMA is an on-going project. Its data is constantly being updated,
		corrected, and added-to; the accompanying webb application
		is in a process of constant revision.
		To ensure that your data and web app are up-to-date you should
		periodically run the following shell scripts, which assume that
		git has been installed and that the data and webapp have been
		cloned from the master version in the manner outlined above.
	      </p>
	      <p>The following script:
		<pre>
 ~/aama $ webappy/bin/aama-pulldata.sh data/[LANG]
                </pre>
		will update the JSON language data file in the
		data/[LANG] directory.
	      </p>
	      <p>While:
		<pre>
 ~/aama $ webappy/bin/aama-pulldata.sh "data/*"
                </pre>
		will update the JSON language data files in all the
		data/[LANG] directories.
	      </p>
	      <p>Once revised (or new) JSON files have been installed, remember
		to run the appropriate scripts to transform them to ttl
		format and to load them into the SPARQL server, as outlined above.
	      </p>
	      <p>
		Finally, the script:
		<pre>
 ~/aama $ tools/bin/aama-pullwebappy.sh
                </pre>
		will update he files of the web applicagtion.
	      </p>
	    </li>
	  </ul>

	  <h4 id="dataschema"> Appendix 1: The Data Schema </h4>
	  <p>Basic structure:</p>
	      <p>In outline
		each language JSON file has the following structure (see any of
		the LANGUAGE-pdgms.json files for a concrete example, and see below
		for explanation of terms):
		</p>
	      <pre>

{
|-"lang;"        		"language name"
|-"subfamily;"		"language subfamily name"
|-"lgpref:"       		"string representing 3-character ns prefix used for the
				URI of language-specific morphosyntactic properties
				and values"
|-"datasource:"   	"bibliographic source(s) for the data in the file"
|-"datasourceNotes:" "remarks, if necessary, about darasource"
|-"transcription:"        "remarks, if necessary about how the transcription should be interpreted
				w.r.t. normative AAMA transcription. (The transcription of the 'LANG-					pdgms.json' file is always that of the datasource.)"
|-"geodemoURL:"   	"on-line geo-/demo-graphical information about the language"
|-"geodemoTXT:"   	"short textual summary of geo-/demographcal information"
|-"schemata:" {
				"associative map of each morphosyntactic property used 	                				in the inflectional paradigms with a list of its values">
			}
|-"lexemes:"   {
				"associative map of each paradigmatic 'lexeme'-ID with an indication of
				its lemma, gloss, part-of-speech and possibly other properties relevant to the
				collection of paradigms -- a provisional stand-in for a reference to a true 				digital lexicon".
			}
|-"pdgmPropOrder:"  "list of paradigm properties in the order their values are to be listed in 					paradigm labels  (can be programatically altered)"
|-"termclusters:"   [
				"label-ordered list of term-clusters/paradigms,
                 		  each of which has the structure:"
			      {
|----"label:"    		 "descriptive label assigned to the term-cluster at data-entry"
|----"note:"      		"bibliographic reference to source of paradigm data; plus other
				 remarks, if necessary"
|----"common:"     {
			  	"map of property-value pairs which all members of the
                 		 termcluster have in common"
			     }
|----"terms:"    [[
				"list of lists, the first of which enumerates  the
                		  properties which differentiate individual terms, while   the others list, in   				 order, the value of the i-th
                		 property -- in fact, a property-value-table realization of the distinct property-
                		 value pairs of the lexeme in question"
			   ]
			]
			}
			...
			]
 }


</pre>
	  <h4 id="datafilelist"> Appendix 2: The Data Files </h4>
	  <p>At present the following data files are available:</p>
	  <ul>
	    <li><a href="https://github.com/aama/aari">Aari</a></li>
	    <li><a href="https://github.com/aama/afar">Afar</a></li>
	    <li><a href="https://github.com/aama/akkadian-ob">Akkadian-ob</a></li>
	    <li><a href="https://github.com/aama/alaaba">Alaaba</a></li>
	    <li><a href="https://github.com/aama/arabic">Arabic</a></li>
	    <li><a href="https://github.com/aama/arbore">Arbore</a></li>
	    <li><a href="https://github.com/aama/awngi">Awngi</a></li>
	    <li><a href="https://github.com/aama/bayso">Bayso</a></li>
	    <li><a href="https://github.com/aama/beja-alm">Beja-alm</a></li>
	    <li><a href="https://github.com/aama/beja-hud">Beja-hud</a></li>
	    <li><a href="https://github.com/aama/beja-rei">Beja-rei</a></li>
	    <li><a href="https://github.com/aama/beja-rop">Beja-rop</a></li>
	    <li><a href="https://github.com/aama/beja-van">Beja-van</a></li>
	    <li><a href="https://github.com/aama/beja-wed">Beja-wed</a></li>
	    <li><a href="https://github.com/aama/berber-ghadames">Berber-ghadames</a></li>
	    <li><a href="https://github.com/aama/bilin">Bilin</a></li>
	    <li><a href="https://github.com/aama/boni-jara">Boni-jara</a></li>
	    <li><a href="https://github.com/aama/boni-kijee-bala">Boni-kijee-bala</a></li>
	    <li><a href="https://github.com/aama/boni-kilii">Boni-kilii</a></li>
	    <li><a href="https://github.com/aama/burji">Burji</a></li>
	    <li><a href="https://github.com/aama/burunge">Burunge</a></li>
	    <li><a href="https://github.com/aama/coptic-sahidic">Coptic-sahidic</a></li>
	    <li><a href="https://github.com/aama/dahalo">Dahalo</a></li>
	    <li><a href="https://github.com/aama/dhaasanac">Dhaasanac</a></li>
	    <li><a href="https://github.com/aama/dizi">Dizi</a></li>
	    <li><a href="https://github.com/aama/egyptian-middle">Egyptian-middle</a></li>
	    <li><a href="https://github.com/aama/elmolo">Elmolo</a></li>
	    <li><a href="https://github.com/aama/gawwada">Gawwada</a></li>
	    <li><a href="https://github.com/aama/gedeo">Gedeo</a></li>
	    <li><a href="https://github.com/aama/geez">Geez</a></li>
	    <li><a href="https://github.com/aama/hadiyya">Hadiyya</a></li>
	    <li><a href="https://github.com/aama/hausa">Hausa</a></li>
	    <li><a href="https://github.com/aama/hdi">Hdi</a></li>
	    <li><a href="https://github.com/aama/hebrew">Hebrew</a></li>
	    <li><a href="https://github.com/aama/iraqw">Iraqw</a></li>
	    <li><a href="https://github.com/aama/kambaata">Kambaata</a></li>
	    <li><a href="https://github.com/aama/kemant">Kemant</a></li>
	    <li><a href="https://github.com/aama/khamtanga">Khamtanga</a></li>
	    <li><a href="https://github.com/aama/koorete">Koorete</a></li>
	    <li><a href="https://github.com/aama/maale">Maale</a></li>
	    <li><a href="https://github.com/aama/mubi">Mubi</a></li>