source: asadb/groups/load_people.py

stablestage
Last change on this file was 89165c1, checked in by Alex Dehnert <adehnert@…>, 12 years ago

Track student status with moira "affiliation"

Moira recently added an "affiliation" field with information about whether a
person is student, staff, etc.. This changes the ASA DB to use that field by
default to determine student status. For some people with suppressed directory
information ("secret people"), however, affiliation will be "affiliate" but
should be "student". Those people can have their record changed to use a "loose
student" algorithm that considers somebody a student if their affiliation or
account class indicate that they're a student.

  • Property mode set to 100755
File size: 5.8 KB
Line 
1#!/usr/bin/python
2
3import sys
4import os
5
6if __name__ == '__main__':
7    cur_file = os.path.abspath(__file__)
8    django_dir = os.path.abspath(os.path.join(os.path.dirname(cur_file), '..'))
9    proj_dir = os.path.abspath(os.path.join(django_dir, '..'))
10    sys.path.append(django_dir)
11    sys.path.append(proj_dir)
12    os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
13
14import groups.models
15
16import collections
17import datetime
18
19from django.db import transaction
20
21fields = [
22    # Django field, in order matching the input fields
23    'username',
24    'mit_id',
25    'first_name',
26    'last_name',
27    'account_class',
28    'affiliation_basic',
29    'affiliation_detailed',
30]
31
32def load_dcm(dcm_stream):
33    dcm_people = {}
34    for line in dcm_stream:
35        line = line.strip()
36        field_list = line.split("\t")
37        field_dict = {}
38        for index, field in enumerate(fields):
39            field_dict[field] = field_list[index]
40        dcm_people[field_dict['username']] = field_dict
41    return dcm_people
42
43@transaction.commit_manually
44def load_people(dcm_people):
45    django_people = groups.models.AthenaMoiraAccount.objects.all()
46    stat_loops = 0
47    stat_django_people = len(django_people)
48    stat_dcm_people = len(dcm_people)
49    stat_changed = 0
50    stat_mut_ign = 0
51    stat_unchanged = 0
52    stat_del = 0
53    stat_pre_del = 0
54    stat_undel = 0
55    stat_add = 0
56    stat_people = collections.defaultdict(list)
57    for django_person in django_people:
58        stat_loops += 1
59        if stat_loops % 100 == 0:
60            transaction.commit()
61            pass
62        mutable = django_person.mutable
63        if django_person.username in dcm_people:
64            # great, they're still in the dump
65            changed = False
66            changes = []
67            dcm_person = dcm_people[django_person.username]
68            del dcm_people[django_person.username]
69
70            # Check for changes: first fields, then deletions
71            for key in fields:
72                if django_person.__dict__[key] != dcm_person[key]:
73                    changed = True
74                    if key == 'mit_id':
75                        changes.append((key, '[redacted]', '[redacted]', ))
76                    else:
77                        changes.append((key, django_person.__dict__[key], dcm_person[key]))
78                    if mutable:
79                        django_person.__dict__[key] = dcm_person[key]
80            if django_person.del_date is not None:
81                changed = True
82                if mutable:
83                    django_person.del_date = None
84                    stat_undel += 1
85                    changes.append(('[account]', '[deleted]', '[undeleted]', ))
86                    stat_people['undel'].append((django_person.username, changes))
87
88            if changed:
89                stat_name = ''
90                if mutable:
91                    django_person.mod_date = datetime.date.today()
92                    django_person.save()
93                    stat_changed += 1
94                    stat_name = 'changed'
95                else:
96                    stat_mut_ign += 1
97                    stat_name = 'mut_ign'
98                stat_people[stat_name].append((django_person.username, changes))
99            else:
100                stat_unchanged += 1
101
102        else:
103            # They're not in the dump
104            if django_person.del_date is None:
105                stat_name = ''
106                if mutable:
107                    django_person.del_date = datetime.date.today()
108                    stat_del += 1
109                    stat_name = 'del'
110                    django_person.save()
111                else:
112                    stat_mut_ign += 1
113                    stat_name = 'mut_ign'
114                changes = [('account_class', django_person.account_class, '[deleted]')]
115                stat_people[stat_name].append((django_person.username, changes))
116            else:
117                stat_pre_del += 1
118
119    transaction.commit()
120
121    # Import new people from the DCM
122    for username, dcm_person in dcm_people.items():
123        stat_loops += 1
124        if stat_loops % 100 == 0:
125            transaction.commit()
126            pass
127        django_person = groups.models.AthenaMoiraAccount()
128        for key in fields:
129            django_person.__dict__[key] = dcm_person[key]
130        django_person.add_date = datetime.date.today()
131        stat_add += 1
132        changes = [('account_class', '[missing]', dcm_person['account_class'], )]
133        stat_people['add'].append((django_person.username, changes))
134        django_person.save()
135    transaction.commit()
136
137    stats = {
138        'loops': stat_loops,
139        'django_people': stat_django_people,
140        'dcm_people': stat_dcm_people,
141        'changed': stat_changed,
142        'mut_ign': stat_mut_ign,
143        'unchanged': stat_unchanged,
144        'del': stat_del,
145        'pre_del': stat_pre_del,
146        'undel': stat_undel,
147        'add': stat_add,
148    }
149    return stats, stat_people
150
151
152if __name__ == '__main__':
153    print "Phase 1 (DCM parsing): starting at %s" % (datetime.datetime.now(), )
154    dcm_people = load_dcm(sys.stdin)
155    print "Phase 1 (DCM parsing): complete at %s" % (datetime.datetime.now(), )
156    print "Phase 2 (Django updating): starting at %s" % (datetime.datetime.now(), )
157    stats, stat_people = load_people(dcm_people)
158    print "Phase 2 (Django updating): complete at %s" % (datetime.datetime.now(), )
159    print """
160Loop iterations:    %(loops)6d
161Initial in Django:  %(django_people)6d
162People in DCM:      %(dcm_people)6d
163Already Deleted:    %(pre_del)6d
164Unchanged:          %(unchanged)6d
165Changed:            %(changed)6d
166Change ignored:     %(mut_ign)6d
167Deleted:            %(del)6d
168Undeleted:          %(undel)6d
169Added:              %(add)6d
170""" % stats
171
172    for change_type, people in stat_people.items():
173        for person, changes in people:
174            print "%12s\t%12s\t%s" % (change_type, person, changes, )
175        print ""
Note: See TracBrowser for help on using the repository browser.