source: asadb/util/previews.py

fysm-4-1space-accessstablestagetest-hooks
Last change on this file was 0265c6d, checked in by Alex Dehnert <adehnert@…>, 15 years ago

Add code to actually update previews

  • Property mode set to 100755
File size: 2.8 KB
Line 
1#!/usr/bin/python
2import sys
3import os, errno
4from subprocess import Popen, PIPE
5import tempfile
6import re
7import traceback
8
9wkhtml_safe_pattern = re.compile(r'^[] !#-[^-~]+$')
10wkhtml_args = ['--margin-bottom', '0mm', '--margin-top', '0mm', '--margin-left', '0mm', '--margin-right', '0mm', ]
11
12def is_safe_for_wkhtml(url):
13    print "Checking %s" % (url, )
14    return wkhtml_safe_pattern.match(url)
15
16def convert_pdf_to_jpg(pdf, dest, ):
17    # pdftoppm < $pdf | pnmcrop | pnmscale  0.75 | pnmtojpeg --optimize > $dest
18    pdffile = open(pdf, 'r', )
19    try:
20        os.unlink(dest)
21    except OSError as exc:
22        if exc.errno == errno.ENOENT:
23            pass
24        else: raise
25    jpgfile = open(dest, 'w', )
26    p1 = Popen(['pdftoppm'],                    stdin=pdffile,   stdout=PIPE, )
27    p2 = Popen(['pnmcrop'],                     stdin=p1.stdout, stdout=PIPE, )
28    p3 = Popen(['pnmscale', '0.75', ],          stdin=p2.stdout, stdout=PIPE, )
29    p4 = Popen(['pnmtojpeg', '--optimize', ],   stdin=p3.stdout, stdout=jpgfile, )
30    p4.wait()
31    print "Theoretically, wrote JPG to '%s'" % (dest, )
32
33def generate_webpage_previews(websites):
34    """
35    Generate previews of websites.
36
37    Takes one argument --- a list of (source url, destination image
38    location) pairs.
39
40    Returns a list of (url, errmsg, ) pairs indicating failed conversions.
41    """
42
43    preview_requests = []
44    jpg_convert_requests = []
45    tmpfiles = []
46    failures = []
47    for url, dest in websites:
48        if is_safe_for_wkhtml(url):
49            tmpfile = tempfile.NamedTemporaryFile(delete=False)
50            assert(is_safe_for_wkhtml(tmpfile.name))
51            preview_requests.append('"%s" "%s"' % (url, tmpfile.name,))
52            jpg_convert_requests.append((url, tmpfile.name, dest,))
53            tmpfiles.append(tmpfile.name)
54            tmpfile.close()
55        else:
56            failures.append((url, "URL '%s' not safe for wkhtml" % (url, ), ))
57    wkhtml = Popen(['util/wkhtmltopdf', '--read-args-from-stdin', ] + wkhtml_args, stdin=PIPE, )
58    wkhtml.communicate("\n".join(preview_requests))
59
60    for url, pdf, dest in jpg_convert_requests:
61        try:
62            convert_pdf_to_jpg(pdf, dest)
63        except Exception, e:
64            raise
65            failures.append((
66                url,
67                "URL '%s' not JPGized:\n%s" % (url, traceback.format_exc()),
68            ))
69
70    return failures
71
72def generate_webpage_preview(url, dest):
73    failures = generate_webpage_previews([(url, dest), ])
74    if failures:
75        return failures[0][1]
76    else:
77        return None
78
79if __name__ == '__main__':
80    print "In main"
81    test_pairs = [
82        ("http://ua.mit.edu/", "/tmp/uamitedu.jpg", ),
83        ("http://scripts.mit.edu/", "/tmp/scripts.jpg", ),
84    ]
85    generate_webpage_previews(test_pairs)
Note: See TracBrowser for help on using the repository browser.