[RARSLAVE] Add PAR2 Parser
authorIra W. Snyder <devel@irasnyder.com>
Mon, 25 Dec 2006 01:45:56 +0000 (17:45 -0800)
committerIra W. Snyder <devel@irasnyder.com>
Mon, 25 Dec 2006 01:45:56 +0000 (17:45 -0800)
Add the class par2parser which parses PAR2 files to extract the list of
files which are "protected" by them.

Signed-off-by: Ira W. Snyder <devel@irasnyder.com>
par2parser.py [new file with mode: 0644]

diff --git a/par2parser.py b/par2parser.py
new file mode 100644 (file)
index 0000000..784bcea
--- /dev/null
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# vim: set ts=4 sts=4 sw=4 textwidth=92:
+
+################################################################################
+# The PAR2 Parser
+#
+# This was stolen from cfv (see http://cfv.sourceforge.net/ for a copy)
+################################################################################
+
+import struct, errno, os
+
+def get_full_filename (dir, file):
+       return os.path.abspath (os.path.expanduser (os.path.join (dir, file)))
+
+def chompnulls(line):
+    p = line.find('\0')
+    if p < 0: return line
+    else:     return line[:p]
+
+def get_protected_files (dir, filename):
+    """Get all of the filenames that are protected by the par2
+    file given as the filename"""
+
+    full_filename = get_full_filename (dir, filename)
+
+    try:
+        file = open(full_filename, 'rb')
+    except:
+        print 'Could not open %s' % (full_filename, )
+        return []
+
+    # We always want to do crc checks
+    docrcchecks = True
+
+    pkt_header_fmt = '< 8s Q 16s 16s 16s'
+    pkt_header_size = struct.calcsize(pkt_header_fmt)
+    file_pkt_fmt = '< 16s 16s 16s Q'
+    file_pkt_size = struct.calcsize(file_pkt_fmt)
+    main_pkt_fmt = '< Q I'
+    main_pkt_size = struct.calcsize(main_pkt_fmt)
+
+    seen_file_ids = {}
+    expected_file_ids = None
+    filenames = []
+
+    while 1:
+        d = file.read(pkt_header_size)
+        if not d:
+            break
+
+        magic, pkt_len, pkt_md5, set_id, pkt_type = struct.unpack(pkt_header_fmt, d)
+
+        if docrcchecks:
+            import md5
+            control_md5 = md5.new()
+            control_md5.update(d[0x20:])
+            d = file.read(pkt_len - pkt_header_size)
+            control_md5.update(d)
+
+            if control_md5.digest() != pkt_md5:
+                raise EnvironmentError, (errno.EINVAL, \
+                    "corrupt par2 file - bad packet hash")
+
+        if pkt_type == 'PAR 2.0\0FileDesc':
+            if not docrcchecks:
+                d = file.read(pkt_len - pkt_header_size)
+
+            file_id, file_md5, file_md5_16k, file_size = \
+                struct.unpack(file_pkt_fmt, d[:file_pkt_size])
+
+            if seen_file_ids.get(file_id) is None:
+                seen_file_ids[file_id] = 1
+                filename = chompnulls(d[file_pkt_size:])
+                filenames.append(filename)
+
+        elif pkt_type == "PAR 2.0\0Main\0\0\0\0":
+            if not docrcchecks:
+                d = file.read(pkt_len - pkt_header_size)
+
+            if expected_file_ids is None:
+                expected_file_ids = []
+                slice_size, num_files = struct.unpack(main_pkt_fmt, d[:main_pkt_size])
+                num_nonrecovery = (len(d)-main_pkt_size)/16 - num_files
+
+                for i in range(main_pkt_size,main_pkt_size+(num_files+num_nonrecovery)*16,16):
+                    expected_file_ids.append(d[i:i+16])
+
+        else:
+            if not docrcchecks:
+                file.seek(pkt_len - pkt_header_size, 1)
+
+    if expected_file_ids is None:
+        raise EnvironmentError, (errno.EINVAL, \
+            "corrupt or unsupported par2 file - no main packet found")
+
+    for id in expected_file_ids:
+        if not seen_file_ids.has_key(id):
+            raise EnvironmentError, (errno.EINVAL, \
+                "corrupt or unsupported par2 file - " \
+                "expected file description packet not found")
+
+    return filenames
+
+def main ():
+       pass
+
+if __name__ == '__main__':
+       main ()
+