| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 |
|
|---|
| 25 |
|
|---|
| 26 |
__doc__ = """ |
|---|
| 27 |
This module demonstrates how you can use amplee to miror an existing AtomPub service to your local machine as well as index each entry while importing. |
|---|
| 28 |
|
|---|
| 29 |
Basically just run the module as-is after changing the service document URI and username and password if required. |
|---|
| 30 |
|
|---|
| 31 |
This will go and fetch the service document, lookup for each collection defined and import each member. |
|---|
| 32 |
|
|---|
| 33 |
This module is not full proof, it may break if the service document or the atom entries are doing something funny but it might be a good starting point for anyone. |
|---|
| 34 |
|
|---|
| 35 |
Once the import is done, you'll have a new directory called 'repository' and its sub-directories are reflecting each collection found in the service document. |
|---|
| 36 |
|
|---|
| 37 |
You will also have a file named 'index.p' representing the indexed atom entries. You can then do something such as, from a python interpreter: |
|---|
| 38 |
|
|---|
| 39 |
>>> from miror import setup_index |
|---|
| 40 |
>>> i = setup_index() |
|---|
| 41 |
>>> from datetime import datetime |
|---|
| 42 |
>>> ui = i.retrieve('ui') |
|---|
| 43 |
>>> r0 = ui.between(datetime(2007, 10, 24, 19, 50), datetime(2007, 10, 24, 20, 10)) |
|---|
| 44 |
>>> ci = i.retrieve('ci') |
|---|
| 45 |
>>> r1 = ci.lookup(term='test') |
|---|
| 46 |
>>> ai = i.retrieve('ai') |
|---|
| 47 |
>>> r2 = ai.lookup('some name') |
|---|
| 48 |
>>> r0 & r1 | r2 |
|---|
| 49 |
""" |
|---|
| 50 |
|
|---|
| 51 |
__author__ = ['Sylvain Hellegouarch'] |
|---|
| 52 |
__license__ = 'BSD' |
|---|
| 53 |
__version__ = '0.1.0' |
|---|
| 54 |
|
|---|
| 55 |
from urlparse import urlparse |
|---|
| 56 |
import os |
|---|
| 57 |
|
|---|
| 58 |
base_dir = os.getcwd() |
|---|
| 59 |
|
|---|
| 60 |
import httplib2 |
|---|
| 61 |
import amara |
|---|
| 62 |
from amplee.loader import AtomPubLoader |
|---|
| 63 |
from amplee.atompub.member import MemberResource |
|---|
| 64 |
from amplee.utils import extract_url_trail |
|---|
| 65 |
from amplee.indexer import * |
|---|
| 66 |
|
|---|
| 67 |
class ResourceWrapper(MemberResource): |
|---|
| 68 |
def generate_resource_id(self, entry=None, slug=None, info=None): |
|---|
| 69 |
links = entry.xml_xpath('atom:link[@rel="edit"]') |
|---|
| 70 |
return extract_url_trail(links[0].href) + '.atom' |
|---|
| 71 |
|
|---|
| 72 |
def run(service_uri, username=None, password=None): |
|---|
| 73 |
h = httplib2.Http(os.path.join(base_dir, '.cache')) |
|---|
| 74 |
if username: |
|---|
| 75 |
h.add_credentials(username, password) |
|---|
| 76 |
|
|---|
| 77 |
index = setup_index() |
|---|
| 78 |
service, xmldoc = load_service_document(h, service_uri) |
|---|
| 79 |
import_members(h, service, index) |
|---|
| 80 |
|
|---|
| 81 |
print "Saving service document as service.xml" |
|---|
| 82 |
f = file(os.path.join(base_dir, 'service.xml'), 'w') |
|---|
| 83 |
f.write(service.to_service().xml(indent=True)) |
|---|
| 84 |
f.close() |
|---|
| 85 |
|
|---|
| 86 |
def setup_index(): |
|---|
| 87 |
index = Indexer() |
|---|
| 88 |
container = ShelveContainer(os.path.join(base_dir, 'index.p')) |
|---|
| 89 |
index.register(PublishedIndex('pi', container=container, granularity=DateIndex.day)) |
|---|
| 90 |
index.register(UpdatedIndex('ui', container=container, granularity=DateIndex.minute)) |
|---|
| 91 |
index.register(EditedIndex('ei', container=container, granularity=DateIndex.minute)) |
|---|
| 92 |
index.register(AuthorIndex('ai', container=container, index_email=True, index_uri=True)) |
|---|
| 93 |
index.register(CategoryIndex('ci', container=container)) |
|---|
| 94 |
|
|---|
| 95 |
return index |
|---|
| 96 |
|
|---|
| 97 |
def load_service_document(h, service_uri): |
|---|
| 98 |
r, c = h.request(service_uri) |
|---|
| 99 |
if r['status'] in ['200', '304']: |
|---|
| 100 |
apl = AtomPubLoader(base_dir) |
|---|
| 101 |
return apl.load(os.path.join(base_dir, 'config.xml'), c) |
|---|
| 102 |
|
|---|
| 103 |
raise StandardError("Could not retrieve '%s'" % service_uri) |
|---|
| 104 |
|
|---|
| 105 |
def import_members(h, service, index): |
|---|
| 106 |
for collection in service.get_collections(): |
|---|
| 107 |
|
|---|
| 108 |
|
|---|
| 109 |
collection.add_indexer(index) |
|---|
| 110 |
|
|---|
| 111 |
uri = collection.get_base_edit_uri() |
|---|
| 112 |
r, c = h.request(uri) |
|---|
| 113 |
|
|---|
| 114 |
if r['status'] in ['200', '304'] and \ |
|---|
| 115 |
r['content-type'] in ['application/atom+xml', |
|---|
| 116 |
'application/atom+xml;type=feed']: |
|---|
| 117 |
|
|---|
| 118 |
print "Loading: %s" % uri |
|---|
| 119 |
|
|---|
| 120 |
|
|---|
| 121 |
|
|---|
| 122 |
path_info = urlparse(uri)[2].strip('/') |
|---|
| 123 |
collection.name_or_id = path_info |
|---|
| 124 |
collection.store.storage.create_container(path_info) |
|---|
| 125 |
|
|---|
| 126 |
|
|---|
| 127 |
doc = amara.parse(c) |
|---|
| 128 |
doc.xmlns_prefixes['app'] = "http://www.w3.org/2007/app" |
|---|
| 129 |
doc.xmlns_prefixes['atom'] = "http://www.w3.org/2005/Atom" |
|---|
| 130 |
|
|---|
| 131 |
|
|---|
| 132 |
entries = doc.feed.xml_xpath('//atom:link[@rel="edit"]') |
|---|
| 133 |
|
|---|
| 134 |
|
|---|
| 135 |
for entry in entries: |
|---|
| 136 |
|
|---|
| 137 |
|
|---|
| 138 |
r, c = h.request(unicode(entry.href)) |
|---|
| 139 |
content_type = r['content-type'].lower().replace(' ', '') |
|---|
| 140 |
if r['status'] in ['200', '304'] and \ |
|---|
| 141 |
content_type in ['application/atom+xml', |
|---|
| 142 |
'application/atom+xml;type=entry']: |
|---|
| 143 |
|
|---|
| 144 |
print " Entry: %s" % unicode(entry.href) |
|---|
| 145 |
|
|---|
| 146 |
doc = amara.parse(c) |
|---|
| 147 |
doc.xmlns_prefixes['atom'] = "http://www.w3.org/2005/Atom" |
|---|
| 148 |
|
|---|
| 149 |
media_content = None |
|---|
| 150 |
media = doc.entry.xml_xpath('//atom:link[@rel="edit-media"]') |
|---|
| 151 |
if media: |
|---|
| 152 |
r, c = h.request(unicode(media[0].href)) |
|---|
| 153 |
if r['status'] in ['200', '304']: |
|---|
| 154 |
media_content = c |
|---|
| 155 |
|
|---|
| 156 |
member = ResourceWrapper(collection, 'application/atom+xml;type=entry') |
|---|
| 157 |
member.from_entry(doc.entry) |
|---|
| 158 |
collection.attach(member, member_content=member.atom.xml(), |
|---|
| 159 |
media_content=media_content) |
|---|
| 160 |
collection.store.commit(message='Adding %s' % member.member_id) |
|---|
| 161 |
|
|---|
| 162 |
if __name__ == "__main__": |
|---|
| 163 |
run('http://snellspace.dyndns.org:9080/weblogs/services/atom', 'test', 'test') |
|---|