Skip to content

Instantly share code, notes, and snippets.

@SamPenrose
Created September 23, 2015 21:19
Show Gist options
  • Save SamPenrose/34369a647be2e286753c to your computer and use it in GitHub Desktop.
Save SamPenrose/34369a647be2e286753c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "from moztelemetry import get_pings\npings = get_pings(sc, app=\"Firefox\", channel=\"beta\", \n schema=\"v4\", doc_type=\"main\", fraction=0.1)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "def extract_dates(d):\n cd = d.get('creationDate', '')[:10]\n sd = d.get('meta', {}).get('submissionDate', '')\n if not (cd and sd):\n return None\n return {'creation': cd, 'submission': sd}\naugust = pings.map(lambda d: extract_dates(d)).filter(lambda d: d and d['creation'].startswith('2015-08'))", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "august.first()", "outputs": [{"execution_count": 3, "output_type": "execute_result", "data": {"text/plain": "{'creation': u'2015-08-02', 'submission': u'20150719'}"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "import datetime as DT\nbounds = range(5)\ndef setup():\n # for values under 5, count in place\n counters = [sc.accumulator(0) for i in bounds]\n def extract_delta(d):\n cd = DT.datetime.strptime(d['creation'], \"%Y-%m-%d\").date()\n sd = DT.datetime.strptime(d['submission'], \"%Y%m%d\").date()\n delta = (sd - cd).days\n if delta in bounds:\n counters[delta].add(1)\n return None\n return delta\n return counters, extract_delta", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 5, "cell_type": "code", "source": "counters, extractor = setup()\ndeltas = august.map(extractor)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 8, "cell_type": "code", "source": "deltas = deltas.filter(lambda i: i)\nresults = deltas.collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 9, "cell_type": "code", "source": "print counters\nprint len(results)\nprint results[-10:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[Accumulator<id=0, value=16492159>, Accumulator<id=1, value=8686657>, Accumulator<id=2, value=2821738>, Accumulator<id=3, value=1442018>, Accumulator<id=4, value=798156>]\n2946868\n[6, 6, 6, 6, 6, 5, 5, -1, -1, -1]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 10, "cell_type": "code", "source": "results.sort()\nprint results[:10], results[-10:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[-93, -93, -93, -93, -93, -92, -92, -92, -92, -92] [53, 53, 53, 53, 53, 53, 53, 53, 53, 53]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 11, "cell_type": "code", "source": "results.index(5)", "outputs": [{"execution_count": 11, "output_type": "execute_result", "data": {"text/plain": "560352"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 13, "cell_type": "code", "source": "total = sum([a.value for a in counters]+[len(results)])\nprint total", "outputs": [{"output_type": "stream", "name": "stdout", "text": "33187596\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "total *= 1.0\nindex = 0\nfor i in range(len(bounds)):\n index += counters[i].value\n print \"%d days old: %02f\" % (i, index/total)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "0 days old: 0.496937\n1 days old: 0.758682\n2 days old: 0.843705\n3 days old: 0.887156\n4 days old: 0.911206\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 18, "cell_type": "code", "source": "high_count = len(results)\nfor i in range(5, 22):\n high_index = results.index(i)\n overall_index = total - (high_count-high_index)\n print \"%d days old: %02f\" % (i, overall_index/total)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "5 days old: 0.928090\n6 days old: 0.943891\n7 days old: 0.955970\n8 days old: 0.966862\n9 days old: 0.974510\n10 days old: 0.979967\n11 days old: 0.984380\n12 days old: 0.988069\n13 days old: 0.991246\n14 days old: 0.994102\n15 days old: 0.995861\n16 days old: 0.996295\n17 days old: 0.996655\n18 days old: 0.996986\n19 days old: 0.997279\n20 days old: 0.997553\n21 days old: 0.997826\n"}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment