Ten Jupyter/IPython essentials¶
Using IPython as an extended shell¶
In [1]:
%pwd
Out[1]:
'/home/cyrille/minibook/chapter1'
In [2]:
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip
In [3]:
%ls
Out[3]:
facebook.zip [...]
In [4]:
!unzip facebook.zip
In [5]:
%ls
Out[5]:
facebook facebook.zip [...]
In [6]:
%cd facebook
Out[6]:
/home/cyrille/minibook/chapter1/facebook
In [7]:
%bookmark fbdata
In [8]:
%ls
Out[8]:
0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
In [9]:
files = !ls -1 -S | grep .edges
In [10]:
files
Out[10]:
['1912.edges', '107.edges', '1684.edges', '3437.edges', '348.edges', '0.edges', '414.edges', '686.edges', '698.edges', '3980.edges']
In [11]:
import os
from operator import itemgetter
# Get the name and file size of all .edges files.
files = [(file, os.stat(file).st_size)
for file in os.listdir('.')
if file.endswith('.edges')]
# Sort the list with the second item (file size),
# in decreasing order.
files = sorted(files,
key=itemgetter(1),
reverse=True)
# Only keep the first item (file name), in the same order.
files = [file for (file, size) in files]
In [12]:
!head -n5 {files[0]}
Out[12]:
2290 2363 2346 2025 2140 2428 2201 2506 2425 2557
Learning magic commands¶
In [13]:
%lsmagic
Out[13]:
Available line magics: %alias %alias_magic %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %install_default_config %install_ext %install_profiles %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %popd %pprint %precision %profile %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode Available cell magics: %%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%latex %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile Automagic is ON, % prefix IS NOT needed for line magics.
In [14]:
%history?
In [15]:
%history -l 5
Out[15]:
files = !ls -1 -S | grep .edges
files
!head -n5 {files[0]}
%lsmagic
%history?
In [16]:
# how many minutes in a day?
24 * 60
Out[16]:
1440
In [17]:
# and in a year?
_ * 365
Out[17]:
525600
In [18]:
%%capture output
%ls
In [19]:
output.stdout
Out[19]:
0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
In [20]:
%%bash
cd ..
touch _HEY
ls
rm _HEY
cd facebook
Out[20]:
_HEY facebook facebook.zip [...]
In [21]:
%%script ghci
putStrLn "Hello world!"
Out[21]:
GHCi, version 7.6.3: http://www.haskell.org/ghc/ :? for help Loading package ghc-prim ... linking ... done. Loading package integer-gmp ... linking ... done. Loading package base ... linking ... done. Prelude> Hello world! Prelude> Leaving GHCi.
In [22]:
%%writefile myfile.txt
Hello world!
Out[22]:
Writing myfile.txt
In [23]:
!more myfile.txt
Out[23]:
Hello world!
In [24]:
!rm myfile.txt
Mastering tab completion¶
In [25]:
%cd fbdata
%ls
Out[25]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook 0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
Writing interactive documents in the Notebook with Markdown¶
Creating interactive widgets in the Notebook¶
In [26]:
from IPython.display import YouTubeVideo
YouTubeVideo('j9YpkSX7NNM')
In [27]:
from ipywidgets import interact # IPython.html.widgets before IPython 4.0
@interact(x=(0, 10))
def square(x):
return("The square of %d is %d." % (x, x**2))
Out[27]:
'The square of 7 is 49.'
Running Python scripts from IPython¶
In [28]:
%cd fbdata
%cd ..
Out[28]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook
In [29]:
%%writefile egos.py
import sys
import os
# We retrieve the folder as the first positional argument
# to the command-line call
if len(sys.argv) > 1:
folder = sys.argv[1]
# We list all files in the specified folder
files = os.listdir(folder)
# ids contains the list of idenfitiers
identifiers = [int(file.split('.')[0]) for file in files]
# Finally, we remove duplicates with set(), and sort the list
# with sorted().
ids = sorted(set(identifiers))
Out[29]:
Overwriting egos.py
In [30]:
%run egos.py facebook
In [31]:
ids
Out[31]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]
In [32]:
folder = 'facebook'
In [33]:
%run egos.py
In [34]:
%run -i egos.py
In [35]:
ids
Out[35]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]
Introspecting Python objects¶
In [36]:
import networkx
In [37]:
networkx.Graph?
Debugging Python code¶
Benchmarking Python code¶
In [38]:
%cd fbdata
Out[38]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook
In [39]:
import networkx
In [40]:
graph = networkx.read_edgelist('107.edges')
In [41]:
len(graph.nodes()), len(graph.edges())
Out[41]:
(1034, 26749)
In [42]:
networkx.is_connected(graph)
Out[42]:
True
In [43]:
%timeit networkx.is_connected(graph)
Out[43]:
100 loops, best of 3: 5.92 ms per loop
Profiling Python code¶
In [44]:
import networkx
In [45]:
def ncomponents(file):
graph = networkx.read_edgelist(file)
return networkx.number_connected_components(graph)
In [46]:
import glob
def ncomponents_files():
return [(file, ncomponents(file))
for file in sorted(glob.glob('*.edges'))]
In [47]:
for file, n in ncomponents_files():
print(file.ljust(12), n, 'component(s)')
Out[47]:
0.edges 5 component(s) 107.edges 1 component(s) 1684.edges 4 component(s) 1912.edges 2 component(s) 3437.edges 2 component(s) 348.edges 1 component(s) 3980.edges 4 component(s) 414.edges 2 component(s) 686.edges 1 component(s) 698.edges 3 component(s)
In [48]:
%timeit ncomponents_files()
Out[48]:
1 loops, best of 3: 634 ms per loop
In [49]:
%prun -s cumtime ncomponents_files()
Out[49]:
2391070 function calls in 1.038 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.038 1.038 {built-in method exec}
1 0.000 0.000 1.038 1.038 <string>:1(<module>)
10 0.000 0.000 0.995 0.100 <string>:1(read_edgelist)
10 0.000 0.000 0.995 0.100 decorators.py:155(_open_file)
10 0.376 0.038 0.995 0.099 edgelist.py:174(parse_edgelist)
170174 0.279 0.000 0.350 0.000 graph.py:648(add_edge)
170184 0.059 0.000 0.095 0.000 edgelist.py:366(<genexpr>)
10 0.000 0.000 0.021 0.002 connected.py:98(number_connected_components)
35 0.001 0.000 0.021 0.001 connected.py:22(connected_components)