\documentclass[11pt,a4paper]{article} %%\documentclass[8pt,a4paper,twocolumn]{article} \usepackage[a4paper,left=1cm,right=1cm,top=1.0cm,bottom=2.5cm]{geometry} \usepackage[turkish]{babel} \usepackage{times} \usepackage{graphicx} \usepackage{natbib} \usepackage{algorithm} \usepackage{algorithmic} \usepackage[utf8]{inputenc} \usepackage{nomencl} \usepackage{commath} \usepackage{url} \usepackage{subfig} \usepackage[colorinlistoftodos]{todonotes} \usepackage{epstopdf} \usepackage{amsmath} \usepackage{mathtools} \usepackage{paralist} \usepackage{hyperref} \usepackage{pythonhighlight} \usepackage{listings} \definecolor{codegreen}{rgb}{0,0.6,0} \definecolor{codegray}{rgb}{0.5,0.5,0.5} \definecolor{codepurple}{rgb}{0.58,0,0.82} \definecolor{backcolour}{rgb}{0.95,0.95,0.92} \lstdefinestyle{mystyle}{ backgroundcolor=\color{backcolour}, commentstyle=\color{codegreen}, keywordstyle=\color{magenta}, numberstyle=\tiny\color{codegray}, stringstyle=\color{codepurple}, basicstyle=\footnotesize, breakatwhitespace=false, breaklines=true, captionpos=b, keepspaces=true, numbers=left, numbersep=5pt, showspaces=false, showstringspaces=false, showtabs=false, tabsize=2 } \def\ExtendVersion{1} \newcommand{\credit}[1]{Thanks to: #1} \input{taypack.tex} %turning on/off comments \usepackage{comment} \includecomment{comment} %show comments %\excludecomment{comment} %do not show comments \lstset{style=mystyle} \begin{document} \lstset{language=Python} % first the title is needed \title{\centering{Python: From Basics to the Extreme}} % the name(s) of the author(s) follow(s) next %\author{Keke\c{c}} %\author{İbrâhim Taygun Keke\c{c}} %\author{İbrâhim Taygun Keke\c{c}} %\author{UL} \maketitle %\begin{abstract} %\end{abstract} \newcounter{Madde}[section] \newenvironment{Madde}[1][]{\refstepcounter{Madde}\par\medskip \textbf{Md ~\theMadde. #1} \rmfamily}{\medskip} \newcommand{\citemd}[1]{(\textbf{M. {#1}})} \tableofcontents \section{Introduction} This reference book contains very frequently used concepts in Python language. The reader is assumed to be familiar with the abstract concepts of variables, loops, functions and such. At each section, a working code example is provided. \textbf{Correct usage of the material.} \begin{itemize} \item analyze the code example \item copy/paste and run the example \item check the output of the code whether it makes sense. \end{itemize} \section{Basics} \subsection{Variables and Data Types} You can declare integer and string variables. \begin{python} x = 5 y = "John" print(x) #5 print(y) #John \end{python} Declare string, integer or float. \begin{python} x = str(3) # x will be '3' y = int(3) # y will be 3 z = float(3) # z will be 3.0 print(type(x)) # print(type(y)) # \end{python} Different variable types exist. But for now, focus on string, integer, float, list, tuple, dictionary, and sets. \begin{python} Text Type: str Numeric Types: int, float, complex Sequence Types: list, tuple, range Mapping Type: dict Set Types: set, frozenset Boolean Type: bool Binary Types: bytes, bytearray, memoryview None Type: NoneType \end{python} List is a collection which is ordered and changeable. Allows duplicate members. Tuple is a collection which is ordered and unchangeable. Allows duplicate members. Set is a collection which is unordered, unchangeable, and unindexed. No duplicate members. Dictionary is a collection which is ordered and changeable. No duplicate members. \subsection{List} List is a python data type. Lists can store multiple variables in a single variable. \begin{python} thislist = ["apple", "banana", "cherry"] print(thislist) #['apple', 'banana', 'cherry'] \end{python} \subsubsection{access list items} \begin{python} thislist = ["apple", "banana", "cherry"] print(thislist[1]) #banana \end{python} \subsubsection{change list item value} \begin{python} thislist = ["apple", "banana", "cherry"] thislist[1] = "blackcurrant" print(thislist) #['apple', 'blackcurrant', 'cherry'] \end{python} \subsubsection{change a range of list item values} \begin{python} thislist = ["apple", "banana", "cherry", "orange", "kiwi", "mango"] thislist[1:3] = ["blackcurrant", "watermelon"] print(thislist) #['apple', 'blackcurrant', 'watermelon', 'orange', 'kiwi', 'mango'] \end{python} \subsubsection{add list items} \begin{python} thislist = ["apple", "banana", "cherry"] thislist.append("orange") print(thislist) #['apple', 'banana', 'cherry', 'orange'] \end{python} \subsubsection{remove list items} \begin{python} thislist = ["apple", "banana", "cherry"] thislist.remove("banana") print(thislist) #['apple', 'cherry'] \end{python} \paragraph{remove duplicates from the list} If new element is seen, first adds it to the list, and then returns it. \begin{python} a = [1,2,3,2,1,5,6,5,5,5] seen = set() uniq = [x for x in a if x not in seen and not seen.add(x)] print(uniq) #[1, 2, 3, 5, 6] \end{python} \subsubsection{looping a list} \begin{python} thislist = ["apple", "banana", "cherry"] for x in thislist: print(x) #apple #banana #cherry \end{python} \subsubsection{list slicing} you can access multiple elements with slicing operation. \begin{python} thislist = ["apple", "banana", "cherry", "watermelon", "grape", "kiwi"] print(thislist[1:4] ) #['banana', 'cherry', 'watermelon'] ## Here, we access each 2nd element from 1st to 6th element of the list. print(thislist[1:6:2]) #['banana', 'watermelon', 'kiwi'] ## Reverse slicing: access elements from end to beginning of the list. print(thislist[6:1:-1]) #['kiwi', 'grape', 'watermelon', 'cherry'] \end{python} \subsubsection{list comprehension} This powerful concept can single-line the traditional loops. \begin{python} fruits = ["apple", "banana", "cherry", "kiwi", "mango"] newlist = [] for x in fruits: if "a" in x: newlist.append(x) print(newlist) # ['apple', 'banana', 'mango'] fruits = ["apple", "banana", "cherry", "kiwi", "mango"] newlist = [x for x in fruits if "a" in x] print(newlist) # ['apple', 'banana', 'mango'] \end{python} \subsubsection{list comprehension: condition + operation} \begin{python} fruits = ["apple", "banana", "cherry", "kiwi", "mango"] newlist = [x.upper() for x in fruits] print(newlist) #['APPLE', 'BANANA', 'CHERRY', 'KIWI', 'MANGO'] \end{python} \subsubsection{sorting a list} \begin{python} thislist = ["orange", "mango", "kiwi", "pineapple", "banana"] thislist.sort() print(thislist) #['banana', 'kiwi', 'mango', 'orange', 'pineapple'] \end{python} \subsubsection{copying a list} \begin{python} thislist = ["apple", "banana", "cherry"] mylist = thislist.copy() print(mylist) #['apple', 'banana', 'cherry'] \end{python} \subsubsection{join two lists} \begin{python} list1 = ["a", "b", "c"] list2 = [1, 2, 3] list3 = list1 + list2 print(list3) #['a', 'b', 'c', 1, 2, 3] \end{python} \subsubsection{looping through multiple Lists} \begin{python} a = ['a1', 'a2', 'a3'] b = ['b1', 'b2'] for x, y in zip(a, b): print(x, y) #a1 b1 #a2 b2 \end{python} \subsubsection{other list methods} \begin{python} Method Description append() Adds an element at the end of the list clear() Removes all the elements from the list copy() Returns a copy of the list count() Returns the number of elements with the specified value extend() Add the elements of a list (or any iterable), to the end of the current list index() Returns the index of the first element with the specified value insert() Adds an element at the specified position pop() Removes the element at the specified position remove() Removes the item with the specified value reverse() Reverses the order of the list sort() Sorts the list \end{python} \subsection{Dictionary} Unlike lists, in dictionary each entry has a key, and a value. These key, value pairs constitute the dictionary content. \begin{python} thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964 } print(thisdict) #{'brand': 'Ford', 'model': 'Mustang', 'year': 1964} \end{python} \subsubsection{access dictionary values} \begin{python} thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964 } print(thisdict["brand"]) #Ford \end{python} \subsubsection{access dictionary keys or values} \begin{python} car = { "brand": "Ford", "model": "Mustang", "year": 1964 } x = car.keys() print(x) #dict_keys(['brand', 'model', 'year']) v = car.values() print(v) #dict_values(['Ford', 'Mustang', 1964]) \end{python} \subsubsection{inverse dictionary lookup} \begin{python} d = {"a":0, "b":1, "c":2} dict(zip(d.values(), d.keys())) #{0: 'a', 1: 'b', 2: 'c'} \end{python} \subsubsection{change dictionary items} \begin{python} car = { "brand": "Ford", "model": "Mustang", "year": 1964 } x = car.items() print(x) #before the change #dict_items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 1964)]) car["year"] = 2020 print(x) #after the change #dict_items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 2020)]) \end{python} \subsubsection{add dictionary items} \begin{python} thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964 } thisdict["color"] = "red" print(thisdict) #{'brand': 'Ford', 'model': 'Mustang', 'year': 1964, 'color': 'red'} \end{python} \subsubsection{remove dictionary item} \begin{python} thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964 } del thisdict["model"] print(thisdict) #{'brand': 'Ford', 'year': 1964} \end{python} \subsubsection{looping a dictionary} \begin{python} thisdict={'brand': 'Ford', 'year': 1964} for x, y in thisdict.items(): print(x, y) #brand Ford #year 1964 \end{python} \subsubsection{copy a dictionary} \begin{python} thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964 } mydict = thisdict.copy() print(mydict) #{'brand': 'Ford', 'model': 'Mustang', 'year': 1964} \end{python} \subsubsection{intersections of two dictionaries} \begin{python} some_dict = {'zope':'zzz', 'python':'rocks' } another_dict = {'python':'rocks', 'perl':'interesting' } print "Intersects:", [k for k in some_dict if k in another_dict] #python \end{python} \subsubsection{other dictionary operations} \begin{python} clear() Removes all the elements from the dictionary copy() Returns a copy of the dictionary fromkeys() Returns a dictionary with the specified keys and value get() Returns the value of the specified key items() Returns a list containing a tuple for each key value pair keys() Returns a list containing the dictionary's keys pop() Removes the element with the specified key popitem() Removes the last inserted key-value pair setdefault() Returns the value of the specified key. If the key does not exist: insert the key, with the specified value update() Updates the dictionary with the specified key-value pairs values() Returns a list of all the values in the dictionary \end{python} \subsection{Sets} Python sets are like lists. They are used to store multiple items in a single variable.Set elements are unordered. Items are unchangeable, can't be altered after creation. But we can add and remove elements to the set. Unlike lists, they can't have two identical elements. \begin{python} thisset = {"apple", "banana", "cherry"} print(thisset) #{'apple', 'cherry', 'banana'} ##length print(len(thisset)) #3 \end{python} Sets can have different objects as elements. \begin{python} set1 = {"abc", 34, True, 40, "male"} \end{python} \subsubsection{access elements} \begin{python} thisset = {"apple", "banana", "cherry"} for x in thisset: print(x) #apple #cherry #banana \end{python} \subsubsection{check element existence} \begin{python} thisset = {"apple", "banana", "cherry"} print("banana" in thisset) #True \end{python} \subsubsection{add elements} \begin{python} thisset = {"apple", "banana", "cherry"} thisset.add("orange") print(thisset) #{'orange', 'apple', 'cherry', 'banana'} \end{python} \subsubsection{remove elements} We can remove the set elements with remove() function. \begin{python} thisset = {"apple", "banana", "cherry"} thisset.remove("banana") print(thisset) #{'apple', 'cherry'} \end{python} Alternatively we can remove the last element using the pop() function. \begin{python} thisset = {"apple", "banana", "cherry"} x = thisset.pop() print(x) #apple print(thisset) #{'cherry', 'banana'} \end{python} \subsubsection{looping sets} \begin{python} thisset = {"apple", "banana", "cherry"} for x in thisset: print(x) #apple #cherry #banana \end{python} \subsubsection{joining/combining/ (union) of sets} \begin{python} set1 = {"a", "b" , "c"} set2 = {1, 2, 3} set3 = set1.union(set2) print(set3) #{'a', 'b', 1, 2, 3, 'c'} \end{python} \subsubsection{merge (intersection) of sets} \begin{python} x = {"apple", "banana", "cherry"} y = {"google", "microsoft", "apple"} z = x.intersection(y) print(z) #{'apple'} \end{python} \subsection{Tuples} Tuples are used to store multiple items in a single variable. Elements of tuples are ordered and unchangeable. \begin{python} thistuple = ("apple", "banana", "cherry") print(thistuple) #('apple', 'banana', 'cherry') print(len(thistuple)) #3 ## a tuple with different objects tuple1 = ("abc", 34, True, 40, "male") \end{python} \subsubsection{accessing elements} \begin{python} thistuple = ("apple", "banana", "cherry") print(thistuple[1]) #banana ##negative indexing print(thistuple[-1]) #cherry ##slicing thistuple = ("apple", "banana", "cherry", "orange", "kiwi", "melon", "mango") print(thistuple[2:5]) #('cherry', 'orange', 'kiwi') print(thistuple[:4]) #('apple', 'banana', 'cherry', 'orange') \end{python} \subsubsection{check element existence} \begin{python} thistuple = ("apple", "banana", "cherry") if "apple" in thistuple: print("Yes, 'apple' is in the fruits tuple") #Yes, 'apple' is in the fruits tuple \end{python} \subsubsection{unpack elements} \begin{python} fruits = ("apple", "banana", "cherry", "strawberry", "raspberry") (green, yellow, *red) = fruits print(green) #apple print(yellow) #banana print(red) #['cherry', 'strawberry', 'raspberry'] \end{python} \subsubsection{looping elements} \begin{python} thistuple = ("apple", "banana", "cherry") for x in thistuple: print(x) #apple #banana #cherry \end{python} \subsubsection{joining two tuples} \begin{python} tuple1 = ("a", "b" , "c") tuple2 = (1, 2, 3) tuple3 = tuple1 + tuple2 print(tuple3) #('a', 'b', 'c', 1, 2, 3) \end{python} \subsubsection{multiply tuples} \begin{python} fruits = ("apple", "banana", "cherry") mytuple = fruits * 2 print(mytuple) #('apple', 'banana', 'cherry', 'apple', 'banana', 'cherry') \end{python} \subsection{If conditional} If conditional checks for cases in your program. \begin{python} if 5 > 2: print("Five is greater than two!") #Five is greater than two! \end{python} \subsubsection{Elif conditional} \begin{python} a = 33 b = 33 if b > a: print("b is greater than a") elif a == b: print("a and b are equal") #a and b are equal \end{python} \subsubsection{Else-If conditional} \begin{python} a = 200 b = 33 if b > a: print("b is greater than a") elif a == b: print("a and b are equal") else: print("a is greater than b") #a is greater than b \end{python} \subsubsection{Short If} \begin{python} if a > b: print("a is greater than b") #a is greater than b \end{python} \subsubsection{Short If-Else} \begin{python} a = 2 b = 330 print("A") if a > b else print("B") # B \end{python} \subsubsection{And or conditional} \begin{python} a = 200 b = 33 c = 500 if a > b and c > a: print("Both conditions are True") # Both conditions are True if a > b or a > c: print("At least one of the conditions is True") #At least one of the conditions is True \end{python} \subsubsection{Nested if} With nested if's you can create branches in your program. \begin{python} x = 41 if x > 10: print("Above ten,") if x > 20: print("and also above 20!") else: print("but not above 20.") #Above ten, #and also above 20! \end{python} \subsubsection{Pass} if statements cannot be empty, if you need to have empty statment, use pass statement to avoid getting an error. \begin{python} a = 33 b = 200 if b > a: pass \end{python} \subsection{While Loop} \begin{python} i = 1 while i < 4: print(i) i += 1 #1 #2 #3 \end{python} \subsubsection{Break statement} You can exit the loops immediately with break statement. \begin{python} i = 1 while i < 6: print(i) if i == 3: break i += 1 #1 #2 #3 \end{python} \subsubsection{Continue statement} With continue statement we can stop the current iteration, and continue with the next. \begin{python} i = 0 while i < 6: i += 1 if i == 3: continue print(i) #1 #2 #4 #5 #6 \end{python} \subsubsection{While-Else statement} \begin{python} i = 1 while i < 6: print(i) i += 1 else: print("i is no longer less than 6") #1 #2 #3 #4 #5 #i is no longer less than 6 \end{python} \subsection{For Loop} \begin{python} fruits = ["apple", "banana", "cherry"] for x in fruits: print(x) #apple #banana #cherry \end{python} \subsubsection{Looping integers} \begin{python} for x in range(4): print(x) #0 #1 #2 #3 \end{python} \subsubsection{Looping a string variable} \begin{python} for x in "car": print(x) #c #a #r \end{python} \subsubsection{breaking the loop} \begin{python} fruits = ["apple", "banana", "cherry"] for x in fruits: print(x) if x == "banana": break #apple #banana \end{python} \subsubsection{for loop nested} \begin{python} adj = ["red", "big", "tasty"] fruits = ["apple", "banana", "cherry"] for x in adj: for y in fruits: print(x, y) #red apple #red banana #red cherry #big apple #big banana #big cherry #tasty apple #tasty banana #tasty cherry \end{python} \subsubsection{for loop pass statement} \begin{python} for x in [0, 1, 2]: pass \end{python} \subsection{Function} In python, you can declare functions. Functions are code pieces that you can execute multiple times easily. \subsubsection{defining a function} \begin{python} # define the function def my_function(): print("Hello from a function") \end{python} \subsubsection{calling a function} \begin{python} def my_function(): print("Hello from a function") my_function() #Hello from a function \end{python} \subsubsection{giving function an argument} \begin{python} def my_function(fname): print(fname + " Refsnes") my_function("Emil") #Emil Refsnes my_function("Tobias") #Tobias Refsnes my_function("Linus") #Linus Refsnes \end{python} \subsubsection{giving function multiple arguments} \begin{python} def my_function(fname, lname): print(fname + " " + lname) my_function("Emil", "Refsnes") #Emil Refsnes \end{python} \subsubsection{giving function with default parameters} \begin{python} def my_function(country = "Norway"): print("I am from " + country) my_function("Sweden") #I am from Sweden my_function("India") #I am from India my_function() #I am from Norway my_function("Brazil") #I am from Brazil \end{python} \subsubsection{giving function unknown number of parameters} \begin{python} def my_function(*kids): print("The youngest child is " + kids[2]) my_function("Emil", "Tobias", "Linus") #The youngest child is Linus \end{python} \subsubsection{function returning values} \begin{python} def my_function(x): return 5 * x print(my_function(3)) #15 print(my_function(5)) #25 print(my_function(9)) #45 \end{python} \subsubsection{function returning multiple values} \begin{python} def my_function(x): return 5 * x, x * x print(my_function(3)) #(15, 9) print(my_function(10)) #(50, 100) print(type(my_function(10))) # \end{python} \subsubsection{shortcut function: lambda} Lambda functions are one-liner functions. Sometimes you need to define such functions inside others. This gives very much ease. \begin{python} x = lambda a : a + 10 print(x(5)) #15 x = lambda a, b : a * b print(x(5, 6)) #30 x = lambda a, b, c : a + b + c print(x(5, 6, 2)) #13 \end{python} \subsection{Modules} Python modules are libraries (set of functions) written by others. Using these code pieces help you to re-use written code. \subsubsection{writing a module} Save the following to a file named mymodule.py . \begin{python} def greeting(name): print("Hello, " + name) person1 = { "name": "John", "age": 36, "country": "Norway" } \end{python} \subsubsection{use functions of a module} \begin{python} import mymodule mymodule.greeting("Jonathan") #Hello Jonathan \end{python} \subsubsection{custom naming a module} \begin{python} import mymodule as mx a = mx.person1["age"] print(a) #36 \end{python} \subsubsection{partial import of a module} Sometimes, you just need a function, or an object from a module package. In these cases, you only import that part of the module. No need to import whole module. This is a better practice. \begin{python} from mymodule import person1 print (person1["age"]) #36 \end{python} \subsubsection{listing functions of the module} You can list all the function names in a module with dir command. \begin{python} import platform x = dir(platform) # too long to write the output... check your program. \end{python} \subsection{Classes} %Python supports object oriented programming (OOP). In default, what we are doing is functional programming. That is, %the program is a composition of functions. In OOP, similar to the real world, we have objects. These objects are represented by classes in the program. %A class has attributes, and methods (in other word class functions). We create instances from that class using class constructor. \begin{python} class Person: def __init__(mysillyobject, name, age): mysillyobject.name = name mysillyobject.age = age def myfunc(abc): print("Hello my name is " + abc.name) p1 = Person("John", 36) p1.myfunc() \end{python} \subsection{Uncategorized} \subsubsection{add padding to opencv image} \begin{python} #cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) padSize = 500 ib = cv2.copyMakeBorder(img_dilated, padSize, padSize, padSize, padSize, cv2.BORDER_CONSTANT, (0,0,0)) \end{python} \subsubsection{find neighboring pairs in list} \begin{python} A = [1, 2, 3, 4] B = [(i,j) for i,j in zip(A, A[1:])] #[(1, 2), (2, 3), (3, 4)] \end{python} \subsubsection{find most frequent elements in list} \begin{python} from collections import Counter mylist = [1,1,1,1,3,4,5,5,5,6,6,7,8,9] counter = Counter(mylist) most_common = counter.most_common(2) print(most_common) #[(1, 4), (5, 3)] # 4 ones, 3 fives seen. \end{python} \subsubsection{cartesian product of two lists} \begin{python} import itertools a = [1,2,3] b = [4,5,6] for i in itertools.product( a, b ): print(i) \end{python} \subsubsection{pad integer to have zeros} \begin{python} def NDigited(x,n=3): return (n-len(x)) * '0' + x \end{python} \paragraph{optimized version} \credit{jnmbk} \begin{python} def NDigited(x,n=3) return x.zfill(n) \end{python} \subsubsection{matplotlib display strings on y-axis} \begin{python} yticks(np.arange(5), ('String1', 'String2', 'String3', '4', '5')) \end{python} \subsubsection{broadcast image or matrix channels} This operation is usually needed to go from 1d image to 3d image. \begin{python} nChannels = 3 m3d = np.repeat( m.reshape(m.shape[0], m.shape[1], 1), nChannels, axis=2) \end{python} Another identical way to do it. \begin{python} nChannels = 3 m3d = np.tile(m[:, :, None], [1, 1, nChannels]) \end{python} \subsubsection{1d interpolation of x-y values} Given a set of 2D points, we fit a curve to these points. \begin{python} xdata = [0,1,2,3,4,5] ydata = [0,1,4,9,16,25] f2 = interp1d(xdata, ydata, kind = 'quadratic') xnew = np.linspace(-5, 5, 1000) ynew = f2(xnew) \end{python} \subsection{useful numpy functions} \begin{python} ## remove empty dimension x = np.array([[[0], [1], [2]]]) print(x.shape) #(1, 3, 1) dd = np.squeeze(cc) print(x.shape) #(3,) ## vertically stack list of matrices x = np.dstack( possibleCurves ) ## randomly choose 5 values from the interval 0-100 randIdxs = np.random.choice(100 , 5, replace=False) ## reshape 1D data for one feature problems X = x.reshape(-1,1) \end{python} \section{String Manipulation, Searching, Sorting}\label{sect:strings} \subsection{substring search} \begin{python} word = 'cart for supermarket' ##substring search: find first occurrence result = word.find('supermarket') print("Substring 'geeks' found at index:", result) #Substring 'geeks' found at index: 9 ##substring search with start end specification: searched in 'for su'. print(word.find('su', 4, 12)) #9 \end{python} \subsubsection{string between two substrings} \begin{python} import re s = 'asdf=5;iwantthis123jasd' result = re.search('asdf=5;(.*)123jasd', s) print(result.group(1)) #iwantthis \end{python} \subsubsection{Create index for strings} \begin{python} a = ['a', 'b', 'c'] b = dict(map(lambda t: (t[1], t[0]), enumerate(a))) #{'a':0, 'b':1, 'c':2} \end{python} \subsection{string concatenation} \begin{python} s1 = "myStrFirst" s2 = "secondString" s3 = s1 + " " + s2 print(s3) #myStrFirst secondString \end{python} \subsection{string splitting} \begin{python} ## simple string splitting txt = "apple#banana#cherry#orange" x = txt.split("#") print(x) #['apple', 'banana', 'cherry', 'orange'] ## setting the maxsplit parameter to 1, will return a list with 2 elements! txt = "apple#banana#cherry#orange" x = txt.split("#", 1) #['apple', 'banana#cherry#orange'] \end{python} \subsection{stripping string} Remove leading and trailing spaces and specific characters at the beginning and at the end of a string. \begin{python} txt = " banana sss " x = txt.strip() print("of all fruits", x, "is my favorite") #of all fruits banana is my favorite txt = ",,,,,rrttgg.....banana....rrr" x = txt.strip(",.grt") print(x) #banana \end{python} \subsection{combining list of strings} \begin{python} text = ['Python', 'is', 'a', 'fun', 'programming', 'language'] print(' '.join(text)) # Python is a fun programming language \end{python} \section{Input Output Operations}\label{sect:io} \subsection{create a file} \begin{python} f = open("demofile3.txt", "w") f.write("I have added content!") f.close() \end{python} \subsection{write to a file: fast shortcut} In this version, you don't have to remember closing the file. \begin{python} with open("demofile3.txt", "w") as fp: fp.write(f.write("I have added content!") \end{python} \subsection{create directory} The following program checks for a directory, and creates it if not present. \begin{python} import os directory = "newDirectory" parent_dir = "/home/User/Documents" path = os.path.join(parent_dir, directory) os.makedirs(path,exist_ok = True) \end{python} \subsection{remove file} \begin{python} import os fileName = 'myFile.txt' # File name location = "/home/User/Documents" path = os.path.join(location, fileName) if os.path.exists(path): os.remove(path) else: print("The file does not exist") \end{python} \subsection{save and load pickle file} Pickle is the default binary storage format of Python. It can store any type of variable inside. \begin{python} import pickle a = {'hello': 'world'} with open('filename.pkl', 'wb') as handle: pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('filename.pkl', 'rb') as handle: b = pickle.load(handle) print(a == b) \end{python} \begin{python} def load_obj(name): with open(name + '.pkl', 'rb') as f: return pickle.load(f) def write_obj(name, data): with open(name, 'wb') as handle: pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) \end{python} \subsection{joining paths} \begin{python} import os path = "/home" ## Join various path components print(os.path.join(path, "User/Desktop", "file.txt")) #/home/User/Desktop/file.txt \end{python} \subsection{check file existence} \begin{python} path = 'D:/Pycharm/USER/testFile.txt' isFile = os.path.isfile(path) print(isFile) #False \end{python} \subsection{list files in a directory} \begin{python} import os path = "/" dir_list = os.listdir(path) print(dir_list) # too many output, please run the code yourself. \end{python} \subsection{iterate (traverse) files in a folder} \begin{python} import os for (root,dirs,files) in os.walk('Test', topdown=true): print (root) print (dirs) print (files) # too many output, please run the code yourself. \end{python} \subsection{sort files by date} \begin{python} import os search_dir = "/mydir/" files = os.listdir(search_dir) files = [os.path.join(search_dir, f) for f in files] files.sort(key=lambda x: os.path.getmtime(x)) \end{python} \subsection{write to CSV file} \begin{python} import csv with open('employee_file.csv', mode='w') as employee_file: employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') employee_writer.writerow(['name', 'department', 'birthday month']) employee_writer.writerow(['John Smith', 'Accounting', 'November']) employee_writer.writerow(['Erica Meyers', 'IT', 'March']) employee_writer.writerow(['Monica Barker', 'HR', 'December']) ## open the employee_file.csv and you will see: ## name,department,birthday month ## John Smith,Accounting,November ## Erica Meyers,IT,March ## Monica Barker,HR,December \end{python} \subsection{read CSV file example} \begin{python} import csv with open('employee_file.csv', mode='w') as employee_file: employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') employee_writer.writerow(['name', 'department', 'birthday month']) employee_writer.writerow(['John Smith', 'Accounting', 'November']) employee_writer.writerow(['Erica Meyers', 'IT', 'March']) employee_writer.writerow(['Monica Barker', 'HR', 'December']) with open('employee_birthday.txt') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count == 0: print(f'Column names are {", ".join(row)}') line_count += 1 else: print(f'\t{row[0]} works in the {row[1]} department, and was born in {row[2]}.') line_count += 1 print(f'Processed {line_count} lines.') \end{python} \subsubsection{read CSV into list} \begin{python} import csv with open('employee_file.csv', mode='w') as employee_file: employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') employee_writer.writerow(['name', 'department', 'birthday month']) employee_writer.writerow(['John Smith', 'Accounting', 'November']) employee_writer.writerow(['Erica Meyers', 'IT', 'March']) employee_writer.writerow(['Monica Barker', 'HR', 'December']) ## import csv def readCSVIntoList(fileName, discardHeader=False): rows = [] with open(fileName) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: rows.append(row) if discardHeader: rows.remove(rows[0]) return rows rows = readCSVIntoList('employee_birthday.txt',discardHeader=True) print(rows) #[['John Smith', 'Accounting', 'November'], ['Erica Meyers', 'IT', 'March'], ['Monica Barker', 'HR', 'December']] \end{python} \subsection{Adding Command Line Arguments} The following program expects command line arguments. If not provided, uses default arguments. \begin{python} ## run with python sourcefile.py --keyword mykeyword --page 1 ## or python sourcefile.py import argparse parser = argparse.ArgumentParser(description="Just an example",formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-k", "--keyword", type=str, help="query keywords") parser.add_argument("-p", "--page", type=str, help="query page") args = parser.parse_args() config = vars(args) #print(config) if args.keyword: searchKeyword=args.keyword searchPage=args.pageidx else: searchKeyword = "googleit" searchPage="0" print("SearchKeyword is " + searchKeyword + " Search Page is " + searchPage ) \end{python} \section{Time and Date} \subsection{Get current date } \begin{python} ##Get the current date in DD-MM-YYYY-HR-MM-SS format: from datetime import datetime now = datetime.now() curDate = now.strftime("%d-%m-%Y-%H-%M-%S") print(curDate) # 18-06-2022-10-40-59 \end{python} \subsection{Convert Unix time to datetime} \begin{python} from datetime import datetime x = 1656100252345 d = datetime.utcfromtimestamp( x / 1000 ).strftime('%Y-%m-%d %H:%M:%S') #2022-06-24 19:50:52 \end{python} \section{Algorithms} \subsection{Remove elements} \subsection{Find middle coordinates of a coordinate array} \begin{python} [ (linesProc2[i] + linesProc2[i+1]) / 2 for i,x in enumerate( linesProc2[:-1] ) ] \end{python} \section{Regex} \subsection{Nongreedy regex search} Default behaviour of regex is to greedy matching (searches the longest sequence up to the end). To search nongreedy: \begin{python} text="From: test: test", regex="^F.+:" -> match="From: test:" regex="^F.+?:" -> match="From:" \end{python} \section{Network}\label{sect:network} \subsection{Single threaded to multi threaded} Python programs are by default single threaded. This source is a multi-threaded example: \begin{python} ### ---------------------- ### ### The following program is single threaded, it takes approximately six seconds. ### ---------------------- ### from time import sleep, perf_counter def task(): print('Starting a task...') sleep(3) print('done') start_time = perf_counter() task() task() end_time = perf_counter() print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') # result depends on your cpu. please run the code! ### ---------------------- ### ### The following program is multi-threaded and it takes approximately 3 seconds. ### ---------------------- ### from time import sleep, perf_counter from threading import Thread def task(): print('Starting a task...') sleep(3) print('done') start_time = perf_counter() # create two new threads t1 = Thread(target=task) t2 = Thread(target=task) # start the threads t1.start() t2.start() # wait for the threads to complete t1.join() t2.join() end_time = perf_counter() print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') # result depends on your cpu. please run the code! \end{python} \subsection{Multi-thread with argument} \begin{python} from time import sleep, perf_counter from threading import Thread def task(id): print(f'Starting the task {id}...') sleep(1) print(f'The task {id} completed') start_time = perf_counter() ## create and start 10 threads threads = [] for n in range(1, 11): t = Thread(target=task, args=(n,)) threads.append(t) t.start() ## wait for the threads to complete for t in threads: t.join() end_time = perf_counter() print(f'It took {end_time- start_time: 0.2f} second(s) to complete.') # Please run the program on your computer to see the output! \end{python} \section{Web}\label{sect:web} \subsection{Scrape HTML with Beautiful Soup} This example code scrapes an HTML page and searches for HTML div tags inside it. \begin{python} import requests from bs4 import BeautifulSoup URL = "https://edition.cnn.com/" page = requests.get(URL) soup = BeautifulSoup(page.content, "html.parser") ## print the HTML content print(soup.prettify()) ## find first div element in the page myDiv = soup.find("div") ## find all div elements in the page myDivs = soup.findAll("div") ## find all divs with class equal to the following string. job_elements = results.find_all("div", attrs={"class":"card-content"} ) ## get tag of an "a" element. r.find('a')['href'] \end{python} \subsection{Make request until success} \begin{python} def retryResponseGetSoup(url): response = get(url) bs = BeautifulSoup(response.content) while bs.text == 'Baglanti hatasi.' or response.status_code != 200: print('retrying...') time.sleep(5) response = get(url) bs = BeautifulSoup(response.content) return bs url = "www.google.com" s = retryResponseGetSoup(url) \end{python} \subsection{Selenium} \subsubsection{Access attribute of an element} \begin{python} ### selenium python scroll to element's location elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'navigationPane')]/a") for e in elements: e.get_attribute("outerHTML") \end{python} \subsubsection{Scroll to element} \begin{python} ### selenium python scroll to element's location desired_y = element.location['y'] current_y = (driver.execute_script('return window.innerHeight') / 2) + driver.execute_script('return window.pageYOffset') scroll_y_by = desired_y driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by) \end{python} \subsubsection{Access pure HTML of the element} \begin{python} tableElems = driver.find_element(By.XPATH, "//table[contains(@class, 'morphologyTable')]//tbody") tableElems.get_attribute("outerHTML") \end{python} \subsubsection{Save cropped screenshot} \begin{python} driver.save_screenshot('shot.png') im = Image.open('shot.png') im = im.crop((int(x1-5),int(0), int(x1+w1+5), int(y2-y1+h2))) im.save('shot.png') \end{python} \subsection{Download files} \begin{python} import requests image_url = "https://www.python.org/static/community_logos/python-logo-master-v3-TM.png" r = requests.get(image_url) with open("python_logo.png",'wb') as f: f.write(r.content) \end{python} \section{Pandas} [describe pandas library with 1-2 sentences. Then why its useful with 1-2 sentences.] \subsection{read CSV } We can use Pandas library to read CSV's easily. The content goes to a DataFrame type of the Pandas library. \begin{python} with open('employee_file.csv', mode='w+') as employee_file: employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') employee_writer.writerow(['name', 'department', 'birthday month']) employee_writer.writerow(['John Smith', 'Accounting', 'November']) employee_writer.writerow(['Erica Meyers', 'IT', 'March']) employee_writer.writerow(['Monica Barker', 'HR', 'December']) import pandas as pd data= pd.read_csv("employee_file.csv") print(data) # name department birthday month #0 John Smith Accounting November #1 Erica Meyers IT March #2 Monica Barker HR December \end{python} \subsection{analyze and clean data } \begin{python} with open('employee_file.csv', mode='w+') as employee_file: employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"') employee_writer.writerow(['name', 'department', 'birthday month','salary','gender']) employee_writer.writerow(['John Smith', 'Accounting', 'November', '100', 'm']) employee_writer.writerow(['Erica Meyers', 'IT', 'March', '200', 'f']) employee_writer.writerow(['Monica Barker', '', '', '400', 'f']) import pandas as pd df= pd.read_csv("employee_file.csv") ## print head of the data print(df.head(10)) ## print tail of the data print(df.tail()) ## info about the data print(df.info()) ## get column data types df.dtypes ## drop empty rows new_df = df.dropna() print(new_df.to_string()) ## drop unnecessary columns df.drop(columns=['gender']) ## replace empty places df.fillna(130, inplace = True) ## only replace specific columns df["salary"].fillna(130, inplace = True) ## substitute column mean to the empty places x = df["salary"].mean() df["salary"].fillna(x, inplace = True) ## substitute column median to the empty places x = df["salary"].median() df["salary"].fillna(x, inplace = True) ## substitute column mode to the empty places x = df["salary"].mode()[0] df["salary"].fillna(x, inplace = True) ## remove rows with df.dropna(subset=['department'], inplace = True) \end{python} \subsection{basic functionality: access, sampling, filtering} \begin{python} import pandas as pd df = pd.read_csv("../pokemon_data.txt", delimiter="\t") ## Read headers df.columns ## list the frequency of each Generation field df['Generation'].value_counts() ## give how many uniques are in the dataset df["Generation"].nunique() ## Read each column df["Speed"] df["Speed"][0:5] df.ColumnName df[ ["Speed", "HP"] ] ## Read each row df.iloc[1] df.iloc[1:4] [row for index, row in df.iterrows()] ## Read a specific location (R,C) df.iloc[2,1] ## Select rows df.loc[ df["Type 1" == "Fire"] ## Sorting data df.sort_values("Speed") df.sort_values("Speed", ascending=False) df.sort_values(["HP", "Speed"], ascending[1,0]) ## Making changes to the data df['Total'] = df['Total'] - 5 df['Total'] = df[ ['CA','CB','CC'].mean() def f(x,y): return x+y ## Iterating over one column result = [x for x in df['End']] ## Iterating over two columns, use `zip` result = [f(x, y) for x, y in zip(df['Start'], df['End'])] ## Iterating over multiple columns - same data type result = [f(row[0], row[1]) for row in df[['Start', 'End']].to_numpy()] ## Iterating over multiple columns - differing data type result = [f(row[0], row[1]) for row in zip(df['Start'], df['End'])] ## worst and very slow solutions. Avoid. for row in df.itertuples(): print(row) ##Removing columns df.drop( columns=['Total'], in_place=True) ##Removing columns conditionally df.query("salary > 20") ## Summing a dataframe df.sum(axis=1) ## Drop repeating entries df.drop_duplicates(inplace = True) ## save results to csv df.to_csv("myCsvFile.csv", index=False) \end{python} \subsection{advanced: multi column access, contains, groupby} \begin{python} ##################### ### Advanced import pandas as pd df = pd.read_csv("../pokemon_data.txt", delimiter="\t") df.loc[ df["Type 1" == "Grass" ] ## Sample using multiple condition new_df = (df.loc[ df["Type 1" == "Grass" ]) & df.loc[ df["Type 2" == "Poison" ] ## After filtering, index stays. You have to reset index then. new_df.reset_index(in_place=True) new_df.reset_index(drop=True, in_place=True) # removes old idx ## Using contains df.loc[ df["Name"].str.contains("Mega") ] df.loc[ ~df["Name"].str.contains("Mega") ] # take other set df.loc[ df["Name"].str.contains("Fire|Grass", regex=True) ] df.loc[ df["Name"].str.contains("pi[a-z]*", regex=True) ] #### Conditional Changes ## Change Type1 column having entry "fire" to "flamer" df.loc[ df["Type 1"] ] == "Fire", "Type 1"] = "Flamer" ## Change two columns at the same time. df.loc[ df["Total"] > 500, ["Generation", "Legendary"] ] = ["Test 1", "Test2"] ## Aggregate data using groupby df.groupby( ["Type 1"] ).mean() df.groupby("Type 1")['HP'].sum() df.groupby( ["Type 1"] ).mean().sort_values("Defense", ascending=False) df.groupby( ["Type 1"] ).count() ### Working with large data for df in pd.read_csv("modified.csv", chunksize=5000) print(df) \end{python} \subsection{calculate column cumulatives} \begin{python} import pandas as pd df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30], [30, 42, 2, 10], [100,142, 22,1]], columns=['Start','End','Value1','Value2']) df2 = df[['Value1', 'Value2']].cumsum() df2.rename(columns={'Value1': 'Cumulative Value1', 'Value2': 'Cumulative Value2'}, inplace=True) print(df2) \end{python} \subsection{operations on two data frames} \begin{python} import numpy as np import pandas as pd df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30], [30, 42, 2, 10], [100,142, 22,1]], columns=['Value1','Value2','Value3','Value4']) df2 = pd.DataFrame(data=[[10, 20, 30, 40], [5, 1, 6, 32], [143, 152, 2, 10], [np.nan, 162, 12, 11]], columns=['Value1','Value2','Value3','Value4']) ## add dataframes df + df2 ## replaces missing values with 0 while adding df.add(df2, fill_value=0) ## check whether df > df2. Result is a boolean filled data frame. ## eq, ne, lt, gt, le, and ge are the functions here. ## their usage is the same. df.gt(df2) \end{python} \subsubsection{Concat join rows} \begin{python} import pandas as pd df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) df2 = pd.DataFrame(data=[[5, 6, 7], [5, 8, 12]], columns=['Key1','Key2', 'Key3'] ) df3 = pd.concat([df, df2], axis=0) df3.reset_index(drop=True) # otherwise indexes get mixed # Key1 Key2 Key3 #0 10 20 30 #1 11 21 31 #2 5 6 7 #3 5 8 12 \end{python} \subsubsection{Concat join rows with different columns} \begin{python} import pandas as pd df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]],columns=['Key4','Key5'] ) pd.concat([df, df2], axis=0) # Key1 Key2 Key3 Key4 Key5 #0 10.0 20.0 30.0 NaN NaN #1 11.0 21.0 31.0 NaN NaN #0 NaN NaN NaN 5.0 Lazy #1 NaN NaN NaN 5.0 Hardworking \end{python} \subsubsection{Concat join columns} \begin{python} import pandas as pd df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] ) df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]], columns=['Key4','Key5'] ) pd.concat([df, df2], axis=1) # Key1 Key2 Key3 Key4 Key5 #0 10 20 30 5 Lazy #1 11 21 31 5 Hardworking \end{python} \subsection{applying a function to dataFrame rows or columns} \begin{python} import numpy as np df = pd.DataFrame(data=[["Kevin", 2, 6.], ["Frank", 22, 8.], ["Sarah", 4, 5.], ["Galvin", 3, 10.]], columns=['Name','Years','Ability']) print(df) # Name Years Ability #0 Kevin 2 6 #1 Frank 22 8 #2 Sarah 4 5 #3 Galvin 3 10 ## sum rows of the frame df1 = df.apply(np.sum, axis=0) print(df1) #Name KevinFrankSarahGalvin #Years 31 #Ability 29 ## sum columns of the frame df2 = df[["Years", "Ability"]].apply(np.sum, axis=1) print(df2) #0 8.0 #1 30.0 #2 9.0 #3 13.0 \end{python} \subsection{plot values with dates on x axis} \begin{python} import pandas as pd import matplotlib.pyplot as plt df = pd.DataFrame(data=[["10-06-2022", 5], ["09-06-2022", 3], ["11-06-2022", 20], ["13-06-2022", 12],["12-06-2022", 15], ["14-06-2022", 7]], columns=['Date','Sales']) df["time"] = pd.to_datetime(df['Date'], format='%d-%m-%Y') ##df["time"] = pd.to_datetime(df['Date'], format='%Y-%m-%d %H:%M:%S.%f') df.set_index(['time'],inplace=True) df.plot() \end{python} \subsection{Example code 1:} \begin{python} import pandas as pd import seaborn as sns import matplotlib.pyplot as plt df = pd.read_csv("examples/4weeks_date.csv") ##see the columns df.columns ##strip the column names to remove extra whitespaces df = df.rename(columns=lambda x: x.strip()) ##check dataset df.describe() df.info() print("initial length of dataset %d " % len(df) ) ##drop rows when ENTRIES or EXITS is zero df_clean = df.query("ENTRIES > 0") df_clean = df_clean.query("EXITS > 0") print("length after cleaning 1 %d " % len(df_clean) ) ##drop entries bigger than 5M df_clean = df_clean.query("ENTRIES < 5000000") df_clean = df_clean.query("EXITS < 5000000") df_clean.reset_index(drop = True, inplace = True) print("length after cleaning 2 %d " % len(df_clean) ) ##compute total activity : ENTRIES + EXITS df_clean["TA"] = df_clean["ENTRIES"] + df_clean["EXITS"] ##combine date and time columns. Then convert to pdDate df_clean["DT"] = df_clean["DATE"] + " " + df_clean["TIME"] df_clean["DATETIME"] = pd.to_datetime(df_clean['DT'], format='%m/%d/%Y %H:%M:%S') ##select a station and sum same day activities df_clean["LINENAME"].value_counts() usedLineName = "1237ACENQRS" myLineDF = df_clean[ df_clean["LINENAME"] == usedLineName] myLineDF.reset_index(drop = True, inplace = True) ##sort rows with DATE + TIME myLineDF = myLineDF.sort_values( ["DATE","TIME"] ) myLineDF.reset_index(drop = True, inplace = True) ##add rows with identical DATETIME myLineDFGrouped = myLineDF.groupby("DATETIME").mean() myLineDFGrouped.reset_index(drop = True, inplace = True) ## Create figure and plot space fig, ax = plt.subplots(figsize=(10, 10)) ax.plot(myLineDFGrouped.index , myLineDFGrouped["TA"], color='purple') ax.set(xlabel="Date", ylabel="Total activity", title="Total entries for line: %s" % usedLineName ) plt.show() ## Alternatively use default Plotter myLineDFGrouped.plot() \end{python} \section{Matplotlib Visualization} \subsection{Basics} \subsubsection{Basic plotting} \begin{python} from matplotlib import pyplot as plt plt.figure(figsize = (20,9)) plt.plot() \end{python} \section{Seaborn Visualization} Seaborn is a visualization library on top of Matplotlib. Seaborn is more comfortable in handling Pandas data frames. \subsection{Plotting an histogram} \begin{python} import matplotlib.pyplot as plt import seaborn as sns ## plot only density sns.distplot([0, 1, 2, 3, 4, 5], hist=False) plt.show() ## plot density + histogram sns.distplot([0, 1, 2, 3, 3, 3, 3, 4, 5, 7]) plt.show() \end{python} \subsection{Lineplot} \begin{python} import matplotlib.pyplot as plt import seaborn as sns ## loading dataset data = sns.load_dataset("iris") # [150 rows x 5 columns] ### draw lineplot sns.lineplot(x="sepal_length", y="sepal_width", data=data) ## setting the x limit of the plot plt.xlim(5) \end{python} \subsection{Scatterplot} \begin{python} # importing packages import seaborn as sns import matplotlib.pyplot as plt # loading dataset data = sns.load_dataset("iris") sns.scatterplot(x='sepal_length', y='sepal_width', data=data) plt.show() \end{python} \section{OpenCV} \subsection{Basics} \subsubsection{Typecase PIL Image to OpenCV image} \begin{python} from PIL import Image import cv2 import matplotlib.pyplot as plt pillowImage = Image.open("kiz-kulesi.jpg") rgb_image_float= np.asarray(pillowImage,dtype=float)/255.0 plt.imshow(rgb_image_float) \end{python} \subsubsection{Draw circle on PIL Image} \begin{python} from PIL import Image, ImageDraw image = Image.new('RGBA', (200, 200)) draw = ImageDraw.Draw(image) # bounding box coordinates for the ellipse topleft, bot right (x1, y1, x2, y2) draw.ellipse((20, 20, 180, 180), fill = 'blue', outline ='blue') draw.point((100, 100), 'red') image.save('test.png') \end{python} \subsubsection{Draw text on PIL Image} \begin{python} from PIL import Image, ImageDraw image = Image.new('RGBA', (200, 200)) draw = ImageDraw.Draw(image) # drawing text size text = "hello" font = ImageFont.truetype(r'C:\Users\System-Pc\Desktop\arial.ttf', 20) ## ubuntu -- font can be found with fc-list command draw.text((5, 5), text, fill ="red", font = font, align ="right") image.save('test.png') \end{python} \subsubsection{Flip, resize, rotate, crop images} \begin{python} import cv2 import scipy.ndimage import numpy as np import matplotlib.pyplot as plt original_image = cv2.imread("kiz-kulesi.jpg", cv2.IMREAD_GRAYSCALE) flipud_image=np.flipud(original_image) fliplr_image=np.fliplr(original_image) rotated_image=scipy.ndimage.rotate(original_image,45) resized_image=scipy.misc.imresize(original_image, 0.5, interp='bilinear', mode=None) rows,cols=original_image.shape croped_image = original_image[int(rows / 3): -int(rows / 3), int(cols / 4): - int(cols / 4)] fig1, axes_array = plt.subplots(2, 3) fig1.set_size_inches(9,6) image_plot = axes_array[0][0].imshow(original_image ,cmap=plt.cm.gray) axes_array[0][0].set(title='Original') image_plot = axes_array[0][1].imshow(flipud_image,cmap=plt.cm.gray) axes_array[0][1].set(title='Flipped up-down') image_plot = axes_array[0][2].imshow(fliplr_image,cmap=plt.cm.gray) axes_array[0][2].set(title='Flipped left-right') image_plot = axes_array[1][0].imshow(rotated_image,cmap=plt.cm.gray) axes_array[1][0].set(title='Rotated') image_plot = axes_array[1][1].imshow(resized_image,cmap=plt.cm.gray) axes_array[1][1].set(title='Resized') image_plot = axes_array[1][2].imshow(croped_image,cmap=plt.cm.gray) axes_array[1][2].set(title='Cropped') plt.show() \end{python} \subsubsection{Operating on HSV colorspace} \begin{python} import matplotlib import matplotlib.pyplot as plt def demo_rgb_to_hsv(original_image,reduce_intensity_factor=0.5): original_rgb_float= np.asarray(original_image,dtype=float)/255.0 original_rgb_float = original_rgb_float[:,:,:3] hsv_image=matplotlib.colors.rgb_to_hsv(original_rgb_float) hsv_image_processed=hsv_image.copy() hsv_image_processed[:,: ,2]=hsv_image[:,: ,2]*reduce_intensity_factor rgb_image_processed=matplotlib.colors.hsv_to_rgb(hsv_image_processed) fig1, axes_array = plt.subplots(1, 2) fig1.set_size_inches(8,4) image_plot = axes_array[0].imshow(original_rgb_float) # Show the RGB image axes_array[0].axis('off') axes_array[0].set(title='RGB Image') image_plot = axes_array[1].imshow(rgb_image_processed) # Show the gray image axes_array[1].axis('off') axes_array[1].set(title='Intensity Reduced Image') plt.show() rgb_image_int = Image.open("kiz-kulesi.jpg") demo_rgb_to_hsv(rgb_image_int) \end{python} \subsubsection{1d Gaussian Kernel} \begin{python} def display_1d_gaussian(mean=0.0,sigma=0.5): x=np.linspace(-10,10,1000) y= (1/np.sqrt(2*np.pi*sigma**2))*np.exp(-((x-mean)**2)/(2*sigma**2)) fig, axes1 = plt.subplots(1, 1) fig.set_size_inches(6,3) axes1.set(xlabel="X",ylabel="Y",title='Gaussian Curve',ylim=(0,1)) plt.grid(True) axes1.plot(x,y,color='gray') plt.fill_between(x,y,0,color='#c0f0c0') plt.show() \end{python} \subsubsection{2d Gaussian Kernel Image} \begin{python} import scipy.stats import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.ticker import LinearLocator, FormatStrFormatter def display_gaussian_kernel(sigma=1.0): X = np.linspace(-5, 5, 400) Y = np.linspace(-5, 5, 400) X, Y = np.meshgrid(X, Y) R = np.sqrt(X**2 + Y**2) Z = np.sin(R) mu = np.array([0.0, 0.0]) covariance = np.diag(np.array([sigma, sigma])**2) XY = np.column_stack([X.flat, Y.flat]) z = scipy.stats.multivariate_normal.pdf(XY, mean=mu, cov=covariance) Z = z.reshape(X.shape) # Plot the surface. fig = plt.figure() fig.set_size_inches(8,4) ax1 = fig.add_subplot(121) ax1.imshow(Z) ax2 = fig.add_subplot(122, projection='3d') surf = ax2.plot_surface(X, Y, Z, cmap=plt.cm.coolwarm, linewidth=0, antialiased=False) # Customize the z axis. ax2.set_zlim(0, .2) ax2.zaxis.set_major_locator(LinearLocator(10)) ax2.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) # Add a color bar which maps values to colors. fig.colorbar(surf, shrink=0.5, aspect=5) plt.show() display_gaussian_kernel() \end{python} \subsubsection{find horizontal lines} \begin{python} import cv2 # Load image, convert to grayscale, Otsu's threshold image = cv2.imread('kiz-kulesi.jpg') result = image.copy() gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Detect horizontal lines horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1)) detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(result, [c], -1, (36,255,12), 2) plt.figure(figsize = (20,9)); plt.imshow(result) \end{python} \subsubsection{Drawing text on image} \begin{python} import cv2 # path path = r'kiz-kulesi.jpg' # Reading an image in default mode image = cv2.imread(path) # Window name in which image is displayed window_name = 'Image' # font font = cv2.FONT_HERSHEY_SIMPLEX # position org = (40, 40) # fontScale fontScale = 1 # Green color in BGR color = (0, 255, 0) # Line thickness of 2 px thickness = 2 image = cv2.putText(image, 'Hello', org, font, fontScale, color, thickness, cv2.LINE_AA) # Displaying the image plt.figure(figsize = (20,9)); plt.imshow(img_rgb) \end{python} \subsection{Template Matching} \begin{python} from matplotlib import pyplot as plt import numpy as np import cv2 import imutils def multiscaleTemplateMatching(imFileToLoad,templateFileToLoad): template = cv2.imread(templateFileToLoad) template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) template = cv2.Canny(template, 50, 200) (tH, tW) = template.shape[:2] # loop over the images to find the template in image = cv2.imread(imFileToLoad) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) found = None # loop over the scales of the image for scale in np.linspace(0.2, 1.0, 20)[::-1]: # resize the image according to the scale, and keep track # of the ratio of the resizing resized = imutils.resize(gray, width = int(gray.shape[1] * scale)) r = gray.shape[1] / float(resized.shape[1]) # if the resized image is smaller than the template, then break # from the loop if resized.shape[0] < tH or resized.shape[1] < tW: break # detect edges in the resized, grayscale image and apply template # matching to find the template in the image edged = cv2.Canny(resized, 50, 200) result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF) (_, maxVal, _, maxLoc) = cv2.minMaxLoc(result) # check to see if the iteration should be visualized if True: # draw a bounding box around the detected region clone = np.dstack([edged, edged, edged]) cv2.rectangle(clone, (maxLoc[0], maxLoc[1]), (maxLoc[0] + tW, maxLoc[1] + tH), (0, 0, 255), 2) # if we have found a new maximum correlation value, then update # the bookkeeping variable if found is None or maxVal > found[0]: found = (maxVal, maxLoc, r) # unpack the bookkeeping variable and compute the (x, y) coordinates # of the bounding box based on the resized ratio (_, maxLoc, r) = found (startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r)) (endX, endY) = (int((maxLoc[0] + tW) * r), int((maxLoc[1] + tH) * r)) # draw a bounding box around the detected result and display the image return [startX, startY, endX, endY] img_rgb = cv2.imread(shot) startX, startY, endX, endY = multiscaleTemplateMatching(shot,templateFileToLoad) cv2.rectangle(img_rgb, (startX, startY), (endX, endY), (0, 0, 255), 2) plt.figure(figsize = (20,9)) plt.imshow(img_rgb) \end{python} \subsection{Overlapping bounding box removal: nonmaxima suppression} \begin{python} def NMS(boxes, overlapThresh = 0.4): # Return an empty list, if no boxes given if len(boxes) == 0: return [] x1 = boxes[:, 0] # x coordinate of the top-left corner y1 = boxes[:, 1] # y coordinate of the top-left corner x2 = boxes[:, 2] # x coordinate of the bottom-right corner y2 = boxes[:, 3] # y coordinate of the bottom-right corner # Compute the area of the bounding boxes and sort the bounding # Boxes by the bottom-right y-coordinate of the bounding box areas = (x2 - x1 + 1) * (y2 - y1 + 1) # We add 1, because the pixel at the start as well as at the end counts # The indices of all boxes at start. We will redundant indices one by one. indices = np.arange(len(x1)) for i,box in enumerate(boxes): # Create temporary indices temp_indices = indices[indices!=i] # Find out the coordinates of the intersection box xx1 = np.maximum(box[0], boxes[temp_indices,0]) yy1 = np.maximum(box[1], boxes[temp_indices,1]) xx2 = np.minimum(box[2], boxes[temp_indices,2]) yy2 = np.minimum(box[3], boxes[temp_indices,3]) # Find out the width and the height of the intersection box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) # compute the ratio of overlap overlap = (w * h) / areas[temp_indices] # if the actual boungding box has an overlap bigger than threshold with any other box, remove it's index if np.any(overlap) > overlapThresh: indices = indices[indices != i] #return only the boxes at the remaining indices return boxes[indices].astype(int) \end{python} \subsection{SingleScale Multiple Template Matching} \begin{python} import cv2 def singleScaleMultipleTemplateMatching(imageFileName, templateFileName): print("[INFO] loading images...") image = cv2.imread(imageFileName) img_rgb = image.copy() template = cv2.imread(templateFileName) (tH, tW) = template.shape[:2] # convert both the image and template to grayscale imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) # perform template matching print("[INFO] performing template matching...") result = cv2.matchTemplate(imageGray, templateGray, cv2.TM_CCOEFF_NORMED) (yCoords, xCoords) = np.where(result >= 0.75) clone = image.copy() print("[INFO] {} matched locations *before* NMS".format(len(yCoords))) # loop over our starting (x, y)-coordinates for (x, y) in zip(xCoords, yCoords): # draw the bounding box on the image cv2.rectangle(clone, (x, y), (x + tW, y + tH), (255, 0, 0), 3) # initialize our list of rectangles rects = [] # loop over the starting (x, y)-coordinates again for (x, y) in zip(xCoords, yCoords): # update our list of rectangles rects.append((x, y, x + tW, y + tH)) # apply non-maxima suppression to the rectangles pick = NMS(np.array(rects)) # pick = rects print("[INFO] {} matched locations *after* NMS".format(len(pick))) # loop over the final bounding boxes for (startX, startY, endX, endY) in pick: # draw the bounding box on the image cv2.rectangle(img_rgb, (startX, startY), (endX, endY), (0, 255, 0), 2) return pick, img_rgb pick , img_rgb = singleScaleMultipleTemplateMatching("cropped2.png","template.png") plt.figure(figsize = (20,9)) plt.imshow(img_rgb) \end{python} \subsection{Finding and Plotting Contours} \begin{python} def findAndPlotContours(fileName, blob_area_thresh=20): img = cv2.imread(fileName, cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, 3) ### following morphology open and close can be applied. #kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)) #blob = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) #kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9)) #blob = cv2.morphologyEx(blob, cv2.MORPH_CLOSE, kernel) blob = thresh # invert blob blob = (255 - blob) # Get contours cnts = cv2.findContours(blob, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] ## select the contours larger than having area 20 cnts = [c for c in cnts if cv2.contourArea(c) > blob_area_thresh] #big_contour = max(cnts, key=cv2.contourArea) ## return contours and buffer image result = img.copy() result[:,:,0] = 255 result[:,:,1] = 255 result[:,:,2] = 255 for c in cnts: cv2.drawContours(result, [c], -1, (0,0,255), 1) return result, c result, c = findAndPlotContours("kiz-kulesi.jpg",20) \end{python} \subsection{Circle Detection} \begin{python} import matplotlib.pyplot as plt import cv2 img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) detected_circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20, param1 = 50, param2 = 30, minRadius = 1, maxRadius = 400) for pt in detected_circles[0, :]: a, b, r = pt[0], pt[1], pt[2] # Draw the circumference of the circle. cv2.circle(img, (a, b), r, (0, 255, 0), 2) # Draw a small circle (of radius 1) to show the center. cv2.circle(img, (a, b), 1, (0, 0, 255), 3) plt.figure(figsize = (20,9)) plt.imshow(img) \end{python} \subsection{Connected Components Analysis} \begin{python} import matplotlib.pyplot as plt import cv2 img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] analysis = cv2.connectedComponentsWithStats(threshold, 4, cv2.CV_32S) (totalLabels, label_ids, values, centroid) = analysis #plt.figure(figsize = (20,9)) #plt.imshow(threshold) # Loop through each component output = np.zeros(gray.shape, dtype="uint8") for i in range(1, totalLabels): area = values[i, cv2.CC_STAT_AREA] if (area > 110) and (area < 900): # Labels stores all the IDs of the components on the each pixel # It has the same dimension as the threshold # So we'll check the component # then convert it to 255 value to mark it white componentMask = (label_ids == i).astype("uint8") * 255 # Creating the Final output mask output = cv2.bitwise_or(output, componentMask) plt.figure(figsize = (20,9)) plt.imshow(output) \end{python} \subsection{Fit ellipses to objects} \begin{python} img = cv2.imread("cropped2.png", cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, s, 7.0) cnts, hier = cv2.findContours(gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) ellipses = [] if len(cnts) != 0: for i in range(len(cnts)): if len(cnts[i]) >= 5: ellipse=cv2.fitEllipse(cnts[i]) print(ellipse) ellipses.append(ellipse) centCoord = ( int(ellipse[0][0]), int(ellipse[0][1]) ) axisLen = ( int(ellipse[1][0]), int(ellipse[1][1]) ) angle = ellipse[2] ## angle filtering #offSet = np.min( np.fabs( [angle, angle-90, angle-180, angle-270, angle-360]) ) #if offSet < 5: img = cv2.ellipse(img, centCoord, axisLen, angle, 0, 360, (0,0,255)) # cv2.drawContours(img,cnts,-1,(150,10,255),2) plt.figure(figsize = (20,9)) plt.imshow(img) \end{python} \section{Numpy} \subsection{Fitting} \subsubsection{Curve fitting} \begin{python} import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error def generateQuadraticData(): x = np.random.rand(100) - 0.5 u = 0.1 * np.random.rand(100) a = 1 b = 0.0 c = 0.1 y = a * np.multiply(x,x) + np.multiply(x,b) + c + u #plt.scatter(x,y) return x,y x,y = generateQuadraticData() X = x.reshape(-1,1) # for one feature problems ###We compare nonlinear regression here with different power regr = LinearRegression() quadratic = PolynomialFeatures(degree=2) cubic = PolynomialFeatures(degree=3) X_quad = quadratic.fit_transform(X) X_cubic = cubic.fit_transform(X) X_fit = np.arange(X.min(), X.max(), 0.05)[:, np.newaxis] regr = regr.fit(X, y) y_lin_fit = regr.predict(X_fit) linear_r2 = r2_score(y, regr.predict(X)) regr = regr.fit(X_quad, y) y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) quadratic_r2 = r2_score(y, regr.predict(X_quad)) regr = regr.fit(X_cubic, y) y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) cubic_r2 = r2_score(y, regr.predict(X_cubic)) plt.scatter(X, y, label='training points', color='lightgray') plt.plot(X_fit, y_lin_fit, label='linear (d=1), $R^2=%.2f$' % linear_r2, color='blue', lw=2, linestyle=':') plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2, color='red', lw=2, linestyle='-') plt.plot(X_fit, y_cubic_fit, label='cubic (d=3), $R^2=%.2f$' % cubic_r2, color='green', lw=2, linestyle='--') plt.legend(loc='upper right') plt.tight_layout() plt.show() \end{python} \subsubsection{Ransac curve fitting} \begin{python} import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error from sklearn import linear_model, datasets def generateQuadraticData(noiseFraction = 0.2): nDataPoints = 100 nNoisePoints = int(nDataPoints * noiseFraction) ## data a, b, c = 1, 0.0, 0.1 x = np.random.rand(nDataPoints) - 0.5 y = a * np.multiply(x,x) + np.multiply(x,b) + c ## outliers u = np.random.rand(nNoisePoints, 2) x = np.concatenate( (x,u[:,0]), axis=0) y = np.concatenate( (y,u[:,1]), axis=0) # plt.scatter(x,y) return x,y noiseFraction = 0.7 x,y = generateQuadraticData(noiseFraction) ## 1D data reshape. X = x.reshape(-1,1) # for one feature problems quadratic = PolynomialFeatures(degree=2) X_quad = quadratic.fit_transform(X) X_fit = np.arange(2*np.min(X), 2*np.max(X), 0.05)[:, np.newaxis] #evaluation interval regr = linear_model.RANSACRegressor() regr = regr.fit(X_quad, y) y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) quadratic_r2 = r2_score(y, regr.predict(X_quad)) inlier_mask = regr.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) plt.figure(figsize = (10,9)) plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2,color='blue', lw=1,linestyle='-') plt.scatter(X[inlier_mask], y[inlier_mask], color="green", marker=".", label="Inliers") plt.scatter(X[outlier_mask], y[outlier_mask], color="red", marker=".", label="Outliers") plt.legend(loc='upper right') plt.tight_layout() plt.show() \end{python} \section{Scikit-Learn} Scikit learn supports many machine learning models. \subsection{Linear Regression} \begin{python} import matplotlib.pyplot as plt import numpy as np from sklearn import datasets, linear_model from sklearn.metrics import mean_squared_error, r2_score # Load the diabetes dataset diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) # Use only one feature diabetes_X = diabetes_X[:, np.newaxis, 2] # Split the data into training/testing sets diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] # Split the targets into training/testing sets diabetes_y_train = diabetes_y[:-20] diabetes_y_test = diabetes_y[-20:] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(diabetes_X_train, diabetes_y_train) # Make predictions using the testing set diabetes_y_pred = regr.predict(diabetes_X_test) # The coefficients print("Coefficients: \n", regr.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) # The coefficient of determination: 1 is perfect prediction print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred)) # Plot outputs plt.scatter(diabetes_X_test, diabetes_y_test, color="black") plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3) plt.xticks(()) plt.yticks(()) plt.show() \end{python} %\section*{Appendix} %\bibliographystyle{plain} %\bibliography{references} \end{document}