\documentclass [11pt,a4paper] { article}
%%\documentclass[8pt,a4paper,twocolumn]{article}
\usepackage [a4paper,left=1cm,right=1cm,top=1.0cm,bottom=2.5cm] { geometry}
\usepackage [turkish] { babel}
\usepackage { times}
\usepackage { graphicx}
\usepackage { natbib}
\usepackage { algorithm}
\usepackage { algorithmic}
\usepackage [utf8] { inputenc}
\usepackage { nomencl}
\usepackage { commath}
\usepackage { url}
\usepackage { subfig}
\usepackage [colorinlistoftodos] { todonotes}
\usepackage { epstopdf}
\usepackage { amsmath}
\usepackage { mathtools}
\usepackage { paralist}
\usepackage { hyperref}
\usepackage { pythonhighlight}
\usepackage { listings}
\definecolor { codegreen} { rgb} { 0,0.6,0}
\definecolor { codegray} { rgb} { 0.5,0.5,0.5}
\definecolor { codepurple} { rgb} { 0.58,0,0.82}
\definecolor { backcolour} { rgb} { 0.95,0.95,0.92}
\lstdefinestyle { mystyle} {
backgroundcolor=\color { backcolour} ,
commentstyle=\color { codegreen} ,
keywordstyle=\color { magenta} ,
numberstyle=\tiny \color { codegray} ,
stringstyle=\color { codepurple} ,
basicstyle=\footnotesize ,
breakatwhitespace=false,
breaklines=true,
captionpos=b,
keepspaces=true,
numbers=left,
numbersep=5pt,
showspaces=false,
showstringspaces=false,
showtabs=false,
tabsize=2
}
\def \ExtendVersion { 1}
\newcommand { \credit } [1]{ Thanks to: #1}
\input { taypack.tex}
%turning on/off comments
\usepackage { comment}
\includecomment { comment} %show comments
%\excludecomment{comment} %do not show comments
\lstset { style=mystyle}
\begin { document}
\lstset { language=Python}
% first the title is needed
\title { \centering { Python: From Basics to the Extreme} }
% the name(s) of the author(s) follow(s) next
%\author{Keke\c{c}}
%\author{İbrâhim Taygun Keke\c{c}}
%\author{İbrâhim Taygun Keke\c{c}}
%\author{UL}
\maketitle
%\begin{abstract}
%\end{abstract}
\newcounter { Madde} [section]
\newenvironment { Madde} [1][]{ \refstepcounter { Madde} \par \medskip
\textbf { Md ~\theMadde . #1} \rmfamily } { \medskip }
\newcommand { \citemd } [1]{ (\textbf { M. { #1} } )}
\tableofcontents
\section { Introduction}
This reference book contains very frequently used concepts in Python language.
The reader is assumed to be familiar with the abstract concepts of variables, loops, functions and such.
At each section, a working code example is provided.
\textbf { Correct usage of the material.}
\begin { itemize}
\item analyze the code example
\item copy/paste and run the example
\item check the output of the code whether it makes sense.
\end { itemize}
\section { Basics}
\subsection { Variables and Data Types}
You can declare integer and string variables.
\begin { python}
x = 5
y = "John"
print(x)
#5
print(y)
#John
\end { python}
Declare string, integer or float.
\begin { python}
x = str(3) # x will be '3'
y = int(3) # y will be 3
z = float(3) # z will be 3.0
print(type(x))
#<class 'str'>
print(type(y))
#<class 'int'>
\end { python}
Different variable types exist. But for now, focus on string, integer, float, list, tuple, dictionary, and sets.
\begin { python}
Text Type: str
Numeric Types: int, float, complex
Sequence Types: list, tuple, range
Mapping Type: dict
Set Types: set, frozenset
Boolean Type: bool
Binary Types: bytes, bytearray, memoryview
None Type: NoneType
\end { python}
List is a collection which is ordered and changeable. Allows duplicate members.
Tuple is a collection which is ordered and unchangeable. Allows duplicate members.
Set is a collection which is unordered, unchangeable, and unindexed. No duplicate members.
Dictionary is a collection which is ordered and changeable. No duplicate members.
\subsection { List}
List is a python data type. Lists can store multiple variables in a single variable.
\begin { python}
thislist = ["apple", "banana", "cherry"]
print(thislist)
#['apple', 'banana', 'cherry']
\end { python}
\subsubsection { access list items}
\begin { python}
thislist = ["apple", "banana", "cherry"]
print(thislist[1])
#banana
\end { python}
\subsubsection { change list item value}
\begin { python}
thislist = ["apple", "banana", "cherry"]
thislist[1] = "blackcurrant"
print(thislist)
#['apple', 'blackcurrant', 'cherry']
\end { python}
\subsubsection { change a range of list item values}
\begin { python}
thislist = ["apple", "banana", "cherry", "orange", "kiwi", "mango"]
thislist[1:3] = ["blackcurrant", "watermelon"]
print(thislist)
#['apple', 'blackcurrant', 'watermelon', 'orange', 'kiwi', 'mango']
\end { python}
\subsubsection { add list items}
\begin { python}
thislist = ["apple", "banana", "cherry"]
thislist.append("orange")
print(thislist)
#['apple', 'banana', 'cherry', 'orange']
\end { python}
\subsubsection { remove list items}
\begin { python}
thislist = ["apple", "banana", "cherry"]
thislist.remove("banana")
print(thislist)
#['apple', 'cherry']
\end { python}
\paragraph { remove duplicates from the list}
If new element is seen, first adds it to the list, and then returns it.
\begin { python}
a = [1,2,3,2,1,5,6,5,5,5]
seen = set()
uniq = [x for x in a if x not in seen and not seen.add(x)]
print(uniq)
#[1, 2, 3, 5, 6]
\end { python}
\subsubsection { looping a list}
\begin { python}
thislist = ["apple", "banana", "cherry"]
for x in thislist:
print(x)
#apple
#banana
#cherry
\end { python}
\subsubsection { list slicing}
you can access multiple elements with slicing operation.
\begin { python}
thislist = ["apple", "banana", "cherry", "watermelon", "grape", "kiwi"]
print(thislist[1:4] )
#['banana', 'cherry', 'watermelon']
## Here, we access each 2nd element from 1st to 6th element of the list.
print(thislist[1:6:2])
#['banana', 'watermelon', 'kiwi']
## Reverse slicing: access elements from end to beginning of the list.
print(thislist[6:1:-1])
#['kiwi', 'grape', 'watermelon', 'cherry']
\end { python}
\subsubsection { list comprehension}
This powerful concept can single-line the traditional loops.
\begin { python}
fruits = ["apple", "banana", "cherry", "kiwi", "mango"]
newlist = []
for x in fruits:
if "a" in x:
newlist.append(x)
print(newlist)
# ['apple', 'banana', 'mango']
fruits = ["apple", "banana", "cherry", "kiwi", "mango"]
newlist = [x for x in fruits if "a" in x]
print(newlist)
# ['apple', 'banana', 'mango']
\end { python}
\subsubsection { list comprehension: condition + operation}
\begin { python}
fruits = ["apple", "banana", "cherry", "kiwi", "mango"]
newlist = [x.upper() for x in fruits]
print(newlist)
#['APPLE', 'BANANA', 'CHERRY', 'KIWI', 'MANGO']
\end { python}
\subsubsection { sorting a list}
\begin { python}
thislist = ["orange", "mango", "kiwi", "pineapple", "banana"]
thislist.sort()
print(thislist)
#['banana', 'kiwi', 'mango', 'orange', 'pineapple']
\end { python}
\subsubsection { copying a list}
\begin { python}
thislist = ["apple", "banana", "cherry"]
mylist = thislist.copy()
print(mylist)
#['apple', 'banana', 'cherry']
\end { python}
\subsubsection { join two lists}
\begin { python}
list1 = ["a", "b", "c"]
list2 = [1, 2, 3]
list3 = list1 + list2
print(list3)
#['a', 'b', 'c', 1, 2, 3]
\end { python}
\subsubsection { looping through multiple Lists}
\begin { python}
a = ['a1', 'a2', 'a3']
b = ['b1', 'b2']
for x, y in zip(a, b):
print(x, y)
#a1 b1
#a2 b2
\end { python}
\subsubsection { other list methods}
\begin { python}
Method Description
append() Adds an element at the end of the list
clear() Removes all the elements from the list
copy() Returns a copy of the list
count() Returns the number of elements with the specified value
extend() Add the elements of a list (or any iterable), to the end of the current list
index() Returns the index of the first element with the specified value
insert() Adds an element at the specified position
pop() Removes the element at the specified position
remove() Removes the item with the specified value
reverse() Reverses the order of the list
sort() Sorts the list
\end { python}
\subsection { Dictionary}
Unlike lists, in dictionary each entry has a key, and a value. These key, value pairs constitute the dictionary content.
\begin { python}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
print(thisdict)
#{ 'brand': 'Ford', 'model': 'Mustang', 'year': 1964}
\end { python}
\subsubsection { access dictionary values}
\begin { python}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
print(thisdict["brand"])
#Ford
\end { python}
\subsubsection { access dictionary keys or values}
\begin { python}
car = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
x = car.keys()
print(x)
#dict_ keys(['brand', 'model', 'year'])
v = car.values()
print(v)
#dict_ values(['Ford', 'Mustang', 1964])
\end { python}
\subsubsection { inverse dictionary lookup}
\begin { python}
d = { "a":0, "b":1, "c":2}
dict(zip(d.values(), d.keys()))
#{ 0: 'a', 1: 'b', 2: 'c'}
\end { python}
\subsubsection { change dictionary items}
\begin { python}
car = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
x = car.items()
print(x) #before the change
#dict_ items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 1964)])
car["year"] = 2020
print(x) #after the change
#dict_ items([('brand', 'Ford'), ('model', 'Mustang'), ('year', 2020)])
\end { python}
\subsubsection { add dictionary items}
\begin { python}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict["color"] = "red"
print(thisdict)
#{ 'brand': 'Ford', 'model': 'Mustang', 'year': 1964, 'color': 'red'}
\end { python}
\subsubsection { remove dictionary item}
\begin { python}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
del thisdict["model"]
print(thisdict)
#{ 'brand': 'Ford', 'year': 1964}
\end { python}
\subsubsection { looping a dictionary}
\begin { python}
thisdict={ 'brand': 'Ford', 'year': 1964}
for x, y in thisdict.items():
print(x, y)
#brand Ford
#year 1964
\end { python}
\subsubsection { copy a dictionary}
\begin { python}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
mydict = thisdict.copy()
print(mydict)
#{ 'brand': 'Ford', 'model': 'Mustang', 'year': 1964}
\end { python}
\subsubsection { intersections of two dictionaries}
\begin { python}
some_ dict = { 'zope':'zzz', 'python':'rocks' }
another_ dict = { 'python':'rocks', 'perl':'interesting' }
print "Intersects:", [k for k in some_ dict if k in another_ dict]
#python
\end { python}
\subsubsection { other dictionary operations}
\begin { python}
clear() Removes all the elements from the dictionary
copy() Returns a copy of the dictionary
fromkeys() Returns a dictionary with the specified keys and value
get() Returns the value of the specified key
items() Returns a list containing a tuple for each key value pair
keys() Returns a list containing the dictionary's keys
pop() Removes the element with the specified key
popitem() Removes the last inserted key-value pair
setdefault() Returns the value of the specified key. If the key does not exist: insert the key, with the specified value
update() Updates the dictionary with the specified key-value pairs
values() Returns a list of all the values in the dictionary
\end { python}
\subsection { Sets}
Python sets are like lists. They are used to store multiple items in a single variable.Set elements are unordered. Items are unchangeable, can't be altered after creation. But we can add and remove elements to the set. Unlike lists, they can't have two identical elements.
\begin { python}
thisset = { "apple", "banana", "cherry"}
print(thisset)
#{ 'apple', 'cherry', 'banana'}
##length
print(len(thisset))
#3
\end { python}
Sets can have different objects as elements.
\begin { python}
set1 = { "abc", 34, True, 40, "male"}
\end { python}
\subsubsection { access elements}
\begin { python}
thisset = { "apple", "banana", "cherry"}
for x in thisset:
print(x)
#apple
#cherry
#banana
\end { python}
\subsubsection { check element existence}
\begin { python}
thisset = { "apple", "banana", "cherry"}
print("banana" in thisset)
#True
\end { python}
\subsubsection { add elements}
\begin { python}
thisset = { "apple", "banana", "cherry"}
thisset.add("orange")
print(thisset)
#{ 'orange', 'apple', 'cherry', 'banana'}
\end { python}
\subsubsection { remove elements}
We can remove the set elements with remove() function.
\begin { python}
thisset = { "apple", "banana", "cherry"}
thisset.remove("banana")
print(thisset)
#{ 'apple', 'cherry'}
\end { python}
Alternatively we can remove the last element using the pop() function.
\begin { python}
thisset = { "apple", "banana", "cherry"}
x = thisset.pop()
print(x)
#apple
print(thisset)
#{ 'cherry', 'banana'}
\end { python}
\subsubsection { looping sets}
\begin { python}
thisset = { "apple", "banana", "cherry"}
for x in thisset:
print(x)
#apple
#cherry
#banana
\end { python}
\subsubsection { joining/combining/ (union) of sets}
\begin { python}
set1 = { "a", "b" , "c"}
set2 = { 1, 2, 3}
set3 = set1.union(set2)
print(set3)
#{ 'a', 'b', 1, 2, 3, 'c'}
\end { python}
\subsubsection { merge (intersection) of sets}
\begin { python}
x = { "apple", "banana", "cherry"}
y = { "google", "microsoft", "apple"}
z = x.intersection(y)
print(z)
#{ 'apple'}
\end { python}
\subsection { Tuples}
Tuples are used to store multiple items in a single variable.
Elements of tuples are ordered and unchangeable.
\begin { python}
thistuple = ("apple", "banana", "cherry")
print(thistuple)
#('apple', 'banana', 'cherry')
print(len(thistuple))
#3
## a tuple with different objects
tuple1 = ("abc", 34, True, 40, "male")
\end { python}
\subsubsection { accessing elements}
\begin { python}
thistuple = ("apple", "banana", "cherry")
print(thistuple[1])
#banana
##negative indexing
print(thistuple[-1])
#cherry
##slicing
thistuple = ("apple", "banana", "cherry", "orange", "kiwi", "melon", "mango")
print(thistuple[2:5])
#('cherry', 'orange', 'kiwi')
print(thistuple[:4])
#('apple', 'banana', 'cherry', 'orange')
\end { python}
\subsubsection { check element existence}
\begin { python}
thistuple = ("apple", "banana", "cherry")
if "apple" in thistuple:
print("Yes, 'apple' is in the fruits tuple")
#Yes, 'apple' is in the fruits tuple
\end { python}
\subsubsection { unpack elements}
\begin { python}
fruits = ("apple", "banana", "cherry", "strawberry", "raspberry")
(green, yellow, *red) = fruits
print(green)
#apple
print(yellow)
#banana
print(red)
#['cherry', 'strawberry', 'raspberry']
\end { python}
\subsubsection { looping elements}
\begin { python}
thistuple = ("apple", "banana", "cherry")
for x in thistuple:
print(x)
#apple
#banana
#cherry
\end { python}
\subsubsection { joining two tuples}
\begin { python}
tuple1 = ("a", "b" , "c")
tuple2 = (1, 2, 3)
tuple3 = tuple1 + tuple2
print(tuple3)
#('a', 'b', 'c', 1, 2, 3)
\end { python}
\subsubsection { multiply tuples}
\begin { python}
fruits = ("apple", "banana", "cherry")
mytuple = fruits * 2
print(mytuple)
#('apple', 'banana', 'cherry', 'apple', 'banana', 'cherry')
\end { python}
\subsection { If conditional}
If conditional checks for cases in your program.
\begin { python}
if 5 > 2:
print("Five is greater than two!")
#Five is greater than two!
\end { python}
\subsubsection { Elif conditional}
\begin { python}
a = 33
b = 33
if b > a:
print("b is greater than a")
elif a == b:
print("a and b are equal")
#a and b are equal
\end { python}
\subsubsection { Else-If conditional}
\begin { python}
a = 200
b = 33
if b > a:
print("b is greater than a")
elif a == b:
print("a and b are equal")
else:
print("a is greater than b")
#a is greater than b
\end { python}
\subsubsection { Short If}
\begin { python}
if a > b: print("a is greater than b")
#a is greater than b
\end { python}
\subsubsection { Short If-Else}
\begin { python}
a = 2
b = 330
print("A") if a > b else print("B")
# B
\end { python}
\subsubsection { And or conditional}
\begin { python}
a = 200
b = 33
c = 500
if a > b and c > a:
print("Both conditions are True")
# Both conditions are True
if a > b or a > c:
print("At least one of the conditions is True")
#At least one of the conditions is True
\end { python}
\subsubsection { Nested if}
With nested if's you can create branches in your program.
\begin { python}
x = 41
if x > 10:
print("Above ten,")
if x > 20:
print("and also above 20!")
else:
print("but not above 20.")
#Above ten,
#and also above 20!
\end { python}
\subsubsection { Pass}
if statements cannot be empty, if you need to have empty statment, use pass statement to avoid getting an error.
\begin { python}
a = 33
b = 200
if b > a:
pass
\end { python}
\subsection { While Loop}
\begin { python}
i = 1
while i < 4:
print(i)
i += 1
#1
#2
#3
\end { python}
\subsubsection { Break statement}
You can exit the loops immediately with break statement.
\begin { python}
i = 1
while i < 6:
print(i)
if i == 3:
break
i += 1
#1
#2
#3
\end { python}
\subsubsection { Continue statement}
With continue statement we can stop the current iteration, and continue with the next.
\begin { python}
i = 0
while i < 6:
i += 1
if i == 3:
continue
print(i)
#1
#2
#4
#5
#6
\end { python}
\subsubsection { While-Else statement}
\begin { python}
i = 1
while i < 6:
print(i)
i += 1
else:
print("i is no longer less than 6")
#1
#2
#3
#4
#5
#i is no longer less than 6
\end { python}
\subsection { For Loop}
\begin { python}
fruits = ["apple", "banana", "cherry"]
for x in fruits:
print(x)
#apple
#banana
#cherry
\end { python}
\subsubsection { Looping integers}
\begin { python}
for x in range(4):
print(x)
#0
#1
#2
#3
\end { python}
\subsubsection { Looping a string variable}
\begin { python}
for x in "car":
print(x)
#c
#a
#r
\end { python}
\subsubsection { breaking the loop}
\begin { python}
fruits = ["apple", "banana", "cherry"]
for x in fruits:
print(x)
if x == "banana":
break
#apple
#banana
\end { python}
\subsubsection { for loop nested}
\begin { python}
adj = ["red", "big", "tasty"]
fruits = ["apple", "banana", "cherry"]
for x in adj:
for y in fruits:
print(x, y)
#red apple
#red banana
#red cherry
#big apple
#big banana
#big cherry
#tasty apple
#tasty banana
#tasty cherry
\end { python}
\subsubsection { for loop pass statement}
\begin { python}
for x in [0, 1, 2]:
pass
\end { python}
\subsection { Function}
In python, you can declare functions. Functions are code pieces that you can execute multiple times easily.
\subsubsection { defining a function}
\begin { python}
# define the function
def my_ function():
print("Hello from a function")
\end { python}
\subsubsection { calling a function}
\begin { python}
def my_ function():
print("Hello from a function")
my_ function()
#Hello from a function
\end { python}
\subsubsection { giving function an argument}
\begin { python}
def my_ function(fname):
print(fname + " Refsnes")
my_ function("Emil")
#Emil Refsnes
my_ function("Tobias")
#Tobias Refsnes
my_ function("Linus")
#Linus Refsnes
\end { python}
\subsubsection { giving function multiple arguments}
\begin { python}
def my_ function(fname, lname):
print(fname + " " + lname)
my_ function("Emil", "Refsnes")
#Emil Refsnes
\end { python}
\subsubsection { giving function with default parameters}
\begin { python}
def my_ function(country = "Norway"):
print("I am from " + country)
my_ function("Sweden")
#I am from Sweden
my_ function("India")
#I am from India
my_ function()
#I am from Norway
my_ function("Brazil")
#I am from Brazil
\end { python}
\subsubsection { giving function unknown number of parameters}
\begin { python}
def my_ function(*kids):
print("The youngest child is " + kids[2])
my_ function("Emil", "Tobias", "Linus")
#The youngest child is Linus
\end { python}
\subsubsection { function returning values}
\begin { python}
def my_ function(x):
return 5 * x
print(my_ function(3))
#15
print(my_ function(5))
#25
print(my_ function(9))
#45
\end { python}
\subsubsection { function returning multiple values}
\begin { python}
def my_ function(x):
return 5 * x, x * x
print(my_ function(3))
#(15, 9)
print(my_ function(10))
#(50, 100)
print(type(my_ function(10)))
#<class 'tuple'>
\end { python}
\subsubsection { shortcut function: lambda}
Lambda functions are one-liner functions. Sometimes you need to define such functions inside others. This gives very much ease.
\begin { python}
x = lambda a : a + 10
print(x(5))
#15
x = lambda a, b : a * b
print(x(5, 6))
#30
x = lambda a, b, c : a + b + c
print(x(5, 6, 2))
#13
\end { python}
\subsection { Modules}
Python modules are libraries (set of functions) written by others. Using these code pieces help you to re-use written code.
\subsubsection { writing a module}
Save the following to a file named mymodule.py .
\begin { python}
def greeting(name):
print("Hello, " + name)
person1 = {
"name": "John",
"age": 36,
"country": "Norway"
}
\end { python}
\subsubsection { use functions of a module}
\begin { python}
import mymodule
mymodule.greeting("Jonathan")
#Hello Jonathan
\end { python}
\subsubsection { custom naming a module}
\begin { python}
import mymodule as mx
a = mx.person1["age"]
print(a)
#36
\end { python}
\subsubsection { partial import of a module}
Sometimes, you just need a function, or an object from a module package. In these cases, you only import that part of the module. No need to import whole module. This is a better practice.
\begin { python}
from mymodule import person1
print (person1["age"])
#36
\end { python}
\subsubsection { listing functions of the module}
You can list all the function names in a module with dir command.
\begin { python}
import platform
x = dir(platform)
# too long to write the output... check your program.
\end { python}
\subsection { Classes}
%Python supports object oriented programming (OOP). In default, what we are doing is functional programming. That is,
%the program is a composition of functions. In OOP, similar to the real world, we have objects. These objects are represented by classes in the program.
%A class has attributes, and methods (in other word class functions). We create instances from that class using class constructor.
\begin { python}
class Person:
def _ _ init_ _ (mysillyobject, name, age):
mysillyobject.name = name
mysillyobject.age = age
def myfunc(abc):
print("Hello my name is " + abc.name)
p1 = Person("John", 36)
p1.myfunc()
\end { python}
\subsection { Uncategorized}
\subsubsection { add padding to opencv image}
\begin { python}
#cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_ CONSTANT, value=color)
padSize = 500
ib = cv2.copyMakeBorder(img_ dilated, padSize, padSize, padSize, padSize, cv2.BORDER_ CONSTANT, (0,0,0))
\end { python}
\subsubsection { find neighboring pairs in list}
\begin { python}
A = [1, 2, 3, 4]
B = [(i,j) for i,j in zip(A, A[1:])]
#[(1, 2), (2, 3), (3, 4)]
\end { python}
\subsubsection { find most frequent elements in list}
\begin { python}
from collections import Counter
mylist = [1,1,1,1,3,4,5,5,5,6,6,7,8,9]
counter = Counter(mylist)
most_ common = counter.most_ common(2)
print(most_ common)
#[(1, 4), (5, 3)] # 4 ones, 3 fives seen.
\end { python}
\subsubsection { cartesian product of two lists}
\begin { python}
import itertools
a = [1,2,3]
b = [4,5,6]
for i in itertools.product( a, b ):
print(i)
\end { python}
\subsubsection { pad integer to have zeros}
\begin { python}
def NDigited(x,n=3):
return (n-len(x)) * '0' + x
\end { python}
\paragraph { optimized version}
\credit { jnmbk}
\begin { python}
def NDigited(x,n=3)
return x.zfill(n)
\end { python}
\subsubsection { matplotlib display strings on y-axis}
\begin { python}
yticks(np.arange(5), ('String1', 'String2', 'String3', '4', '5'))
\end { python}
\subsubsection { broadcast image or matrix channels}
This operation is usually needed to go from 1d image to 3d image.
\begin { python}
nChannels = 3
m3d = np.repeat( m.reshape(m.shape[0], m.shape[1], 1), nChannels, axis=2)
\end { python}
Another identical way to do it.
\begin { python}
nChannels = 3
m3d = np.tile(m[:, :, None], [1, 1, nChannels])
\end { python}
\subsubsection { 1d interpolation of x-y values}
Given a set of 2D points, we fit a curve to these points.
\begin { python}
xdata = [0,1,2,3,4,5]
ydata = [0,1,4,9,16,25]
f2 = interp1d(xdata, ydata, kind = 'quadratic')
xnew = np.linspace(-5, 5, 1000)
ynew = f2(xnew)
\end { python}
\subsection { useful numpy functions}
\begin { python}
## remove empty dimension
x = np.array([[[0], [1], [2]]])
print(x.shape)
#(1, 3, 1)
dd = np.squeeze(cc)
print(x.shape)
#(3,)
## vertically stack list of matrices
x = np.dstack( possibleCurves )
## randomly choose 5 values from the interval 0-100
randIdxs = np.random.choice(100 , 5, replace=False)
## reshape 1D data for one feature problems
X = x.reshape(-1,1)
\end { python}
\section { String Manipulation, Searching, Sorting} \label { sect:strings}
\subsection { substring search}
\begin { python}
word = 'cart for supermarket'
##substring search: find first occurrence
result = word.find('supermarket')
print("Substring 'geeks' found at index:", result)
#Substring 'geeks' found at index: 9
##substring search with start end specification: searched in 'for su'.
print(word.find('su', 4, 12))
#9
\end { python}
\subsubsection { string between two substrings}
\begin { python}
import re
s = 'asdf=5;iwantthis123jasd'
result = re.search('asdf=5;(.*)123jasd', s)
print(result.group(1))
#iwantthis
\end { python}
\subsubsection { Create index for strings}
\begin { python}
a = ['a', 'b', 'c']
b = dict(map(lambda t: (t[1], t[0]), enumerate(a)))
#{ 'a':0, 'b':1, 'c':2}
\end { python}
\subsection { string concatenation}
\begin { python}
s1 = "myStrFirst"
s2 = "secondString"
s3 = s1 + " " + s2
print(s3)
#myStrFirst secondString
\end { python}
\subsection { string splitting}
\begin { python}
## simple string splitting
txt = "apple#banana#cherry#orange"
x = txt.split("#")
print(x)
#['apple', 'banana', 'cherry', 'orange']
## setting the maxsplit parameter to 1, will return a list with 2 elements!
txt = "apple#banana#cherry#orange"
x = txt.split("#", 1)
#['apple', 'banana#cherry#orange']
\end { python}
\subsection { stripping string}
Remove leading and trailing spaces and specific characters at the beginning and at the end of a string.
\begin { python}
txt = " banana sss "
x = txt.strip()
print("of all fruits", x, "is my favorite")
#of all fruits banana is my favorite
txt = ",,,,,rrttgg.....banana....rrr"
x = txt.strip(",.grt")
print(x)
#banana
\end { python}
\subsection { combining list of strings}
\begin { python}
text = ['Python', 'is', 'a', 'fun', 'programming', 'language']
print(' '.join(text))
# Python is a fun programming language
\end { python}
\section { Input Output Operations} \label { sect:io}
\subsection { create a file}
\begin { python}
f = open("demofile3.txt", "w")
f.write("I have added content!")
f.close()
\end { python}
\subsection { write to a file: fast shortcut}
In this version, you don't have to remember closing the file.
\begin { python}
with open("demofile3.txt", "w") as fp:
fp.write(f.write("I have added content!")
\end { python}
\subsection { create directory}
The following program checks for a directory, and creates it if not present.
\begin { python}
import os
directory = "newDirectory"
parent_ dir = "/home/User/Documents"
path = os.path.join(parent_ dir, directory)
os.makedirs(path,exist_ ok = True)
\end { python}
\subsection { remove file}
\begin { python}
import os
fileName = 'myFile.txt' # File name
location = "/home/User/Documents"
path = os.path.join(location, fileName)
if os.path.exists(path):
os.remove(path)
else:
print("The file does not exist")
\end { python}
\subsection { save and load pickle file}
Pickle is the default binary storage format of Python. It can store any type of variable inside.
\begin { python}
import pickle
a = { 'hello': 'world'}
with open('filename.pkl', 'wb') as handle:
pickle.dump(a, handle, protocol=pickle.HIGHEST_ PROTOCOL)
with open('filename.pkl', 'rb') as handle:
b = pickle.load(handle)
print(a == b)
\end { python}
\begin { python}
def load_ obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
def write_ obj(name, data):
with open(name, 'wb') as handle:
pickle.dump(data, handle, protocol=pickle.HIGHEST_ PROTOCOL)
\end { python}
\subsection { joining paths}
\begin { python}
import os
path = "/home"
## Join various path components
print(os.path.join(path, "User/Desktop", "file.txt"))
#/home/User/Desktop/file.txt
\end { python}
\subsection { check file existence}
\begin { python}
path = 'D:/Pycharm/USER/testFile.txt'
isFile = os.path.isfile(path)
print(isFile)
#False
\end { python}
\subsection { list files in a directory}
\begin { python}
import os
path = "/"
dir_ list = os.listdir(path)
print(dir_ list)
# too many output, please run the code yourself.
\end { python}
\subsection { iterate (traverse) files in a folder}
\begin { python}
import os
for (root,dirs,files) in os.walk('Test', topdown=true):
print (root)
print (dirs)
print (files)
# too many output, please run the code yourself.
\end { python}
\subsection { sort files by date}
\begin { python}
import os
search_ dir = "/mydir/"
files = os.listdir(search_ dir)
files = [os.path.join(search_ dir, f) for f in files]
files.sort(key=lambda x: os.path.getmtime(x))
\end { python}
\subsection { write to CSV file}
\begin { python}
import csv
with open('employee_ file.csv', mode='w') as employee_ file:
employee_ writer = csv.writer(employee_ file, delimiter=',', quotechar='"')
employee_ writer.writerow(['name', 'department', 'birthday month'])
employee_ writer.writerow(['John Smith', 'Accounting', 'November'])
employee_ writer.writerow(['Erica Meyers', 'IT', 'March'])
employee_ writer.writerow(['Monica Barker', 'HR', 'December'])
## open the employee_ file.csv and you will see:
## name,department,birthday month
## John Smith,Accounting,November
## Erica Meyers,IT,March
## Monica Barker,HR,December
\end { python}
\subsection { read CSV file example}
\begin { python}
import csv
with open('employee_ file.csv', mode='w') as employee_ file:
employee_ writer = csv.writer(employee_ file, delimiter=',', quotechar='"')
employee_ writer.writerow(['name', 'department', 'birthday month'])
employee_ writer.writerow(['John Smith', 'Accounting', 'November'])
employee_ writer.writerow(['Erica Meyers', 'IT', 'March'])
employee_ writer.writerow(['Monica Barker', 'HR', 'December'])
with open('employee_ birthday.txt') as csv_ file:
csv_ reader = csv.reader(csv_ file, delimiter=',')
line_ count = 0
for row in csv_ reader:
if line_ count == 0:
print(f'Column names are { ", ".join(row)} ')
line_ count += 1
else:
print(f'\t { row[0]} works in the { row[1]} department, and was born in { row[2]} .')
line_ count += 1
print(f'Processed { line_ count} lines.')
\end { python}
\subsubsection { read CSV into list}
\begin { python}
import csv
with open('employee_ file.csv', mode='w') as employee_ file:
employee_ writer = csv.writer(employee_ file, delimiter=',', quotechar='"')
employee_ writer.writerow(['name', 'department', 'birthday month'])
employee_ writer.writerow(['John Smith', 'Accounting', 'November'])
employee_ writer.writerow(['Erica Meyers', 'IT', 'March'])
employee_ writer.writerow(['Monica Barker', 'HR', 'December'])
##
import csv
def readCSVIntoList(fileName, discardHeader=False):
rows = []
with open(fileName) as csv_ file:
csv_ reader = csv.reader(csv_ file, delimiter=',')
for row in csv_ reader:
rows.append(row)
if discardHeader:
rows.remove(rows[0])
return rows
rows = readCSVIntoList('employee_ birthday.txt',discardHeader=True)
print(rows)
#[['John Smith', 'Accounting', 'November'], ['Erica Meyers', 'IT', 'March'], ['Monica Barker', 'HR', 'December']]
\end { python}
\subsection { Adding Command Line Arguments}
The following program expects command line arguments. If not provided, uses default arguments.
\begin { python}
## run with python sourcefile.py --keyword mykeyword --page 1
## or python sourcefile.py
import argparse
parser = argparse.ArgumentParser(description="Just an example",formatter_ class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_ argument("-k", "--keyword", type=str, help="query keywords")
parser.add_ argument("-p", "--page", type=str, help="query page")
args = parser.parse_ args()
config = vars(args)
#print(config)
if args.keyword:
searchKeyword=args.keyword
searchPage=args.pageidx
else:
searchKeyword = "googleit"
searchPage="0"
print("SearchKeyword is " + searchKeyword + " Search Page is " + searchPage )
\end { python}
\section { Time and Date}
\subsection { Get current date }
\begin { python}
##Get the current date in DD-MM-YYYY-HR-MM-SS format:
from datetime import datetime
now = datetime.now()
curDate = now.strftime("%d-%m-%Y-%H-%M-%S")
print(curDate)
# 18-06-2022-10-40-59
\end { python}
\subsection { Convert Unix time to datetime}
\begin { python}
from datetime import datetime
x = 1656100252345
d = datetime.utcfromtimestamp( x / 1000 ).strftime('%Y-%m-%d %H:%M:%S')
#2022-06-24 19:50:52
\end { python}
\section { Algorithms}
\subsection { Remove elements}
\subsection { Find middle coordinates of a coordinate array}
\begin { python}
[ (linesProc2[i] + linesProc2[i+1]) / 2 for i,x in enumerate( linesProc2[:-1] ) ]
\end { python}
\section { Regex}
\subsection { Nongreedy regex search}
Default behaviour of regex is to greedy matching (searches the longest sequence up to the end). To search nongreedy:
\begin { python}
text="From: test: test",
regex="^ F.+:" -> match="From: test:"
regex="^ F.+?:" -> match="From:"
\end { python}
\section { Network} \label { sect:network}
\subsection { Single threaded to multi threaded}
Python programs are by default single threaded. This source is a multi-threaded example:
\begin { python}
### ---------------------- ###
### The following program is single threaded, it takes approximately six seconds.
### ---------------------- ###
from time import sleep, perf_ counter
def task():
print('Starting a task...')
sleep(3)
print('done')
start_ time = perf_ counter()
task()
task()
end_ time = perf_ counter()
print(f'It took { end_ time- start_ time: 0.2f} second(s) to complete.')
# result depends on your cpu. please run the code!
### ---------------------- ###
### The following program is multi-threaded and it takes approximately 3 seconds.
### ---------------------- ###
from time import sleep, perf_ counter
from threading import Thread
def task():
print('Starting a task...')
sleep(3)
print('done')
start_ time = perf_ counter()
# create two new threads
t1 = Thread(target=task)
t2 = Thread(target=task)
# start the threads
t1.start()
t2.start()
# wait for the threads to complete
t1.join()
t2.join()
end_ time = perf_ counter()
print(f'It took { end_ time- start_ time: 0.2f} second(s) to complete.')
# result depends on your cpu. please run the code!
\end { python}
\subsection { Multi-thread with argument}
\begin { python}
from time import sleep, perf_ counter
from threading import Thread
def task(id):
print(f'Starting the task { id} ...')
sleep(1)
print(f'The task { id} completed')
start_ time = perf_ counter()
## create and start 10 threads
threads = []
for n in range(1, 11):
t = Thread(target=task, args=(n,))
threads.append(t)
t.start()
## wait for the threads to complete
for t in threads:
t.join()
end_ time = perf_ counter()
print(f'It took { end_ time- start_ time: 0.2f} second(s) to complete.')
# Please run the program on your computer to see the output!
\end { python}
\section { Web} \label { sect:web}
\subsection { Scrape HTML with Beautiful Soup}
This example code scrapes an HTML page and searches for HTML div tags inside it.
\begin { python}
import requests
from bs4 import BeautifulSoup
URL = "https://edition.cnn.com/"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
## print the HTML content
print(soup.prettify())
## find first div element in the page
myDiv = soup.find("div")
## find all div elements in the page
myDivs = soup.findAll("div")
## find all divs with class equal to the following string.
job_ elements = results.find_ all("div", attrs={ "class":"card-content"} )
## get tag of an "a" element.
r.find('a')['href']
\end { python}
\subsection { Make request until success}
\begin { python}
def retryResponseGetSoup(url):
response = get(url)
bs = BeautifulSoup(response.content)
while bs.text == 'Baglanti hatasi.' or response.status_ code != 200:
print('retrying...')
time.sleep(5)
response = get(url)
bs = BeautifulSoup(response.content)
return bs
url = "www.google.com"
s = retryResponseGetSoup(url)
\end { python}
\subsection { Selenium}
\subsubsection { Access attribute of an element}
\begin { python}
### selenium python scroll to element's location
elements = driver.find_ elements(By.XPATH, "//div[contains(@class, 'navigationPane')]/a")
for e in elements:
e.get_ attribute("outerHTML")
\end { python}
\subsubsection { Scroll to element}
\begin { python}
### selenium python scroll to element's location
desired_ y = element.location['y']
current_ y = (driver.execute_ script('return window.innerHeight') / 2) + driver.execute_ script('return window.pageYOffset')
scroll_ y_ by = desired_ y
driver.execute_ script("window.scrollBy(0, arguments[0]);", scroll_ y_ by)
\end { python}
\subsubsection { Access pure HTML of the element}
\begin { python}
tableElems = driver.find_ element(By.XPATH, "//table[contains(@class, 'morphologyTable')]//tbody")
tableElems.get_ attribute("outerHTML")
\end { python}
\subsubsection { Save cropped screenshot}
\begin { python}
driver.save_ screenshot('shot.png')
im = Image.open('shot.png')
im = im.crop((int(x1-5),int(0), int(x1+w1+5), int(y2-y1+h2)))
im.save('shot.png')
\end { python}
\subsection { Download files}
\begin { python}
import requests
image_ url = "https://www.python.org/static/community_ logos/python-logo-master-v3-TM.png"
r = requests.get(image_ url)
with open("python_ logo.png",'wb') as f:
f.write(r.content)
\end { python}
\section { Pandas}
[describe pandas library with 1-2 sentences. Then why its useful with 1-2 sentences.]
\subsection { read CSV }
We can use Pandas library to read CSV's easily. The content goes to a DataFrame type of the Pandas library.
\begin { python}
with open('employee_ file.csv', mode='w+') as employee_ file:
employee_ writer = csv.writer(employee_ file, delimiter=',', quotechar='"')
employee_ writer.writerow(['name', 'department', 'birthday month'])
employee_ writer.writerow(['John Smith', 'Accounting', 'November'])
employee_ writer.writerow(['Erica Meyers', 'IT', 'March'])
employee_ writer.writerow(['Monica Barker', 'HR', 'December'])
import pandas as pd
data= pd.read_ csv("employee_ file.csv")
print(data)
# name department birthday month
#0 John Smith Accounting November
#1 Erica Meyers IT March
#2 Monica Barker HR December
\end { python}
\subsection { analyze and clean data }
\begin { python}
with open('employee_ file.csv', mode='w+') as employee_ file:
employee_ writer = csv.writer(employee_ file, delimiter=',', quotechar='"')
employee_ writer.writerow(['name', 'department', 'birthday month','salary','gender'])
employee_ writer.writerow(['John Smith', 'Accounting', 'November', '100', 'm'])
employee_ writer.writerow(['Erica Meyers', 'IT', 'March', '200', 'f'])
employee_ writer.writerow(['Monica Barker', '', '', '400', 'f'])
import pandas as pd
df= pd.read_ csv("employee_ file.csv")
## print head of the data
print(df.head(10))
## print tail of the data
print(df.tail())
## info about the data
print(df.info())
## get column data types
df.dtypes
## drop empty rows
new_ df = df.dropna()
print(new_ df.to_ string())
## drop unnecessary columns
df.drop(columns=['gender'])
## replace empty places
df.fillna(130, inplace = True)
## only replace specific columns
df["salary"].fillna(130, inplace = True)
## substitute column mean to the empty places
x = df["salary"].mean()
df["salary"].fillna(x, inplace = True)
## substitute column median to the empty places
x = df["salary"].median()
df["salary"].fillna(x, inplace = True)
## substitute column mode to the empty places
x = df["salary"].mode()[0]
df["salary"].fillna(x, inplace = True)
## remove rows with
df.dropna(subset=['department'], inplace = True)
\end { python}
\subsection { basic functionality: access, sampling, filtering}
\begin { python}
import pandas as pd
df = pd.read_ csv("../pokemon_ data.txt", delimiter="\t ")
## Read headers
df.columns
## list the frequency of each Generation field
df['Generation'].value_ counts()
## give how many uniques are in the dataset
df["Generation"].nunique()
## Read each column
df["Speed"]
df["Speed"][0:5]
df.ColumnName
df[ ["Speed", "HP"] ]
## Read each row
df.iloc[1]
df.iloc[1:4]
[row for index, row in df.iterrows()]
## Read a specific location (R,C)
df.iloc[2,1]
## Select rows
df.loc[ df["Type 1" == "Fire"]
## Sorting data
df.sort_ values("Speed")
df.sort_ values("Speed", ascending=False)
df.sort_ values(["HP", "Speed"], ascending[1,0])
## Making changes to the data
df['Total'] = df['Total'] - 5
df['Total'] = df[ ['CA','CB','CC'].mean()
def f(x,y):
return x+y
## Iterating over one column
result = [x for x in df['End']]
## Iterating over two columns, use `zip`
result = [f(x, y) for x, y in zip(df['Start'], df['End'])]
## Iterating over multiple columns - same data type
result = [f(row[0], row[1]) for row in df[['Start', 'End']].to_ numpy()]
## Iterating over multiple columns - differing data type
result = [f(row[0], row[1]) for row in zip(df['Start'], df['End'])]
## worst and very slow solutions. Avoid.
for row in df.itertuples():
print(row)
##Removing columns
df.drop( columns=['Total'], in_ place=True)
##Removing columns conditionally
df.query("salary > 20")
## Summing a dataframe
df.sum(axis=1)
## Drop repeating entries
df.drop_ duplicates(inplace = True)
## save results to csv
df.to_ csv("myCsvFile.csv", index=False)
\end { python}
\subsection { advanced: multi column access, contains, groupby}
\begin { python}
#####################
### Advanced
import pandas as pd
df = pd.read_ csv("../pokemon_ data.txt", delimiter="\t ")
df.loc[ df["Type 1" == "Grass" ]
## Sample using multiple condition
new_ df = (df.loc[ df["Type 1" == "Grass" ]) & df.loc[ df["Type 2" == "Poison" ]
## After filtering, index stays. You have to reset index then.
new_ df.reset_ index(in_ place=True)
new_ df.reset_ index(drop=True, in_ place=True) # removes old idx
## Using contains
df.loc[ df["Name"].str.contains("Mega") ]
df.loc[ ~df["Name"].str.contains("Mega") ] # take other set
df.loc[ df["Name"].str.contains("Fire|Grass", regex=True) ]
df.loc[ df["Name"].str.contains("pi[a-z]*", regex=True) ]
#### Conditional Changes
## Change Type1 column having entry "fire" to "flamer"
df.loc[ df["Type 1"] ] == "Fire", "Type 1"] = "Flamer"
## Change two columns at the same time.
df.loc[ df["Total"] > 500, ["Generation", "Legendary"] ] = ["Test 1", "Test2"]
## Aggregate data using groupby
df.groupby( ["Type 1"] ).mean()
df.groupby("Type 1")['HP'].sum()
df.groupby( ["Type 1"] ).mean().sort_ values("Defense", ascending=False)
df.groupby( ["Type 1"] ).count()
### Working with large data
for df in pd.read_ csv("modified.csv", chunksize=5000)
print(df)
\end { python}
\subsection { calculate column cumulatives}
\begin { python}
import pandas as pd
df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30],
[30, 42, 2, 10], [100,142, 22,1]],
columns=['Start','End','Value1','Value2'])
df2 = df[['Value1', 'Value2']].cumsum()
df2.rename(columns={ 'Value1': 'Cumulative Value1', 'Value2': 'Cumulative Value2'} ,
inplace=True)
print(df2)
\end { python}
\subsection { operations on two data frames}
\begin { python}
import numpy as np
import pandas as pd
df = pd.DataFrame(data=[[1, 2, 7, 10], [10, 22, 1, 30],
[30, 42, 2, 10], [100,142, 22,1]],
columns=['Value1','Value2','Value3','Value4'])
df2 = pd.DataFrame(data=[[10, 20, 30, 40], [5, 1, 6, 32],
[143, 152, 2, 10], [np.nan, 162, 12, 11]],
columns=['Value1','Value2','Value3','Value4'])
## add dataframes
df + df2
## replaces missing values with 0 while adding
df.add(df2, fill_ value=0)
## check whether df > df2. Result is a boolean filled data frame.
## eq, ne, lt, gt, le, and ge are the functions here.
## their usage is the same.
df.gt(df2)
\end { python}
\subsubsection { Concat join rows}
\begin { python}
import pandas as pd
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] )
df2 = pd.DataFrame(data=[[5, 6, 7], [5, 8, 12]], columns=['Key1','Key2', 'Key3'] )
df3 = pd.concat([df, df2], axis=0)
df3.reset_ index(drop=True) # otherwise indexes get mixed
# Key1 Key2 Key3
#0 10 20 30
#1 11 21 31
#2 5 6 7
#3 5 8 12
\end { python}
\subsubsection { Concat join rows with different columns}
\begin { python}
import pandas as pd
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] )
df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]],columns=['Key4','Key5'] )
pd.concat([df, df2], axis=0)
# Key1 Key2 Key3 Key4 Key5
#0 10.0 20.0 30.0 NaN NaN
#1 11.0 21.0 31.0 NaN NaN
#0 NaN NaN NaN 5.0 Lazy
#1 NaN NaN NaN 5.0 Hardworking
\end { python}
\subsubsection { Concat join columns}
\begin { python}
import pandas as pd
df = pd.DataFrame(data=[[10, 20, 30], [11, 21, 31]], columns=['Key1','Key2', 'Key3'] )
df2 = pd.DataFrame(data=[[5, "Lazy"], [5, "Hardworking"]], columns=['Key4','Key5'] )
pd.concat([df, df2], axis=1)
# Key1 Key2 Key3 Key4 Key5
#0 10 20 30 5 Lazy
#1 11 21 31 5 Hardworking
\end { python}
\subsection { applying a function to dataFrame rows or columns}
\begin { python}
import numpy as np
df = pd.DataFrame(data=[["Kevin", 2, 6.], ["Frank", 22, 8.],
["Sarah", 4, 5.], ["Galvin", 3, 10.]],
columns=['Name','Years','Ability'])
print(df)
# Name Years Ability
#0 Kevin 2 6
#1 Frank 22 8
#2 Sarah 4 5
#3 Galvin 3 10
## sum rows of the frame
df1 = df.apply(np.sum, axis=0)
print(df1)
#Name KevinFrankSarahGalvin
#Years 31
#Ability 29
## sum columns of the frame
df2 = df[["Years", "Ability"]].apply(np.sum, axis=1)
print(df2)
#0 8.0
#1 30.0
#2 9.0
#3 13.0
\end { python}
\subsection { plot values with dates on x axis}
\begin { python}
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(data=[["10-06-2022", 5], ["09-06-2022", 3], ["11-06-2022", 20],
["13-06-2022", 12],["12-06-2022", 15], ["14-06-2022", 7]], columns=['Date','Sales'])
df["time"] = pd.to_ datetime(df['Date'], format='%d-%m-%Y')
##df["time"] = pd.to_ datetime(df['Date'], format='%Y-%m-%d %H:%M:%S.%f')
df.set_ index(['time'],inplace=True)
df.plot()
\end { python}
\subsection { Example code 1:}
\begin { python}
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_ csv("examples/4weeks_ date.csv")
##see the columns
df.columns
##strip the column names to remove extra whitespaces
df = df.rename(columns=lambda x: x.strip())
##check dataset
df.describe()
df.info()
print("initial length of dataset %d " % len(df) )
##drop rows when ENTRIES or EXITS is zero
df_ clean = df.query("ENTRIES > 0")
df_ clean = df_ clean.query("EXITS > 0")
print("length after cleaning 1 %d " % len(df_clean) )
##drop entries bigger than 5M
df_ clean = df_ clean.query("ENTRIES < 5000000")
df_ clean = df_ clean.query("EXITS < 5000000")
df_ clean.reset_ index(drop = True, inplace = True)
print("length after cleaning 2 %d " % len(df_clean) )
##compute total activity : ENTRIES + EXITS
df_ clean["TA"] = df_ clean["ENTRIES"] + df_ clean["EXITS"]
##combine date and time columns. Then convert to pdDate
df_ clean["DT"] = df_ clean["DATE"] + " " + df_ clean["TIME"]
df_ clean["DATETIME"] = pd.to_ datetime(df_ clean['DT'], format='%m/%d/%Y %H:%M:%S')
##select a station and sum same day activities
df_ clean["LINENAME"].value_ counts()
usedLineName = "1237ACENQRS"
myLineDF = df_ clean[ df_ clean["LINENAME"] == usedLineName]
myLineDF.reset_ index(drop = True, inplace = True)
##sort rows with DATE + TIME
myLineDF = myLineDF.sort_ values( ["DATE","TIME"] )
myLineDF.reset_ index(drop = True, inplace = True)
##add rows with identical DATETIME
myLineDFGrouped = myLineDF.groupby("DATETIME").mean()
myLineDFGrouped.reset_ index(drop = True, inplace = True)
## Create figure and plot space
fig, ax = plt.subplots(figsize=(10, 10))
ax.plot(myLineDFGrouped.index , myLineDFGrouped["TA"], color='purple')
ax.set(xlabel="Date", ylabel="Total activity", title="Total entries for line: %s" % usedLineName )
plt.show()
## Alternatively use default Plotter
myLineDFGrouped.plot()
\end { python}
\section { Matplotlib Visualization}
\subsection { Basics}
\subsubsection { Basic plotting}
\begin { python}
from matplotlib import pyplot as plt
plt.figure(figsize = (20,9))
plt.plot()
\end { python}
\section { Seaborn Visualization}
Seaborn is a visualization library on top of Matplotlib. Seaborn is more comfortable in handling Pandas data frames.
\subsection { Plotting an histogram}
\begin { python}
import matplotlib.pyplot as plt
import seaborn as sns
## plot only density
sns.distplot([0, 1, 2, 3, 4, 5], hist=False)
plt.show()
## plot density + histogram
sns.distplot([0, 1, 2, 3, 3, 3, 3, 4, 5, 7])
plt.show()
\end { python}
\subsection { Lineplot}
\begin { python}
import matplotlib.pyplot as plt
import seaborn as sns
## loading dataset
data = sns.load_ dataset("iris")
# [150 rows x 5 columns]
### draw lineplot
sns.lineplot(x="sepal_ length", y="sepal_ width", data=data)
## setting the x limit of the plot
plt.xlim(5)
\end { python}
\subsection { Scatterplot}
\begin { python}
# importing packages
import seaborn as sns
import matplotlib.pyplot as plt
# loading dataset
data = sns.load_ dataset("iris")
sns.scatterplot(x='sepal_ length', y='sepal_ width', data=data)
plt.show()
\end { python}
\section { OpenCV}
\subsection { Basics}
\subsubsection { Typecase PIL Image to OpenCV image}
\begin { python}
from PIL import Image
import cv2
import matplotlib.pyplot as plt
pillowImage = Image.open("kiz-kulesi.jpg")
rgb_ image_ float= np.asarray(pillowImage,dtype=float)/255.0
plt.imshow(rgb_ image_ float)
\end { python}
\subsubsection { Draw circle on PIL Image}
\begin { python}
from PIL import Image, ImageDraw
image = Image.new('RGBA', (200, 200))
draw = ImageDraw.Draw(image)
# bounding box coordinates for the ellipse topleft, bot right (x1, y1, x2, y2)
draw.ellipse((20, 20, 180, 180), fill = 'blue', outline ='blue')
draw.point((100, 100), 'red')
image.save('test.png')
\end { python}
\subsubsection { Draw text on PIL Image}
\begin { python}
from PIL import Image, ImageDraw
image = Image.new('RGBA', (200, 200))
draw = ImageDraw.Draw(image)
# drawing text size
text = "hello"
font = ImageFont.truetype(r'C:\Users \System -Pc\Desktop \arial .ttf', 20)
draw.text((5, 5), text, fill ="red", font = font, align ="right")
image.save('test.png')
\end { python}
\subsubsection { Flip, resize, rotate, crop images}
\begin { python}
import cv2
import scipy.ndimage
import numpy as np
import matplotlib.pyplot as plt
original_ image = cv2.imread("kiz-kulesi.jpg", cv2.IMREAD_ GRAYSCALE)
flipud_ image=np.flipud(original_ image)
fliplr_ image=np.fliplr(original_ image)
rotated_ image=scipy.ndimage.rotate(original_ image,45)
resized_ image=scipy.misc.imresize(original_ image, 0.5, interp='bilinear', mode=None)
rows,cols=original_ image.shape
croped_ image = original_ image[int(rows / 3): -int(rows / 3), int(cols / 4): - int(cols / 4)]
fig1, axes_ array = plt.subplots(2, 3)
fig1.set_ size_ inches(9,6)
image_ plot = axes_ array[0][0].imshow(original_ image ,cmap=plt.cm.gray)
axes_ array[0][0].set(title='Original')
image_ plot = axes_ array[0][1].imshow(flipud_ image,cmap=plt.cm.gray)
axes_ array[0][1].set(title='Flipped up-down')
image_ plot = axes_ array[0][2].imshow(fliplr_ image,cmap=plt.cm.gray)
axes_ array[0][2].set(title='Flipped left-right')
image_ plot = axes_ array[1][0].imshow(rotated_ image,cmap=plt.cm.gray)
axes_ array[1][0].set(title='Rotated')
image_ plot = axes_ array[1][1].imshow(resized_ image,cmap=plt.cm.gray)
axes_ array[1][1].set(title='Resized')
image_ plot = axes_ array[1][2].imshow(croped_ image,cmap=plt.cm.gray)
axes_ array[1][2].set(title='Cropped')
plt.show()
\end { python}
\subsubsection { Operating on HSV colorspace}
\begin { python}
import matplotlib
import matplotlib.pyplot as plt
def demo_ rgb_ to_ hsv(original_ image,reduce_ intensity_ factor=0.5):
original_ rgb_ float= np.asarray(original_ image,dtype=float)/255.0
original_ rgb_ float = original_ rgb_ float[:,:,:3]
hsv_ image=matplotlib.colors.rgb_ to_ hsv(original_ rgb_ float)
hsv_ image_ processed=hsv_ image.copy()
hsv_ image_ processed[:,: ,2]=hsv_ image[:,: ,2]*reduce_ intensity_ factor
rgb_ image_ processed=matplotlib.colors.hsv_ to_ rgb(hsv_ image_ processed)
fig1, axes_ array = plt.subplots(1, 2)
fig1.set_ size_ inches(8,4)
image_ plot = axes_ array[0].imshow(original_ rgb_ float) # Show the RGB image
axes_ array[0].axis('off')
axes_ array[0].set(title='RGB Image')
image_ plot = axes_ array[1].imshow(rgb_ image_ processed) # Show the gray image
axes_ array[1].axis('off')
axes_ array[1].set(title='Intensity Reduced Image')
plt.show()
rgb_ image_ int = Image.open("kiz-kulesi.jpg")
demo_ rgb_ to_ hsv(rgb_ image_ int)
\end { python}
\subsubsection { 1d Gaussian Kernel}
\begin { python}
def display_ 1d_ gaussian(mean=0.0,sigma=0.5):
x=np.linspace(-10,10,1000)
y= (1/np.sqrt(2*np.pi*sigma**2))*np.exp(-((x-mean)**2)/(2*sigma**2))
fig, axes1 = plt.subplots(1, 1)
fig.set_ size_ inches(6,3)
axes1.set(xlabel="X",ylabel="Y",title='Gaussian Curve',ylim=(0,1))
plt.grid(True)
axes1.plot(x,y,color='gray')
plt.fill_ between(x,y,0,color='#c0f0c0')
plt.show()
\end { python}
\subsubsection { 2d Gaussian Kernel Image}
\begin { python}
import scipy.stats
import numpy as np
import matplotlib.pyplot as plt
from mpl_ toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter
def display_ gaussian_ kernel(sigma=1.0):
X = np.linspace(-5, 5, 400)
Y = np.linspace(-5, 5, 400)
X, Y = np.meshgrid(X, Y)
R = np.sqrt(X**2 + Y**2)
Z = np.sin(R)
mu = np.array([0.0, 0.0])
covariance = np.diag(np.array([sigma, sigma])**2)
XY = np.column_ stack([X.flat, Y.flat])
z = scipy.stats.multivariate_ normal.pdf(XY, mean=mu, cov=covariance)
Z = z.reshape(X.shape)
# Plot the surface.
fig = plt.figure()
fig.set_ size_ inches(8,4)
ax1 = fig.add_ subplot(121)
ax1.imshow(Z)
ax2 = fig.add_ subplot(122, projection='3d')
surf = ax2.plot_ surface(X, Y, Z, cmap=plt.cm.coolwarm, linewidth=0, antialiased=False)
# Customize the z axis.
ax2.set_ zlim(0, .2)
ax2.zaxis.set_ major_ locator(LinearLocator(10))
ax2.zaxis.set_ major_ formatter(FormatStrFormatter('%.02f'))
# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
display_ gaussian_ kernel()
\end { python}
\subsubsection { find horizontal lines}
\begin { python}
import cv2
# Load image, convert to grayscale, Otsu's threshold
image = cv2.imread('kiz-kulesi.jpg')
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_ BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_ BINARY_ INV + cv2.THRESH_ OTSU)[1]
# Detect horizontal lines
horizontal_ kernel = cv2.getStructuringElement(cv2.MORPH_ RECT, (40,1))
detect_ horizontal = cv2.morphologyEx(thresh, cv2.MORPH_ OPEN, horizontal_ kernel, iterations=2)
cnts = cv2.findContours(detect_ horizontal, cv2.RETR_ EXTERNAL, cv2.CHAIN_ APPROX_ SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (36,255,12), 2)
plt.figure(figsize = (20,9)); plt.imshow(result)
\end { python}
\subsubsection { Drawing text on image}
\begin { python}
import cv2
# path
path = r'kiz-kulesi.jpg'
# Reading an image in default mode
image = cv2.imread(path)
# Window name in which image is displayed
window_ name = 'Image'
# font
font = cv2.FONT_ HERSHEY_ SIMPLEX
# position
org = (40, 40)
# fontScale
fontScale = 1
# Green color in BGR
color = (0, 255, 0)
# Line thickness of 2 px
thickness = 2
image = cv2.putText(image, 'Hello', org, font, fontScale, color, thickness, cv2.LINE_ AA)
# Displaying the image
plt.figure(figsize = (20,9)); plt.imshow(img_ rgb)
\end { python}
\subsection { Template Matching}
\begin { python}
from matplotlib import pyplot as plt
import numpy as np
import cv2
import imutils
def multiscaleTemplateMatching(imFileToLoad,templateFileToLoad):
template = cv2.imread(templateFileToLoad)
template = cv2.cvtColor(template, cv2.COLOR_ BGR2GRAY)
template = cv2.Canny(template, 50, 200)
(tH, tW) = template.shape[:2]
# loop over the images to find the template in
image = cv2.imread(imFileToLoad)
gray = cv2.cvtColor(image, cv2.COLOR_ BGR2GRAY)
found = None
# loop over the scales of the image
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
# resize the image according to the scale, and keep track
# of the ratio of the resizing
resized = imutils.resize(gray, width = int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
# if the resized image is smaller than the template, then break
# from the loop
if resized.shape[0] < tH or resized.shape[1] < tW:
break
# detect edges in the resized, grayscale image and apply template
# matching to find the template in the image
edged = cv2.Canny(resized, 50, 200)
result = cv2.matchTemplate(edged, template, cv2.TM_ CCOEFF)
(_ , maxVal, _ , maxLoc) = cv2.minMaxLoc(result)
# check to see if the iteration should be visualized
if True:
# draw a bounding box around the detected region
clone = np.dstack([edged, edged, edged])
cv2.rectangle(clone, (maxLoc[0], maxLoc[1]),
(maxLoc[0] + tW, maxLoc[1] + tH), (0, 0, 255), 2)
# if we have found a new maximum correlation value, then update
# the bookkeeping variable
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
# unpack the bookkeeping variable and compute the (x, y) coordinates
# of the bounding box based on the resized ratio
(_ , maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + tW) * r), int((maxLoc[1] + tH) * r))
# draw a bounding box around the detected result and display the image
return [startX, startY, endX, endY]
img_ rgb = cv2.imread(shot)
startX, startY, endX, endY = multiscaleTemplateMatching(shot,templateFileToLoad)
cv2.rectangle(img_ rgb, (startX, startY), (endX, endY), (0, 0, 255), 2)
plt.figure(figsize = (20,9))
plt.imshow(img_ rgb)
\end { python}
\subsection { Overlapping bounding box removal: nonmaxima suppression}
\begin { python}
def NMS(boxes, overlapThresh = 0.4):
# Return an empty list, if no boxes given
if len(boxes) == 0:
return []
x1 = boxes[:, 0] # x coordinate of the top-left corner
y1 = boxes[:, 1] # y coordinate of the top-left corner
x2 = boxes[:, 2] # x coordinate of the bottom-right corner
y2 = boxes[:, 3] # y coordinate of the bottom-right corner
# Compute the area of the bounding boxes and sort the bounding
# Boxes by the bottom-right y-coordinate of the bounding box
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # We add 1, because the pixel at the start as well as at the end counts
# The indices of all boxes at start. We will redundant indices one by one.
indices = np.arange(len(x1))
for i,box in enumerate(boxes):
# Create temporary indices
temp_ indices = indices[indices!=i]
# Find out the coordinates of the intersection box
xx1 = np.maximum(box[0], boxes[temp_ indices,0])
yy1 = np.maximum(box[1], boxes[temp_ indices,1])
xx2 = np.minimum(box[2], boxes[temp_ indices,2])
yy2 = np.minimum(box[3], boxes[temp_ indices,3])
# Find out the width and the height of the intersection box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / areas[temp_ indices]
# if the actual boungding box has an overlap bigger than threshold with any other box, remove it's index
if np.any(overlap) > overlapThresh:
indices = indices[indices != i]
#return only the boxes at the remaining indices
return boxes[indices].astype(int)
\end { python}
\subsection { SingleScale Multiple Template Matching}
\begin { python}
import cv2
def singleScaleMultipleTemplateMatching(imageFileName, templateFileName):
print("[INFO] loading images...")
image = cv2.imread(imageFileName)
img_ rgb = image.copy()
template = cv2.imread(templateFileName)
(tH, tW) = template.shape[:2]
# convert both the image and template to grayscale
imageGray = cv2.cvtColor(image, cv2.COLOR_ BGR2GRAY)
templateGray = cv2.cvtColor(template, cv2.COLOR_ BGR2GRAY)
# perform template matching
print("[INFO] performing template matching...")
result = cv2.matchTemplate(imageGray, templateGray,
cv2.TM_ CCOEFF_ NORMED)
(yCoords, xCoords) = np.where(result >= 0.75)
clone = image.copy()
print("[INFO] { } matched locations *before* NMS".format(len(yCoords)))
# loop over our starting (x, y)-coordinates
for (x, y) in zip(xCoords, yCoords):
# draw the bounding box on the image
cv2.rectangle(clone, (x, y), (x + tW, y + tH),
(255, 0, 0), 3)
# initialize our list of rectangles
rects = []
# loop over the starting (x, y)-coordinates again
for (x, y) in zip(xCoords, yCoords):
# update our list of rectangles
rects.append((x, y, x + tW, y + tH))
# apply non-maxima suppression to the rectangles
pick = NMS(np.array(rects))
# pick = rects
print("[INFO] { } matched locations *after* NMS".format(len(pick)))
# loop over the final bounding boxes
for (startX, startY, endX, endY) in pick:
# draw the bounding box on the image
cv2.rectangle(img_ rgb, (startX, startY), (endX, endY),
(0, 255, 0), 2)
return pick, img_ rgb
pick , img_ rgb = singleScaleMultipleTemplateMatching("cropped2.png","template.png")
plt.figure(figsize = (20,9))
plt.imshow(img_ rgb)
\end { python}
\subsection { Finding and Plotting Contours}
\begin { python}
def findAndPlotContours(fileName, blob_ area_ thresh=20):
img = cv2.imread(fileName, cv2.IMREAD_ COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_ BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_ THRESH_ MEAN_ C, cv2.THRESH_ BINARY, 101, 3)
### following morphology open and close can be applied.
#kernel = cv2.getStructuringElement(cv2.MORPH_ ELLIPSE, (5,5))
#blob = cv2.morphologyEx(thresh, cv2.MORPH_ OPEN, kernel)
#kernel = cv2.getStructuringElement(cv2.MORPH_ ELLIPSE, (9,9))
#blob = cv2.morphologyEx(blob, cv2.MORPH_ CLOSE, kernel)
blob = thresh
# invert blob
blob = (255 - blob)
# Get contours
cnts = cv2.findContours(blob, cv2.RETR_ EXTERNAL, cv2.CHAIN_ APPROX_ SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
## select the contours larger than having area 20
cnts = [c for c in cnts if cv2.contourArea(c) > blob_ area_ thresh]
#big_ contour = max(cnts, key=cv2.contourArea)
## return contours and buffer image
result = img.copy()
result[:,:,0] = 255
result[:,:,1] = 255
result[:,:,2] = 255
for c in cnts:
cv2.drawContours(result, [c], -1, (0,0,255), 1)
return result, c
result, c = findAndPlotContours("kiz-kulesi.jpg",20)
\end { python}
\subsection { Circle Detection}
\begin { python}
import matplotlib.pyplot as plt
import cv2
img = cv2.imread("cropped2.png", cv2.IMREAD_ COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_ BGR2GRAY)
detected_ circles = cv2.HoughCircles(gray, cv2.HOUGH_ GRADIENT, 1, 20, param1 = 50, param2 = 30, minRadius = 1, maxRadius = 400)
for pt in detected_ circles[0, :]:
a, b, r = pt[0], pt[1], pt[2]
# Draw the circumference of the circle.
cv2.circle(img, (a, b), r, (0, 255, 0), 2)
# Draw a small circle (of radius 1) to show the center.
cv2.circle(img, (a, b), 1, (0, 0, 255), 3)
plt.figure(figsize = (20,9))
plt.imshow(img)
\end { python}
\subsection { Connected Components Analysis}
\begin { python}
import matplotlib.pyplot as plt
import cv2
img = cv2.imread("cropped2.png", cv2.IMREAD_ COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_ BGR2GRAY)
threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_ BINARY_ INV | cv2.THRESH_ OTSU)[1]
analysis = cv2.connectedComponentsWithStats(threshold, 4, cv2.CV_ 32S)
(totalLabels, label_ ids, values, centroid) = analysis
#plt.figure(figsize = (20,9))
#plt.imshow(threshold)
# Loop through each component
output = np.zeros(gray.shape, dtype="uint8")
for i in range(1, totalLabels):
area = values[i, cv2.CC_ STAT_ AREA]
if (area > 110) and (area < 900):
# Labels stores all the IDs of the components on the each pixel
# It has the same dimension as the threshold
# So we'll check the component
# then convert it to 255 value to mark it white
componentMask = (label_ ids == i).astype("uint8") * 255
# Creating the Final output mask
output = cv2.bitwise_ or(output, componentMask)
plt.figure(figsize = (20,9))
plt.imshow(output)
\end { python}
\subsection { Fit ellipses to objects}
\begin { python}
img = cv2.imread("cropped2.png", cv2.IMREAD_ COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_ BGR2GRAY)
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_ THRESH_ GAUSSIAN_ C, cv2.THRESH_ BINARY_ INV, s, 7.0)
cnts, hier = cv2.findContours(gray,cv2.RETR_ TREE,cv2.CHAIN_ APPROX_ SIMPLE)
ellipses = []
if len(cnts) != 0:
for i in range(len(cnts)):
if len(cnts[i]) >= 5:
ellipse=cv2.fitEllipse(cnts[i])
print(ellipse)
ellipses.append(ellipse)
centCoord = ( int(ellipse[0][0]), int(ellipse[0][1]) )
axisLen = ( int(ellipse[1][0]), int(ellipse[1][1]) )
angle = ellipse[2]
## angle filtering
#offSet = np.min( np.fabs( [angle, angle-90, angle-180, angle-270, angle-360]) )
#if offSet < 5:
img = cv2.ellipse(img, centCoord, axisLen, angle, 0, 360, (0,0,255))
# cv2.drawContours(img,cnts,-1,(150,10,255),2)
plt.figure(figsize = (20,9))
plt.imshow(img)
\end { python}
\section { Numpy}
\subsection { Fitting}
\subsubsection { Curve fitting}
\begin { python}
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_ model import LinearRegression
from sklearn.metrics import r2_ score
from sklearn.metrics import mean_ squared_ error
def generateQuadraticData():
x = np.random.rand(100) - 0.5
u = 0.1 * np.random.rand(100)
a = 1
b = 0.0
c = 0.1
y = a * np.multiply(x,x) + np.multiply(x,b) + c + u
#plt.scatter(x,y)
return x,y
x,y = generateQuadraticData()
X = x.reshape(-1,1) # for one feature problems
###We compare nonlinear regression here with different power
regr = LinearRegression()
quadratic = PolynomialFeatures(degree=2)
cubic = PolynomialFeatures(degree=3)
X_ quad = quadratic.fit_ transform(X)
X_ cubic = cubic.fit_ transform(X)
X_ fit = np.arange(X.min(), X.max(), 0.05)[:, np.newaxis]
regr = regr.fit(X, y)
y_ lin_ fit = regr.predict(X_ fit)
linear_ r2 = r2_ score(y, regr.predict(X))
regr = regr.fit(X_ quad, y)
y_ quad_ fit = regr.predict(quadratic.fit_ transform(X_ fit))
quadratic_ r2 = r2_ score(y, regr.predict(X_ quad))
regr = regr.fit(X_ cubic, y)
y_ cubic_ fit = regr.predict(cubic.fit_ transform(X_ fit))
cubic_ r2 = r2_ score(y, regr.predict(X_ cubic))
plt.scatter(X, y, label='training points', color='lightgray')
plt.plot(X_ fit, y_ lin_ fit,
label='linear (d=1), $ R ^ 2 = %.2f$' % linear_r2,
color='blue',
lw=2,
linestyle=':')
plt.plot(X_ fit, y_ quad_ fit,
label='quadratic (d=2), $ R ^ 2 = %.2f$' % quadratic_r2,
color='red',
lw=2,
linestyle='-')
plt.plot(X_ fit, y_ cubic_ fit,
label='cubic (d=3), $ R ^ 2 = %.2f$' % cubic_r2,
color='green',
lw=2,
linestyle='--')
plt.legend(loc='upper right')
plt.tight_ layout()
plt.show()
\end { python}
\subsubsection { Ransac curve fitting}
\begin { python}
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_ model import LinearRegression
from sklearn.metrics import r2_ score
from sklearn.metrics import mean_ squared_ error
from sklearn import linear_ model, datasets
def generateQuadraticData(noiseFraction = 0.2):
nDataPoints = 100
nNoisePoints = int(nDataPoints * noiseFraction)
## data
a, b, c = 1, 0.0, 0.1
x = np.random.rand(nDataPoints) - 0.5
y = a * np.multiply(x,x) + np.multiply(x,b) + c
## outliers
u = np.random.rand(nNoisePoints, 2)
x = np.concatenate( (x,u[:,0]), axis=0)
y = np.concatenate( (y,u[:,1]), axis=0)
# plt.scatter(x,y)
return x,y
noiseFraction = 0.7
x,y = generateQuadraticData(noiseFraction)
## 1D data reshape.
X = x.reshape(-1,1) # for one feature problems
quadratic = PolynomialFeatures(degree=2)
X_ quad = quadratic.fit_ transform(X)
X_ fit = np.arange(2*np.min(X), 2*np.max(X), 0.05)[:, np.newaxis] #evaluation interval
regr = linear_ model.RANSACRegressor()
regr = regr.fit(X_ quad, y)
y_ quad_ fit = regr.predict(quadratic.fit_ transform(X_ fit))
quadratic_ r2 = r2_ score(y, regr.predict(X_ quad))
inlier_ mask = regr.inlier_ mask_
outlier_ mask = np.logical_ not(inlier_ mask)
plt.figure(figsize = (10,9))
plt.plot(X_ fit, y_ quad_ fit, label='quadratic (d=2), $ R ^ 2 = %.2f$' % quadratic_r2,color='blue', lw=1,linestyle='-')
plt.scatter(X[inlier_ mask], y[inlier_ mask], color="green", marker=".", label="Inliers")
plt.scatter(X[outlier_ mask], y[outlier_ mask], color="red", marker=".", label="Outliers")
plt.legend(loc='upper right')
plt.tight_ layout()
plt.show()
\end { python}
\section { Scikit-Learn}
Scikit learn supports many machine learning models.
\subsection { Linear Regression}
\begin { python}
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_ model
from sklearn.metrics import mean_ squared_ error, r2_ score
# Load the diabetes dataset
diabetes_ X, diabetes_ y = datasets.load_ diabetes(return_ X_ y=True)
# Use only one feature
diabetes_ X = diabetes_ X[:, np.newaxis, 2]
# Split the data into training/testing sets
diabetes_ X_ train = diabetes_ X[:-20]
diabetes_ X_ test = diabetes_ X[-20:]
# Split the targets into training/testing sets
diabetes_ y_ train = diabetes_ y[:-20]
diabetes_ y_ test = diabetes_ y[-20:]
# Create linear regression object
regr = linear_ model.LinearRegression()
# Train the model using the training sets
regr.fit(diabetes_ X_ train, diabetes_ y_ train)
# Make predictions using the testing set
diabetes_ y_ pred = regr.predict(diabetes_ X_ test)
# The coefficients
print("Coefficients: \n ", regr.coef_ )
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))
# Plot outputs
plt.scatter(diabetes_ X_ test, diabetes_ y_ test, color="black")
plt.plot(diabetes_ X_ test, diabetes_ y_ pred, color="blue", linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
\end { python}
%\section*{Appendix}
%\bibliographystyle{plain}
%\bibliography{references}
\end { document}