admin管理员组

文章数量:1612413

原文的CSDN 翻译

Python中的GPS轨迹聚类

开源

https://github/eziaowonder/Mining-Pattern

环境

geopy    numpy    matplotlib    pandas    dipy    gmplot 
Anaconda3  python3 

数据集

大致结构

Geolife GPS trajectory dataset

代码

因为是用 jupyter notebook 打的,所以就分成一块块了, 本代码仅取用User1的数据作为测试

# 数据处理
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

# Enable inline plotting
names = ['lat','lng','zero','alt','days','date','time']
streams = []
index = 0

userdata = 'data\\Geolife Trajectories 1.3\\Data\\' + '001' + '\\Trajectory\\'
filelist = os.listdir(userdata)

for f in filelist:
    df_list = [pd.read_csv(userdata + f,header=6,names=names,index_col=False)]
    df = pd.concat(df_list, ignore_index=True)
    df.drop(['zero','alt','days','date','time'], axis=1, inplace=True)
    df_min = df.iloc[::12, :]
    lat_lng_data = np.c_[df_min['lat'].values, df_min['lng'].values]
    streams.append(lat_lng_data)

print(streams[0][0,0], streams[0][0,1])
输出
39.984198 116.319322
# 分类
import geopy.distance
from dipy.segment.metric import Metric
from dipy.segment.metric import ResampleFeature
import numpy as np
from dipy.segment.clustering import QuickBundles
THRESHOLD = 1.5
class GPSDistance(Metric):
    def __init__(self):
        super(GPSDistance, self).__init__(feature=ResampleFeature(nb_points=256))

    def are_compatible(self, shape1, shape2):
        return len(shape1) == len(shape2)

    def dist(self, v1, v2):
        x = [geopy.distance.distance([p[0][0], p[0][1]], [p[1][0], p[1][1]]).kilometers for p in list(zip(v1, v2))]
        currD = np.mean(x)
        return currD


metric = GPSDistance()
qb = QuickBundles(threshold=THRESHOLD, metric=metric)

clusters = qb.cluster(streams)
print("Nb. clusters:", len(clusters))
输出
Nb. clusters: 13
# 画图
from gmplot import gmplot
import random

def randomcolor():
    colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
    color = ""
    for i in range(6):
        color += colorArr[random.randint(0,14)]
    return "#"+color

gmap = gmplot.GoogleMapPlotter(streams[0][0,0], streams[0][0,1], 12)

for clustersIndex in range(7):
    color = randomcolor()
    for i in clusters[clustersIndex].indices:
        gmap.plot(streams[i][:,0], streams[i][:,1], color, edge_width=1)

gmap.draw("user001_map.html")

效果

threshold 为1.5, 越高分的类越少

参考

本质上是复现 GPS Trajectories Clustering inPython

数据集的使用参考 Inferring home and work locations using GPS trajectories and DBSCAN

本文标签: 轨迹代码PythonGPS