parent
d3438bd04c
commit
8014aef8d1
@ -0,0 +1,89 @@
|
||||
from PTTData import PTTData
|
||||
from pprint import pprint
|
||||
from datetime import datetime
|
||||
from progressbar import ProgressBar
|
||||
import json
|
||||
import csv
|
||||
import io
|
||||
|
||||
data = PTTData('Gossiping')
|
||||
lastUpdate: datetime = None
|
||||
mostFrequentAuthor: str = None
|
||||
|
||||
defaultStopWords = []
|
||||
with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file:
|
||||
for word in file.readlines():
|
||||
word = word.strip()
|
||||
defaultStopWords.append(word)
|
||||
|
||||
|
||||
def getMostFrequentAuthor(title: str = None):
|
||||
global mostFrequentAuthor, lastUpdate
|
||||
if (lastUpdate == datetime.today().date()):
|
||||
yield mostFrequentAuthor
|
||||
elif (mostFrequentAuthor != None):
|
||||
yield mostFrequentAuthor
|
||||
authorList = data.pushCllc.aggregate(pipeline=[{'$group': {
|
||||
'_id': '$author',
|
||||
'count': {
|
||||
'$sum': 1
|
||||
}
|
||||
}}])
|
||||
lastUpdate = datetime.today().date()
|
||||
mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id']
|
||||
yield mostFrequentAuthor
|
||||
return
|
||||
|
||||
|
||||
def possegPushes(ids: list, stopwords: list, keyword: str):
|
||||
possegs = data.pushPossegCllc.find({'ID': {'$in': ids}})
|
||||
result = []
|
||||
for index, p in enumerate(possegs):
|
||||
words = [i[1] for i in p['content'] if i[0] not in [
|
||||
'eng', 'x', 'm'] and i[1] not in stopwords]
|
||||
if(keyword == '' or keyword in words):
|
||||
result.append({
|
||||
'posString': ' '.join(words),
|
||||
'ID': p['ID']
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []):
|
||||
terms = {}
|
||||
if (author != [''] and author != None):
|
||||
terms['author'] = {
|
||||
'$in': author
|
||||
}
|
||||
if (aid != [''] and aid != None):
|
||||
terms['postAid'] = {
|
||||
'$in': aid
|
||||
}
|
||||
print(terms)
|
||||
pushes = data.pushCllc.find(terms)
|
||||
pushId = []
|
||||
pushContent = {}
|
||||
for p in pushes:
|
||||
pushId.append(p['_id'])
|
||||
pushContent[str(p['_id'])] = {
|
||||
'title': p['title'],
|
||||
'author': p['author'],
|
||||
'pushes': p['pushes']
|
||||
}
|
||||
possegList = possegPushes(pushId, stopwords, keyword)
|
||||
possegResult = [['id', 'text', 'count']]
|
||||
for index, n in enumerate(possegList):
|
||||
if(str(n['ID']) in pushContent.keys()):
|
||||
pushContent[str(n['ID'])]['part'] = str(n['posString'])
|
||||
possegResult.append([index, n['posString'], 3000])
|
||||
jsonString = json.dumps(
|
||||
[i for i in pushContent.values()], indent=4, ensure_ascii=False)
|
||||
with io.StringIO() as f:
|
||||
writer = csv.writer(f, delimiter='\t')
|
||||
writer.writerows(possegResult)
|
||||
csvString = f.getvalue()
|
||||
return (csvString, jsonString)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pprint(findAuthorPush(['gwenwoo']))
|
@ -0,0 +1,329 @@
|
||||
init()
|
||||
var tsvPath = ''
|
||||
var titlePath = ''
|
||||
var defaultStartDate
|
||||
var defaultEndDate
|
||||
var totalPosts
|
||||
var startDate
|
||||
var endDate
|
||||
var wordPushList
|
||||
var randId
|
||||
var globKeyword = ''
|
||||
var stopwords = []
|
||||
|
||||
function init() {
|
||||
$.ajax({
|
||||
type: 'POST',
|
||||
url: 'ptt_push/init',
|
||||
dataType: 'json',
|
||||
success: function(data) {
|
||||
console.log(data)
|
||||
tsvString = data.Result.tsv
|
||||
wordPushList = JSON.parse(data.Result.json)
|
||||
stopwords = data.Result.stopwords
|
||||
console.log(wordPushList)
|
||||
$('#idBox').val(data.Result.author)
|
||||
buildSentetree(tsvString)
|
||||
}
|
||||
})
|
||||
$(document).ready(function() {
|
||||
$(window).keydown(function(event) {
|
||||
if (event.keyCode == 13) {
|
||||
event.preventDefault()
|
||||
sendRequest()
|
||||
}
|
||||
});
|
||||
});
|
||||
$(window).on('mousemove', function(e) {
|
||||
$('#nodeTitle').css({
|
||||
left: e.pageX,
|
||||
top: e.pageY
|
||||
})
|
||||
})
|
||||
$('#titleListContainer').hover(
|
||||
function() { // Run on hover/mouseenter
|
||||
$(this).css('overflow', 'auto')
|
||||
},
|
||||
function() { // Run on mouseleave
|
||||
$(this).css('overflow', 'hidden')
|
||||
}
|
||||
)
|
||||
$('#titleListLayer').click(function(e) {
|
||||
if ($('#titleListLayer').is(e.target)) {
|
||||
hideTitles()
|
||||
}
|
||||
})
|
||||
$('#stopWordEditorLayer').click(function(e) {
|
||||
if ($('#stopWordEditorLayer').is(e.target)) {
|
||||
hideStopWordEditor()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function clearStopWord() {
|
||||
stopwords = []
|
||||
$('#sweContainer').html('')
|
||||
}
|
||||
|
||||
function addStopWord() {
|
||||
newswRaw = $('#newStopWord').val()
|
||||
newswList = newswRaw.split(' ')
|
||||
for (newsw of newswList) {
|
||||
if (newsw != '') {
|
||||
if (stopwords.includes(newsw)) {
|
||||
|
||||
} else {
|
||||
stopwords.push(newsw)
|
||||
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
|
||||
var index = $(this).parent().index()
|
||||
console.log(stopwords[index])
|
||||
stopwords.splice(index, 1)
|
||||
console.log(stopwords)
|
||||
$('#sweContainer li').eq(index).remove()
|
||||
}).append("×")))
|
||||
console.log(document.getElementById('sweContainer').children[stopwords.indexOf(newsw)])
|
||||
}
|
||||
document.getElementById("sweContainer").scrollTop = document.getElementById('sweContainer').children[stopwords.indexOf(newsw)].offsetTop
|
||||
}
|
||||
}
|
||||
$('#newStopWord').val('')
|
||||
}
|
||||
|
||||
function showStopwordEditor() {
|
||||
console.log(stopwords)
|
||||
$(window).unbind('keydown')
|
||||
$(window).keydown(function(event) {
|
||||
if (event.keyCode == 13) {
|
||||
addStopWord()
|
||||
}
|
||||
})
|
||||
$('#sweContainer').empty()
|
||||
for (word of stopwords) {
|
||||
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
|
||||
var index = $(this).parent().index()
|
||||
console.log(stopwords[index])
|
||||
stopwords.splice(index, 1)
|
||||
console.log(stopwords)
|
||||
$('#sweContainer li').eq(index).remove()
|
||||
}).append("×")))
|
||||
}
|
||||
$('#stopWordEditorLayer').removeClass('hidden')
|
||||
}
|
||||
|
||||
function hideStopWordEditor() {
|
||||
$(window).unbind('keydown')
|
||||
$(window).keydown(function(event) {
|
||||
if (event.keyCode == 13) {
|
||||
event.preventDefault()
|
||||
sendRequest()
|
||||
}
|
||||
})
|
||||
$('#stopWordEditorLayer').addClass('hidden')
|
||||
}
|
||||
|
||||
function downloadStopWord() {
|
||||
stopWordString = stopwords.join('\n')
|
||||
download(stopWordString, 'stopwords.txt', 'text/plain')
|
||||
}
|
||||
|
||||
|
||||
function hidePopup() {
|
||||
$('#infoWindowLayer').toggleClass('hidden')
|
||||
$('#progressInfo').html('')
|
||||
$('#progBarInner').css('width', 0 + '%')
|
||||
closeEventListner()
|
||||
}
|
||||
|
||||
function setDate(_startDate, _endDate) {
|
||||
document.getElementById('startDate').value = _startDate
|
||||
document.getElementById("endDate").value = _endDate
|
||||
startDate = _startDate
|
||||
endDate = _endDate
|
||||
}
|
||||
|
||||
function getProgressing(event) {
|
||||
data = JSON.parse(event.data)
|
||||
$('#progressInfo').html(data.comment)
|
||||
$('#progBarInner').css('width', data.progress + '%')
|
||||
}
|
||||
|
||||
function getProgressFinished(event) {
|
||||
data = JSON.parse(event.data)
|
||||
changeGraph(data)
|
||||
hidePopup()
|
||||
}
|
||||
|
||||
function closeEventListner() {
|
||||
progListener.removeEventListener('progressing' + randId, getProgressing)
|
||||
progListener.removeEventListener('progressFinished' + randId, getProgressFinished)
|
||||
}
|
||||
|
||||
function sendRequest() {
|
||||
if ($('#idBox').val() == '' && $('#titleBox').val() == '') {
|
||||
window.alert('請至少填寫一個鄉民id或是')
|
||||
}
|
||||
content = JSON.stringify({
|
||||
author: $('#idBox').val(),
|
||||
aid: $('#titleBox').val(),
|
||||
keyword: $('#keywordBox').val(),
|
||||
stopwords: stopwords,
|
||||
pos: {
|
||||
noun: $('#noun').is(':checked'),
|
||||
verb: $('#verb').is(':checked'),
|
||||
adj: $('#adj').is(':checked'),
|
||||
adv: $('#adv').is(':checked'),
|
||||
pron: $('#pron').is(':checked'),
|
||||
aux: $('#aux').is(':checked'),
|
||||
other: $('#other').is(':checked')
|
||||
}
|
||||
})
|
||||
console.log(content)
|
||||
$.ajax({
|
||||
type: 'POST',
|
||||
url: 'ptt_push/addRequest',
|
||||
data: content,
|
||||
contentType: 'application/json',
|
||||
success: function(data) {
|
||||
console.log(data)
|
||||
tsvString = data.Result.tsv
|
||||
wordPushList = JSON.parse(data.Result.json)
|
||||
console.log(wordPushList)
|
||||
changeGraph(data.Result)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function changeGraph(data) {
|
||||
console.log(data)
|
||||
let tsvString = data.tsv
|
||||
let json = JSON.parse(data.json)
|
||||
destroyCurrentGraph()
|
||||
d3.select('#graph').append('div').attr('id', 'vis')
|
||||
buildSentetree(tsvString)
|
||||
}
|
||||
|
||||
function destroyCurrentGraph() {
|
||||
d3.selectAll('#vis').remove()
|
||||
}
|
||||
|
||||
function hideTitles() {
|
||||
$('#titleListLayer').addClass('hidden')
|
||||
}
|
||||
|
||||
function buildSentetree(tsvString) {
|
||||
console.log("Build.")
|
||||
var model;
|
||||
var tree;
|
||||
var data;
|
||||
if (typeof tsvString === 'undefined') {
|
||||
d3.tsv(tsvPath, buildTree)
|
||||
} else {
|
||||
data = d3.tsvParse(tsvString)
|
||||
buildTree(_, data)
|
||||
}
|
||||
|
||||
function buildTree(error, rawdata) {
|
||||
const data = rawdata.map(d => Object.assign({}, d, { count: +d.count }));
|
||||
model = new SentenTree.SentenTreeBuilder()
|
||||
.tokenize(SentenTree.tokenizer.tokenizeBySpace)
|
||||
.transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token))
|
||||
.buildModel(data, {
|
||||
maxSupportRatio: 0.8,
|
||||
minSupportRatio: 0.001
|
||||
});
|
||||
tree = new SentenTree.SentenTreeVis('#vis', {
|
||||
fontSize: [15, 40],
|
||||
gapBetweenGraph: 10
|
||||
});
|
||||
tree.data(model.getRenderedGraphs(5))
|
||||
.on('nodeClick', node => {
|
||||
$("#keywordBox").val(node.data.entity)
|
||||
$('#titleListLayer').removeClass('hidden')
|
||||
seqList = node.data.seq.DBs.map(function(n) {
|
||||
return n.rawText
|
||||
})
|
||||
seqList = seqList.filter(function(v, i) {
|
||||
return seqList.indexOf(v) == i
|
||||
})
|
||||
titleList = []
|
||||
console.log(seqList)
|
||||
for (s of seqList) {
|
||||
titleTemp = wordPushList.filter(function(n) {
|
||||
return n.part == s
|
||||
})
|
||||
titleList = titleList.concat(titleTemp)
|
||||
}
|
||||
console.log(titleList)
|
||||
info = wordPushList[node.data.entity]
|
||||
$('#titleListKeyword').html(node.data.entity)
|
||||
$('#titleListKeywordInfo').html('')
|
||||
|
||||
$('#titleListContainer').empty()
|
||||
for (i of titleList) {
|
||||
let link = $('<a>').append(
|
||||
$('<h4>').html(i.title)
|
||||
)
|
||||
for (p of i.pushes) {
|
||||
link.append(
|
||||
$('<span>').attr('style', 'margin: 0px 10px').html((['推', '噓', '→'])[p.type - 1] + ' ' + p.author + ': ' + p.content + '<br>')
|
||||
)
|
||||
}
|
||||
$('#titleListContainer').append(
|
||||
$('<li>').attr('class', 'w3-panel').append(
|
||||
link
|
||||
)
|
||||
)
|
||||
}
|
||||
})
|
||||
.on('nodeMouseenter', node => {
|
||||
console.log(node)
|
||||
let titles = []
|
||||
node.data.topEntries.forEach(function(x) {
|
||||
console.log(x)
|
||||
let result = wordPushList.filter(function(y) {
|
||||
return y.part == x.rawText
|
||||
})
|
||||
for (r of result) {
|
||||
if (titles.indexOf(r) < 0 && titles.length < 5) {
|
||||
titles.push(r)
|
||||
}
|
||||
}
|
||||
})
|
||||
console.log(titles)
|
||||
infoStr = ''
|
||||
for (index in titles) {
|
||||
if (index == 0) {
|
||||
infoStr += titles[index].title + '<br>'
|
||||
} else {
|
||||
if (titles[index].title != titles[index - 1].title) {
|
||||
infoStr += titles[index].title + '<br>'
|
||||
}
|
||||
}
|
||||
pos = titles[index].part.indexOf(node.data.entity)
|
||||
infoStr += titles[index].pushes.filter(function(x) {
|
||||
return x.content.includes(node.data.entity)
|
||||
})[0].content + '<br>'
|
||||
}
|
||||
$(nodeTitleContent).html(infoStr)
|
||||
$('#nodeTitle').removeClass('hidden')
|
||||
tree.highlightNeighbors(node)
|
||||
})
|
||||
.on('nodeMouseleave', node => {
|
||||
$('#nodeTitle').addClass('hidden')
|
||||
tree.clearHighlightNeighbors()
|
||||
}).on('layoutStart', layout => {
|
||||
console.log(layout)
|
||||
}).on('linkMouseenter', link => {
|
||||
console.log(link)
|
||||
})
|
||||
new ResizeSensor(jQuery('#d3kitRoot'), function() {
|
||||
var scale, origin;
|
||||
scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))
|
||||
|
||||
$('#vis').css({
|
||||
transform: "scale(" + scale + ")",
|
||||
'transform-origin': 'top left'
|
||||
});
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in new issue