diff --git a/app.py b/app.py index 0471be9..2285074 100755 --- a/app.py +++ b/app.py @@ -3,6 +3,7 @@ import threading import random import string import dataHandlerPTT as ptt +import dataHandlerPTTPush as pttPush import generalText as gen from queue import Queue from datetime import datetime @@ -58,6 +59,37 @@ def index(): return redirect('/ptt') +@app.route('/ptt_push') +def ptt_push(): + return render_template('ptt_push.html', title='推文 Sententree') + + +@app.route('/ptt_push/init', methods=['POST']) +def pttPushInit(): + author = next(pttPush.getMostFrequentAuthor()) + pushes = pttPush.findAuthorPush( + author=[author], stopwords=pttPush.defaultStopWords) + result: dict = { + 'author': author, + 'stopwords': pttPush.defaultStopWords, + 'tsv': pushes[0], + 'json': pushes[1] + } + return jsonify(Result=result) + + +@app.route('/ptt_push/addRequest', methods=['POST']) +def pttPushAddRequest(): + pushes = pttPush.findAuthorPush(author=request.json['author'].split( + ' '), aid=request.json['aid'].split(' '), keyword=request.json['keyword'], stopwords=request.json['stopwords']) + result = { + 'keyword': request.json['keyword'], + 'tsv': pushes[0], + 'json': pushes[1] + } + return jsonify(Result=result) + + @app.route('/ptt') def pttSententree(): return render_template('ptt.html', title="PTT Sententree") @@ -79,18 +111,6 @@ def updateContent(): }) -@app.route('/askProgressHandler', methods=['POST']) -def askProgressHandler(): - key = request.json['key'] - randId = ''.join(random.choices( - string.ascii_uppercase + string.ascii_lowercase, k=15)) - threading.Thread(target=ptt.progressListener, - args=(key, eventQueue, randId,)).start() - return jsonify(Result={ - 'id': randId - }) - - @app.route('/addRequest', methods=['POST']) def addRequest(): content = request.json @@ -126,4 +146,4 @@ def initPage(): if __name__ == "__main__": - app.run(debug=False, port=4980, host='0.0.0.0', threaded=True) + app.run(debug=True, port=4998, host='0.0.0.0', threaded=True) diff --git a/dataHandlerPTTPush.py b/dataHandlerPTTPush.py new file mode 100644 index 0000000..faa19c2 --- /dev/null +++ b/dataHandlerPTTPush.py @@ -0,0 +1,89 @@ +from PTTData import PTTData +from pprint import pprint +from datetime import datetime +from progressbar import ProgressBar +import json +import csv +import io + +data = PTTData('Gossiping') +lastUpdate: datetime = None +mostFrequentAuthor: str = None + +defaultStopWords = [] +with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file: + for word in file.readlines(): + word = word.strip() + defaultStopWords.append(word) + + +def getMostFrequentAuthor(title: str = None): + global mostFrequentAuthor, lastUpdate + if (lastUpdate == datetime.today().date()): + yield mostFrequentAuthor + elif (mostFrequentAuthor != None): + yield mostFrequentAuthor + authorList = data.pushCllc.aggregate(pipeline=[{'$group': { + '_id': '$author', + 'count': { + '$sum': 1 + } + }}]) + lastUpdate = datetime.today().date() + mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id'] + yield mostFrequentAuthor + return + + +def possegPushes(ids: list, stopwords: list, keyword: str): + possegs = data.pushPossegCllc.find({'ID': {'$in': ids}}) + result = [] + for index, p in enumerate(possegs): + words = [i[1] for i in p['content'] if i[0] not in [ + 'eng', 'x', 'm'] and i[1] not in stopwords] + if(keyword == '' or keyword in words): + result.append({ + 'posString': ' '.join(words), + 'ID': p['ID'] + }) + return result + + +def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []): + terms = {} + if (author != [''] and author != None): + terms['author'] = { + '$in': author + } + if (aid != [''] and aid != None): + terms['postAid'] = { + '$in': aid + } + print(terms) + pushes = data.pushCllc.find(terms) + pushId = [] + pushContent = {} + for p in pushes: + pushId.append(p['_id']) + pushContent[str(p['_id'])] = { + 'title': p['title'], + 'author': p['author'], + 'pushes': p['pushes'] + } + possegList = possegPushes(pushId, stopwords, keyword) + possegResult = [['id', 'text', 'count']] + for index, n in enumerate(possegList): + if(str(n['ID']) in pushContent.keys()): + pushContent[str(n['ID'])]['part'] = str(n['posString']) + possegResult.append([index, n['posString'], 3000]) + jsonString = json.dumps( + [i for i in pushContent.values()], indent=4, ensure_ascii=False) + with io.StringIO() as f: + writer = csv.writer(f, delimiter='\t') + writer.writerows(possegResult) + csvString = f.getvalue() + return (csvString, jsonString) + + +if __name__ == "__main__": + pprint(findAuthorPush(['gwenwoo'])) diff --git a/static/css/main.css b/static/css/main.css index 1d9a6c9..ebde5f4 100755 --- a/static/css/main.css +++ b/static/css/main.css @@ -309,6 +309,17 @@ input[type="date" i] { animation: blinker 1s linear infinite; } +.searchBox { + padding: 7px; + align-content: center; + border-radius: 3px; + border-style: solid; + border-width: 1px; + border-color: lightslategray; + margin-left: 5px; + margin-right: 5px; +} + @keyframes blinker { 50% { color: red; diff --git a/static/js/ptt.js b/static/js/ptt.js index fef3876..e911c58 100755 --- a/static/js/ptt.js +++ b/static/js/ptt.js @@ -308,9 +308,9 @@ function buildSentetree(tsvString) { .on('nodeMouseenter', node => { console.log(node) titles = node.data.topEntries.map(function(x) { - return wordTitleList[x.rawText] - }) - //console.log(titles) + return wordTitleList[x.rawText] + }) + console.log(titles) infoStr = '' for (index in titles) { if (index == 0) { diff --git a/static/js/pttPush.js b/static/js/pttPush.js new file mode 100644 index 0000000..fd3046c --- /dev/null +++ b/static/js/pttPush.js @@ -0,0 +1,329 @@ +init() +var tsvPath = '' +var titlePath = '' +var defaultStartDate +var defaultEndDate +var totalPosts +var startDate +var endDate +var wordPushList +var randId +var globKeyword = '' +var stopwords = [] + +function init() { + $.ajax({ + type: 'POST', + url: 'ptt_push/init', + dataType: 'json', + success: function(data) { + console.log(data) + tsvString = data.Result.tsv + wordPushList = JSON.parse(data.Result.json) + stopwords = data.Result.stopwords + console.log(wordPushList) + $('#idBox').val(data.Result.author) + buildSentetree(tsvString) + } + }) + $(document).ready(function() { + $(window).keydown(function(event) { + if (event.keyCode == 13) { + event.preventDefault() + sendRequest() + } + }); + }); + $(window).on('mousemove', function(e) { + $('#nodeTitle').css({ + left: e.pageX, + top: e.pageY + }) + }) + $('#titleListContainer').hover( + function() { // Run on hover/mouseenter + $(this).css('overflow', 'auto') + }, + function() { // Run on mouseleave + $(this).css('overflow', 'hidden') + } + ) + $('#titleListLayer').click(function(e) { + if ($('#titleListLayer').is(e.target)) { + hideTitles() + } + }) + $('#stopWordEditorLayer').click(function(e) { + if ($('#stopWordEditorLayer').is(e.target)) { + hideStopWordEditor() + } + }) +} + +function clearStopWord() { + stopwords = [] + $('#sweContainer').html('') +} + +function addStopWord() { + newswRaw = $('#newStopWord').val() + newswList = newswRaw.split(' ') + for (newsw of newswList) { + if (newsw != '') { + if (stopwords.includes(newsw)) { + + } else { + stopwords.push(newsw) + $('#sweContainer').append($('
  • ').attr('class', 'w3-display-container').append($('').append(newsw)).append($('').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) { + var index = $(this).parent().index() + console.log(stopwords[index]) + stopwords.splice(index, 1) + console.log(stopwords) + $('#sweContainer li').eq(index).remove() + }).append("×"))) + console.log(document.getElementById('sweContainer').children[stopwords.indexOf(newsw)]) + } + document.getElementById("sweContainer").scrollTop = document.getElementById('sweContainer').children[stopwords.indexOf(newsw)].offsetTop + } + } + $('#newStopWord').val('') +} + +function showStopwordEditor() { + console.log(stopwords) + $(window).unbind('keydown') + $(window).keydown(function(event) { + if (event.keyCode == 13) { + addStopWord() + } + }) + $('#sweContainer').empty() + for (word of stopwords) { + $('#sweContainer').append($('
  • ').attr('class', 'w3-display-container').append($('').append(word)).append($('').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) { + var index = $(this).parent().index() + console.log(stopwords[index]) + stopwords.splice(index, 1) + console.log(stopwords) + $('#sweContainer li').eq(index).remove() + }).append("×"))) + } + $('#stopWordEditorLayer').removeClass('hidden') +} + +function hideStopWordEditor() { + $(window).unbind('keydown') + $(window).keydown(function(event) { + if (event.keyCode == 13) { + event.preventDefault() + sendRequest() + } + }) + $('#stopWordEditorLayer').addClass('hidden') +} + +function downloadStopWord() { + stopWordString = stopwords.join('\n') + download(stopWordString, 'stopwords.txt', 'text/plain') +} + + +function hidePopup() { + $('#infoWindowLayer').toggleClass('hidden') + $('#progressInfo').html('') + $('#progBarInner').css('width', 0 + '%') + closeEventListner() +} + +function setDate(_startDate, _endDate) { + document.getElementById('startDate').value = _startDate + document.getElementById("endDate").value = _endDate + startDate = _startDate + endDate = _endDate +} + +function getProgressing(event) { + data = JSON.parse(event.data) + $('#progressInfo').html(data.comment) + $('#progBarInner').css('width', data.progress + '%') +} + +function getProgressFinished(event) { + data = JSON.parse(event.data) + changeGraph(data) + hidePopup() +} + +function closeEventListner() { + progListener.removeEventListener('progressing' + randId, getProgressing) + progListener.removeEventListener('progressFinished' + randId, getProgressFinished) +} + +function sendRequest() { + if ($('#idBox').val() == '' && $('#titleBox').val() == '') { + window.alert('請至少填寫一個鄉民id或是') + } + content = JSON.stringify({ + author: $('#idBox').val(), + aid: $('#titleBox').val(), + keyword: $('#keywordBox').val(), + stopwords: stopwords, + pos: { + noun: $('#noun').is(':checked'), + verb: $('#verb').is(':checked'), + adj: $('#adj').is(':checked'), + adv: $('#adv').is(':checked'), + pron: $('#pron').is(':checked'), + aux: $('#aux').is(':checked'), + other: $('#other').is(':checked') + } + }) + console.log(content) + $.ajax({ + type: 'POST', + url: 'ptt_push/addRequest', + data: content, + contentType: 'application/json', + success: function(data) { + console.log(data) + tsvString = data.Result.tsv + wordPushList = JSON.parse(data.Result.json) + console.log(wordPushList) + changeGraph(data.Result) + } + }) +} + +function changeGraph(data) { + console.log(data) + let tsvString = data.tsv + let json = JSON.parse(data.json) + destroyCurrentGraph() + d3.select('#graph').append('div').attr('id', 'vis') + buildSentetree(tsvString) +} + +function destroyCurrentGraph() { + d3.selectAll('#vis').remove() +} + +function hideTitles() { + $('#titleListLayer').addClass('hidden') +} + +function buildSentetree(tsvString) { + console.log("Build.") + var model; + var tree; + var data; + if (typeof tsvString === 'undefined') { + d3.tsv(tsvPath, buildTree) + } else { + data = d3.tsvParse(tsvString) + buildTree(_, data) + } + + function buildTree(error, rawdata) { + const data = rawdata.map(d => Object.assign({}, d, { count: +d.count })); + model = new SentenTree.SentenTreeBuilder() + .tokenize(SentenTree.tokenizer.tokenizeBySpace) + .transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token)) + .buildModel(data, { + maxSupportRatio: 0.8, + minSupportRatio: 0.001 + }); + tree = new SentenTree.SentenTreeVis('#vis', { + fontSize: [15, 40], + gapBetweenGraph: 10 + }); + tree.data(model.getRenderedGraphs(5)) + .on('nodeClick', node => { + $("#keywordBox").val(node.data.entity) + $('#titleListLayer').removeClass('hidden') + seqList = node.data.seq.DBs.map(function(n) { + return n.rawText + }) + seqList = seqList.filter(function(v, i) { + return seqList.indexOf(v) == i + }) + titleList = [] + console.log(seqList) + for (s of seqList) { + titleTemp = wordPushList.filter(function(n) { + return n.part == s + }) + titleList = titleList.concat(titleTemp) + } + console.log(titleList) + info = wordPushList[node.data.entity] + $('#titleListKeyword').html(node.data.entity) + $('#titleListKeywordInfo').html('') + + $('#titleListContainer').empty() + for (i of titleList) { + let link = $('').append( + $('

    ').html(i.title) + ) + for (p of i.pushes) { + link.append( + $('').attr('style', 'margin: 0px 10px').html((['推', '噓', '→'])[p.type - 1] + ' ' + p.author + ': ' + p.content + '
    ') + ) + } + $('#titleListContainer').append( + $('
  • ').attr('class', 'w3-panel').append( + link + ) + ) + } + }) + .on('nodeMouseenter', node => { + console.log(node) + let titles = [] + node.data.topEntries.forEach(function(x) { + console.log(x) + let result = wordPushList.filter(function(y) { + return y.part == x.rawText + }) + for (r of result) { + if (titles.indexOf(r) < 0 && titles.length < 5) { + titles.push(r) + } + } + }) + console.log(titles) + infoStr = '' + for (index in titles) { + if (index == 0) { + infoStr += titles[index].title + '
    ' + } else { + if (titles[index].title != titles[index - 1].title) { + infoStr += titles[index].title + '
    ' + } + } + pos = titles[index].part.indexOf(node.data.entity) + infoStr += titles[index].pushes.filter(function(x) { + return x.content.includes(node.data.entity) + })[0].content + '
    ' + } + $(nodeTitleContent).html(infoStr) + $('#nodeTitle').removeClass('hidden') + tree.highlightNeighbors(node) + }) + .on('nodeMouseleave', node => { + $('#nodeTitle').addClass('hidden') + tree.clearHighlightNeighbors() + }).on('layoutStart', layout => { + console.log(layout) + }).on('linkMouseenter', link => { + console.log(link) + }) + new ResizeSensor(jQuery('#d3kitRoot'), function() { + var scale, origin; + scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60)) + + $('#vis').css({ + transform: "scale(" + scale + ")", + 'transform-origin': 'top left' + }); + }) + } +} \ No newline at end of file diff --git a/templates/generalTxt.html b/templates/generalTxt.html index b2d84ae..a990d6f 100644 --- a/templates/generalTxt.html +++ b/templates/generalTxt.html @@ -27,6 +27,7 @@
    +
    diff --git a/templates/ptt.html b/templates/ptt.html index 6dd3fa3..e41df79 100755 --- a/templates/ptt.html +++ b/templates/ptt.html @@ -11,6 +11,7 @@
    +
    diff --git a/templates/ptt_push.html b/templates/ptt_push.html new file mode 100644 index 0000000..e7c12eb --- /dev/null +++ b/templates/ptt_push.html @@ -0,0 +1,90 @@ + + + + + {{ title }} + + + + + + +
    + + + +
    + + + + +
    +

    {{title}}

    +

    SentenTree https://github.com/twitter/SentenTree

    +

    同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。

    +

    點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。

    +

    若搜尋到的文章超過50篇,圖表僅會顯示推文數最多的前50篇文章。

    +
    +
    + + + + + + +
    +
    +
    +
    + 選擇詞性 + 名詞 + 動詞 + 形容詞 + 副詞 + 代詞 + 助詞 + 其他詞性 +
    +
    +
    +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file