修改停用詞的處理方法

dev/addPush
Zovjsra 4 years ago
parent 8014aef8d1
commit ed2e045958

@ -124,7 +124,7 @@ def filterPOS(content, aid):
stopwords = content['stopwords']
else:
stopwords = defaultStopWords
stopped = [i for i in cutted if i not in stopwords]
stopped = [i for i in cutted] # 不在server端刪除停用詞
return stopped
@ -217,7 +217,7 @@ def findResult(content):
filtered.append(i)
titles['info']['posts'] = len(filtered)
filtered = [i for i in sorted(
filtered, key=lambda x: x['pushes'], reverse=True)[:50]]
filtered, key=lambda x: x['pushes'], reverse=True)[:(30 if (content['keyword'] == "") else 100)]]
print('到第一步為止生成花費', int(time()-timeStart), '')
counter = 0
total = len(filtered)
@ -299,7 +299,7 @@ def getDefault(startDate, endDate):
filtered.append(i)
titles['info']['posts'] = len(filtered)
filtered = [i for i in sorted(
filtered, key=lambda x: x['pushes'], reverse=True)[:50]]
filtered, key=lambda x: x['pushes'], reverse=True)[:30]]
counter = 0
total = len(postContents)
content = {

@ -40,7 +40,7 @@ def possegPushes(ids: list, stopwords: list, keyword: str):
result = []
for index, p in enumerate(possegs):
words = [i[1] for i in p['content'] if i[0] not in [
'eng', 'x', 'm'] and i[1] not in stopwords]
'eng', 'x', 'm']]
if(keyword == '' or keyword in words):
result.append({
'posString': ' '.join(words),

@ -19,3 +19,52 @@
新聞
標題
內文
可以
沒有
就是
自己
大家
我們
知道
網址
備註
連結
所以

@ -10,6 +10,7 @@ var wordTitleList
var randId
var globKeyword = ''
var stopwords = []
var tsvString
function init() {
$.ajax({
@ -35,7 +36,7 @@ function init() {
$('#graphInfo').empty()
$('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + ',' + keywordCountString)
totalPosts = json.info.posts
buildSentetree(tsvString)
buildSentetree()
}
})
$(document).ready(function() {
@ -102,7 +103,6 @@ function addStopWord() {
}
function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
@ -175,7 +175,7 @@ function sendRequest() {
startDate: $('#startDate').val(),
endDate: $('#endDate').val(),
keyword: $('#keywordBox').val(),
stopwords: stopwords,
stopwords: [],
pos: {
noun: $('#noun').is(':checked'),
verb: $('#verb').is(':checked'),
@ -216,19 +216,21 @@ function changeGraph(data) {
$('#graphInfo').attr('style', 'margin: 10px;').append('總文章數:' + json.info.posts + keywordCountString)
totalPosts = json.info.posts
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')
buildSentetree(tsvString)
buildSentetree()
}
function destroyCurrentGraph() {
d3.selectAll('#vis').remove()
d3.select('#graph').append('div').attr('id', 'vis')
}
function hideTitles() {
$('#titleListLayer').addClass('hidden')
$('#setToKeyword').unbind()
$("#addToStopwords").unbind()
}
function buildSentetree(tsvString) {
function buildSentetree() {
console.log("Build.")
var model;
var tree;
@ -253,10 +255,9 @@ function buildSentetree(tsvString) {
fontSize: [15, 40],
gapBetweenGraph: 10
});
console.log(tree)
tree.data(model.getRenderedGraphs(2))
.on('nodeClick', node => {
$("#keywordBox").val(node.data.entity)
$('#titleListLayer').removeClass('hidden')
seqList = node.data.seq.DBs.map(function(n) {
return n.rawText
})
@ -273,6 +274,27 @@ function buildSentetree(tsvString) {
info = wordTitleList[node.data.entity]
$('#titleListKeyword').html(node.data.entity)
$('#titleListKeywordInfo').html('')
if (stopwords.indexOf(node.data.entity) < 0) {
$("#addToStopwords").html('設為停用詞').css('background-color', '#379').click(() => {
stopwords.push(node.data.entity)
destroyCurrentGraph()
buildSentetree()
hideTitles()
})
} else {
$("#addToStopwords").html('從停用詞移除').css('background-color', '#933').click(() => {
stopwords.pop(node.data.entity)
destroyCurrentGraph()
buildSentetree()
hideTitles()
})
}
$('#setToKeyword').click(() => {
$('#keywordBox').val(node.data.entity)
sendRequest()
hideTitles()
})
$('#titleListLayer').removeClass('hidden')
$.ajax({
type: 'POST',
url: '/ptt/keywordFrequency',

File diff suppressed because it is too large Load Diff

@ -19143,7 +19143,7 @@
words.forEach(function(w) {
var value = fdist[w];
if (value < maxSupport && value > maxc) {
if (value < maxSupport && value > maxc && (isNotRoot || stopwords.indexOf(itemset[w]) < 0)) {
maxw = +w;
maxc = value;
}
@ -19185,7 +19185,7 @@
}
}
return { word: word, pos: pos, count: count, s0: s0, s1: s1 };
return { word: word, pos: pos, count: stopwords.indexOf(itemset[word]) < 0 ? count : minSupport, s0: s0, s1: s1 };
}
function expandSeqTree(rootSeq, graphs, expandCnt, minSupport, maxSupport, terms, itemset) {
@ -19344,6 +19344,7 @@
var graphs = [];
var visibleGroups = expandSeqTree(this.rootSeq, graphs, DEFAULT_NODE_COUNT, minSupport, maxSupport, this.terms, itemset);
this.graphs = graphs.filter(function(g) {
return g.nodes.length > 2;
}).slice(0, 10);
@ -19376,9 +19377,11 @@
key: 'getRenderedGraphs',
value: function getRenderedGraphs(limit) {
var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs;
console.log("slice")
var renderedGraphs = graphs.map(function(g) {
return g.toRenderedGraph();
});
console.log("toRenderedGraph")
var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) {
return g.freqRange[0];
})), (0, _lodash.max)(renderedGraphs.map(function(g) {
@ -37424,7 +37427,6 @@
}).filter(function(entry) {
return entry.tokens.length > 0;
});
return new _TokenizedDataset2.default(tokenizedEntries);
}
}, {
@ -37855,13 +37857,18 @@
heap.push(n);
});
let counter = 1;
while (heap.size() > 0) {
console.log(`in while ${counter++}`)
var parent = heap.pop();
console.log(heap)
if (parent.merged) {
continue;
}
var groups = [];
console.log(parent.data.id)
if (parent.leftLinks.length > 1) {
var lNodes = parent.leftLinks.map(function(l) {
return l.source;
@ -37875,6 +37882,7 @@
});
groups = groups.concat(this.groupMergeableNodes(rNodes));
}
console.log(groups)
if (groups.length > 0) {
var newNodes = groups.map(function(group) {
@ -38102,6 +38110,7 @@
var RenderedGraph = function() {
function RenderedGraph(rawGraph) {
console.log(arguments)
var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
_ref$bundle = _ref.bundle,
bundle = _ref$bundle === undefined ? true : _ref$bundle,
@ -38134,8 +38143,10 @@
this.assignNodeIds(nodes);
console.log(bundle)
if (bundle) {
var bundled = new _GraphBundler2.default(nodes, links).bundle();
console.log(bundled)
this.nodes = bundled.nodes;
this.links = bundled.links;
this.assignNodeIds(this.nodes);

@ -83,7 +83,7 @@
"visualization"
],
"license": "Apache-2.0",
"main": "dist/SentenTree.min.js",
"main": "dist/SentenTree.js",
"name": "sententree",
"repository": {
"type": "git",

@ -132,9 +132,9 @@ export default class RenderedGraph {
const constraints = this.baseConstraints
.concat(this.links.map(l => l.toConstraint()));
return this.options.highFrequencyOnTop
? constraints.concat(flatMap(this.nodes, n => n.computeOrderConstraints()))
: constraints;
return this.options.highFrequencyOnTop ?
constraints.concat(flatMap(this.nodes, n => n.computeOrderConstraints())) :
constraints;
}
toGroupConstraint() {

@ -35,9 +35,9 @@ function growSeq(seq, terms, minSupport, maxSupport, itemset) {
let maxc = 0;
const isNotRoot = len > 0;
const words = isNotRoot
? Object.keys(fdist)
: Object.keys(fdist).filter(w => !itemset[w].startsWith('#'));
const words = isNotRoot ?
Object.keys(fdist) :
Object.keys(fdist).filter(w => !itemset[w].startsWith('#'));
words.forEach(w => {
const value = fdist[w];
@ -62,7 +62,7 @@ function growSeq(seq, terms, minSupport, maxSupport, itemset) {
s0 = { size: 0, DBs: [] };
s1 = { size: 0, DBs: [] };
const words = seq.words;
for (let ti = 0; ti < seq.DBs.length; ti ++) {
for (let ti = 0; ti < seq.DBs.length; ti++) {
const t = seq.DBs[ti];
const l = pos === 0 ? 0 : t.seqIndices[pos - 1] + 1;
const r = pos === words.length ? t.tokens.length : t.seqIndices[pos];
@ -261,9 +261,9 @@ export default class SentenTreeModel {
}
getRenderedGraphs(limit) {
const graphs = arguments.length === 1
? this.graphs.slice(0, limit)
: this.graphs;
const graphs = arguments.length === 1 ?
this.graphs.slice(0, limit) :
this.graphs;
const renderedGraphs = graphs.map(g => g.toRenderedGraph());
const globalFreqRange = [
min(renderedGraphs.map(g => g.freqRange[0])),

@ -41,6 +41,8 @@
<ul id="titleListContainer" class="w3-ul w3-hoverable"></ul>
<div id="backButton" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideTitles()">返回</button>
<button class="general-button" type="button" id="setToKeyword" style="background-color: #379; margin: 0px 20px">設為關鍵詞</button>
<button class="general-button" type='button' id='addToStopwords' style='background-color: #379;margin: 0px 40px;position: absolute;right: 0px;'></button>
</div>
</div>
</div>

Loading…
Cancel
Save